Skip to content

[Parse] Allow prefix operators containing / with /.../ regex literals #58835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions include/swift/AST/DiagnosticsParse.def
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none,
ERROR(regex_literal_parsing_error,none,
"%0", (StringRef))

ERROR(prefix_slash_not_allowed,none,
"prefix operator may not contain '/'", ())

//------------------------------------------------------------------------------
// MARK: Lexer diagnostics
//------------------------------------------------------------------------------
Expand Down
5 changes: 1 addition & 4 deletions include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -1763,10 +1763,7 @@ class Parser {
/// Try re-lex a '/' operator character as a regex literal. This should be
/// called when parsing in an expression position to ensure a regex literal is
/// correctly parsed.
///
/// If \p mustBeRegex is set to true, a regex literal will always be lexed if
/// enabled. Otherwise, it will not be lexed if it may be ambiguous.
void tryLexRegexLiteral(bool mustBeRegex);
void tryLexRegexLiteral(bool forUnappliedOperator);

void validateCollectionElement(ParserResult<Expr> element);

Expand Down
56 changes: 43 additions & 13 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2000,23 +2000,11 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
// 2
// }
//
// This takes advantage of the consistent operator spacing rule. We also
// need to ban ')' to avoid ambiguity with unapplied operator references e.g
// `reduce(1, /)`. This would be invalid regex syntax anyways. Note this
// doesn't totally save us from e.g `foo(/, 0)`, but it should at least
// help, and it ensures users can always surround their operator ref in
// parens `(/)` to fix the issue.
// This takes advantage of the consistent operator spacing rule.
// TODO: This heuristic should be sunk into the Swift library once we have a
// way of doing fix-its from there.
auto *RegexContentStart = TokStart + 1;
switch (*RegexContentStart) {
case ')': {
if (!MustBeRegex)
return false;

// ')' is invalid anyway, so we can let the parser diagnose it.
break;
}
case ' ':
case '\t': {
if (!MustBeRegex)
Expand Down Expand Up @@ -2072,6 +2060,48 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
Ptr--;
}

// If we're tentatively lexing `/.../`, scan to make sure we don't have any
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
// regex syntax anyways. This ensures users can surround their operator ref
// in parens `(/)` to fix the issue. This also applies to prefix operators
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
// or not we're in a custom character class `[...]`, as parens are literal
// there.
// TODO: This should be sunk into the Swift library.
if (IsForwardSlash && !MustBeRegex) {
unsigned CharClassDepth = 0;
unsigned GroupDepth = 0;
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
switch (*Cursor) {
case '\\':
// Skip over the next character of an escape.
Cursor++;
break;
case '(':
if (CharClassDepth == 0)
GroupDepth += 1;
break;
case ')':
if (CharClassDepth != 0)
break;

// Invalid, so bail.
if (GroupDepth == 0)
return false;

GroupDepth -= 1;
break;
case '[':
CharClassDepth += 1;
break;
case ']':
if (CharClassDepth != 0)
CharClassDepth -= 1;
}
}
}

// Update to point to where we ended regex lexing.
assert(Ptr > TokStart && Ptr <= BufferEnd);
CurPtr = Ptr;
Expand Down
7 changes: 0 additions & 7 deletions lib/Parse/ParseDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8541,13 +8541,6 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) {
Tok.getRawText().front() == '!'))
diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap);

// Prefix operators may not contain the `/` character when `/.../` regex
// literals are enabled.
if (Context.LangOpts.EnableBareSlashRegexLiterals) {
if (Attributes.hasAttribute<PrefixAttr>() && Tok.getText().contains("/"))
diagnose(Tok, diag::prefix_slash_not_allowed);
}

// A common error is to try to define an operator with something in the
// unicode plane considered to be an operator, or to try to define an
// operator like "not". Analyze and diagnose this specifically.
Expand Down
106 changes: 60 additions & 46 deletions lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
UnresolvedDeclRefExpr *Operator;

// First check to see if we have the start of a regex literal `/.../`.
tryLexRegexLiteral(/*mustBeRegex*/ true);
tryLexRegexLiteral(/*forUnappliedOperator*/ false);

switch (Tok.getKind()) {
default:
Expand Down Expand Up @@ -880,56 +880,64 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() {
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
}

void Parser::tryLexRegexLiteral(bool mustBeRegex) {
void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
return;

// Check to see if we have a regex literal `/.../`, optionally with a prefix
// operator e.g `!/.../`.
bool mustBeRegex = false;
switch (Tok.getKind()) {
case tok::oper_prefix:
// Prefix operators may contain `/` characters, so this may not be a regex,
// and as such need to make sure we have a closing `/`.
break;
case tok::oper_binary_spaced:
case tok::oper_binary_unspaced: {
// Check to see if we have an operator containing '/'.
auto slashIdx = Tok.getText().find("/");
if (slashIdx == StringRef::npos)
break;
case tok::oper_binary_unspaced:
// When re-lexing for a unary expression, binary operators are always
// invalid, so we can be confident in always lexing a regex literal.
mustBeRegex = !forUnappliedOperator;
break;
default:
// We only re-lex regex literals for operator tokens.
return;
}

CancellableBacktrackingScope backtrack(*this);
{
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
regexScope.emplace(*L, mustBeRegex);

// Try re-lex as a `/.../` regex literal, this will split an operator if
// necessary.
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);

// If we didn't split a prefix operator, reset the regex lexing scope.
// Otherwise, we want to keep it in place for the next token.
auto didSplit = L->peekNextToken().getLength() == slashIdx;
if (!didSplit)
regexScope.reset();

// Discard the current token, which will be replaced by the re-lexed
// token, which will either be a regex literal token, a prefix operator,
// or the original unchanged token.
discardToken();

// If we split a prefix operator from the regex literal, and are not sure
// whether this should be a regex, backtrack if we didn't end up lexing a
// regex literal.
if (didSplit && !mustBeRegex &&
!L->peekNextToken().is(tok::regex_literal)) {
return;
}
// Check to see if we have an operator containing '/'.
auto slashIdx = Tok.getText().find("/");
if (slashIdx == StringRef::npos)
return;

CancellableBacktrackingScope backtrack(*this);
{
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
regexScope.emplace(*L, mustBeRegex);

// Otherwise, accept the result.
backtrack.cancelBacktrack();
// Try re-lex as a `/.../` regex literal, this will split an operator if
// necessary.
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);

// If we didn't split a prefix operator, reset the regex lexing scope.
// Otherwise, we want to keep it in place for the next token.
auto didSplit = L->peekNextToken().getLength() == slashIdx;
if (!didSplit)
regexScope.reset();

// Discard the current token, which will be replaced by the re-lexed
// token, which will either be a regex literal token, a prefix operator,
// or the original unchanged token.
discardToken();

// If we split a prefix operator from the regex literal, and are not sure
// whether this should be a regex, backtrack if we didn't end up lexing a
// regex literal.
if (didSplit && !mustBeRegex &&
!L->peekNextToken().is(tok::regex_literal)) {
return;
}
break;
}
default:
break;

// Otherwise, accept the result.
backtrack.cancelBacktrack();
}
}

Expand Down Expand Up @@ -3223,17 +3231,23 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
SourceLoc FieldNameLoc;
parseOptionalArgumentLabel(FieldName, FieldNameLoc);

// First check to see if we have the start of a regex literal `/.../`. We
// need to do this before handling unapplied operator references, as e.g
// `(/, /)` might be a regex literal.
tryLexRegexLiteral(/*mustBeRegex*/ false);

// See if we have an operator decl ref '(<op>)'. The operator token in
// this case lexes as a binary operator because it neither leads nor
// follows a proper subexpression.
auto isUnappliedOperator = [&]() {
return Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma);
};

if (isUnappliedOperator()) {
// Check to see if we have the start of a regex literal `/.../`. We need
// to do this for an unapplied operator reference, as e.g `(/, /)` might
// be a regex literal.
tryLexRegexLiteral(/*forUnappliedOperator*/ true);
}

ParserStatus Status;
Expr *SubExpr = nullptr;
if (Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma)) {
if (isUnappliedOperator()) {
SyntaxParsingContext operatorContext(SyntaxContext,
SyntaxKind::IdentifierExpr);
DeclNameLoc Loc;
Expand Down
2 changes: 1 addition & 1 deletion test/StringProcessing/Frontend/enable-flag.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

// REQUIRES: swift_in_compiler

prefix operator / // expected-error {{prefix operator may not contain '/'}}
prefix operator /

_ = /x/
_ = #/x/#
Expand Down
Loading