From 3b23606ebbb790d7690da5360364ba0aa3a9adbc Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 11 Aug 2022 11:02:19 -0700 Subject: [PATCH 1/2] Backreferences do not guarantee forward progress --- Sources/_StringProcessing/ByteCodeGen.swift | 2 ++ Tests/RegexTests/MatchTests.swift | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index e8c92f2b5..51a0182ed 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -1061,6 +1061,8 @@ extension DSLTree.Node { case .atom(let atom): switch atom { case .changeMatchingOptions, .assertion: return false + // Captures may be nil so backreferences may be zero length matches + case .backreference: return false default: return true } case .trivia, .empty: diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 8e01582a9..b3b99b099 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2534,4 +2534,8 @@ extension RegexTests { expectCompletion(regex: #"(a{,4})*"#, in: "aa") expectCompletion(regex: #"((|)+)*"#, in: "aa") } + + func testFuzzerArtifacts() throws { + expectCompletion(regex: #"(b?)\1*"#, in: "a") + } } From b4df644dbe88558c16f85bde9ad1cb64bda4f836 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 15:25:03 -0700 Subject: [PATCH 2/2] Don't assume quoted literals are non-empty during bytecodegen --- Sources/_StringProcessing/ByteCodeGen.swift | 2 +- Tests/RegexTests/MatchTests.swift | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 51a0182ed..8d7224710 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -109,7 +109,7 @@ fileprivate extension Compiler.ByteCodeGen { } // Fast path for eliding boundary checks for an all ascii quoted literal - if optimizationsEnabled && s.allSatisfy(\.isASCII) { + if optimizationsEnabled && s.allSatisfy(\.isASCII) && !s.isEmpty { let lastIdx = s.unicodeScalars.indices.last! for idx in s.unicodeScalars.indices { let boundaryCheck = idx == lastIdx diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index b3b99b099..f2a8f9e82 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2465,6 +2465,9 @@ extension RegexTests { // case insensitive tests firstMatchTest(#"(?i)abc\u{301}d"#, input: "AbC\u{301}d", match: "AbC\u{301}d", semanticLevel: .unicodeScalar) + + // check that we don't crash on empty strings + firstMatchTest(#"\Q\E"#, input: "", match: "") } func testCase() {