From fc72a723b558b801f36c1ff70c690207d023d051 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 30 May 2022 13:15:36 -0500 Subject: [PATCH 01/13] Add a regex-match-specific collection This prepares for adopting an opaque result type for matches(of:). The old, CollectionConsumer-based model moves index-by-index, and isn't aware of the regex's semantic level, which results in inaccurate results for regexes that match at a mid-character index. --- .../Algorithms/Matching/Matches.swift | 123 ++++++++++++++---- 1 file changed, 96 insertions(+), 27 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index a7cd17779..6befa3841 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -183,13 +183,106 @@ extension BidirectionalCollection { // MARK: Regex algorithms +@available(SwiftStdlib 5.7, *) +struct RegexMatchesCollection { + let base: Substring + let regex: Regex + let startIndex: Index + + init(base: Substring, regex: Regex) { + self.base = base + self.regex = regex + self.startIndex = base.firstMatch(of: regex).map(Index.match) ?? .end + } +} + +@available(SwiftStdlib 5.7, *) +extension RegexMatchesCollection: Collection { + enum Index: Comparable { + case match(Regex.Match) + case end + + static func == (lhs: Self, rhs: Self) -> Bool { + switch (lhs, rhs) { + case (.match(let lhs), .match(let rhs)): + return lhs.range == rhs.range + case (.end, .end): + return true + case (.end, .match), (.match, .end): + return false + } + } + + static func < (lhs: Self, rhs: Self) -> Bool { + switch (lhs, rhs) { + case (.match(let lhs), .match(let rhs)): + // This implementation uses a tuple comparison so that an empty + // range `i.. Index { + let currentMatch: Element + switch i { + case .match(let match): + currentMatch = match + case .end: + fatalError("Can't advance past the 'endIndex' of a match collection.") + } + + let start: String.Index + if currentMatch.range.isEmpty { + if currentMatch.range.lowerBound == base.endIndex { + return .end + } + + switch regex.initialOptions.semanticLevel { + case .graphemeCluster: + start = base.index(after: currentMatch.range.upperBound) + case .unicodeScalar: + start = base.unicodeScalars.index(after: currentMatch.range.upperBound) + } + } else { + start = currentMatch.range.upperBound + } + + guard let nextMatch = try? regex.firstMatch(in: base[start...]) else { + return .end + } + return Index.match(nextMatch) + } + + subscript(position: Index) -> Regex.Match { + switch position { + case .match(let match): + return match + case .end: + fatalError("Can't subscript the 'endIndex' of a match collection.") + } + } +} + extension BidirectionalCollection where SubSequence == Substring { @available(SwiftStdlib 5.7, *) @_disfavoredOverload func _matches( of regex: R - ) -> MatchesCollection> { - _matches(of: RegexConsumer(regex)) + ) -> RegexMatchesCollection { + RegexMatchesCollection(base: self[...], regex: regex.regex) } @available(SwiftStdlib 5.7, *) @@ -207,30 +300,6 @@ extension BidirectionalCollection where SubSequence == Substring { public func matches( of r: some RegexComponent ) -> [Regex.Match] { - let slice = self[...] - var start = self.startIndex - let end = self.endIndex - let regex = r.regex - - var result = [Regex.Match]() - while start <= end { - guard let match = try? regex._firstMatch( - slice.base, in: start.. Date: Mon, 30 May 2022 14:03:41 -0500 Subject: [PATCH 02/13] Add a RegexRangesCollection based on RegexMatches --- .../Algorithms/Algorithms/Ranges.swift | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 36285d7cc..80c33c71a 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -226,17 +226,38 @@ extension BidirectionalCollection where Element: Comparable { // } } +@available(SwiftStdlib 5.7, *) +struct RegexRangesCollection { + let base: RegexMatchesCollection + + init(string: Substring, regex: Regex) { + self.base = RegexMatchesCollection(base: string, regex: regex) + } +} + +@available(SwiftStdlib 5.7, *) +extension RegexRangesCollection: Collection { + typealias Index = RegexMatchesCollection.Index + + var startIndex: Index { base.startIndex } + var endIndex: Index { base.endIndex } + func index(after i: Index) -> Index { base.index(after: i) } + subscript(position: Index) -> Range { base[position].range } +} + // MARK: Regex algorithms -extension BidirectionalCollection where SubSequence == Substring { +extension Collection where SubSequence == Substring { @available(SwiftStdlib 5.7, *) @_disfavoredOverload func _ranges( of regex: R - ) -> RangesCollection> { - _ranges(of: RegexConsumer(regex)) + ) -> RegexRangesCollection { + RegexRangesCollection(string: self[...], regex: regex.regex) } +} +extension BidirectionalCollection where SubSequence == Substring { @available(SwiftStdlib 5.7, *) func _rangesFromBack( of regex: R From a94c72c421b7ce8af546c34fb8797cc4e457bc61 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 30 May 2022 14:05:34 -0500 Subject: [PATCH 03/13] Build `replace`/`replacing` on top of _ranges These can be a higher-order operation that replace each range from a given collection instead of doing the searching directly. --- .../Algorithms/Algorithms/Replace.swift | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index ccc0962d5..e34f74634 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -12,12 +12,12 @@ // MARK: `CollectionSearcher` algorithms extension RangeReplaceableCollection { - func _replacing( - _ searcher: Searcher, + func _replacing( + _ ranges: Ranges, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where Searcher.Searched == SubSequence, + ) -> Self where Ranges.Element == Range, Replacement.Element == Element { precondition(maxReplacements >= 0) @@ -26,7 +26,7 @@ extension RangeReplaceableCollection { var result = Self() result.append(contentsOf: self[..( - _ searcher: Searcher, + func _replacing( + _ ranges: Ranges, with replacement: Replacement, maxReplacements: Int = .max - ) -> Self where Searcher.Searched == SubSequence, + ) -> Self where Ranges.Element == Range, Replacement.Element == Element { _replacing( - searcher, + ranges, with: replacement, subrange: startIndex..( - _ searcher: Searcher, + _ ranges: Ranges, with replacement: Replacement, maxReplacements: Int = .max - ) where Searcher.Searched == SubSequence, Replacement.Element == Element { + ) where Ranges.Element == Range, Replacement.Element == Element { self = _replacing( - searcher, + ranges, with: replacement, maxReplacements: maxReplacements) } @@ -85,7 +85,7 @@ extension RangeReplaceableCollection where Element: Equatable { maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element { _replacing( - ZSearcher(pattern: Array(other), by: ==), + _ranges(of: other), with: replacement, subrange: subrange, maxReplacements: maxReplacements) @@ -143,7 +143,7 @@ extension RangeReplaceableCollection maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element { _replacing( - PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other))), + _ranges(of: other), with: replacement, subrange: subrange, maxReplacements: maxReplacements) @@ -195,7 +195,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: Int = .max ) -> Self where Replacement.Element == Element { _replacing( - RegexConsumer(regex), + _ranges(of: regex), with: replacement, subrange: subrange, maxReplacements: maxReplacements) From 5dff33a2fb8eaef680da7442caeb31e83708c204 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 30 May 2022 14:06:08 -0500 Subject: [PATCH 04/13] Test matches/ranges/replace w/ scalar semantics --- Tests/RegexTests/AlgorithmsTests.swift | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index 1a5bc34df..e92a86437 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -498,6 +498,25 @@ class AlgorithmTests: XCTestCase { s2.ranges(of: try Regex("a*?")).map(s2.offsets(of:)), [0..<0, 1..<1, 2..<2]) } + func testUnicodeScalarSemantics() throws { + let regex = try Regex(#"(?u)."#, as: Substring.self) + let emptyRegex = try Regex(#"(?u)z?"#, as: Substring.self) + + XCTAssertEqual("".matches(of: regex).map(\.output), []) + XCTAssertEqual("Café".matches(of: regex).map(\.output), ["C", "a", "f", "é"]) + XCTAssertEqual("Cafe\u{301}".matches(of: regex).map(\.output), ["C", "a", "f", "e", "\u{301}"]) + XCTAssertEqual("Cafe\u{301}".matches(of: emptyRegex).count, 6) + + XCTAssertEqual("Café".ranges(of: regex).count, 4) + XCTAssertEqual("Cafe\u{301}".ranges(of: regex).count, 5) + XCTAssertEqual("Cafe\u{301}".ranges(of: emptyRegex).count, 6) + + XCTAssertEqual("Café".replacing(regex, with: "-"), "----") + XCTAssertEqual("Cafe\u{301}".replacing(regex, with: "-"), "-----") + XCTAssertEqual("Café".replacing(emptyRegex, with: "-"), "-C-a-f-é-") + XCTAssertEqual("Cafe\u{301}".replacing(emptyRegex, with: "-"), "-C-a-f-e-\u{301}-") + } + func testSwitches() { switch "abcde" { case try! Regex("a.*f"): From 30c540a9165c9e936f676e034179e9b3df3abc33 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 30 May 2022 23:37:20 -0500 Subject: [PATCH 05/13] Correctly replace only within specified subrange --- .../Algorithms/Algorithms/Replace.swift | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index e34f74634..fe87334a1 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -15,16 +15,14 @@ extension RangeReplaceableCollection { func _replacing( _ ranges: Ranges, with replacement: Replacement, - subrange: Range, maxReplacements: Int = .max ) -> Self where Ranges.Element == Range, Replacement.Element == Element { precondition(maxReplacements >= 0) - var index = subrange.lowerBound var result = Self() - result.append(contentsOf: self[..( - _ ranges: Ranges, - with replacement: Replacement, - maxReplacements: Int = .max - ) -> Self where Ranges.Element == Range, - Replacement.Element == Element - { - _replacing( - ranges, - with: replacement, - subrange: startIndex..( @@ -85,9 +69,8 @@ extension RangeReplaceableCollection where Element: Equatable { maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element { _replacing( - _ranges(of: other), + self[subrange]._ranges(of: other), with: replacement, - subrange: subrange, maxReplacements: maxReplacements) } @@ -143,9 +126,8 @@ extension RangeReplaceableCollection maxReplacements: Int = .max ) -> Self where C.Element == Element, Replacement.Element == Element { _replacing( - _ranges(of: other), + self[subrange]._ranges(of: other), with: replacement, - subrange: subrange, maxReplacements: maxReplacements) } @@ -195,9 +177,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: Int = .max ) -> Self where Replacement.Element == Element { _replacing( - _ranges(of: regex), + self[subrange]._ranges(of: regex), with: replacement, - subrange: subrange, maxReplacements: maxReplacements) } From 83d1a752b8273a5ea95e60b84a9588c13b560d15 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 1 Jun 2022 09:29:57 -0500 Subject: [PATCH 06/13] Test RegexMatchesCollection conformances --- .../RegexTests/AlgorithmsInternalsTests.swift | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Tests/RegexTests/AlgorithmsInternalsTests.swift b/Tests/RegexTests/AlgorithmsInternalsTests.swift index 31e082bce..96cd12076 100644 --- a/Tests/RegexTests/AlgorithmsInternalsTests.swift +++ b/Tests/RegexTests/AlgorithmsInternalsTests.swift @@ -44,4 +44,39 @@ extension AlgorithmTests { XCTAssertEqual("x", "axb"._trimming(r)) XCTAssertEqual("x", "axbb"._trimming(r)) } + + func testMatchesCollection() { + let r = try! Regex("a|b+|c*", as: Substring.self) + + let str = "zaabbbbbbcde" + let matches = str._matches(of: r) + let expected: [Substring] = [ + "", // before 'z' + "a", + "a", + "bbbbbb", + "c", + "", // after 'c' + "", // after 'd' + "", // after 'e' + ] + + // Make sure we're getting the right collection type + let _: RegexMatchesCollection = matches + + XCTAssertEqual(matches.map(\.output), expected) + + let i = matches.index(matches.startIndex, offsetBy: 3) + XCTAssertEqual(matches[i].output, expected[3]) + let j = matches.index(i, offsetBy: 5) + XCTAssertEqual(j, matches.endIndex) + + var index = matches.startIndex + while index < matches.endIndex { + XCTAssertEqual( + matches[index].output, + expected[matches.distance(from: matches.startIndex, to: index)]) + matches.formIndex(after: &index) + } + } } From b200e9a01dbb969b9368c36f1a036e81b2c806d6 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 1 Jun 2022 11:07:21 -0500 Subject: [PATCH 07/13] Address RegexMatchesCollection feedback Adds a custom iterator to avoid the heavy indexing cost in many cases --- .../Algorithms/Matching/Matches.swift | 106 +++++++++++++----- 1 file changed, 78 insertions(+), 28 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index 6befa3841..58de2750b 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -185,23 +185,92 @@ extension BidirectionalCollection { @available(SwiftStdlib 5.7, *) struct RegexMatchesCollection { - let base: Substring + let input: Substring let regex: Regex let startIndex: Index init(base: Substring, regex: Regex) { - self.base = base + self.input = base self.regex = regex self.startIndex = base.firstMatch(of: regex).map(Index.match) ?? .end } } +@available(SwiftStdlib 5.7, *) +extension RegexMatchesCollection: Sequence { + /// Returns the index to start searching for the next match after `match`. + fileprivate func searchIndex(after match: Regex.Match) -> String.Index? { + if !match.range.isEmpty { + return match.range.upperBound + } + + // If the last match was an empty match, advance by one position and + // run again, unless at the end of `input`. + if match.range.lowerBound == input.endIndex { + return nil + } + + switch regex.initialOptions.semanticLevel { + case .graphemeCluster: + return input.index(after: match.range.upperBound) + case .unicodeScalar: + return input.unicodeScalars.index(after: match.range.upperBound) + } + } + + struct Iterator: IteratorProtocol { + let base: RegexMatchesCollection + + // Because `RegexMatchesCollection` eagerly computes the first match for + // its `startIndex`, the iterator begins with this current match populated. + // For subsequent calls to `next()`, this value is `nil`, and `nextStart` + // is used to search for the next match. + var currentMatch: Regex.Match? + var nextStart: String.Index? + + init(_ matches: RegexMatchesCollection) { + self.base = matches + self.currentMatch = matches.startIndex.match + self.nextStart = currentMatch.flatMap(base.searchIndex(after:)) + } + + mutating func next() -> Regex.Match? { + // Initial case with pre-computed first match + if let match = currentMatch { + currentMatch = nil + return match + } + + // `nextStart` is `nil` when iteration has completed + guard let start = nextStart else { + return nil + } + + // Otherwise, find the next match (if any) and compute `nextStart` + let match = try! base.regex.firstMatch(in: base.input[start...]) + nextStart = match.flatMap(base.searchIndex(after:)) + return match + } + } + + func makeIterator() -> Iterator { + Iterator(self) + } +} + @available(SwiftStdlib 5.7, *) extension RegexMatchesCollection: Collection { enum Index: Comparable { case match(Regex.Match) case end + var match: Regex.Match? { + switch self { + case .match(let match): return match + case .end: return nil + } + } + static func == (lhs: Self, rhs: Self) -> Bool { switch (lhs, rhs) { case (.match(let lhs), .match(let rhs)): @@ -236,43 +305,24 @@ extension RegexMatchesCollection: Collection { } func index(after i: Index) -> Index { - let currentMatch: Element - switch i { - case .match(let match): - currentMatch = match - case .end: + guard let currentMatch = i.match else { fatalError("Can't advance past the 'endIndex' of a match collection.") } - let start: String.Index - if currentMatch.range.isEmpty { - if currentMatch.range.lowerBound == base.endIndex { - return .end - } - - switch regex.initialOptions.semanticLevel { - case .graphemeCluster: - start = base.index(after: currentMatch.range.upperBound) - case .unicodeScalar: - start = base.unicodeScalars.index(after: currentMatch.range.upperBound) - } - } else { - start = currentMatch.range.upperBound - } - - guard let nextMatch = try? regex.firstMatch(in: base[start...]) else { + guard + let start = searchIndex(after: currentMatch), + let nextMatch = try! regex.firstMatch(in: input[start...]) + else { return .end } return Index.match(nextMatch) } subscript(position: Index) -> Regex.Match { - switch position { - case .match(let match): - return match - case .end: + guard let match = position.match else { fatalError("Can't subscript the 'endIndex' of a match collection.") } + return match } } From c7dd144f3844cc9ba24f3dc77bb2a84773c2c524 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 1 Jun 2022 11:07:32 -0500 Subject: [PATCH 08/13] Add a custom iterator for RegexRangesCollection --- .../Algorithms/Algorithms/Ranges.swift | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 80c33c71a..40732255c 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -234,7 +234,22 @@ struct RegexRangesCollection { self.base = RegexMatchesCollection(base: string, regex: regex) } } + +@available(SwiftStdlib 5.7, *) +extension RegexRangesCollection: Sequence { + struct Iterator: IteratorProtocol { + var matchesBase: RegexMatchesCollection.Iterator + + mutating func next() -> Range? { + matchesBase.next().map(\.range) + } + } + func makeIterator() -> Iterator { + Iterator(matchesBase: base.makeIterator()) + } +} + @available(SwiftStdlib 5.7, *) extension RegexRangesCollection: Collection { typealias Index = RegexMatchesCollection.Index From 1c591737cc4316b837a3ee24a9d9ba248583bcbe Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 2 Jun 2022 09:55:14 -0500 Subject: [PATCH 09/13] Switch away from storing a match on the iterator --- .../Algorithms/Matching/Matches.swift | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index 58de2750b..ef5ec28e8 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -222,23 +222,22 @@ extension RegexMatchesCollection: Sequence { let base: RegexMatchesCollection // Because `RegexMatchesCollection` eagerly computes the first match for - // its `startIndex`, the iterator begins with this current match populated. - // For subsequent calls to `next()`, this value is `nil`, and `nextStart` - // is used to search for the next match. - var currentMatch: Regex.Match? + // its `startIndex`, the iterator can use that match for its initial + // iteration. For subsequent calls to `next()`, this value is `false`, and + // `nextStart` is used to search for the next match. + var initialIteration = true var nextStart: String.Index? init(_ matches: RegexMatchesCollection) { self.base = matches - self.currentMatch = matches.startIndex.match - self.nextStart = currentMatch.flatMap(base.searchIndex(after:)) + self.nextStart = base.startIndex.match.flatMap(base.searchIndex(after:)) } mutating func next() -> Regex.Match? { // Initial case with pre-computed first match - if let match = currentMatch { - currentMatch = nil - return match + if initialIteration { + initialIteration = false + return base.startIndex.match } // `nextStart` is `nil` when iteration has completed From ad2dae2209a981e2a4a5b6ddebc9f3922993c067 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 16 Jun 2022 15:33:31 -0500 Subject: [PATCH 10/13] Use try? instead of try! --- Sources/_StringProcessing/Algorithms/Matching/Matches.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index ef5ec28e8..35cf4bd02 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -246,7 +246,7 @@ extension RegexMatchesCollection: Sequence { } // Otherwise, find the next match (if any) and compute `nextStart` - let match = try! base.regex.firstMatch(in: base.input[start...]) + let match = try? base.regex.firstMatch(in: base.input[start...]) nextStart = match.flatMap(base.searchIndex(after:)) return match } From eb010f2a7816ab55568b6dddd8fdd1707865731d Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 16 Jun 2022 20:22:37 -0500 Subject: [PATCH 11/13] Missed the second try! last time --- Sources/_StringProcessing/Algorithms/Matching/Matches.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index 35cf4bd02..094d3dfdd 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -310,7 +310,7 @@ extension RegexMatchesCollection: Collection { guard let start = searchIndex(after: currentMatch), - let nextMatch = try! regex.firstMatch(in: input[start...]) + let nextMatch = try? regex.firstMatch(in: input[start...]) else { return .end } From 00782646ece209880126b33edeb90c9dfab4f58f Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Fri, 17 Jun 2022 02:13:45 -0500 Subject: [PATCH 12/13] Work around a type checking issue This moves a call to `prefix(_:)` to before a for-in loop to work around the issue described in https://github.com/apple/swift/issues/59522. --- Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index fe87334a1..a3f876b0e 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -24,7 +24,9 @@ extension RangeReplaceableCollection { var result = Self() var index = startIndex - for range in ranges.prefix(maxReplacements) { + // `maxRanges` is a workaround for https://github.com/apple/swift/issues/59522 + let maxRanges = ranges.prefix(maxReplacements) + for range in maxRanges { result.append(contentsOf: self[index.. Date: Fri, 17 Jun 2022 08:18:09 -0500 Subject: [PATCH 13/13] Update tests to use matchingSemantics API --- Tests/RegexTests/AlgorithmsTests.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index e92a86437..175746f71 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -499,8 +499,8 @@ class AlgorithmTests: XCTestCase { } func testUnicodeScalarSemantics() throws { - let regex = try Regex(#"(?u)."#, as: Substring.self) - let emptyRegex = try Regex(#"(?u)z?"#, as: Substring.self) + let regex = try Regex(#"."#, as: Substring.self).matchingSemantics(.unicodeScalar) + let emptyRegex = try Regex(#"z?"#, as: Substring.self).matchingSemantics(.unicodeScalar) XCTAssertEqual("".matches(of: regex).map(\.output), []) XCTAssertEqual("Café".matches(of: regex).map(\.output), ["C", "a", "f", "é"])