diff --git a/Sources/PatternConverter/PatternConverter.swift b/Sources/PatternConverter/PatternConverter.swift index f66204884..a10698526 100644 --- a/Sources/PatternConverter/PatternConverter.swift +++ b/Sources/PatternConverter/PatternConverter.swift @@ -30,9 +30,6 @@ struct PatternConverter: ParsableCommand { @Flag(help: "Whether to show canonical regex literal") var showCanonical: Bool = false - @Flag(help: "Whether to show capture structure") - var showCaptureStructure: Bool = false - @Flag(help: "Whether to skip result builder DSL") var skipDSL: Bool = false @@ -71,13 +68,6 @@ struct PatternConverter: ParsableCommand { print() } - if showCaptureStructure { - print("Capture structure:") - print() - print(ast.captureStructure) - print() - } - print() if !skipDSL { let render = ast.renderAsBuilderDSL( diff --git a/Sources/_RegexParser/Regex/AST/AST.swift b/Sources/_RegexParser/Regex/AST/AST.swift index 409d5a7ee..ebb64736a 100644 --- a/Sources/_RegexParser/Regex/AST/AST.swift +++ b/Sources/_RegexParser/Regex/AST/AST.swift @@ -24,12 +24,6 @@ public struct AST: Hashable { extension AST { /// Whether this AST tree has nested somewhere inside it a capture. public var hasCapture: Bool { root.hasCapture } - - /// The capture structure of this AST tree. - public var captureStructure: CaptureStructure { - var constructor = CaptureStructure.Constructor(.flatten) - return root._captureStructure(&constructor) - } } extension AST { diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift new file mode 100644 index 000000000..d112b2010 --- /dev/null +++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift @@ -0,0 +1,154 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public struct CaptureList { + public var captures: [Capture] + + public init(_ s: S) where S.Element == Capture { + captures = Array(s) + } + + public mutating func append(_ c: Capture) { + captures.append(c) + } +} + +extension CaptureList { + public struct Capture { + public var name: String? + public var type: Any.Type? + public var optionalDepth: Int + + public init( + name: String? = nil, + type: Any.Type? = nil, + optionalDepth: Int + ) { + self.name = name + self.type = type + self.optionalDepth = optionalDepth + } + } +} + +// MARK: Generating from AST + +extension AST.Node { + public func _addCaptures( + to list: inout CaptureList, + optionalNesting nesting: Int + ) { + let addOptional = nesting+1 + switch self { + case let .alternation(a): + for child in a.children { + child._addCaptures(to: &list, optionalNesting: addOptional) + } + + case let .concatenation(c): + for child in c.children { + child._addCaptures(to: &list, optionalNesting: nesting) + } + + case let .group(g): + switch g.kind.value { + case .capture: + list.append(.init(optionalDepth: nesting)) + + case .namedCapture(let name): + list.append(.init(name: name.value, optionalDepth: nesting)) + + case .balancedCapture(let b): + list.append(.init(name: b.name?.value, optionalDepth: nesting)) + + default: break + } + g.child._addCaptures(to: &list, optionalNesting: nesting) + + case .conditional(let c): + switch c.condition.kind { + case .group(let g): + AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting) + default: + break + } + + c.trueBranch._addCaptures(to: &list, optionalNesting: addOptional) + c.falseBranch._addCaptures(to: &list, optionalNesting: addOptional) + + case .quantification(let q): + var optNesting = nesting + if q.amount.value.bounds.atLeast == 0 { + optNesting += 1 + } + q.child._addCaptures(to: &list, optionalNesting: optNesting) + + case .absentFunction(let abs): + switch abs.kind { + case .expression(_, _, let child): + child._addCaptures(to: &list, optionalNesting: nesting) + case .clearer, .repeater, .stopper: + break + } + + case .quote, .trivia, .atom, .customCharacterClass, .empty: + break + } + } + + public var _captureList: CaptureList { + var caps = CaptureList() + self._addCaptures(to: &caps, optionalNesting: 0) + return caps + } +} + +extension AST { + /// Get the capture list for this AST + public var captureList: CaptureList { + root._captureList + } +} + +// MARK: Convenience for testing and inspection + +extension CaptureList.Capture: Equatable { + public static func == (lhs: Self, rhs: Self) -> Bool { + lhs.name == rhs.name && + lhs.optionalDepth == rhs.optionalDepth && + lhs.type == rhs.type + } +} +extension CaptureList: Equatable {} + +extension CaptureList.Capture: CustomStringConvertible { + public var description: String { + let typeStr: String + if let ty = type { + typeStr = "\(ty)" + } else { + typeStr = "Substring" + } + let suffix = String(repeating: "?", count: optionalDepth) + return typeStr + suffix + } +} +extension CaptureList: CustomStringConvertible { + public var description: String { + "(" + captures.map(\.description).joined(separator: ", ") + ")" + } +} + +extension CaptureList: ExpressibleByArrayLiteral { + public init(arrayLiteral elements: Capture...) { + self.init(elements) + } +} diff --git a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift index 8298dc207..163dbbf7b 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift @@ -9,258 +9,35 @@ // //===----------------------------------------------------------------------===// -// A tree representing the type of some captures. -public enum CaptureStructure: Equatable { +// TODO: Remove and directly serialize CaptureList instead + +// A tree representing the type of some captures, used for communication +// with the compiler. +enum CaptureStructure: Equatable { case atom(name: String? = nil, type: AnyType? = nil) indirect case optional(CaptureStructure) indirect case tuple([CaptureStructure]) - public static func tuple(_ children: CaptureStructure...) -> Self { + static func tuple(_ children: CaptureStructure...) -> Self { tuple(children) } - public static var empty: Self { + static var empty: Self { .tuple([]) } } -// TODO: Below are all flattening constructors. Instead create -// a builder/visitor that can store the structuralization -// approach - -extension CaptureStructure { - public struct Constructor { - var strategy: Strategy - - public init(_ strategy: Strategy = .flatten) { - guard strategy == .flatten else { - fatalError("TODO: adjust creator methods") - } - self.strategy = strategy - } - } -} - -extension CaptureStructure.Constructor { - public mutating func alternating( - _ children: C - ) -> CaptureStructure where C.Element: _TreeNode { - return children.map { - $0._captureStructure(&self) - }.reduce(.empty, +) - .map(CaptureStructure.optional) - } - public mutating func concatenating( - _ children: C - ) -> CaptureStructure where C.Element: _TreeNode { - return children.map { - $0._captureStructure(&self) - }.reduce(.empty, +) - } - - public mutating func grouping( - _ child: T, - as kind: AST.Group.Kind - ) -> CaptureStructure { - switch kind { - case .capture: - return capturing(child) - case .namedCapture(let name): - return capturing(name: name.value, child) - case .balancedCapture(let b): - return capturing(name: b.name?.value, child) - default: - precondition(!kind.isCapturing) - return child._captureStructure(&self) - } - } - - public mutating func capturing( - name: String? = nil, - _ child: T, - withType type: AnyType? = nil - ) -> CaptureStructure { - .atom(name: name, type: type) - + child._captureStructure(&self) - } - - // TODO: We'll likely want/need a generalization of - // conditional's condition kind. - public mutating func condition( - _ condition: AST.Conditional.Condition.Kind, - trueBranch: T, - falseBranch: T - ) -> CaptureStructure { - // A conditional's capture structure is effectively that of an alternation - // between the true and false branches. However the condition may also - // have captures in the case of a group condition. - var captures = CaptureStructure.empty - switch condition { - case .group(let g): - captures = captures + AST.Node.group(g)._captureStructure(&self) - default: - break - } - let branchCaptures = trueBranch._captureStructure(&self) + - falseBranch._captureStructure(&self) - return captures + branchCaptures.map(CaptureStructure.optional) - } - - public mutating func quantifying( - _ child: T, amount: AST.Quantification.Amount - ) -> CaptureStructure { - let result = child._captureStructure(&self) - return amount.bounds.atLeast == 0 - ? result.map(CaptureStructure.optional) : result - } - - // TODO: Will need to adjust for DSLTree support, and - // "absent" isn't the best name for these. - public mutating func absent( - _ kind: AST.AbsentFunction.Kind - ) -> CaptureStructure { - // Only the child of an expression absent function is relevant, as the - // other expressions don't actually get matched against. - switch kind { - case .expression(_, _, let child): - return child._captureStructure(&self) - case .clearer, .repeater, .stopper: - return .empty - } - } - -} - -extension AST.Node { - public func _captureStructure( - _ constructor: inout CaptureStructure.Constructor - ) -> CaptureStructure { - guard constructor.strategy == .flatten else { - fatalError("TODO") - } - - // Note: This implementation could be more optimized. - switch self { - case let .alternation(a): - return constructor.alternating(a.children) - - case let .concatenation(c): - return constructor.concatenating(c.children) - - case let .group(g): - return constructor.grouping(g.child, as: g.kind.value) - - case .conditional(let c): - return constructor.condition( - c.condition.kind, - trueBranch: c.trueBranch, - falseBranch: c.falseBranch) - - case .quantification(let q): - return constructor.quantifying( - q.child, amount: q.amount.value) - - case .absentFunction(let abs): - return constructor.absent(abs.kind) - - case .quote, .trivia, .atom, .customCharacterClass, .empty: - return .empty - } - } -} - -// MARK: - Combination and transformation - -extension CaptureStructure { - /// Returns a capture structure by concatenating any tuples in `self` and - /// `other`. - func concatenating(with other: CaptureStructure) -> CaptureStructure { - switch (self, other) { - // (T...) + (U...) ==> (T..., U...) - case let (.tuple(lhs), .tuple(rhs)): - return .tuple(lhs + rhs) - // T + () ==> T - case (_, .tuple(let rhs)) where rhs.isEmpty: - return self - // () + T ==> T - case (.tuple(let lhs), _) where lhs.isEmpty: - return other - // (T...) + U ==> (T..., U) - case let (.tuple(lhs), _): - return .tuple(lhs + [other]) - // T + (U...) ==> (T, U...) - case let (_, .tuple(rhs)): - return .tuple([self] + rhs) - // T + U ==> (T, U) - default: - return .tuple([self, other]) - } - } - - static func + ( - lhs: CaptureStructure, rhs: CaptureStructure - ) -> CaptureStructure { - lhs.concatenating(with: rhs) - } - - /// Returns a capture structure by transforming any tuple element of `self` - /// or transforming `self` directly if it is not a tuple. - func map( - _ transform: (CaptureStructure) -> CaptureStructure - ) -> CaptureStructure { - if case .tuple(let children) = self { - return .tuple(children.map(transform)) - } - return transform(self) - } -} - // MARK: - Common properties extension CaptureStructure { /// Returns a Boolean indicating whether the structure does not contain any /// captures. - public var isEmpty: Bool { + private var isEmpty: Bool { if case .tuple(let elements) = self, elements.isEmpty { return true } return false } - - public func type(withAtomType atomType: Any.Type) -> Any.Type { - switch self { - case .atom(_, type: nil): - return atomType - case .atom(_, type: let type?): - return type.base - case .optional(let child): - return TypeConstruction.optionalType(of: child.type(withAtomType: atomType)) - case .tuple(let children): - return TypeConstruction.tupleType(of: children.map { - $0.type(withAtomType: atomType) - }) - } - } - - public typealias DefaultAtomType = Substring - - public var type: Any.Type { - type(withAtomType: DefaultAtomType.self) - } - - public var atomType: AnyType { - switch self { - case .atom(_, type: nil): - return .init(Substring.self) - case .atom(_, type: let type?): - return type - case .optional(let child): - return child.atomType - case .tuple: - fatalError("Recursive nesting has no single atom type") - } - - } } // MARK: - Serialization @@ -280,7 +57,7 @@ extension CaptureStructure { private typealias SerializationVersion = UInt16 private static let currentSerializationVersion: SerializationVersion = 1 - public static func serializationBufferSize( + static func serializationBufferSize( forInputUTF8CodeUnitCount inputUTF8CodeUnitCount: Int ) -> Int { MemoryLayout.stride + inputUTF8CodeUnitCount + 1 @@ -301,7 +78,7 @@ extension CaptureStructure { /// /// - Parameter buffer: A buffer whose byte count is at least the byte count /// of the regular expression string that produced this capture structure. - public func encode(to buffer: UnsafeMutableRawBufferPointer) { + func encode(to buffer: UnsafeMutableRawBufferPointer) { assert(!buffer.isEmpty, "Buffer must not be empty") assert( buffer.count >= @@ -360,7 +137,7 @@ extension CaptureStructure { /// Creates a capture structure by decoding a serialized representation from /// the given buffer. - public init?(decoding buffer: UnsafeRawBufferPointer) { + init?(decoding buffer: UnsafeRawBufferPointer) { var scopes: [[CaptureStructure]] = [[]] var currentScope: [CaptureStructure] { get { scopes[scopes.endIndex - 1] } @@ -414,13 +191,13 @@ extension CaptureStructure { } extension CaptureStructure: CustomStringConvertible { - public var description: String { + var description: String { var printer = PrettyPrinter() _print(&printer) return printer.finish() } - private func _print(_ printer: inout PrettyPrinter) { + func _print(_ printer: inout PrettyPrinter) { switch self { case let .atom(name, type): let name = name ?? "" @@ -444,10 +221,41 @@ extension CaptureStructure: CustomStringConvertible { } } -extension CaptureStructure.Constructor { - public enum Strategy { - case flatten - case nest - // case drop(after: Int)... +extension AST { + /// The capture structure of this AST for compiler communication. + var captureStructure: CaptureStructure { + root._captureList._captureStructure(nestOptionals: true) + } +} + +// MARK: Convert CaptureList into CaptureStructure + +extension CaptureList { + func _captureStructure(nestOptionals: Bool) -> CaptureStructure { + if captures.isEmpty { return .empty } + if captures.count == 1 { + return captures.first!._captureStructure(nestOptionals: nestOptionals) + } + return .tuple(captures.map { + $0._captureStructure(nestOptionals: nestOptionals) + }) + } +} + +extension CaptureList.Capture { + func _captureStructure(nestOptionals: Bool) -> CaptureStructure { + if optionalDepth == 0 { + if let ty = type { + return .atom(name: name, type: .init(ty)) + } + return .atom(name: name) + } + var copy = self + copy.optionalDepth = 0 + var base = copy._captureStructure(nestOptionals: false) + for _ in 0..<(nestOptionals ? optionalDepth : 1) { + base = .optional(base) + } + return base } } diff --git a/Sources/_RegexParser/Regex/TreeProtocols.swift b/Sources/_RegexParser/Regex/TreeProtocols.swift index c14db65ce..7f1ccb5f7 100644 --- a/Sources/_RegexParser/Regex/TreeProtocols.swift +++ b/Sources/_RegexParser/Regex/TreeProtocols.swift @@ -2,10 +2,6 @@ public protocol _TreeNode { var children: [Self]? { get } - - func _captureStructure( - _: inout CaptureStructure.Constructor - ) -> CaptureStructure } extension _TreeNode { diff --git a/Sources/_RegexParser/Utility/Misc.swift b/Sources/_RegexParser/Utility/Misc.swift index 55d3d3adc..65aca42f1 100644 --- a/Sources/_RegexParser/Utility/Misc.swift +++ b/Sources/_RegexParser/Utility/Misc.swift @@ -161,7 +161,7 @@ extension BinaryInteger { } /// A wrapper of an existential metatype, equatable and hashable by reference. -public struct AnyType: Equatable, Hashable { +public struct AnyType: Hashable { public var base: Any.Type public init(_ type: Any.Type) { @@ -176,3 +176,5 @@ public struct AnyType: Equatable, Hashable { hasher.combine(ObjectIdentifier(base)) } } + + diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index 96476f42b..47faa23ed 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -28,7 +28,7 @@ class Compiler { __consuming func emit() throws -> Program { // TODO: Handle global options var codegen = ByteCodeGen(options: options) - codegen.builder.captureStructure = tree.captureStructure + codegen.builder.captureList = tree.root._captureList try codegen.emitNode(tree.root) let program = try codegen.finish() return program diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 2b38ace0a..cae8194bd 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -38,9 +38,7 @@ extension MEProgram where Input.Element: Hashable { // Special addresses or instructions var failAddressToken: AddressToken? = nil - // TODO: Should we have better API for building this up - // as we compile? - var captureStructure: CaptureStructure = .empty + var captureList = CaptureList() // Symbolic reference resolution var unresolvedReferences: [ReferenceID: [InstructionAddress]] = [:] @@ -353,7 +351,7 @@ extension MEProgram.Builder { staticTransformFunctions: transformFunctions, staticMatcherFunctions: matcherFunctions, registerInfo: regInfo, - captureStructure: captureStructure, + captureList: captureList, referencedCaptureOffsets: referencedCaptureOffsets, namedCaptureOffsets: namedCaptureOffsets) } diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 807598637..e3a542c1e 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -142,7 +142,7 @@ extension Processor._StoredCapture: CustomStringConvertible { } } -struct CaptureList { +struct MECaptureList { var values: Array._StoredCapture> var referencedCaptureOffsets: [ReferenceID: Int] var namedCaptureOffsets: [String: Int] diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index 0bfa0ecba..8f1c721b0 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -34,7 +34,7 @@ struct MEProgram where Input.Element: Equatable { var enableTracing: Bool = false - let captureStructure: CaptureStructure + let captureList: CaptureList let referencedCaptureOffsets: [ReferenceID: Int] let namedCaptureOffsets: [String: Int] } diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index 12d2e1242..a8cfeb20c 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -1,78 +1,21 @@ @_implementationOnly import _RegexParser -extension CaptureStructure { - var optionalCount: Int { - switch self { - case .atom: return 0 - case .optional(let o): - return 1 + o.optionalCount - case .tuple: - // FIXME: Separate CaptureStructure and a component - fatalError("Recursive nesting") - @unknown default: - fatalError("Unknown default") - } - } - - // FIXME: Do it all in one pass, no need for all these - // intermediary arrays +extension CaptureList { func structuralize( - _ list: CaptureList, + _ list: MECaptureList, _ input: String - ) throws -> [StructuredCapture] { - - func mapCap( - _ cap: CaptureStructure, - _ storedCap: Processor._StoredCapture - ) -> StructuredCapture { - // TODO: CaptureList perhaps should store a - // metatype or relevant info... - let optCount = cap.optionalCount - - if cap.atomType.base == Substring.self { - // FIXME: What if a typed capture is Substring? - assert(!storedCap.hasValues) - - if let r = storedCap.latest { - return StructuredCapture( - optionalCount: optCount, - storedCapture: StoredCapture(range: r)) - } + ) -> [StructuredCapture] { + assert(list.values.count == captures.count) - return StructuredCapture( - optionalCount: optCount, - storedCapture: nil) - } + var result = [StructuredCapture]() + for (cap, meStored) in zip(self.captures, list.values) { + let stored = StoredCapture( + range: meStored.latest, value: meStored.latestValue) - guard (storedCap.isEmpty || storedCap.hasValues) else { - print(storedCap) - fatalError() - } - // TODO: assert types are the same, under all the - // optionals - - if let v = storedCap.latestValue { - return StructuredCapture( - optionalCount: optCount, - storedCapture: StoredCapture(range: storedCap.latest, value: v)) - } - return StructuredCapture( - optionalCount: optCount, - storedCapture: nil) - } - - switch self { - case let .tuple(values): - assert(list.values.count == values.count) - var result = Array() - for (cap, storedCap) in zip(values, list.values) { - result.append(mapCap(cap, storedCap)) - } - return result - - default: - assert(list.values.count == 1) - return [mapCap(self, list.values.first!)] + result.append(.init( + optionalCount: cap.optionalDepth, storedCapture: stored)) } + return result } } + diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 6ebb93f5c..e44b110e5 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -35,15 +35,13 @@ struct Executor { return nil } - let capList = CaptureList( + let capList = MECaptureList( values: cpu.storedCaptures, referencedCaptureOffsets: engine.program.referencedCaptureOffsets, namedCaptureOffsets: engine.program.namedCaptureOffsets) - let capStruct = engine.program.captureStructure let range = inputRange.lowerBound.. CaptureStructure { - switch node { - case let .orderedChoice(children): - return constructor.alternating(children.map(_Tree.init)) - - case let .concatenation(children): - return constructor.concatenating(children.map(_Tree.init)) - - case let .capture(name, _, child): - if let type = child.valueCaptureType { - return constructor.capturing( - name: name, _Tree(child), withType: type) - } - return constructor.capturing(name: name, _Tree(child)) - - case let .nonCapturingGroup(kind, child): - assert(!kind.ast.isCapturing) - return constructor.grouping(_Tree(child), as: kind.ast) - - case let .conditional(cond, trueBranch, falseBranch): - return constructor.condition( - cond.ast, - trueBranch: _Tree(trueBranch), - falseBranch: _Tree(falseBranch)) - - case let .quantification(amount, _, child): - return constructor.quantifying( - Self(child), amount: amount.ast) - - case let .regexLiteral(re): - // TODO: Force a re-nesting? - return re.ast._captureStructure(&constructor) - - case let .absentFunction(abs): - return constructor.absent(abs.ast.kind) - - case let .convertedRegexLiteral(n, _): - // TODO: Switch nesting strategy? - return Self(n)._captureStructure(&constructor) - - case .matcher: - return .empty - - case .transform(_, let child): - return Self(child)._captureStructure(&constructor) - - case .customCharacterClass, .atom, .trivia, .empty, - .quotedLiteral, .consumer, .characterPredicate: - return .empty - } - } } @_spi(RegexBuilder) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index b646f16f7..4e08ea103 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -570,6 +570,7 @@ class RegexDSLTests: XCTestCase { } let _: (Substring, Substring, Substring).Type = type(of: regex1).RegexOutput.self + let regex2 = Regex { OneOrMore("a") Capture { @@ -581,6 +582,7 @@ class RegexDSLTests: XCTestCase { } let _: (Substring, Substring, Int?).Type = type(of: regex2).RegexOutput.self + let regex3 = Regex { OneOrMore("a") Capture { @@ -593,6 +595,7 @@ class RegexDSLTests: XCTestCase { } let _: (Substring, Substring, Int, Double?).Type = type(of: regex3).RegexOutput.self + let regex4 = Regex { OneOrMore("a") Capture { diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 7d4266071..b48e1f0a5 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -11,7 +11,42 @@ import XCTest @testable @_spi(RegexBuilder) import _StringProcessing -import _RegexParser +@testable import _RegexParser + + +extension CaptureList.Capture { + static var cap: Self { + return Self(optionalDepth: 0) + } + + static var opt: Self { + return Self(optionalDepth: 1) + } + static var opt_opt: Self { + return Self(optionalDepth: 2) + } + static var opt_opt_opt: Self { + return Self(optionalDepth: 3) + } + static var opt_opt_opt_opt: Self { + return Self(optionalDepth: 4) + } + static var opt_opt_opt_opt_opt: Self { + return Self(optionalDepth: 5) + } + static var opt_opt_opt_opt_opt_opt: Self { + return Self(optionalDepth: 6) + } + + static func named(_ name: String) -> Self { + return Self(name: name, optionalDepth: 0) + } +} +extension CaptureList { + static func caps(count: Int) -> Self { + Self(Array(repeating: .cap, count: count)) + } +} extension StructuredCapture { func formatStringCapture(input: String) -> String { @@ -109,36 +144,35 @@ func compile(_ ast: AST) -> Executor { func captureTest( _ regex: String, - _ expected: CaptureStructure, + _ expected: CaptureList, _ tests: (input: String, output: [StringCapture])..., skipEngine: Bool = false, file: StaticString = #file, line: UInt = #line ) { - let ast = try! parse(regex, .traditional) - let capStructure = ast.captureStructure - guard capStructure == expected else { + let capList = ast.root._captureList + guard capList == expected else { XCTFail(""" - Expected: - \(expected) - Actual: - \(capStructure) - """, - file: file, - line: line) + Expected: + \(expected) + Actual: + \(capList) + """, + file: file, + line: line) return } // Ensure DSLTree preserves literal captures - let dslCapStructure = ast.dslTree.captureStructure - guard dslCapStructure == capStructure else { + let dslCapList = ast.dslTree.root._captureList + guard dslCapList == capList else { XCTFail(""" DSLTree did not preserve structure: AST: - \(capStructure) + \(capList) DSLTree: - \(dslCapStructure) + \(dslCapList) """, file: file, line: line) @@ -192,168 +226,150 @@ extension RegexTests { func testLiteralStructuredCaptures() throws { captureTest( "abc", - .empty, + [], ("abc", [])) captureTest( "a(b)c", - .atom(), + [.cap], ("abc", ["b"])) captureTest( "a(b*)c", - .atom(), + [.cap], ("abc", ["b"]), ("ac", [""]), ("abbc", ["bb"])) captureTest( "a(b)*c", - .optional(.atom()), + [.opt], ("abc", [.some("b")]), ("ac", [.none]), ("abbc", [.some("b")])) captureTest( "a(b)+c", - .atom(), + [.cap], ("abc", ["b"]), ("abbc", ["b"])) captureTest( "a(b)?c", - .optional(.atom()), + [.opt], ("ac", [.none]), ("abc", [.some("b")])) captureTest( "(a)(b)(c)", - .tuple([.atom(),.atom(),.atom()]), + [.cap, .cap, .cap], ("abc", ["a", "b", "c"])) captureTest( "a|(b)", - .optional(.atom()), + [.opt], ("a", [.none]), ("b", [.some("b")])) captureTest( "(a)|(b)", - .tuple(.optional(.atom()), .optional(.atom())), + [.opt, .opt], ("a", [.some("a"), .none]), ("b", [.none, .some("b")])) captureTest( "((a)|(b))", - .tuple(.atom(), .optional(.atom()), .optional(.atom())), + [.cap, .opt, .opt], ("a", ["a", .some("a"), .none]), ("b", ["b", .none, .some("b")])) captureTest( "((a)|(b))?", - .tuple( - .optional(.atom()), - .optional(.optional(.atom())), - .optional(.optional(.atom()))), + [.opt, .opt_opt, .opt_opt], ("a", [.some("a"), .some(.some("a")), .some(.none)]), ("b", [.some("b"), .some(.none), .some(.some("b"))])) + // FIXME captureTest( "((a)|(b))*", - .tuple( - .optional(.atom()), - .optional(.optional(.atom())), - .optional(.optional(.atom()))), + [.opt, .opt_opt, .opt_opt], ("a", [.some("a"), .some(.some("a")), .some(.none)]), skipEngine: true) + // FIXME captureTest( "((a)|(b))+", - .tuple( - .atom(), - .optional(.atom()), - .optional(.atom())), + [.cap, .opt, .opt], // TODO: test cases skipEngine: true) + // FIXME captureTest( "(((a)|(b))*)", - .tuple( - .atom(), - .optional(.atom()), - .optional(.optional(.atom())), - .optional(.optional(.atom()))), + [.cap, .opt, .opt_opt, .opt_opt], // TODO: test cases skipEngine: true) - + // FIXME captureTest( "(((a)|(b))?)", - .tuple( - .atom(), - .optional(.atom()), - .optional(.optional(.atom())), - .optional(.optional(.atom()))), + [.cap, .opt, .opt_opt, .opt_opt], // TODO: test cases skipEngine: true) captureTest( "(a)", - .atom(), + [.cap], ("a", ["a"])) captureTest( "((a))", - .tuple([.atom(), .atom()]), + [.cap, .cap], ("a", ["a", "a"])) captureTest( "(((a)))", - .tuple([.atom(), .atom(), .atom()]), + [.cap, .cap, .cap], ("a", ["a", "a", "a"])) - - // broke + // FIXME captureTest( "((((a)*)?)*)?", - .tuple([ - .optional(.atom()), - .optional(.optional(.atom())), - .optional(.optional(.optional(.atom()))), - .optional(.optional(.optional(.optional(.atom())))), - ]), + [.opt, .opt_opt, .opt_opt_opt, .opt_opt_opt_opt], // TODO: test cases skipEngine: true) - captureTest( "a|(b*)", - .optional(.atom()), + [.opt], ("a", [.none]), ("", [.some("")]), ("b", [.some("b")]), ("bbb", [.some("bbb")])) + // FIXME captureTest( "a|(b)*", - .optional(.optional(.atom())), + [.opt_opt], ("a", [.none]), ("", [.some("")]), ("b", [.some("b")]), ("bbb", [.some("b")]), skipEngine: true) + // FIXME captureTest( "a|(b)+", - .optional(.atom()), + [.opt], ("a", [.none]), ("b", [.some("b")]), ("bbb", [.some("b")]), skipEngine: true) + // FIXME captureTest( "a|(b)?", - .optional(.optional(.atom())), + [.opt_opt], ("a", [.none]), ("", [.none]), ("b", [.some(.some("b"))]), @@ -361,78 +377,78 @@ extension RegexTests { captureTest( "a|(b|c)", - .optional(.atom()), + [.opt], ("a", [.none]), ("b", [.some("b")]), ("c", [.some("c")])) captureTest( "a|(b*|c)", - .optional(.atom()), + [.opt], ("a", [.none]), ("b", [.some("b")]), ("c", [.some("c")])) + // FIXME captureTest( "a|(b|c)*", - .optional(.optional(.atom())), + [.opt_opt], ("a", [.none]), ("", [.some("")]), ("b", [.some("b")]), ("bbb", [.some("b")]), skipEngine: true) + // FIXME captureTest( "a|(b|c)?", - .optional(.optional(.atom())), + [.opt_opt], ("a", [.none]), ("", [.none]), ("b", [.some(.some("b"))]), ("c", [.some(.some("c"))]), skipEngine: true) - captureTest( "a(b(c))", - .tuple(.atom(), .atom()), + [.cap, .cap], ("abc", ["bc", "c"])) captureTest( "a(b(c*))", - .tuple(.atom(), .atom()), + [.cap, .cap], ("ab", ["b", ""]), ("abc", ["bc", "c"]), ("abcc", ["bcc", "cc"])) captureTest( "a(b(c)*)", - .tuple(.atom(), .optional(.atom())), + [.cap, .opt], ("ab", ["b", .none]), ("abc", ["bc", .some("c")]), ("abcc", ["bcc", .some("c")])) captureTest( "a(b(c)?)", - .tuple(.atom(), .optional(.atom())), + [.cap, .opt], ("ab", ["b", .none]), ("abc", ["bc", .some("c")])) - captureTest( "a(b(c))*", - .tuple(.optional(.atom()), .optional(.atom())), + [.opt, .opt], ("a", [.none, .none]), ("abc", [.some("bc"), .some("c")]), ("abcbc", [.some("bc"), .some("c")])) captureTest( "a(b(c))?", - .tuple(.optional(.atom()), .optional(.atom())), + [.opt, .opt], ("a", [.none, .none]), ("abc", [.some("bc"), .some("c")])) -// TODO: "((a|b)*|c)*" -// TODO: "((a|b)|c)*" + // TODO: "((a|b)*|c)*" + // TODO: "((a|b)|c)*" } diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 94c134853..648796683 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -39,7 +39,7 @@ class RegexTests: XCTestCase {} func parseTest( _ input: String, _ expectedAST: AST.Node, syntax: SyntaxOptions = .traditional, - captures expectedCaptures: CaptureStructure = .empty, + captures expectedCaptures: CaptureList = [], file: StaticString = #file, line: UInt = #line ) { @@ -52,7 +52,7 @@ func parseTest( func parseTest( _ input: String, _ expectedAST: AST, syntax: SyntaxOptions = .traditional, - captures expectedCaptures: CaptureStructure = .empty, + captures expectedCaptures: CaptureList = [], file: StaticString = #file, line: UInt = #line ) { @@ -68,7 +68,7 @@ func parseTest( file: file, line: line) return } - let captures = ast.captureStructure + let captures = ast.captureList guard captures == expectedCaptures else { XCTFail(""" @@ -78,13 +78,16 @@ func parseTest( file: file, line: line) return } + // Test capture structure round trip serialization. + let capStruct = captures._captureStructure(nestOptionals: true) let serializedCapturesSize = CaptureStructure.serializationBufferSize( forInputUTF8CodeUnitCount: input.utf8.count) let serializedCaptures = UnsafeMutableRawBufferPointer.allocate( byteCount: serializedCapturesSize, alignment: MemoryLayout.alignment) - captures.encode(to: serializedCaptures) + + capStruct.encode(to: serializedCaptures) guard let decodedCaptures = CaptureStructure( decoding: UnsafeRawBufferPointer(serializedCaptures) ) else { @@ -95,7 +98,7 @@ func parseTest( """) return } - guard decodedCaptures == captures else { + guard decodedCaptures == capStruct else { XCTFail(""" Expected captures: \(expectedCaptures) @@ -310,7 +313,7 @@ extension RegexTests { concat("a", "b", "c", "+", zeroOrMore(of: "d"))) parseTest( "a(b)", concat("a", capture("b")), - captures: .atom()) + captures: [.cap]) parseTest( "abc(?:de)+fghi*k|j", alt( @@ -336,15 +339,13 @@ extension RegexTests { concat( zeroOrMore(of: capture(atom(.any))), capture(zeroOrMore(of: atom(.any)))), - captures: .tuple([.optional(.atom()), .atom()])) + captures: [.opt, .cap]) parseTest( "((.))*((.)?)", concat( zeroOrMore(of: capture(capture(atom(.any)))), capture(zeroOrOne(of: capture(atom(.any))))), - captures: .tuple([ - .optional(.atom()), .optional(.atom()), .atom(), .optional(.atom()) - ])) + captures: [.opt, .opt, .cap, .opt]) parseTest( #"abc\d"#, concat("a", "b", "c", escaped(.decimalDigit))) @@ -357,33 +358,33 @@ extension RegexTests { parseTest( "(a|b)c", concat(capture(alt("a", "b")), "c"), - captures: .atom()) + captures: [.cap]) parseTest( "(a)|b", alt(capture("a"), "b"), - captures: .optional(.atom())) + captures: [.opt]) parseTest( "(a)|(b)|c", alt(capture("a"), capture("b"), "c"), - captures: .tuple(.optional(.atom()), .optional(.atom()))) + captures: [.opt, .opt]) parseTest( "((a|b))c", concat(capture(capture(alt("a", "b"))), "c"), - captures: .tuple([.atom(), .atom()])) + captures: [.cap, .cap]) parseTest( "(?:((a|b)))*?c", concat(quant( .zeroOrMore, .reluctant, nonCapture(capture(capture(alt("a", "b"))))), "c"), - captures: .tuple(.optional(.atom()), .optional(.atom()))) + captures: [.opt, .opt]) parseTest( "(a)|b|(c)d", alt(capture("a"), "b", concat(capture("c"), "d")), - captures: .tuple([.optional(.atom()), .optional(.atom())])) + captures: [.opt, .opt]) // Alternations with empty branches are permitted. parseTest("|", alt(empty(), empty())) - parseTest("(|)", capture(alt(empty(), empty())), captures: .atom()) + parseTest("(|)", capture(alt(empty(), empty())), captures: [.cap]) parseTest("a|", alt("a", empty())) parseTest("|b", alt(empty(), "b")) parseTest("|b|", alt(empty(), "b", empty())) @@ -768,32 +769,32 @@ extension RegexTests { parseTest( #"a(?