From 7d03a1e6154d45d834037943138fdb2c90b2ff34 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Wed, 27 Apr 2022 20:12:48 +0100 Subject: [PATCH] Introduce new compiler interface Add the SPI interface `swiftCompilerLexRegexLiteral` and `swiftCompilerParseRegexLiteral` for the Swift compiler to call into. This allows us to avoid depending on other library API on the compiler side, while letting it keep the bridging gunk. While we're here, add an extra `String` return for the parsing function that could allow us to change the regex emission format in the future. This still needs to be plumbed through on the complier side though. --- .../Regex/Parse/CompilerInterface.swift | 115 ++++++++++++++++ .../_RegexParser/Regex/Parse/Mocking.swift | 128 ------------------ Tests/RegexTests/ParseTests.swift | 34 ++--- 3 files changed, 130 insertions(+), 147 deletions(-) create mode 100644 Sources/_RegexParser/Regex/Parse/CompilerInterface.swift delete mode 100644 Sources/_RegexParser/Regex/Parse/Mocking.swift diff --git a/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift new file mode 100644 index 000000000..0856361d8 --- /dev/null +++ b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift @@ -0,0 +1,115 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// The version number for the regex. This gets emitted as an argument to the +// Regex(_regexString:version:) initializer and should be bumped if the format +// of the regex string needs to be changed in such a that requires the runtime +// to updated. +public let currentRegexLiteralFormatVersion = 1 + +@_spi(CompilerInterface) +public struct CompilerLexError: Error { + public var message: String + public var location: UnsafeRawPointer + public var completelyErroneous: Bool +} + +/// Interface for the Swift compiler. +/// +/// Attempt to lex a regex literal string. +/// +/// - Parameters: +/// - start: The pointer at which to start lexing the literal. +/// - bufferEnd: A pointer to the end of the buffer, which should not be lexed +/// past. +/// - mustBeRegex: Whether we expect a regex literal to be lexed here. If +/// `false`, a regex literal will only be lexed if it does not +/// produce an error. +/// +/// - Returns: If a regex literal was lexed, `resumePtr` specifies where to +/// resume lexing and `error` specifies a lexing error to emit. If +/// a regex literal was not lexed, `nil` is returned. +/// +@_spi(CompilerInterface) +public func swiftCompilerLexRegexLiteral( + start: UnsafeRawPointer, bufferEnd: UnsafeRawPointer, mustBeRegex: Bool +) -> (resumePtr: UnsafeRawPointer, error: CompilerLexError?)? { + do { + let (_, _, endPtr) = try lexRegex(start: start, end: bufferEnd) + return (resumePtr: endPtr, error: nil) + } catch let error as DelimiterLexError { + if !mustBeRegex { + // This token can be something else. Let the client fallback. + return nil + } + let completelyErroneous: Bool + switch error.kind { + case .unterminated, .multilineClosingNotOnNewline: + // These can be recovered from. + completelyErroneous = false + case .unprintableASCII, .invalidUTF8: + // We don't currently have good recovery behavior for these. + completelyErroneous = true + case .unknownDelimiter: + // An unknown delimiter should be recovered from, as we may want to try + // lex something else. + return nil + } + // For now every lexer error is emitted at the starting delimiter. + let compilerError = CompilerLexError( + message: "\(error)", location: start, + completelyErroneous: completelyErroneous + ) + return (error.resumePtr, compilerError) + } catch { + fatalError("Should be a DelimiterLexError") + } +} + +@_spi(CompilerInterface) +public struct CompilerParseError: Error { + public var message: String + public var location: String.Index? +} + +/// Interface for the Swift compiler. +/// +/// Attempt to parse a regex literal string. +/// +/// - Parameters: +/// - input: The regex input string, including delimiters. +/// - captureBufferOut: A buffer into which the captures of the regex will +/// be encoded into upon a successful parse. +/// +/// - Returns: The string to emit along with its version number. +/// - Throws: `CompilerParseError` if there was a parsing error. +@_spi(CompilerInterface) +public func swiftCompilerParseRegexLiteral( + _ input: String, captureBufferOut: UnsafeMutableRawBufferPointer +) throws -> (regexToEmit: String, version: Int) { + do { + let ast = try parseWithDelimiters(input) + // Serialize the capture structure for later type inference. + assert(captureBufferOut.count >= input.utf8.count) + ast.captureStructure.encode(to: captureBufferOut) + + // For now we just return the input as the regex to emit. This could be + // changed in the future if need to back-deploy syntax to something already + // known to the matching engine, or otherwise change the format. Note + // however that it will need plumbing through on the compiler side. + return (regexToEmit: input, version: currentRegexLiteralFormatVersion) + } catch { + throw CompilerParseError( + message: "cannot parse regular expression: \(String(describing: error))", + location: (error as? LocatedErrorProtocol)?.location.start + ) + } +} diff --git a/Sources/_RegexParser/Regex/Parse/Mocking.swift b/Sources/_RegexParser/Regex/Parse/Mocking.swift deleted file mode 100644 index 56294e2d3..000000000 --- a/Sources/_RegexParser/Regex/Parse/Mocking.swift +++ /dev/null @@ -1,128 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -@available(*, deprecated, message: "moving to SwiftCompilerModules") -private func copyCString(_ str: String) -> UnsafePointer { - let count = str.utf8.count + 1 - return str.withCString { - assert($0[count-1] == 0) - let ptr = UnsafeMutablePointer.allocate(capacity: count) - ptr.initialize(from: $0, count: count) - return UnsafePointer(ptr) - } -} - -/// Interface for libswift. -/// -/// Attempt to lex a regex literal string. -/// -/// - Parameters: -/// - CurPtrPtr: A pointer to the current pointer of lexer, which should be -/// the start of the literal. This will be advanced to the point -/// at which the lexer should resume, or will remain the same if -/// this is not a regex literal. -/// - BufferEnd: A pointer to the end of the buffer, which should not be lexed -/// past. -/// - ErrorOut: If an error is encountered, this will be set to the error -/// string. -/// -/// - Returns: A bool indicating whether lexing was completely erroneous, and -/// cannot be recovered from, or false if there either was no error, -/// or there was a recoverable error. -@available(*, deprecated, message: "moving to SwiftCompilerModules") -func libswiftLexRegexLiteral( - _ curPtrPtr: UnsafeMutablePointer?>?, - _ bufferEndPtr: UnsafePointer?, - _ errOut: UnsafeMutablePointer?>? -) -> /*CompletelyErroneous*/ CBool { - guard let curPtrPtr = curPtrPtr, let inputPtr = curPtrPtr.pointee, - let bufferEndPtr = bufferEndPtr - else { - fatalError("Expected lexing pointers") - } - guard let errOut = errOut else { fatalError("Expected error out param") } - - do { - let (_, _, endPtr) = try lexRegex(start: inputPtr, end: bufferEndPtr) - curPtrPtr.pointee = endPtr.assumingMemoryBound(to: CChar.self) - return false - } catch let error as DelimiterLexError { - if error.kind == .unknownDelimiter { - // An unknown delimiter should be recovered from, as we may want to try - // lex something else. - return false - } - errOut.pointee = copyCString("\(error)") - curPtrPtr.pointee = error.resumePtr.assumingMemoryBound(to: CChar.self) - - switch error.kind { - case .unterminated, .multilineClosingNotOnNewline: - // These can be recovered from. - return false - case .unprintableASCII, .invalidUTF8: - // We don't currently have good recovery behavior for these. - return true - case .unknownDelimiter: - fatalError("Already handled") - } - } catch { - fatalError("Should be a DelimiterLexError") - } -} - -// The version number for the regex. This gets emitted as an argument to the -// Regex(_regexString:version:) initializer and should be bumped if the format -// of the regex string needs to be changed in such a that requires the runtime -// to updated. -public let currentRegexLiteralFormatVersion: CUnsignedInt = 1 - -/// Interface for libswift. -/// -/// - Parameters: -/// - inputPtr: A null-terminated C string. -/// - errOut: A buffer accepting an error string upon error. -/// - versionOut: A buffer accepting a regex literal format -/// version. -/// - captureStructureOut: A buffer accepting a byte sequence representing the -/// capture structure. -/// - captureStructureSize: The size of the capture structure buffer. Must be -/// greater than or equal to `strlen(inputPtr)`. -@available(*, deprecated, message: "moving to SwiftCompilerModules") -func libswiftParseRegexLiteral( - _ inputPtr: UnsafePointer?, - _ errOut: UnsafeMutablePointer?>?, - _ versionOut: UnsafeMutablePointer?, - _ captureStructureOut: UnsafeMutableRawPointer?, - _ captureStructureSize: CUnsignedInt -) { - guard let s = inputPtr else { fatalError("Expected input param") } - guard let errOut = errOut else { fatalError("Expected error out param") } - guard let versionOut = versionOut else { - fatalError("Expected version out param") - } - - versionOut.pointee = currentRegexLiteralFormatVersion - - let str = String(cString: s) - do { - let ast = try parseWithDelimiters(str) - // Serialize the capture structure for later type inference. - if let captureStructureOut = captureStructureOut { - assert(captureStructureSize >= str.utf8.count) - let buffer = UnsafeMutableRawBufferPointer( - start: captureStructureOut, count: Int(captureStructureSize)) - ast.captureStructure.encode(to: buffer) - } - } catch { - errOut.pointee = copyCString( - "cannot parse regular expression: \(String(describing: error))") - } -} diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 94c134853..e66078831 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -@testable import _RegexParser +@testable @_spi(CompilerInterface) import _RegexParser import XCTest @testable import _StringProcessing @@ -281,24 +281,20 @@ func delimiterLexingDiagnosticTest( } } -func libswiftDiagnosticMessageTest( - _ input: String, _ expectedErr: String, file: StaticString = #file, - line: UInt = #line +func compilerInterfaceDiagnosticMessageTest( + _ input: String, _ expectedErr: String, + file: StaticString = #file, line: UInt = #line ) { - var errPtr: UnsafePointer? - var version: CUnsignedInt = 0 - - libswiftParseRegexLiteral( - input, &errPtr, &version, /*captureStructure*/ nil, - /*captureStructureSize*/ 0 - ) - - guard let errPtr = errPtr else { - XCTFail("Unexpected test pass", file: file, line: line) - return + do { + let captureBuffer = UnsafeMutableRawBufferPointer(start: nil, count: 0) + _ = try swiftCompilerParseRegexLiteral( + input, captureBufferOut: captureBuffer) + XCTFail("Expected parse error", file: file, line: line) + } catch let error as CompilerParseError { + XCTAssertEqual(expectedErr, error.message, file: file, line: line) + } catch { + fatalError("Expected CompilerParseError") } - let err = String(cString: errPtr) - XCTAssertEqual(expectedErr, err, file: file, line: line) } extension RegexTests { @@ -2547,8 +2543,8 @@ extension RegexTests { delimiterLexingDiagnosticTest("#/\n#/#", .multilineClosingNotOnNewline) } - func testlibswiftDiagnostics() { - libswiftDiagnosticMessageTest( + func testCompilerInterfaceDiagnostics() { + compilerInterfaceDiagnosticMessageTest( "#/[x*/#", "cannot parse regular expression: expected ']'") } }