Skip to content
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
"-Xfrontend",
"-define-availability",
"-Xfrontend",
"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
"SwiftStdlib 5.7:macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0",
"-Xfrontend",
"-define-availability",
"-Xfrontend",
Expand Down
18 changes: 13 additions & 5 deletions Sources/RegexBenchmark/BenchmarkRunner.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import Foundation
@_spi(RegexBenchmark) import _StringProcessing

/// The number of times to re-run the benchmark if results are too variang
private var rerunCount: Int { 3 }

struct BenchmarkRunner {
let suiteName: String
var suite: [any RegexBenchmark] = []
Expand Down Expand Up @@ -82,11 +85,16 @@ struct BenchmarkRunner {
for b in suite {
var result = measure(benchmark: b, samples: samples)
if result.runtimeIsTooVariant {
print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
print(result.runtime)
print("Rerunning \(b.name)")
result = measure(benchmark: b, samples: result.runtime.samples*2)
print(result.runtime)
for _ in 0..<rerunCount {
print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
print(result.runtime)
print("Rerunning \(b.name)")
result = measure(benchmark: b, samples: result.runtime.samples*2)
print(result.runtime)
if !result.runtimeIsTooVariant {
break
}
}
if result.runtimeIsTooVariant {
fatalError("Benchmark \(b.name) is too variant")
}
Expand Down
12 changes: 10 additions & 2 deletions Sources/_StringProcessing/Engine/MEBuiltins.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,21 @@ extension Processor {
_ isStrictASCII: Bool,
_ isScalarSemantics: Bool
) -> Input.Index? {

// ASCII fast-path
if let (next, result) = input._quickMatch(
cc, at: currentPosition, isScalarSemantics: isScalarSemantics
) {
return result == isInverted ? nil : next
}

guard let char = load(), let scalar = loadScalar() else {
return nil
}

let asciiCheck = (char.isASCII && !isScalarSemantics)
let asciiCheck = !isStrictASCII
|| (scalar.isASCII && isScalarSemantics)
|| !isStrictASCII
|| char.isASCII

var matched: Bool
var next: Input.Index
Expand Down
65 changes: 62 additions & 3 deletions Sources/_StringProcessing/Engine/Metrics.swift
Original file line number Diff line number Diff line change
@@ -1,13 +1,71 @@
extension Processor {
#if PROCESSOR_MEASUREMENTS_ENABLED
struct ProcessorMetrics {
var instructionCounts: [Instruction.OpCode: Int] = [:]
var backtracks: Int = 0
var resets: Int = 0
var cycleCount: Int = 0

var isTracingEnabled: Bool = false
var shouldMeasureMetrics: Bool = false

init(isTracingEnabled: Bool, shouldMeasureMetrics: Bool) {
self.isTracingEnabled = isTracingEnabled
self.shouldMeasureMetrics = shouldMeasureMetrics
}
}

#else
struct ProcessorMetrics {
var isTracingEnabled: Bool { false }
var shouldMeasureMetrics: Bool { false }
var cycleCount: Int { 0 }

init(isTracingEnabled: Bool, shouldMeasureMetrics: Bool) { }
}
#endif
}

extension Processor {

mutating func startCycleMetrics() {
#if PROCESSOR_MEASUREMENTS_ENABLED
if metrics.cycleCount == 0 {
trace()
measureMetrics()
}
#endif
}

mutating func endCycleMetrics() {
#if PROCESSOR_MEASUREMENTS_ENABLED
metrics.cycleCount += 1
trace()
measureMetrics()
_checkInvariants()
#endif
}
}

extension Processor.ProcessorMetrics {

mutating func addReset() {
#if PROCESSOR_MEASUREMENTS_ENABLED
self.resets += 1
#endif
}

mutating func addBacktrack() {
#if PROCESSOR_MEASUREMENTS_ENABLED
self.backtracks += 1
#endif
}
}

extension Processor {
#if PROCESSOR_MEASUREMENTS_ENABLED
func printMetrics() {
print("===")
print("Total cycle count: \(cycleCount)")
print("Total cycle count: \(metrics.cycleCount)")
print("Backtracks: \(metrics.backtracks)")
print("Resets: \(metrics.resets)")
print("Instructions:")
Expand All @@ -30,8 +88,9 @@ extension Processor {
}

mutating func measureMetrics() {
if shouldMeasureMetrics {
if metrics.shouldMeasureMetrics {
measure()
}
}
#endif
}
101 changes: 68 additions & 33 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct Processor {
/// of the search. `input` can be a "supersequence" of the subject, while
/// `input[subjectBounds]` is the logical entity that is being searched.
let input: Input

/// The bounds of the logical subject in `input`.
///
/// `subjectBounds` represents the bounds of the string or substring that a
Expand All @@ -46,7 +46,7 @@ struct Processor {
/// `subjectBounds` is always equal to or a subrange of
/// `input.startIndex..<input.endIndex`.
let subjectBounds: Range<Position>

/// The bounds within the subject for an individual search.
///
/// `searchBounds` is equal to `subjectBounds` in some cases, but can be a
Expand All @@ -62,7 +62,7 @@ struct Processor {
let instructions: InstructionList<Instruction>

// MARK: Resettable state

/// The current search position while processing.
///
/// `currentPosition` must always be in the range `subjectBounds` or equal
Expand All @@ -81,16 +81,15 @@ struct Processor {

var wordIndexCache: Set<String.Index>? = nil
var wordIndexMaxIndex: String.Index? = nil

var state: State = .inProgress

var failureReason: Error? = nil

// MARK: Metrics, debugging, etc.
var cycleCount = 0
var isTracingEnabled: Bool
let shouldMeasureMetrics: Bool
var metrics: ProcessorMetrics = ProcessorMetrics()
var metrics: ProcessorMetrics

/// Set if the string has fast contiguous UTF-8 available
let fastUTF8: UnsafeRawPointer?
}

extension Processor {
Expand All @@ -116,15 +115,21 @@ extension Processor {
self.subjectBounds = subjectBounds
self.searchBounds = searchBounds
self.matchMode = matchMode
self.isTracingEnabled = isTracingEnabled
self.shouldMeasureMetrics = shouldMeasureMetrics

self.metrics = ProcessorMetrics(
isTracingEnabled: isTracingEnabled,
shouldMeasureMetrics: shouldMeasureMetrics)

self.currentPosition = searchBounds.lowerBound

// Initialize registers with end of search bounds
self.registers = Registers(program, searchBounds.upperBound)
self.storedCaptures = Array(
repeating: .init(), count: program.registerInfo.captures)

// print(MemoryLayout<Processor>.size)
self.fastUTF8 = input._unsafeFastUTF8?.baseAddress

_checkInvariants()
}

Expand All @@ -144,8 +149,8 @@ extension Processor {

self.state = .inProgress
self.failureReason = nil
if shouldMeasureMetrics { metrics.resets += 1 }

metrics.addReset()
_checkInvariants()
}

Expand All @@ -156,6 +161,16 @@ extension Processor {
assert(subjectBounds.upperBound <= input.endIndex)
assert(currentPosition >= searchBounds.lowerBound)
assert(currentPosition <= searchBounds.upperBound)

assert({
guard let utf8 = self.fastUTF8 else { return true }
var copy = input
return copy.withUTF8 {
let base = UnsafeRawPointer($0.baseAddress!)
return utf8 == base
}
}())

}
}

Expand Down Expand Up @@ -186,7 +201,7 @@ extension Processor {
currentPosition = idx
return true
}

// Advances in unicode scalar view
mutating func consumeScalar(_ n: Distance) -> Bool {
guard let idx = input.unicodeScalars.index(
Expand Down Expand Up @@ -265,11 +280,41 @@ extension Processor {
return true
}

@inline(never)
@_effects(releasenone)
func loadScalar() -> Unicode.Scalar? {
currentPosition < end ? input.unicodeScalars[currentPosition] : nil
guard currentPosition < end else { return nil }
// if let utf8 = self.fastUTF8 {
// let firstByte = utf8[currentPosition.encodedOffset]
// if firstByte < 0x80 {
// let returnValue = Unicode.Scalar(firstByte)
// // TODO: More comprehensive assertion framework to test before and after
// // TODO: unsafe-ish optimizations
// assert(returnValue == input.unicodeScalars[currentPosition])
//
// return returnValue
// }
//
// }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: investigate re-enabling this code, or even consider looking at all potential callers of loadScalar.

return input.unicodeScalars[currentPosition]
}

func _doMatchScalar(_ s: Unicode.Scalar, _ boundaryCheck: Bool) -> Input.Index? {
guard currentPosition < end else { return nil }

if s.isASCII, let utf8 = self.fastUTF8 {
let nextByteIdx = input.utf8.index(after: currentPosition)
if utf8.loadByte(currentPosition) == s.value {
// TODO: comprehensive assertion framework
assert(s == input.unicodeScalars[currentPosition])
if (!boundaryCheck || input.isOnGraphemeClusterBoundary(nextByteIdx)) {
return nextByteIdx
}
}
return nil
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: add assertion to check fast-path produces same result as slow path

} // 13-22ms, after: 22-25ms ???
// Now down to 3ms
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: remove


if s == loadScalar(),
let idx = input.unicodeScalars.index(
currentPosition,
Expand All @@ -281,7 +326,7 @@ extension Processor {
return nil
}
}

mutating func matchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) -> Bool {
guard let next = _doMatchScalar(s, boundaryCheck) else {
signalFailure()
Expand Down Expand Up @@ -355,7 +400,7 @@ extension Processor {
_uncheckedForcedConsumeOne()
return true
}

// Matches the next scalar if it is not a newline
mutating func matchAnyNonNewlineScalar() -> Bool {
guard let s = loadScalar(), !s.isNewline else {
Expand Down Expand Up @@ -401,8 +446,8 @@ extension Processor {
storedCaptures = capEnds
registers.ints = intRegisters
registers.positions = posRegisters
if shouldMeasureMetrics { metrics.backtracks += 1 }

metrics.addBacktrack()
}

mutating func abort(_ e: Error? = nil) {
Expand Down Expand Up @@ -436,23 +481,13 @@ extension Processor {
// TODO: What should we do here?
fatalError("Invalid code: Tried to clear save points when empty")
}

mutating func cycle() {
_checkInvariants()
assert(state == .inProgress)

#if PROCESSOR_MEASUREMENTS_ENABLED
if cycleCount == 0 {
trace()
measureMetrics()
}
defer {
cycleCount += 1
trace()
measureMetrics()
_checkInvariants()
}
#endif
startCycleMetrics()
defer { endCycleMetrics() }

let (opcode, payload) = fetch().destructure
switch opcode {
Expand Down
5 changes: 5 additions & 0 deletions Sources/_StringProcessing/Engine/Tracing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
//
//===----------------------------------------------------------------------===//


// TODO: Remove this protocol (and/or reuse it for something like a FastProcessor)
extension Processor: TracedProcessor {
var cycleCount: Int { metrics.cycleCount }
var isTracingEnabled: Bool { metrics.isTracingEnabled }

var isFailState: Bool { state == .fail }
var isAcceptState: Bool { state == .accept }

Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ struct Executor {
subjectBounds: subjectBounds,
searchBounds: searchBounds)
#if PROCESSOR_MEASUREMENTS_ENABLED
defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } }
defer { if cpu.metrics.shouldMeasureMetrics { cpu.printMetrics() } }
#endif
var low = searchBounds.lowerBound
let high = searchBounds.upperBound
Expand Down Expand Up @@ -60,7 +60,7 @@ struct Executor {
var cpu = engine.makeProcessor(
input: input, bounds: subjectBounds, matchMode: mode)
#if PROCESSOR_MEASUREMENTS_ENABLED
defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } }
defer { if cpu.metrics.shouldMeasureMetrics { cpu.printMetrics() } }
#endif
return try _match(input, from: subjectBounds.lowerBound, using: &cpu)
}
Expand Down
Loading