Skip to content

Commit 2db08b3

Browse files
authored
Merge pull request #405 from apple/accidentally-quadratic-BigString-init
[BigString] Fix accidentally quadratic `BigString.init`
2 parents 3d2dc41 + 5e1fe6e commit 2db08b3

File tree

3 files changed

+118
-3
lines changed

3 files changed

+118
-3
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift Collections open source project
4+
//
5+
// Copyright (c) 2024 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import CollectionsBenchmark
13+
import _RopeModule
14+
import Foundation
15+
16+
let someLatinSymbols: [UnicodeScalar] = [
17+
0x20 ..< 0x7f,
18+
0xa1 ..< 0xad,
19+
0xae ..< 0x2af,
20+
0x300 ..< 0x370,
21+
0x1e00 ..< 0x1eff,
22+
].flatMap {
23+
$0.map { UnicodeScalar($0)! }
24+
}
25+
26+
extension UnicodeScalar {
27+
static func randomLatin(
28+
using rng: inout some RandomNumberGenerator
29+
) -> Self {
30+
someLatinSymbols.randomElement(using: &rng)!
31+
}
32+
}
33+
34+
extension String.UnicodeScalarView {
35+
static func randomLatin(
36+
runeCount: Int, using rng: inout some RandomNumberGenerator
37+
) -> Self {
38+
var result = String.UnicodeScalarView()
39+
for _ in 0 ..< runeCount {
40+
result.append(UnicodeScalar.randomLatin(using: &rng))
41+
}
42+
return result
43+
}
44+
}
45+
46+
extension String {
47+
static func randomLatin(
48+
runeCount: Int, using rng: inout some RandomNumberGenerator
49+
) -> Self {
50+
let text = String.UnicodeScalarView.randomLatin(
51+
runeCount: runeCount, using: &rng)
52+
return String(text)
53+
}
54+
}
55+
56+
struct NativeStringInput {
57+
let value: String
58+
59+
init(runeCount: Int, using rng: inout some RandomNumberGenerator) {
60+
self.value = String.randomLatin(runeCount: runeCount, using: &rng)
61+
}
62+
}
63+
64+
struct BridgedStringInput {
65+
let value: String
66+
67+
init(runeCount: Int, using rng: inout some RandomNumberGenerator) {
68+
let string = String.randomLatin(runeCount: runeCount, using: &rng)
69+
let utf16 = Array(string.utf16)
70+
let cocoa = utf16.withUnsafeBufferPointer {
71+
NSString(characters: $0.baseAddress!, length: $0.count)
72+
}
73+
self.value = cocoa as String
74+
}
75+
}
76+
77+
78+
extension Benchmark {
79+
public mutating func addBigStringBenchmarks() {
80+
guard #available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *) else {
81+
return
82+
}
83+
84+
self.registerInputGenerator(for: NativeStringInput.self) { c in
85+
var rng = SystemRandomNumberGenerator()
86+
return NativeStringInput(runeCount: c, using: &rng)
87+
}
88+
89+
self.registerInputGenerator(for: BridgedStringInput.self) { c in
90+
var rng = SystemRandomNumberGenerator()
91+
return BridgedStringInput(runeCount: c, using: &rng)
92+
}
93+
94+
self.addSimple(
95+
title: "BigString init from native string",
96+
input: NativeStringInput.self
97+
) { input in
98+
blackHole(BigString(input.value))
99+
}
100+
101+
self.addSimple(
102+
title: "BigString init from bridged string",
103+
input: BridgedStringInput.self
104+
) { input in
105+
blackHole(BigString(input.value))
106+
}
107+
}
108+
}

Benchmarks/Sources/benchmark-tool/main.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ benchmark.addHeapBenchmarks()
3737
benchmark.addBitSetBenchmarks()
3838
benchmark.addTreeSetBenchmarks()
3939
benchmark.addCppBenchmarks()
40+
benchmark.addBigStringBenchmarks()
4041
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
4142
benchmark.addFoundationBenchmarks()
4243
#endif

Sources/RopeModule/BigString/Basics/BigString+Ingester.swift

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,19 @@ extension BigString {
3939
var state: _CharacterRecognizer
4040

4141
init(_ input: Substring) {
42-
self.input = input
43-
self.start = input.startIndex
44-
self.state = _CharacterRecognizer()
42+
self.init(input, startState: _CharacterRecognizer())
4543
}
4644

4745
init(_ input: Substring, startState: __owned _CharacterRecognizer) {
4846
self.input = input
47+
// Prevent accidentally quadratic operation by ensuring that we have
48+
// a native UTF-8 string.
49+
// FIXME: This is wasteful: if `input` happens to be a bridged
50+
// FIXME: NSString instance, then it temporarily allocates a full
51+
// FIXME: copy of the (transcoded) input string, only to then copy
52+
// FIXME: its pieces into the tree later.
53+
// FIXME: We should have a direct ingester path for native UTF-16 data.
54+
self.input.makeContiguousUTF8()
4955
self.start = input.startIndex
5056
self.state = startState
5157
}

0 commit comments

Comments
 (0)