diff --git a/README.md b/README.md index 8469719..fa460f6 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # TOONEncoder -A Swift encoder for [TOON](https://github.com/johannschopplich/toon) (Token-Oriented Object Notation), -a compact format designed to reduce LLM token usage by 30–60% compared to JSON. +A Swift encoder for [TOON](https://github.com/toon-format/spec) (Token-Oriented Object Notation), +a compact format designed to reduce LLM token usage by 30–60% compared with JSON. -LLM tokens have a cost, and JSON is verbose. +LLM tokens are expensive, and JSON is verbose. TOON saves tokens while remaining human-readable by -using indentation for structure and tabular format for uniform data: +using indentation for structure and a tabular format for uniform data: **JSON**: ```json @@ -24,8 +24,26 @@ users[2]{id,name,role}: 2,Bob,user ``` -For full details on TOON's design, benchmarks, and specification, -see the [TOON project README](https://github.com/johannschopplich/toon). +For full details on TOON's design, benchmarks, and specification, +see the [TOON specification](https://github.com/toon-format/spec). + +## Features + +`TOONEncoder` conforms to **TOON specification version 3.0** (2025-11-24) +and implements the following features: + +- [x] Canonical number formatting (no trailing zeros, no leading zeros except `0`; `-0` normalized to `0`) +- [x] Correct escape sequences for strings (`\\`, `\"`, `\n`, `\r`, `\t`) +- [x] Three delimiter types: comma (default), tab, pipe +- [x] Array length validation +- [x] Object key order preservation +- [x] Array order preservation +- [x] Tabular format for uniform object arrays +- [x] Inline format for primitive arrays +- [x] Expanded list format for nested structures +- [x] Key folding to collapse single-key object chains into dotted paths +- [x] Configurable flatten depth to limit the depth of key folding +- [x] Collision avoidance so folded keys never collide with existing sibling keys ## Requirements @@ -117,7 +135,7 @@ items[2|]{sku|name|qty|price}: ### Length Markers -Add a `#` prefix to array lengths for emphasis: +Add a `#` prefix to array lengths for emphasis and readability: ```swift let data = [ @@ -171,7 +189,7 @@ items[2]{sku,qty,price}: ### Arrays of Arrays -When you have arrays containing primitive inner arrays: +For arrays containing primitive inner arrays: ```swift let pairs = [[1, 2], [3, 4]] @@ -187,6 +205,110 @@ pairs[2]: - [2]: 3,4 ``` +### Key Folding + +Key folding collapses single-key nested objects into dotted paths, reducing indentation and token count: + +```swift +struct Config: Codable { + struct Database: Codable { + struct Connection: Codable { + let host: String + let port: Int + } + let connection: Connection + } + let database: Database +} + +let config = Config( + database: .init( + connection: .init(host: "localhost", port: 5432) + ) +) + +let encoder = TOONEncoder() +let data = try encoder.encode(config) +``` + +Without key folding: +``` +database: + connection: + host: localhost + port: 5432 +``` + +Output with key folding (`encoder.keyFolding = .safe`): + +``` +database.connection: + host: localhost + port: 5432 +``` + +When enabled, key folding applies only when +all path segments are valid identifiers +(start with a letter or underscore and contain only alphanumerics or underscores), +each level in the chain is a single-key object, +and the folded path does not collide with an existing sibling key +(collision avoidance). + +#### Flatten Depth + +To control how aggressively key folding collapses nested objects, +use `flattenDepth`: + +```swift +struct Metrics: Codable { + struct Service: Codable { + struct CPU: Codable { + let usage: Double + } + let cpu: CPU + } + let service: Service +} + +let value = Metrics( + service: .init( + cpu: .init(usage: 0.73) + ) +) + +let encoder = TOONEncoder() +encoder.keyFolding = .safe +let data = try encoder.encode(value) +``` + +Output with unlimited `flattenDepth` (default): + +``` +service.cpu.usage: 0.73 +``` + +Output with deep nesting and `flattenDepth = 2`: + +```swift +encoder.flattenDepth = 2 +``` + +``` +service.cpu: + usage: 0.73 +``` + +> [!TIP] +> Specifying a flatten depth less than 2 has no practical effect. + +### Version Information + +Check the supported TOON specification version: + +```swift +print(TOONEncoder.specVersion) // "3.0" +``` + ## License This project is available under the MIT license. diff --git a/Sources/TOONEncoder/TOONEncoder.swift b/Sources/TOONEncoder/TOONEncoder.swift index cda3370..8120e5c 100644 --- a/Sources/TOONEncoder/TOONEncoder.swift +++ b/Sources/TOONEncoder/TOONEncoder.swift @@ -1,8 +1,14 @@ import Foundation /// An encoder that converts Swift values to TOON format +/// +/// This encoder conforms to the TOON (Token-Oriented Object Notation) specification version 3.0. +/// For more information, see: https://github.com/toon-format/spec public final class TOONEncoder { + /// The TOON specification version this encoder conforms to + public static let specVersion = "3.0" + /// Number of spaces per indentation level public var indent: Int = 2 @@ -12,6 +18,42 @@ public final class TOONEncoder { /// Optional marker to prefix array lengths in headers public var lengthMarker: LengthMarker = .none + /// Key folding mode for collapsing single-key object chains into dotted paths + /// + /// When enabled, single-key nested objects like `{ a: { b: { c: 1 } } }` + /// are collapsed into `a.b.c: 1`. Only applies when all segments are valid identifiers. + /// + /// Example with `.safe`: + /// ```toon + /// user.profile.name: John + /// user.profile.age: 30 + /// ``` + public var keyFolding: KeyFolding = .disabled + + /// Maximum number of segments to include in a folded path when `keyFolding` is `.safe`. + /// + /// Controls how many nested single-key objects are collapsed into a dotted path. + /// - Default is `Int.max` (unlimited folding depth) + /// - Values less than 2 have no practical folding effect + /// + /// Example with `flattenDepth = 2`: + /// - Input: `{ a: { b: { c: { d: 1 } } } }` + /// - Output: `a.b:` followed by nested `c:` and `d: 1` + /// + /// Example with `flattenDepth = Int.max` (default): + /// - Input: `{ a: { b: { c: 1 } } }` + /// - Output: `a.b.c: 1` + public var flattenDepth: Int = .max + + /// Key folding mode + public enum KeyFolding: Hashable, Sendable { + /// No key folding + case disabled + + /// Safe key folding: only fold when all segments are valid identifiers + case safe + } + /// Delimiter character used to separate array values and tabular row cells /// /// The delimiter determines how multiple values are separated in inline arrays @@ -74,6 +116,8 @@ public final class TOONEncoder { /// - `indent`: 2 spaces /// - `delimiter`: `.comma` /// - `lengthMarker`: `.none` + /// - `keyFolding`: `.disabled` + /// - `flattenDepth`: `Int.max` public init() {} /// Encodes the given value to TOON format @@ -135,15 +179,120 @@ public final class TOONEncoder { _ values: [String: Value], keyOrder: [String], output: inout [String], - depth: Int + depth: Int, + allowFolding: Bool = true ) { for key in keyOrder { guard let value = values[key] else { continue } - encodeKeyValuePair(key: key, value: value, output: &output, depth: depth) + encodeKeyValuePair( + key: key, + value: value, + output: &output, + depth: depth, + siblingKeys: keyOrder, + allowFolding: allowFolding + ) + } + } + + /// Attempts to fold a key path by following single-key object chains + /// Returns the folded path, final value, and whether we hit the depth limit, or nil if folding is not safe + /// - Parameters: + /// - key: The starting key of the chain + /// - value: The value associated with the key + /// - siblingKeys: Other keys at the same object depth (for collision avoidance) + private func tryFoldKeyPath( + key: String, + value: Value, + siblingKeys: [String] = [] + ) -> (path: String, value: Value, hitDepthLimit: Bool)? { + guard keyFolding == .safe else { return nil } + + // Values less than 2 have no practical folding effect + guard flattenDepth >= 2 else { return nil } + + var pathComponents: [String] = [key] + var currentValue = value + var hitDepthLimit = false + + // Follow the chain of single-key objects, respecting flattenDepth limit + while case .object(let nestedValues, let nestedKeyOrder) = currentValue, + nestedKeyOrder.count == 1, + let singleKey = nestedKeyOrder.first, + let nextValue = nestedValues[singleKey] + { + // Stop if we've reached the flattenDepth limit + guard pathComponents.count < flattenDepth else { + hitDepthLimit = true + break + } + + // Validate that the key is a safe identifier + guard singleKey.isValidIdentifierSegment else { + break + } + + pathComponents.append(singleKey) + currentValue = nextValue + } + + // Only fold if we found at least one nested level + guard pathComponents.count > 1 else { return nil } + + // Validate all components are safe identifiers + guard pathComponents.allSatisfy({ $0.isValidIdentifierSegment }) else { + return nil } + + let foldedPath = pathComponents.joined(separator: ".") + + // Collision avoidance: folded key must not equal any existing sibling key + if siblingKeys.contains(foldedPath) { + return nil + } + + return (path: foldedPath, value: currentValue, hitDepthLimit: hitDepthLimit) } - private func encodeKeyValuePair(key: String, value: Value, output: inout [String], depth: Int) { + private func encodeKeyValuePair( + key: String, + value: Value, + output: inout [String], + depth: Int, + siblingKeys: [String] = [], + allowFolding: Bool = true + ) { + // Try key folding if enabled and allowed + if allowFolding, + case let (path, value, hitDepthLimit)? = tryFoldKeyPath(key: key, value: value, siblingKeys: siblingKeys) + { + let encodedKey = encodeKey(path) + + switch value { + case .null, .bool, .int, .double, .string, .date, .url, .data: + if let encodedValue = encodePrimitive(value, delimiter: delimiter.rawValue, inObject: true) { + write(depth: depth, content: "\(encodedKey): \(encodedValue)", to: &output) + } + + case .array(let array): + encodeArray(key: path, array: array, output: &output, depth: depth) + + case .object(let values, let keyOrder): + write(depth: depth, content: "\(encodedKey):", to: &output) + if !keyOrder.isEmpty { + encodeObject( + values, + keyOrder: keyOrder, + output: &output, + depth: depth + 1, + allowFolding: !hitDepthLimit + ) + } + } + return + } + + // Regular encoding without folding let encodedKey = encodeKey(key) switch value { @@ -284,7 +433,7 @@ public final class TOONEncoder { for i in 1 ..< keyOrder.count { let key = keyOrder[i] guard let value = values[key] else { continue } - encodeKeyValuePair(key: key, value: value, output: &output, depth: depth + 1) + encodeKeyValuePair(key: key, value: value, output: &output, depth: depth + 1, siblingKeys: keyOrder) } } @@ -1382,11 +1531,13 @@ private struct IndexedCodingKey: CodingKey { } // Shared number formatter that's used to avoid scientific notation +// and format numbers in canonical decimal form (no trailing zeros) private let numberFormatter: NumberFormatter = { let formatter = NumberFormatter() formatter.numberStyle = .decimal formatter.usesGroupingSeparator = false formatter.maximumFractionDigits = 15 + formatter.minimumFractionDigits = 0 // Prevents trailing zeros formatter.locale = Locale(identifier: "en_US_POSIX") return formatter }() @@ -1471,4 +1622,11 @@ private extension String { return range(of: #"^[A-Z_][\w.]*$"#, options: [.regularExpression, .caseInsensitive]) != nil } + + var isValidIdentifierSegment: Bool { + // Match pattern for a single identifier segment (no dots) + // Must start with letter or underscore, followed by word characters + return range(of: #"^[A-Z_]\w*$"#, options: [.regularExpression, .caseInsensitive]) + != nil + } } diff --git a/Tests/TOONEncoderTests/TOONEncoderTests.swift b/Tests/TOONEncoderTests/TOONEncoderTests.swift index 3c1b43c..8cb9bac 100644 --- a/Tests/TOONEncoderTests/TOONEncoderTests.swift +++ b/Tests/TOONEncoderTests/TOONEncoderTests.swift @@ -1358,4 +1358,341 @@ struct TOONEncoderTests { let simpleResult = String(data: try encoder.encode(simpleObj), encoding: .utf8)! #expect(!simpleResult.hasSuffix("\n")) } + + // MARK: - Key Folding Tests (TOON 2.1+) + + @Test func keyFoldingDisabled() async throws { + struct NestedObject: Codable { + struct User: Codable { + struct Profile: Codable { + let name: String + } + let profile: Profile + } + let user: User + } + + let encoder = TOONEncoder() + encoder.keyFolding = .disabled + + let obj = NestedObject(user: .init(profile: .init(name: "Ada"))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + let expected = """ + user: + profile: + name: Ada + """ + #expect(result == expected) + } + + @Test func keyFoldingSafe() async throws { + struct NestedObject: Codable { + struct User: Codable { + struct Profile: Codable { + let name: String + } + let profile: Profile + } + let user: User + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + let obj = NestedObject(user: .init(profile: .init(name: "Ada"))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + let expected = """ + user.profile.name: Ada + """ + #expect(result == expected) + } + + @Test func keyFoldingWithMultipleFields() async throws { + struct Config: Codable { + struct Database: Codable { + struct Connection: Codable { + let host: String + let port: Int + } + let connection: Connection + } + struct API: Codable { + let key: String + } + let database: Database + let api: API + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + let obj = Config( + database: .init(connection: .init(host: "localhost", port: 5432)), + api: .init(key: "secret") + ) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + let expected = """ + database.connection: + host: localhost + port: 5432 + api.key: secret + """ + #expect(result == expected) + } + + @Test func keyFoldingStopsAtInvalidIdentifier() async throws { + // Keys with hyphens cannot be folded + struct ValidThenInvalid: Codable { + struct Data: Codable { + struct UserInfo: Codable { + let field1: String + + enum CodingKeys: String, CodingKey { + case field1 = "field-1" + } + } + let userInfo: UserInfo + + enum CodingKeys: String, CodingKey { + case userInfo = "user-info" + } + } + let data: Data + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + let obj = ValidThenInvalid(data: .init(userInfo: .init(field1: "value"))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // Should fold "data" but stop at "user-info" because it contains hyphen + let expected = """ + data: + "user-info": + "field-1": value + """ + #expect(result == expected) + } + + @Test func keyFoldingWithArray() async throws { + struct Container: Codable { + struct Wrapper: Codable { + let items: [Int] + } + let wrapper: Wrapper + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + let obj = Container(wrapper: .init(items: [1, 2, 3])) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + let expected = """ + wrapper.items[3]: 1,2,3 + """ + #expect(result == expected) + } + + @Test func versionDeclaration() async throws { + #expect(TOONEncoder.specVersion == "3.0") + } + + @Test func canonicalNumberFormat() async throws { + // TOON specification requires canonical decimal form: no trailing fractional zeros + struct Numbers: Codable { + let a: Double + let b: Double + let c: Double + let d: Double + } + + let obj = Numbers(a: 1.5, b: 2.0, c: 0.1, d: 123.456) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + let expected = """ + a: 1.5 + b: 2 + c: 0.1 + d: 123.456 + """ + #expect(result == expected) + } + + // MARK: - flattenDepth Tests (TOON 3.0) + + @Test func flattenDepthUnlimited() async throws { + struct DeepNested: Codable { + struct Level1: Codable { + struct Level2: Codable { + struct Level3: Codable { + let value: Int + } + let level3: Level3 + } + let level2: Level2 + } + let level1: Level1 + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + encoder.flattenDepth = .max // Unlimited (default) + + let obj = DeepNested(level1: .init(level2: .init(level3: .init(value: 42)))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // All levels should be folded into a single dotted path + let expected = """ + level1.level2.level3.value: 42 + """ + #expect(result == expected) + } + + @Test func flattenDepthLimited() async throws { + struct DeepNested: Codable { + struct Level1: Codable { + struct Level2: Codable { + struct Level3: Codable { + let value: Int + } + let level3: Level3 + } + let level2: Level2 + } + let level1: Level1 + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + encoder.flattenDepth = 2 // Only fold 2 segments + + let obj = DeepNested(level1: .init(level2: .init(level3: .init(value: 42)))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // Only first 2 levels should be folded + let expected = """ + level1.level2: + level3: + value: 42 + """ + #expect(result == expected) + } + + @Test func flattenDepthThree() async throws { + struct DeepNested: Codable { + struct Level1: Codable { + struct Level2: Codable { + struct Level3: Codable { + let value: Int + } + let level3: Level3 + } + let level2: Level2 + } + let level1: Level1 + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + encoder.flattenDepth = 3 + + let obj = DeepNested(level1: .init(level2: .init(level3: .init(value: 42)))) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // First 3 levels should be folded + let expected = """ + level1.level2.level3: + value: 42 + """ + #expect(result == expected) + } + + @Test func flattenDepthOne() async throws { + // flattenDepth < 2 has no practical folding effect + struct NestedObject: Codable { + struct User: Codable { + let name: String + } + let user: User + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + encoder.flattenDepth = 1 // No folding effect + + let obj = NestedObject(user: .init(name: "Ada")) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // Should not fold because flattenDepth < 2 + let expected = """ + user: + name: Ada + """ + #expect(result == expected) + } + + // MARK: - Collision Avoidance Tests (TOON 3.0) + + @Test func keyFoldingCollisionAvoidance() async throws { + // Test that folding doesn't create keys that collide with existing siblings + // The key "a.b" is a literal sibling key, and folding "a" -> {b: 1} would create "a.b" + // which would collide, so folding should NOT happen + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + // Create a structure where "a.b" is a literal key at the same level as "a" + struct CollisionTest: Codable { + struct Nested: Codable { + let b: Int + } + let ab: Int // Will be encoded as "a.b" (literal dotted key) + let a: Nested // Would be folded to "a.b" if not for collision + + enum CodingKeys: String, CodingKey { + case ab = "a.b" + case a + } + } + + let obj = CollisionTest(ab: 1, a: .init(b: 2)) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // "a" should NOT be folded to "a.b" because "a.b" exists as a sibling + // Note: "a.b" is a valid unquoted key per spec (pattern allows dots) + let expected = """ + a.b: 1 + a: + b: 2 + """ + #expect(result == expected) + } + + @Test func keyFoldingNoCollision() async throws { + // Test normal folding when there's no collision + struct NoCollision: Codable { + struct A: Codable { + let b: Int + } + let a: A + let c: Int + } + + let encoder = TOONEncoder() + encoder.keyFolding = .safe + + let obj = NoCollision(a: .init(b: 1), c: 2) + let result = String(data: try encoder.encode(obj), encoding: .utf8)! + + // "a" should be folded to "a.b" since there's no collision + let expected = """ + a.b: 1 + c: 2 + """ + #expect(result == expected) + } }