From 8b0e5f0533a2227d69a8b71814d4aaae277f43a0 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Mon, 16 May 2022 13:02:27 -0500
Subject: [PATCH] More unicode properties (#385)

Add validation testing for supported and unsupported Unicode properties,
along with support for the following properties:

- age
- numeric type
- numeric value
- lower/upper/titlecase mapping
- canonical combining class
---
 Sources/_RegexParser/Regex/AST/Atom.swift     |  21 +
 .../CharacterPropertyClassification.swift     |  87 +++-
 .../Regex/Parse/Diagnostics.swift             |  20 +-
 Sources/_RegexParser/Regex/Parse/Sema.swift   |   8 +-
 .../_StringProcessing/ConsumerInterface.swift |  31 +-
 Tests/RegexTests/ParseTests.swift             |  14 +
 Tests/RegexTests/UTS18Tests.swift             | 385 +++++++++++++++++-
 7 files changed, 526 insertions(+), 40 deletions(-)
diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift
index 075818bac..6721076fc 100644
--- a/Sources/_RegexParser/Regex/AST/Atom.swift
+++ b/Sources/_RegexParser/Regex/AST/Atom.swift
@@ -427,11 +427,32 @@ extension AST.Atom.CharacterProperty {
     /// Character name in the form `\p{name=...}`
     case named(String)
     
+    /// Numeric type.
+    case numericType(Unicode.NumericType)
+    
+    /// Numeric value.
+    case numericValue(Double)
+    
+    /// Case mapping.
+    case mapping(MapKind, String)
+    
+    /// Canonical Combining Class.
+    case ccc(Unicode.CanonicalCombiningClass)
+    
+    /// Character age, as per UnicodeScalar.Properties.age.
+    case age(major: Int, minor: Int)
+    
     case posix(Unicode.POSIXProperty)
 
     /// Some special properties implemented by PCRE and Oniguruma.
     case pcreSpecial(PCRESpecialCategory)
     case onigurumaSpecial(OnigurumaSpecialProperty)
+    
+    public enum MapKind: Hashable {
+      case lowercase
+      case uppercase
+      case titlecase
+    }
   }
 
   // TODO: erm, separate out or fold into something? splat it in?
diff --git a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
index c0ece78ff..21b5ddc68 100644
--- a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
+++ b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
@@ -13,17 +13,17 @@ extension Source {
   typealias PropertyKind = AST.Atom.CharacterProperty.Kind
 
   static private func withNormalizedForms<T>(
-    _ str: String, match: (String) -> T?
-  ) -> T? {
+    _ str: String, match: (String) throws -> T?
+  ) rethrows -> T? {
     // This follows the rules provided by UAX44-LM3, including trying to drop an
     // "is" prefix, which isn't required by UTS#18 RL1.2, but is nice for
     // consistency with other engines and the Unicode.Scalar.Properties names.
     let str = str.filter { !$0.isPatternWhitespace && $0 != "_" && $0 != "-" }
                  .lowercased()
-    if let m = match(str) {
+    if let m = try match(str) {
       return m
     }
-    if str.hasPrefix("is"), let m = match(String(str.dropFirst(2))) {
+    if str.hasPrefix("is"), let m = try match(String(str.dropFirst(2))) {
       return m
     }
     return nil
@@ -79,6 +79,19 @@ extension Source {
     }
   }
 
+  static private func classifyNumericType(
+    _ str: String
+  ) -> Unicode.NumericType? {
+    withNormalizedForms(str) { str in
+      switch str {
+      case "decimal":   return .decimal
+      case "digit":     return .digit
+      case "numeric":   return .numeric
+      default:          return nil
+      }
+    }
+  }
+
   static private func classifyBoolProperty(
     _ str: String
   ) -> Unicode.BinaryProperty? {
@@ -361,6 +374,27 @@ extension Source {
       }
     }
   }
+  
+  static func parseAge(_ value: String) -> Unicode.Version? {
+    // Age can be specified in the form '3.0' or 'V3_0'.
+    // Other formats are not supported.
+    var str = value[...]
+    
+    let separator: Character
+    if str.first == "V" {
+      str.removeFirst()
+      separator = "_"
+    } else {
+      separator = "."
+    }
+    
+    guard let sepIndex = str.firstIndex(of: separator),
+          let major = Int(str[..<sepIndex]),
+          let minor = Int(str[sepIndex...].dropFirst())
+    else { return nil }
+    
+    return (major, minor)
+  }
 
   static func classifyCharacterPropertyValueOnly(
     _ value: String
@@ -414,22 +448,51 @@ extension Source {
 
     // This uses the aliases defined in
     // https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt.
-    let match = withNormalizedForms(key) { key -> PropertyKind? in
-      switch key {
+    let match = try withNormalizedForms(key) { normalizedKey -> PropertyKind? in
+      switch normalizedKey {
       case "script", "sc":
-        if let script = classifyScriptProperty(value) {
-          return .script(script)
+        guard let script = classifyScriptProperty(value) else {
+          throw ParseError.unrecognizedScript(value)
         }
+        return .script(script)
       case "scriptextensions", "scx":
-        if let script = classifyScriptProperty(value) {
-          return .scriptExtension(script)
+        guard let script = classifyScriptProperty(value) else {
+          throw ParseError.unrecognizedScript(value)
         }
+        return .scriptExtension(script)
       case "gc", "generalcategory":
-        if let cat = classifyGeneralCategory(value) {
-          return .generalCategory(cat)
+        guard let cat = classifyGeneralCategory(value) else {
+          throw ParseError.unrecognizedCategory(value)
+        }
+        return .generalCategory(cat)
+      case "age":
+        guard let (major, minor) = parseAge(value) else {
+          throw ParseError.invalidAge(value)
         }
+        return .age(major: major, minor: minor)
       case "name", "na":
         return .named(value)
+      case "numericvalue", "nv":
+        guard let numericValue = Double(value) else {
+          throw ParseError.invalidNumericValue(value)
+        }
+        return .numericValue(numericValue)
+      case "numerictype", "nt":
+        guard let type = classifyNumericType(value) else {
+          throw ParseError.unrecognizedNumericType(value)
+        }
+        return .numericType(type)
+      case "slc", "simplelowercasemapping":
+        return .mapping(.lowercase, value)
+      case "suc", "simpleuppercasemapping":
+        return .mapping(.uppercase, value)
+      case "stc", "simpletitlecasemapping":
+        return .mapping(.titlecase, value)
+      case "ccc", "canonicalcombiningclass":
+        guard let cccValue = UInt8(value), cccValue <= 254 else {
+          throw ParseError.invalidCCC(value)
+        }
+        return .ccc(.init(rawValue: cccValue))
       default:
         break
       }
diff --git a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
index d87fba918..05bf4ba1a 100644
--- a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
+++ b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
@@ -59,7 +59,13 @@ enum ParseError: Error, Hashable {
 
   case emptyProperty
   case unknownProperty(key: String?, value: String)
-
+  case unrecognizedScript(String)
+  case unrecognizedCategory(String)
+  case invalidAge(String)
+  case invalidNumericValue(String)
+  case unrecognizedNumericType(String)
+  case invalidCCC(String)
+  
   case expectedGroupSpecifier
   case unbalancedEndOfGroup
 
@@ -181,6 +187,18 @@ extension ParseError: CustomStringConvertible {
       return "extended syntax may not be disabled in multi-line mode"
     case .expectedCalloutArgument:
       return "expected argument to callout"
+    case .unrecognizedScript(let value):
+      return "unrecognized script '\(value)'"
+    case .unrecognizedCategory(let value):
+      return "unrecognized category '\(value)'"
+    case .unrecognizedNumericType(let value):
+      return "unrecognized numeric type '\(value)'"
+    case .invalidAge(let value):
+      return "invalid age format for '\(value)' - use '3.0' or 'V3_0' formats"
+    case .invalidNumericValue(let value):
+      return "invalid numeric value '\(value)'"
+    case .invalidCCC(let value):
+      return "invalid canonical combining class '\(value)'"
 
     // MARK: Semantic Errors
 
diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift
index 9d5ae4576..be28754b8 100644
--- a/Sources/_RegexParser/Regex/Parse/Sema.swift
+++ b/Sources/_RegexParser/Regex/Parse/Sema.swift
@@ -127,8 +127,8 @@ extension RegexValidator {
     _ prop: Unicode.BinaryProperty, at loc: SourceLocation
   ) throws {
     switch prop {
-    case .asciiHexDigit, .alphabetic, .bidiMirrored, .cased, .caseIgnorable,
-        .changesWhenCasefolded, .changesWhenCasemapped,
+    case .asciiHexDigit, .alphabetic, .bidiControl, .bidiMirrored, .cased,
+        .caseIgnorable, .changesWhenCasefolded, .changesWhenCasemapped,
         .changesWhenNFKCCasefolded, .changesWhenLowercased,
         .changesWhenTitlecased, .changesWhenUppercased, .dash, .deprecated,
         .defaultIgnorableCodePoint, .diacratic, .extender,
@@ -150,7 +150,7 @@ extension RegexValidator {
     case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD, .expandsOnNFKC:
       throw error(.deprecatedUnicode(prop.rawValue.quoted), at: loc)
 
-    case .bidiControl, .compositionExclusion, .emojiComponent,
+    case .compositionExclusion, .emojiComponent,
         .extendedPictographic, .graphemeLink, .hyphen, .otherAlphabetic,
         .otherDefaultIgnorableCodePoint, .otherGraphemeExtended,
         .otherIDContinue, .otherIDStart, .otherLowercase, .otherMath,
@@ -169,7 +169,7 @@ extension RegexValidator {
     case .binary(let b, _):
       try validateBinaryProperty(b, at: loc)
     case .any, .assigned, .ascii, .generalCategory, .posix, .named, .script,
-        .scriptExtension:
+        .scriptExtension, .age, .numericType, .numericValue, .mapping, .ccc:
       break
     case .pcreSpecial:
       throw error(.unsupported("PCRE property"), at: loc)
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index 90e573824..b6bbfd83e 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -145,10 +145,7 @@ extension String {
 }
 
 func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram<String>.ConsumeFunction {
-  let consume = opts.semanticLevel == .graphemeCluster
-    ? consumeCharacterWithSingleScalar
-    : consumeScalar
-  
+  let consume = consumeFunction(for: opts)
   return consume(propertyScalarPredicate {
     // FIXME: name aliases not covered by $0.nameAlias are missed
     // e.g. U+FEFF has both 'BYTE ORDER MARK' and 'BOM' as aliases
@@ -491,6 +488,30 @@ extension AST.Atom.CharacterProperty {
       case .named(let n):
         return consumeName(n, opts: opts)
 
+      case .age(let major, let minor):
+        return consume {
+          guard let age = $0.properties.age else { return false }
+          return age <= (major, minor)
+        }
+        
+      case .numericValue(let value):
+        return consume { $0.properties.numericValue == value }
+        
+      case .numericType(let type):
+        return consume { $0.properties.numericType == type }
+        
+      case .ccc(let ccc):
+        return consume { $0.properties.canonicalCombiningClass == ccc }
+        
+      case .mapping(.lowercase, let value):
+        return consume { $0.properties.lowercaseMapping == value }
+
+      case .mapping(.uppercase, let value):
+        return consume { $0.properties.uppercaseMapping == value }
+
+      case .mapping(.titlecase, let value):
+        return consume { $0.properties.titlecaseMapping == value }
+
       case .posix(let p):
         return p.generateConsumer(opts)
 
@@ -525,7 +546,7 @@ extension Unicode.BinaryProperty {
     case .alphabetic:
       return consume(propertyScalarPredicate(\.isAlphabetic))
     case .bidiControl:
-      break
+      return consume(propertyScalarPredicate(\.isBidiControl))
     case .bidiMirrored:
       return consume(propertyScalarPredicate(\.isBidiMirrored))
     case .cased:
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index 2d22ad252..d5325268e 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -2447,6 +2447,20 @@ extension RegexTests {
     diagnosticTest(#"\p{a=b"#, .unknownProperty(key: "a", value: "b"))
     diagnosticTest(#"\p{aaa[b]}"#, .unknownProperty(key: nil, value: "aaa"))
     diagnosticTest(#"\p{a=b=c}"#, .unknownProperty(key: "a", value: "b"))
+    diagnosticTest(#"\p{script=Not_A_Script}"#, .unrecognizedScript("Not_A_Script"))
+    diagnosticTest(#"\p{scx=Not_A_Script}"#, .unrecognizedScript("Not_A_Script"))
+    diagnosticTest(#"\p{gc=Not_A_Category}"#, .unrecognizedCategory("Not_A_Category"))
+    diagnosticTest(#"\p{age=3}"#, .invalidAge("3"))
+    diagnosticTest(#"\p{age=V3}"#, .invalidAge("V3"))
+    diagnosticTest(#"\p{age=3.0.1}"#, .invalidAge("3.0.1"))
+    diagnosticTest(#"\p{nv=A}"#, .invalidNumericValue("A"))
+    diagnosticTest(#"\p{Numeric_Value=1.2.3.4}"#, .invalidNumericValue("1.2.3.4"))
+    diagnosticTest(#"\p{nt=Not_A_NumericType}"#, .unrecognizedNumericType("Not_A_NumericType"))
+    diagnosticTest(#"\p{Numeric_Type=Nuemric}"#, .unrecognizedNumericType("Nuemric"))
+    diagnosticTest(#"\p{Simple_Lowercase_Mapping}"#, .unknownProperty(key: nil, value: "Simple_Lowercase_Mapping"))
+    diagnosticTest(#"\p{Simple_Lowercase_Mapping=}"#, .emptyProperty)
+    diagnosticTest(#"\p{ccc=255}"#, .invalidCCC("255"))
+    diagnosticTest(#"\p{ccc=Nada}"#, .invalidCCC("Nada"))
     diagnosticTest(#"(?#"#, .expected(")"))
     diagnosticTest(#"(?x"#, .expected(")"))
 
diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
index d13b47b8d..7306632da 100644
--- a/Tests/RegexTests/UTS18Tests.swift
+++ b/Tests/RegexTests/UTS18Tests.swift
@@ -78,6 +78,9 @@ extension UTS18Tests {
   func testHexNotation() {
     expectFirstMatch("ab", regex(#"\u{61}\u{62}"#), "ab")
     expectFirstMatch("𝄞", regex(#"\u{1D11E}"#), "𝄞")
+    expectFirstMatch("\n", regex(#"\u{0A}"#), "\n")
+    expectFirstMatch("\r", regex(#"\u{0D}"#), "\r")
+    expectFirstMatch("\r\n", regex(#"\u{0D}\u{0A}"#), "\r\n")
   }
   
   // 1.1.1 Hex Notation and Normalization
@@ -148,12 +151,8 @@ extension UTS18Tests {
   }
   
   func testProperties_XFail() {
-    XCTExpectFailure("Need to support 'age' and 'block' properties") {
-      // XCTAssertFalse("z".contains(#/\p{age=3.1}/#))
-      XCTFail(#"\(#/\p{age=3.1}/#)"#)
-      // XCTAssertTrue("\u{1F00}".contains(#/\p{Block=Greek}/#))
-      XCTFail(#"\(#/\p{Block=Greek}/#)"#)
-    }
+    // Certain properties are unsupported, see below.
+    XCTAssertThrowsError(try Regex(#"\p{Block=Greek}"#))
   }
   
   // RL1.2a	Compatibility Properties
@@ -171,11 +170,16 @@ extension UTS18Tests {
     expectFirstMatch(input, regex(#"[[:xdigit:]]+"#), input[pos: ..<6])
     expectFirstMatch(input, regex(#"[[:alnum:]]+"#), input[pos: ..<11])
     expectFirstMatch(input, regex(#"[[:space:]]+"#), input[pos: 12..<13])
-    // TODO: blank
-    // TODO: cntrl
     expectFirstMatch(input, regex(#"[[:graph:]]+"#), input[pos: ..<11])
     expectFirstMatch(input, regex(#"[[:print:]]+"#), input[...])
     expectFirstMatch(input, regex(#"[[:word:]]+"#), input[pos: ..<11])
+
+    let blankAndControl = """
+     \t\u{01}\u{19}
+    """
+    // \t - tab is in both [:blank:] and [:cntrl:]
+    expectFirstMatch(blankAndControl, regex(#"[[:blank:]]+"#), blankAndControl[pos: ..<2])
+    expectFirstMatch(blankAndControl, regex(#"[[:cntrl:]]+"#), blankAndControl[pos: 1...])
   }
   
   //RL1.3 Subtraction and Intersection
@@ -196,7 +200,7 @@ extension UTS18Tests {
     
     // Non-ASCII lowercase + non-lowercase ASCII
     expectFirstMatch(input, regex(#"[\p{lowercase}~~\p{ascii}]+"#), input[pos: ..<3])
-    XCTAssertTrue("123%&^ABC".contains(regex(#"^[\p{lowercase}~~\p{ascii}]+$"#)))
+    XCTAssertTrue("123%&^ABCDéîøü".contains(regex(#"^[\p{lowercase}~~\p{ascii}]+$"#)))
   }
   
   func testSubtractionAndIntersectionPrecedence() {
@@ -380,12 +384,15 @@ extension UTS18Tests {
     XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#)))
     XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#).matchingSemantics(.unicodeScalar)))
     XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef.+\y"#).matchingSemantics(.unicodeScalar)))
+    XCTAssertFalse("abcdef🇬🇭".contains(regex(#"abcdef.$"#).matchingSemantics(.unicodeScalar)))
   }
   
   func testCharacterClassesWithStrings() {
     let regex = regex(#"[a-z🧐🇧🇪🇧🇫🇧🇬]"#)
     XCTAssertTrue("🧐".contains(regex))
     XCTAssertTrue("🇧🇫".contains(regex))
+    XCTAssertTrue("🧐".contains(regex.matchingSemantics(.unicodeScalar)))
+    XCTAssertTrue("🇧🇫".contains(regex.matchingSemantics(.unicodeScalar)))
   }
   
   // RL2.3 Default Word Boundaries
@@ -468,7 +475,7 @@ extension UTS18Tests {
       // XCTAssertTrue("^\u{3B1}\u{3B2}$".contains(#/[\N{GREEK SMALL LETTER ALPHA}-\N{GREEK SMALL LETTER BETA}]+/#))
     }
     
-    XCTExpectFailure("Other named char failures -- investigate") {
+    XCTExpectFailure("Other named char failures -- name aliases") {
       XCTAssertTrue("\u{C}".contains(regex(#"\N{FORM FEED}"#)))
       XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BYTE ORDER MARK}"#)))
       XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BOM}"#)))
@@ -486,7 +493,8 @@ extension UTS18Tests {
   // To meet this requirement, an implementation shall support wildcards in
   // Unicode property values.
   func testWildcardsInPropertyValues() {
-    XCTExpectFailure { XCTFail("Implement tests") }
+    // Unsupported
+    XCTAssertThrowsError(try Regex(#"\p{name=/a/"#))
   }
   
   // RL2.7 Full Properties
@@ -498,121 +506,462 @@ extension UTS18Tests {
   func testFullProperties() {
     // MARK: General
     // Name (Name_Alias)
+    XCTAssertTrue("a".contains(regex(#"\p{name=latin small letter a}"#)))
+
     // Block
+    // Unsupported
+
     // Age
+    XCTAssertTrue("a".contains(regex(#"\p{age=1.1}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{age=V1_1}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{age=14.0}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{age=V99_99}"#)))
+    
+    XCTAssertTrue("🥱".contains(regex(#"\p{age=12.0}"#)))
+    XCTAssertFalse("🥱".contains(regex(#"\p{age=11.0}"#)))
+
+    XCTAssertTrue("⌁".contains(regex(#"\p{age=3.0}"#)))
+    XCTAssertFalse("⌁".contains(regex(#"\p{age=2.0}"#)))
+    XCTAssertTrue("⌁".contains(regex(#"[\p{age=3.0}--\p{age=2.0}]"#)))
+
     // General_Category
+    XCTAssertTrue("a".contains(regex(#"\p{Ll}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{gc=Ll}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{gc=Ll}"#)))
+    XCTAssertFalse("A".contains(regex(#"\p{gc=Ll}"#)))
+    XCTAssertTrue("A".contains(regex(#"\p{gc=L}"#)))
+
+    XCTAssertTrue("a".contains(regex(#"\p{Any}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{Assigned}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{ASCII}"#)))
+
     // Script (Script_Extensions)
+    XCTAssertTrue("a".contains(regex(#"\p{script=latin}"#)))
+    XCTAssertTrue("강".contains(regex(#"\p{script=hangul}"#)))
+    
     // White_Space
+    XCTAssertTrue(" ".contains(regex(#"\p{whitespace}"#)))
+    XCTAssertTrue("\n".contains(regex(#"\p{White_Space}"#)))
+    XCTAssertFalse("a".contains(regex(#"\p{whitespace}"#)))
+
     // Alphabetic
+    XCTAssertTrue("aéîøüƒ".contains(regex(#"^\p{Alphabetic}+$"#)))
+
     // Hangul_Syllable_Type
+    // Unsupported
+
     // Noncharacter_Code_Point
+    XCTAssertTrue("\u{10FFFF}".contains(regex(#"\p{Noncharacter_Code_Point}"#)))
+    
     // Default_Ignorable_Code_Point
+    XCTAssertTrue("\u{00AD}".contains(regex(#"\p{Default_Ignorable_Code_Point}"#)))
+
     // Deprecated
+    XCTAssertTrue("ŉ".contains(regex(#"\p{Deprecated}"#)))
     // Logical_Order_Exception
+    XCTAssertTrue("ແ".contains(regex(#"\p{Logical_Order_Exception}"#)))
     // Variation_Selector
+    XCTAssertTrue("\u{FE07}".contains(regex(#"\p{Variation_Selector}"#)))
 
     // MARK: Numeric
     // Numeric_Value
+    XCTAssertTrue("3".contains(regex(#"\p{Numeric_Value=3}"#)))
+    XCTAssertFalse("4".contains(regex(#"\p{Numeric_Value=3}"#)))
+    XCTAssertTrue("④".contains(regex(#"\p{Numeric_Value=4}"#)))
+    XCTAssertTrue("⅕".contains(regex(#"\p{Numeric_Value=0.2}"#)))
+
     // Numeric_Type
+    XCTAssertTrue("3".contains(regex(#"\p{Numeric_Type=Decimal}"#)))
+    XCTAssertFalse("4".contains(regex(#"\p{Numeric_Type=Digit}"#)))
+
     // Hex_Digit
+    XCTAssertTrue("0123456789abcdef０１２３４５６７８９ＡＢＣＤＥＦ"
+      .contains(regex(#"^\p{Hex_Digit}+$"#)))
+    XCTAssertFalse("0123456789abcdefg".contains(regex(#"^\p{Hex_Digit}+$"#)))
     // ASCII_Hex_Digit
+    XCTAssertTrue("0123456789abcdef".contains(regex(#"^\p{ASCII_Hex_Digit}+$"#)))
+    XCTAssertFalse("0123456789abcdef０１２３４５６７８９ＡＢＣＤＥＦ"
+      .contains(regex(#"^\p{ASCII_Hex_Digit}+$"#)))
 
     // MARK: Identifiers
-    // ID_Continue
     // ID_Start
-    // XID_Continue
+    XCTAssertTrue("ABcd".contains(regex(#"^\p{ID_Start}+$"#)))
+    XCTAssertFalse(" ':`-".contains(regex(#"\p{ID_Start}"#)))
+
+    // ID_Continue
+    XCTAssertTrue("ABcd_1234".contains(regex(#"^\p{ID_Continue}+$"#)))
+    XCTAssertFalse(" ':`-".contains(regex(#"\p{ID_Continue}"#)))
+    
     // XID_Start
+    XCTAssertTrue("ABcd".contains(regex(#"^\p{XID_Start}+$"#)))
+    XCTAssertFalse(" ':`-".contains(regex(#"\p{XID_Start}"#)))
+
+    // XID_Continue
+    XCTAssertTrue("ABcd_1234".contains(regex(#"^\p{XID_Continue}+$"#)))
+    XCTAssertFalse(" ':`-".contains(regex(#"\p{XID_Continue}"#)))
+    
     // Pattern_Syntax
+    XCTAssertTrue(".+-:".contains(regex(#"^\p{Pattern_Syntax}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Pattern_Syntax}"#)))
+    
     // Pattern_White_Space
+    XCTAssertTrue(" \t\n".contains(regex(#"^\p{Pattern_White_Space}+$"#)))
+    XCTAssertFalse("abc123".contains(regex(#"\p{Pattern_White_Space}"#)))
+    
     // Identifier_Status
+    // Unsupported
+
     // Identifier_Type
+    // Unsupported
 
     // MARK: CJK
     // Ideographic
+    XCTAssertTrue("微笑".contains(regex(#"^\p{IsIdeographic}+$"#)))
+    XCTAssertFalse("abc123".contains(regex(#"\p{Ideographic}"#)))
+    
     // Unified_Ideograph
+    XCTAssertTrue("微笑".contains(regex(#"^\p{Unified_Ideograph}+$"#)))
+    XCTAssertFalse("abc123".contains(regex(#"\p{Unified_Ideograph}"#)))
+    
     // Radical
+    XCTAssertTrue("⺁⺂⺆".contains(regex(#"^\p{Radical}+$"#)))
+    
     // IDS_Binary_Operator
+    XCTAssertTrue("⿰⿸⿻".contains(regex(#"^\p{IDS_Binary_Operator}+$"#)))
+    
     // IDS_Trinary_Operator
+    XCTAssertTrue("⿲⿳".contains(regex(#"^\p{IDS_Trinary_Operator}+$"#)))
+
     // Equivalent_Unified_Ideograph
-    XCTExpectFailure {
-      XCTFail(#"Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)"#)
-      // XCTAssertTrue("⼚⺁厂".contains(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#))
-    }
+    // Unsupported
 
     // MARK: Case
     // Uppercase
+    XCTAssertTrue("AÉÎØÜ".contains(regex(#"^\p{isUppercase}+$"#)))
+    XCTAssertFalse("123abc".contains(regex(#"^\p{isUppercase}+$"#)))
+
     // Lowercase
+    XCTAssertTrue("aéîøü".contains(regex(#"^\p{Lowercase}+$"#)))
+    XCTAssertFalse("123ABC".contains(regex(#"\p{Lowercase}+$"#)))
+
     // Simple_Lowercase_Mapping
+    XCTAssertTrue("aAa".contains(regex(#"^\p{Simple_Lowercase_Mapping=a}+$"#)))
+    XCTAssertFalse("bBå".contains(regex(#"\p{Simple_Lowercase_Mapping=a}"#)))
+
     // Simple_Titlecase_Mapping
+    XCTAssertTrue("aAa".contains(regex(#"^\p{Simple_Titlecase_Mapping=A}+$"#)))
+    XCTAssertFalse("bBå".contains(regex(#"\p{Simple_Titlecase_Mapping=A}"#)))
+
     // Simple_Uppercase_Mapping
+    XCTAssertTrue("aAa".contains(regex(#"^\p{Simple_Uppercase_Mapping=A}+$"#)))
+    XCTAssertFalse("bBå".contains(regex(#"\p{Simple_Uppercase_Mapping=A}"#)))
+
     // Simple_Case_Folding
+    // Unsupported
+
     // Soft_Dotted
+    XCTAssertTrue("ijɨʝⅈⅉ".contains(regex(#"^\p{Soft_Dotted}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Soft_Dotted}"#)))
+
     // Cased
+    XCTAssertTrue("A".contains(regex(#"\p{Cased}"#)))
+    XCTAssertTrue("A".contains(regex(#"\p{Is_Cased}"#)))
+    XCTAssertFalse("0".contains(regex(#"\p{Cased}"#)))
+
     // Case_Ignorable
+    XCTAssertTrue(":".contains(regex(#"\p{Case_Ignorable}"#)))
+    XCTAssertFalse("a".contains(regex(#"\p{Case_Ignorable}"#)))
+
     // Changes_When_Lowercased
+    XCTAssertTrue("A".contains(regex(#"\p{Changes_When_Lowercased}"#)))
+    XCTAssertTrue("A".contains(regex(#"\p{Changes_When_Lowercased=true}"#)))
+    XCTAssertFalse("a".contains(regex(#"\p{Changes_When_Lowercased}"#)))
+
     // Changes_When_Uppercased
     XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased}"#)))
     XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased=true}"#)))
     XCTAssertFalse("A".contains(regex(#"\p{Changes_When_Uppercased}"#)))
+    
     // Changes_When_Titlecased
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Titlecased=true}"#)))
+    XCTAssertFalse("A".contains(regex(#"\p{Changes_When_Titlecased}"#)))
+
     // Changes_When_Casefolded
-    // Changes_When_Casemapped
+    XCTAssertTrue("A".contains(regex(#"\p{Changes_When_Casefolded=true}"#)))
+    XCTAssertFalse("a".contains(regex(#"\p{Changes_When_Casefolded}"#)))
+    XCTAssertFalse(":".contains(regex(#"\p{Changes_When_Casefolded}"#)))
 
+    // Changes_When_Casemapped
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Casemapped}"#)))
+    XCTAssertFalse(":".contains(regex(#"\p{Changes_When_Casemapped}"#)))
+    
     // MARK: Normalization
     // Canonical_Combining_Class
+    XCTAssertTrue("\u{0321}\u{0322}\u{1DD0}".contains(regex(#"^\p{Canonical_Combining_Class=202}+$"#)))
+    XCTAssertFalse("123".contains(regex(#"\p{Canonical_Combining_Class=202}"#)))
+    
     // Decomposition_Type
+    // Unsupported
+    
     // NFC_Quick_Check
+    // Unsupported
+    
     // NFKC_Quick_Check
+    // Unsupported
+    
     // NFD_Quick_Check
+    // Unsupported
+
     // NFKD_Quick_Check
+    // Unsupported
+
     // NFKC_Casefold
+    // Unsupported
+
     // Changes_When_NFKC_Casefolded
+    XCTAssertTrue("ABCÊÖ".contains(regex(#"^\p{Changes_When_NFKC_Casefolded}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Changes_When_NFKC_Casefolded}"#)))
 
     // MARK: Emoji
     // Emoji
+    XCTAssertTrue("🥰🥳🤩".contains(regex(#"^\p{Emoji}+$"#)))
+    XCTAssertFalse("abc ◎✩℥".contains(regex(#"\p{Emoji}"#)))
+
     // Emoji_Presentation
+    XCTAssertTrue("⌚☕☔".contains(regex(#"^\p{Emoji_Presentation}+$"#)))
+    XCTAssertFalse("abc ǽǮ".contains(regex(#"\p{Emoji_Presentation}"#)))
+
     // Emoji_Modifier
+    XCTAssertTrue("\u{1F3FB}\u{1F3FC}\u{1F3FD}".contains(regex(#"^\p{Emoji_Modifier}+$"#)))
+    XCTAssertFalse("🧒".contains(regex(#"\p{Emoji_Modifier}"#)))
+
     // Emoji_Modifier_Base
+    XCTAssertTrue("🧒".contains(regex(#"^\p{Emoji_Modifier_Base}+$"#)))
+    XCTAssertFalse("123 🧠".contains(regex(#"\p{Emoji_Modifier_Base}"#)))
+
     // Emoji_Component
+    // Unsupported
+
     // Extended_Pictographic
+    // Unsupported
+
     // Basic_Emoji*
+    // Unsupported
+
     // Emoji_Keycap_Sequence*
+    // Unsupported
+
     // RGI_Emoji_Modifier_Sequence*
+    // Unsupported
+
     // RGI_Emoji_Flag_Sequence*
+    // Unsupported
+
     // RGI_Emoji_Tag_Sequence*
+    // Unsupported
+
     // RGI_Emoji_ZWJ_Sequence*
+    // Unsupported
+
     // RGI_Emoji*
+    // Unsupported
 
     // MARK: Shaping and Rendering
     // Join_Control
+    XCTAssertTrue("\u{200C}\u{200D}".contains(regex(#"^\p{Join_Control}+$"#)))
+    XCTAssertFalse("123".contains(regex(#"\p{Join_Control}"#)))
+
     // Joining_Group
+    // Unsupported
+
     // Joining_Type
+    // Unsupported
+
     // Vertical_Orientation
+    // Unsupported
+
     // Line_Break
+    // Unsupported
+
     // Grapheme_Cluster_Break
+    // Unsupported
+
     // Sentence_Break
+    // Unsupported
+
     // Word_Break
+    // Unsupported
+
     // East_Asian_Width
+    // Unsupported
+
     // Prepended_Concatenation_Mark
+    // Unsupported
 
     // MARK: Bidirectional
     // Bidi_Class
+    // Unsupported
+
     // Bidi_Control
+    XCTAssertTrue("\u{200E}\u{200F}\u{2069}".contains(regex(#"^\p{Bidi_Control}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Bidi_Control}"#)))
+
     // Bidi_Mirrored
+    XCTAssertTrue("()<>{}❮❯«»".contains(regex(#"^\p{Bidi_Mirrored}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Bidi_Mirrored}"#)))
+
     // Bidi_Mirroring_Glyph
+    // Unsupported
+
     // Bidi_Paired_Bracket
+    // Unsupported
+
     // Bidi_Paired_Bracket_Type
+    // Unsupported
 
     // MARK: Miscellaneous
     // Math
+    XCTAssertTrue("𝒶𝖇𝕔𝖽𝗲𝘧𝙜𝚑𝛊𝜅𝝀𝝡𝞰𝟙𝟐𝟯𝟺".contains(regex(#"^\p{Math}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Math}"#)))
+
     // Quotation_Mark
+    XCTAssertTrue(#"“«‘"’»”"#.contains(regex(#"^\p{Quotation_Mark}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Quotation_Mark}"#)))
+
     // Dash
+    XCTAssertTrue("—-–".contains(regex(#"^\p{Dash}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Dash}"#)))
+
     // Sentence_Terminal
+    XCTAssertTrue(".!?".contains(regex(#"^\p{Sentence_Terminal}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Sentence_Terminal}"#)))
+
     // Terminal_Punctuation
+    XCTAssertTrue(":?!.".contains(regex(#"^\p{Terminal_Punctuation}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Terminal_Punctuation}"#)))
+
     // Diacritic
+    XCTAssertTrue("¨`^¯ʸ".contains(regex(#"^\p{Diacritic}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Diacritic}"#)))
+
     // Extender
+    XCTAssertTrue("ᪧː々".contains(regex(#"^\p{Extender}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Extender}"#)))
+
     // Grapheme_Base
+    XCTAssertTrue("abc".contains(regex(#"^\p{Grapheme_Base}+$"#)))
+    XCTAssertFalse("\u{301}\u{FE0F}".contains(regex(#"\p{Grapheme_Base}"#)))
+
     // Grapheme_Extend
+    XCTAssertTrue("\u{301}\u{302}\u{303}".contains(regex(#"^\p{Grapheme_Extend}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Grapheme_Extend}"#)))
+
     // Regional_Indicator
+    XCTAssertTrue("🇰🇷🇬🇭🇵🇪".contains(regex(#"^\p{Regional_Indicator}+$"#)))
+    XCTAssertFalse("abc 123".contains(regex(#"\p{Regional_Indicator}"#)))
+  }
+
+  func testFullProperties_Unsupported() {
+    // Block
+    XCTAssertThrowsError(try Regex(#"\p{block=Block_Elements}"#))
+
+    // Hangul_Syllable_Type
+    XCTAssertThrowsError(try Regex(#"\p{Hangul_Syllable_Type=L}/"#))
+
+    // Identifier_Status
+    XCTAssertThrowsError(try Regex(#"\p{Identifier_Status=Allowed}"#))
+    
+    // Identifier_Type
+    XCTAssertThrowsError(try Regex(#"\p{Identifier_Type=Inclusion}/"#))
+
+    // Equivalent_Unified_Ideograph
+    XCTAssertThrowsError(try Regex(#"\p{Equivalent_Unified_Ideograph=⼚}"#))
+
+    // Simple_Case_Folding
+    XCTAssertThrowsError(try Regex(#"\p{Simple_Case_Folding=a}/"#))
+    
+    // Decomposition_Type
+    XCTAssertThrowsError(try Regex(#"\p{Decomposition_Type}"#))
+    
+    // NFC_Quick_Check
+    XCTAssertThrowsError(try Regex(#"\p{NFC_Quick_Check}"#))
+    
+    // NFKC_Quick_Check
+    XCTAssertThrowsError(try Regex(#"\p{NFKC_Quick_Check}"#))
+    
+    // NFD_Quick_Check
+    XCTAssertThrowsError(try Regex(#"\p{NFD_Quick_Check}"#))
+    
+    // NFKD_Quick_Check
+    XCTAssertThrowsError(try Regex(#"\p{NFKD_Quick_Check}"#))
+    
+    // NFKC_Casefold
+    XCTAssertThrowsError(try Regex(#"\p{NFKC_Casefold}"#))
+
+    // Emoji_Component
+    XCTAssertThrowsError(try Regex(#"\p{Emoji_Component}"#))
+
+    // Extended_Pictographic
+    XCTAssertThrowsError(try Regex(#"\p{Extended_Pictographic}"#))
+
+    // Basic_Emoji*
+    XCTAssertThrowsError(try Regex(#"\p{Basic_Emoji*}"#))
+
+    // Emoji_Keycap_Sequence*
+    XCTAssertThrowsError(try Regex(#"\p{Emoji_Keycap_Sequence*}"#))
+
+    // RGI_Emoji_Modifier_Sequence*
+    XCTAssertThrowsError(try Regex(#"\p{RGI_Emoji_Modifier_Sequence*}"#))
+
+    // RGI_Emoji_Flag_Sequence*
+    XCTAssertThrowsError(try Regex(#"\p{RGI_Emoji_Flag_Sequence*}"#))
+
+    // RGI_Emoji_Tag_Sequence*
+    XCTAssertThrowsError(try Regex(#"\p{RGI_Emoji_Tag_Sequence*}"#))
+
+    // RGI_Emoji_ZWJ_Sequence*
+    XCTAssertThrowsError(try Regex(#"\p{RGI_Emoji_ZWJ_Sequence*}"#))
+
+    // RGI_Emoji*
+    XCTAssertThrowsError(try Regex(#"\p{RGI_Emoji*}"#))
+
+    // Joining_Group
+    XCTAssertThrowsError(try Regex(#"\p{Joining_Group}"#))
+
+    // Joining_Type
+    XCTAssertThrowsError(try Regex(#"\p{Joining_Type}"#))
+
+    // Vertical_Orientation
+    XCTAssertThrowsError(try Regex(#"\p{Vertical_Orientation}"#))
+
+    // Line_Break
+    XCTAssertThrowsError(try Regex(#"\p{Line_Break}"#))
+
+    // Grapheme_Cluster_Break
+    XCTAssertThrowsError(try Regex(#"\p{Grapheme_Cluster_Break}"#))
+
+    // Sentence_Break
+    XCTAssertThrowsError(try Regex(#"\p{Sentence_Break}"#))
+
+    // Word_Break
+    XCTAssertThrowsError(try Regex(#"\p{Word_Break}"#))
+
+    // East_Asian_Width
+    XCTAssertThrowsError(try Regex(#"\p{East_Asian_Width}"#))
+
+    // Prepended_Concatenation_Mark
+    XCTAssertThrowsError(try Regex(#"\p{Prepended_Concatenation_Mark}"#))
+
+    // Bidi_Class
+    XCTAssertThrowsError(try Regex(#"\p{Bidi_Class}"#))
+
+    // Bidi_Mirroring_Glyph
+    XCTAssertThrowsError(try Regex(#"\p{Bidi_Mirroring_Glyph}"#))
+
+    // Bidi_Paired_Bracket
+    XCTAssertThrowsError(try Regex(#"\p{Bidi_Paired_Bracket}"#))
+
+    // Bidi_Paired_Bracket_Type
+    XCTAssertThrowsError(try Regex(#"\p{Bidi_Paired_Bracket_Type}"#))
   }
 }