diff --git a/Package.swift b/Package.swift
index abc895813..c1e9bff37 100644
--- a/Package.swift
+++ b/Package.swift
@@ -75,15 +75,17 @@ let package = Package(
             name: "RegexBuilder",
             dependencies: ["_StringProcessing", "_RegexParser"],
             swiftSettings: publicStdlibSettings),
+        .target(name: "TestSupport",
+                swiftSettings: [availabilityDefinition]),
         .testTarget(
             name: "RegexTests",
-            dependencies: ["_StringProcessing"],
+            dependencies: ["_StringProcessing", "TestSupport"],
             swiftSettings: [
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
             ]),
         .testTarget(
             name: "RegexBuilderTests",
-            dependencies: ["_StringProcessing", "RegexBuilder"],
+            dependencies: ["_StringProcessing", "RegexBuilder", "TestSupport"],
             swiftSettings: [
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
             ]),
diff --git a/Sources/TestSupport/TestSupport.swift b/Sources/TestSupport/TestSupport.swift
new file mode 100644
index 000000000..b60adb63f
--- /dev/null
+++ b/Sources/TestSupport/TestSupport.swift
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+import XCTest
+
+// We need to split this out of the test files, as it needs to be compiled
+// *without* `-disable-availability-checking` to ensure the #available check is
+// not compiled into a no-op.
+
+#if os(Linux)
+public func XCTExpectFailure(
+  _ message: String? = nil, body: () throws -> Void
+) rethrows {}
+#endif
+
+/// Guards certain tests to make sure we have a new stdlib available.
+public func ensureNewStdlib(
+  file: StaticString = #file, line: UInt = #line
+) -> Bool {
+  guard #available(SwiftStdlib 5.7, *) else {
+    XCTExpectFailure { XCTFail("Unsupported stdlib", file: file, line: line) }
+    return false
+  }
+  return true
+}
diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift
index b03ce8c39..8706327f7 100644
--- a/Sources/_RegexParser/Regex/AST/Atom.swift
+++ b/Sources/_RegexParser/Regex/AST/Atom.swift
@@ -755,8 +755,10 @@ extension AST.Atom {
   /// Whether this atom is valid as the operand of a custom character class
   /// range.
   public var isValidCharacterClassRangeBound: Bool {
-    // If we have a literal character value for this, it can be used as a bound.
-    if literalCharacterValue != nil { return true }
+    if let c = literalCharacterValue {
+      // We only match character range bounds that are single scalar NFC.
+      return c.hasExactlyOneScalar && c.isNFC
+    }
     switch kind {
     // \cx, \C-x, \M-x, \M-\C-x, \N{...}
     case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:
diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
index 4a4f5c05f..a830a18b7 100644
--- a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
@@ -480,35 +480,37 @@ extension Parser {
   ///
   mutating func lexQuantifier(
   ) -> (Located<Quant.Amount>, Located<Quant.Kind>, [AST.Trivia])? {
-    var trivia: [AST.Trivia] = []
+    tryEating { p in
+      var trivia: [AST.Trivia] = []
 
-    if let t = lexNonSemanticWhitespace() { trivia.append(t) }
+      if let t = p.lexNonSemanticWhitespace() { trivia.append(t) }
 
-    let amt: Located<Quant.Amount>? = recordLoc { p in
-      if p.tryEat("*") { return .zeroOrMore }
-      if p.tryEat("+") { return .oneOrMore }
-      if p.tryEat("?") { return .zeroOrOne }
+      let amt: Located<Quant.Amount>? = p.recordLoc { p in
+        if p.tryEat("*") { return .zeroOrMore }
+        if p.tryEat("+") { return .oneOrMore }
+        if p.tryEat("?") { return .zeroOrOne }
 
-      return p.tryEating { p in
-        guard p.tryEat("{"),
-              let range = p.lexRange(trivia: &trivia),
-              p.tryEat("}")
-        else { return nil }
-        return range.value
+        return p.tryEating { p in
+          guard p.tryEat("{"),
+                let range = p.lexRange(trivia: &trivia),
+                p.tryEat("}")
+          else { return nil }
+          return range.value
+        }
       }
-    }
-    guard let amt = amt else { return nil }
+      guard let amt = amt else { return nil }
 
-    // PCRE allows non-semantic whitespace here in extended syntax mode.
-    if let t = lexNonSemanticWhitespace() { trivia.append(t) }
+      // PCRE allows non-semantic whitespace here in extended syntax mode.
+      if let t = p.lexNonSemanticWhitespace() { trivia.append(t) }
 
-    let kind: Located<Quant.Kind> = recordLoc { p in
-      if p.tryEat("?") { return .reluctant  }
-      if p.tryEat("+") { return .possessive }
-      return .eager
-    }
+      let kind: Located<Quant.Kind> = p.recordLoc { p in
+        if p.tryEat("?") { return .reluctant  }
+        if p.tryEat("+") { return .possessive }
+        return .eager
+      }
 
-    return (amt, kind, trivia)
+      return (amt, kind, trivia)
+    }
   }
 
   /// Try to consume a range, returning `nil` if unsuccessful.
diff --git a/Sources/_RegexParser/Utility/Misc.swift b/Sources/_RegexParser/Utility/Misc.swift
index d37dfbd4a..70dc7a7d5 100644
--- a/Sources/_RegexParser/Utility/Misc.swift
+++ b/Sources/_RegexParser/Utility/Misc.swift
@@ -19,6 +19,21 @@ extension Substring {
   var string: String { String(self) }
 }
 
+extension Character {
+  /// Whether this character is made up of exactly one Unicode scalar value.
+  public var hasExactlyOneScalar: Bool {
+    let scalars = unicodeScalars
+    return scalars.index(after: scalars.startIndex) == scalars.endIndex
+  }
+
+  /// Whether the given character is in NFC form.
+  internal var isNFC: Bool {
+    if isASCII { return true }
+    let str = String(self)
+    return str._nfcCodeUnits.elementsEqual(str.utf8)
+  }
+}
+
 extension CustomStringConvertible {
   @_alwaysEmitIntoClient
   public var halfWidthCornerQuoted: String {
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 477760ef8..e8c92f2b5 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -775,9 +775,131 @@ fileprivate extension Compiler.ByteCodeGen {
     builder.label(exit)
   }
 
+  /// Coalesce any adjacent scalar members in a custom character class together.
+  /// This is required in order to produce correct grapheme matching behavior.
+  func coalescingCustomCharacterClassMembers(
+    _ members: [DSLTree.CustomCharacterClass.Member]
+  ) -> [DSLTree.CustomCharacterClass.Member] {
+    struct Accumulator {
+      /// A series of range operands. For example, in `[ab-cde-fg]`, this will
+      /// contain the strings `["ab", "cde", "fg"]`. From there, the resulting
+      /// ranges will be created.
+      private var rangeOperands: [String] = [""]
+
+      /// The current range operand.
+      private var current: String {
+        _read { yield rangeOperands[rangeOperands.count - 1] }
+        _modify { yield &rangeOperands[rangeOperands.count - 1] }
+      }
+
+      /// Try to accumulate a character class member, returning `true` if
+      /// successful, `false` otherwise.
+      mutating func tryAccumulate(
+        _ member: DSLTree.CustomCharacterClass.Member
+      ) -> Bool {
+        switch member {
+        case .atom(let a):
+          guard let c = a.literalCharacterValue else { return false }
+          current.append(c)
+          return true
+        case .quotedLiteral(let str):
+          current += str
+          return true
+        case let .range(lhs, rhs):
+          guard let lhs = lhs.literalCharacterValue,
+                let rhs = rhs.literalCharacterValue
+          else { return false }
+          current.append(lhs)
+          rangeOperands.append(String(rhs))
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !current.isEmpty
+        default:
+          return false
+        }
+      }
+
+      func finish() -> [DSLTree.CustomCharacterClass.Member] {
+        if rangeOperands.count == 1 {
+          // If we didn't have any additional range operands, this isn't a
+          // range, we can just form a standard quoted literal.
+          return [.quotedLiteral(current)]
+        }
+        var members = [DSLTree.CustomCharacterClass.Member]()
+
+        // We have other range operands, splice them together. For N operands
+        // we have N - 1 ranges.
+        for (i, lhs) in rangeOperands.dropLast().enumerated() {
+          let rhs = rangeOperands[i + 1]
+
+          // If this is the first operand we only need to drop the last
+          // character for its quoted members, otherwise this is both an LHS
+          // and RHS of a range, and as such needs both sides trimmed.
+          let leading = i == 0 ? lhs.dropLast() : lhs.dropFirst().dropLast()
+          if !leading.isEmpty {
+            members.append(.quotedLiteral(String(leading)))
+          }
+          members.append(.range(.char(lhs.last!), .char(rhs.first!)))
+        }
+        // We've handled everything except the quoted portion of the last
+        // operand, add it now.
+        let trailing = rangeOperands.last!.dropFirst()
+        if !trailing.isEmpty {
+          members.append(.quotedLiteral(String(trailing)))
+        }
+        return members
+      }
+    }
+    return members
+      .map { m -> DSLTree.CustomCharacterClass.Member in
+        // First we need to recursively coalsce any child character classes.
+        switch m {
+        case .custom(let ccc):
+          return .custom(coalescingCustomCharacterClass(ccc))
+        case .intersection(let lhs, let rhs):
+          return .intersection(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .subtraction(let lhs, let rhs):
+          return .subtraction(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .symmetricDifference(let lhs, let rhs):
+          return .symmetricDifference(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .atom, .range, .quotedLiteral, .trivia:
+          return m
+        }
+      }
+      .coalescing(with: Accumulator(), into: { $0.finish() }) { accum, member in
+        accum.tryAccumulate(member)
+      }
+  }
+
+  func coalescingCustomCharacterClass(
+    _ ccc: DSLTree.CustomCharacterClass
+  ) -> DSLTree.CustomCharacterClass {
+    // This only needs to be done in grapheme semantic mode. In scalar semantic
+    // mode, we don't want to coalesce any scalars into a grapheme. This
+    // means that e.g `[e\u{301}-\u{302}]` remains a range between U+301 and
+    // U+302.
+    guard options.semanticLevel == .graphemeCluster else { return ccc }
+
+    let members = coalescingCustomCharacterClassMembers(ccc.members)
+    return .init(members: members, isInverted: ccc.isInverted)
+  }
+
   mutating func emitCustomCharacterClass(
     _ ccc: DSLTree.CustomCharacterClass
   ) throws {
+    // Before emitting a custom character class in grapheme semantic mode, we
+    // need to coalesce together any adjacent characters and scalars, over which
+    // we can perform grapheme breaking. This includes e.g range bounds for
+    // `[e\u{301}-\u{302}]`.
+    let ccc = coalescingCustomCharacterClass(ccc)
     if let asciiBitset = ccc.asAsciiBitset(options),
         optimizationsEnabled {
       if options.semanticLevel == .unicodeScalar {
@@ -791,6 +913,45 @@ fileprivate extension Compiler.ByteCodeGen {
     }
   }
 
+  mutating func emitConcatenation(_ children: [DSLTree.Node]) throws {
+    // Before emitting a concatenation, we need to flatten out any nested
+    // concatenations, and coalesce any adjacent characters and scalars, forming
+    // quoted literals of their contents, over which we can perform grapheme
+    // breaking.
+    func flatten(_ node: DSLTree.Node) -> [DSLTree.Node] {
+      switch node {
+      case .concatenation(let ch):
+        return ch.flatMap(flatten)
+      case .convertedRegexLiteral(let n, _):
+        return flatten(n)
+      default:
+        return [node]
+      }
+    }
+    let children = children
+      .flatMap(flatten)
+      .coalescing(with: "", into: DSLTree.Node.quotedLiteral) { str, node in
+        switch node {
+        case .atom(let a):
+          guard let c = a.literalCharacterValue else { return false }
+          str.append(c)
+          return true
+        case .quotedLiteral(let q):
+          str += q
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !str.isEmpty
+        default:
+          return false
+        }
+      }
+    for child in children {
+      try emitConcatenationComponent(child)
+    }
+  }
+
   @discardableResult
   mutating func emitNode(_ node: DSLTree.Node) throws -> ValueRegister? {
     switch node {
@@ -799,9 +960,7 @@ fileprivate extension Compiler.ByteCodeGen {
       try emitAlternation(children)
 
     case let .concatenation(children):
-      for child in children {
-        try emitConcatenationComponent(child)
-      }
+      try emitConcatenation(children)
 
     case let .capture(name, refId, child, transform):
       options.beginScope()
diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift
index 530126a32..b8daa8b21 100644
--- a/Sources/_StringProcessing/Compiler.swift
+++ b/Sources/_StringProcessing/Compiler.swift
@@ -42,19 +42,43 @@ class Compiler {
   }
 }
 
+/// Hashable wrapper for `Any.Type`.
+struct AnyHashableType: CustomStringConvertible, Hashable {
+  var ty: Any.Type
+  init(_ ty: Any.Type) {
+    self.ty = ty
+  }
+  var description: String { "\(ty)" }
+
+  static func == (lhs: Self, rhs: Self) -> Bool {
+    lhs.ty == rhs.ty
+  }
+  func hash(into hasher: inout Hasher) {
+    hasher.combine(ObjectIdentifier(ty))
+  }
+}
+
 // An error produced when compiling a regular expression.
-enum RegexCompilationError: Error, CustomStringConvertible {
+enum RegexCompilationError: Error, Hashable, CustomStringConvertible {
   // TODO: Source location?
   case uncapturedReference
+  case incorrectOutputType(incorrect: AnyHashableType, correct: AnyHashableType)
+  case invalidCharacterClassRangeOperand(Character)
+
+  static func incorrectOutputType(
+    incorrect: Any.Type, correct: Any.Type
+  ) -> Self {
+    .incorrectOutputType(incorrect: .init(incorrect), correct: .init(correct))
+  }
 
-  case incorrectOutputType(incorrect: Any.Type, correct: Any.Type)
-  
   var description: String {
     switch self {
     case .uncapturedReference:
       return "Found a reference used before it captured any match."
     case .incorrectOutputType(let incorrect, let correct):
       return "Cast to incorrect type 'Regex<\(incorrect)>', expected 'Regex<\(correct)>'"
+    case .invalidCharacterClassRangeOperand(let c):
+      return "'\(c)' is an invalid bound for character class range"
     }
   }
 }
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index 668d16eb6..083781120 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -63,7 +63,7 @@ extension DSLTree._AST.Atom {
 extension Character {
   func generateConsumer(
     _ opts: MatchingOptions
-  ) throws -> MEProgram.ConsumeFunction? {
+  ) throws -> MEProgram.ConsumeFunction {
     let isCaseInsensitive = opts.isCaseInsensitive
     switch opts.semanticLevel {
     case .graphemeCluster:
@@ -327,24 +327,25 @@ extension DSLTree.CustomCharacterClass.Member {
     _ opts: MatchingOptions,
     _ isInverted: Bool
   ) -> DSLTree.CustomCharacterClass.AsciiBitset? {
+    typealias Bitset = DSLTree.CustomCharacterClass.AsciiBitset
     switch self {
     case let .atom(a):
       if let val = a.singleScalarASCIIValue {
-        return DSLTree.CustomCharacterClass.AsciiBitset(
-          val,
-          isInverted,
-          opts.isCaseInsensitive
-        )
+        return Bitset(val, isInverted, opts.isCaseInsensitive)
       }
     case let .range(low, high):
-      if let lowVal = low.singleScalarASCIIValue, let highVal = high.singleScalarASCIIValue {
-        return DSLTree.CustomCharacterClass.AsciiBitset(
-          low: lowVal,
-          high: highVal,
-          isInverted: isInverted,
-          isCaseInsensitive: opts.isCaseInsensitive
-        )
+      if let lowVal = low.singleScalarASCIIValue,
+         let highVal = high.singleScalarASCIIValue {
+        return Bitset(low: lowVal, high: highVal, isInverted: isInverted,
+                      isCaseInsensitive: opts.isCaseInsensitive)
+      }
+    case .quotedLiteral(let str):
+      var bitset = Bitset(isInverted: isInverted)
+      for c in str {
+        guard let ascii = c._singleScalarAsciiValue else { return nil }
+        bitset = bitset.union(Bitset(ascii, isInverted, opts.isCaseInsensitive))
       }
+      return bitset
     default:
       return nil
     }
@@ -361,38 +362,68 @@ extension DSLTree.CustomCharacterClass.Member {
       }
       return c
     case let .range(low, high):
-      // TODO:
-      guard let lhs = low.literalCharacterValue else {
+      guard let lhsChar = low.literalCharacterValue else {
         throw Unsupported("\(low) in range")
       }
-      guard let rhs = high.literalCharacterValue else {
+      guard let rhsChar = high.literalCharacterValue else {
         throw Unsupported("\(high) in range")
       }
 
-      if opts.isCaseInsensitive {
-        let lhsLower = lhs.lowercased()
-        let rhsLower = rhs.lowercased()
-        guard lhsLower <= rhsLower else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
-        return { input, bounds in
-          // TODO: check for out of bounds?
-          let curIdx = bounds.lowerBound
-          if (lhsLower...rhsLower).contains(input[curIdx].lowercased()) {
-            // TODO: semantic level
-            return input.index(after: curIdx)
-          }
-          return nil
+      // We must have NFC single scalar bounds.
+      guard let lhs = lhsChar.singleScalar, lhs.isNFC else {
+        throw RegexCompilationError.invalidCharacterClassRangeOperand(lhsChar)
+      }
+      guard let rhs = rhsChar.singleScalar, rhs.isNFC else {
+        throw RegexCompilationError.invalidCharacterClassRangeOperand(rhsChar)
+      }
+      guard lhs <= rhs else {
+        throw Unsupported("Invalid range \(low)-\(high)")
+      }
+
+      let isCaseInsensitive = opts.isCaseInsensitive
+      let isCharacterSemantic = opts.semanticLevel == .graphemeCluster
+      
+      return { input, bounds in
+        let curIdx = bounds.lowerBound
+        let nextIndex = isCharacterSemantic
+          ? input.index(after: curIdx)
+          : input.unicodeScalars.index(after: curIdx)
+
+        // Under grapheme semantics, we compare based on single NFC scalars. If
+        // such a character is not single scalar under NFC, the match fails. In
+        // scalar semantics, we compare the exact scalar value to the NFC
+        // bounds.
+        let scalar = isCharacterSemantic ? input[curIdx].singleNFCScalar
+                                         : input.unicodeScalars[curIdx]
+        guard let scalar = scalar else { return nil }
+        let scalarRange = lhs ... rhs
+        if scalarRange.contains(scalar) {
+          return nextIndex
         }
-      } else {
-        guard lhs <= rhs else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
-        return { input, bounds in
-          // TODO: check for out of bounds?
-          let curIdx = bounds.lowerBound
-          if (lhs...rhs).contains(input[curIdx]) {
-            // TODO: semantic level
-            return input.index(after: curIdx)
+
+        // Check for case insensitive matches.
+        func matchesCased(
+          _ cased: (UnicodeScalar.Properties) -> String
+        ) -> Bool {
+          let casedStr = cased(scalar.properties)
+          // In character semantic mode, we need to map to NFC. In scalar
+          // semantics, we should have an exact scalar.
+          let mapped = isCharacterSemantic ? casedStr.singleNFCScalar
+                                           : casedStr.singleScalar
+          guard let mapped = mapped else { return false }
+          return scalarRange.contains(mapped)
+        }
+        if isCaseInsensitive {
+          if scalar.properties.changesWhenLowercased,
+              matchesCased(\.lowercaseMapping) {
+            return nextIndex
+          }
+          if scalar.properties.changesWhenUppercased,
+             matchesCased(\.uppercaseMapping) {
+            return nextIndex
           }
-          return nil
         }
+        return nil
       }
 
     case let .custom(ccc):
@@ -434,21 +465,17 @@ extension DSLTree.CustomCharacterClass.Member {
         }
         return rhs(input, bounds)
       }
-    case .quotedLiteral(let s):
-      if opts.isCaseInsensitive {
-        return { input, bounds in
-          guard s.lowercased()._contains(input[bounds.lowerBound].lowercased()) else {
-            return nil
-          }
-          return input.index(after: bounds.lowerBound)
-        }
-      } else {
-        return { input, bounds in
-          guard s.contains(input[bounds.lowerBound]) else {
-            return nil
+    case .quotedLiteral(let str):
+      let consumers = try str.map {
+        try $0.generateConsumer(opts)
+      }
+      return { input, bounds in
+        for fn in consumers {
+          if let idx = fn(input, bounds) {
+            return idx
           }
-          return input.index(after: bounds.lowerBound)
         }
+        return nil
       }
     case .trivia:
       // TODO: Should probably strip this earlier...
diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift
index 80f2e7697..c1753c49d 100644
--- a/Sources/_StringProcessing/PrintAsPattern.swift
+++ b/Sources/_StringProcessing/PrintAsPattern.swift
@@ -70,16 +70,9 @@ extension PrettyPrinter {
     for namedCapture in namedCaptures {
       print("let \(namedCapture) = Reference(Substring.self)")
     }
-    
-    switch node {
-    case .concatenation(_):
-      printAsPattern(convertedFromAST: node)
-    case .convertedRegexLiteral(.concatenation(_), _):
-      printAsPattern(convertedFromAST: node)
-    default:
-      printBlock("Regex") { printer in
-        printer.printAsPattern(convertedFromAST: node)
-      }
+
+    printBlock("Regex") { printer in
+      printer.printAsPattern(convertedFromAST: node, isTopLevel: true)
     }
   }
 
@@ -89,7 +82,7 @@ extension PrettyPrinter {
   // to have a non-backing-off pretty-printer that this
   // can defer to.
   private mutating func printAsPattern(
-    convertedFromAST node: DSLTree.Node
+    convertedFromAST node: DSLTree.Node, isTopLevel: Bool = false
   ) {
     if patternBackoff(DSLTree._Tree(node)) {
       printBackoff(node)
@@ -106,11 +99,7 @@ extension PrettyPrinter {
       }
 
     case let .concatenation(c):
-      printBlock("Regex") { printer in
-        c.forEach {
-          printer.printAsPattern(convertedFromAST: $0)
-        }
-      }
+      printConcatenationAsPattern(c, isTopLevel: isTopLevel)
 
     case let .nonCapturingGroup(kind, child):
       switch kind.ast {
@@ -263,7 +252,7 @@ extension PrettyPrinter {
       // check above, so it should work out. Need a
       // cleaner way to do this. This means the argument
       // label is a lie.
-      printAsPattern(convertedFromAST: n)
+      printAsPattern(convertedFromAST: n, isTopLevel: isTopLevel)
 
     case let .customCharacterClass(ccc):
       printAsPattern(ccc)
@@ -279,6 +268,64 @@ extension PrettyPrinter {
       print("/* TODO: absent function */")
     }
   }
+
+  enum NodeToPrint {
+    case dslNode(DSLTree.Node)
+    case stringLiteral(String)
+  }
+
+  mutating func printAsPattern(_ node: NodeToPrint) {
+    switch node {
+    case .dslNode(let n):
+      printAsPattern(convertedFromAST: n)
+    case .stringLiteral(let str):
+      print(str)
+    }
+  }
+
+  mutating func printConcatenationAsPattern(
+    _ nodes: [DSLTree.Node], isTopLevel: Bool
+  ) {
+    // We need to coalesce any adjacent character and scalar elements into a
+    // string literal, preserving scalar syntax.
+    let nodes = nodes
+      .map { NodeToPrint.dslNode($0.lookingThroughConvertedLiteral) }
+      .coalescing(
+        with: StringLiteralBuilder(), into: { .stringLiteral($0.result) }
+      ) { literal, node in
+        guard case .dslNode(let node) = node else { return false }
+        switch node {
+        case let .atom(.char(c)):
+          literal.append(c)
+          return true
+        case let .atom(.scalar(s)):
+          literal.append(unescaped: s._dslBase)
+          return true
+        case .quotedLiteral(let q):
+          literal.append(q)
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !literal.isEmpty
+        default:
+          return false
+        }
+      }
+    if isTopLevel || nodes.count == 1 {
+      // If we're at the top level, or we coalesced everything into a single
+      // element, we don't need to print a surrounding Regex { ... }.
+      for n in nodes {
+        printAsPattern(n)
+      }
+      return
+    }
+    printBlock("Regex") { printer in
+      for n in nodes {
+        printer.printAsPattern(n)
+      }
+    }
+  }
   
   mutating func printAsPattern(
     _ ccc: DSLTree.CustomCharacterClass,
@@ -341,8 +388,7 @@ extension PrettyPrinter {
           charMembers.append(c)
           return false
         case let .scalar(s):
-          charMembers.append(
-            unescaped: "\\u{\(String(s.value, radix: 16, uppercase: true))}")
+          charMembers.append(unescaped: s._dslBase)
           return false
         case .unconverted(_):
           return true
@@ -449,9 +495,9 @@ extension PrettyPrinter {
       case let .scalar(s):
         
         if wrap {
-          output("One(.anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\"))")
+          output("One(.anyOf(\(s._dslBase._bareQuoted)))")
         } else {
-          output(".anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\")")
+          output(".anyOf(\(s._dslBase._bareQuoted))")
         }
         
       case let .unconverted(a):
@@ -625,6 +671,10 @@ extension String {
   }
 }
 
+extension UnicodeScalar {
+  var _dslBase: String { "\\u{\(String(value, radix: 16, uppercase: true))}" }
+}
+
 /// A helper for building string literals, which handles escaping the contents
 /// appended.
 fileprivate struct StringLiteralBuilder {
@@ -851,19 +901,15 @@ extension AST.Atom {
   }
   
   var _dslBase: (String, canBeWrapped: Bool) {
-    func scalarLiteral(_ s: UnicodeScalar) -> String {
-      let hex = String(s.value, radix: 16, uppercase: true)
-      return "\\u{\(hex)}"
-    }
     switch kind {
     case let .char(c):
       return (String(c), false)
 
     case let .scalar(s):
-      return (scalarLiteral(s.value), false)
+      return (s.value._dslBase, false)
 
     case let .scalarSequence(seq):
-      return (seq.scalarValues.map(scalarLiteral).joined(), false)
+      return (seq.scalarValues.map(\._dslBase).joined(), false)
 
     case let .property(p):
       return (p._dslBase, true)
diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift
index c4ac8e759..4eb7bc42c 100644
--- a/Sources/_StringProcessing/Regex/ASTConversion.swift
+++ b/Sources/_StringProcessing/Regex/ASTConversion.swift
@@ -43,61 +43,7 @@ extension AST.Node {
         return .orderedChoice(children)
 
       case let .concatenation(v):
-        // Coalesce adjacent children who can produce a
-        // string literal representation
-        let astChildren = v.children
-        func coalesce(
-          _ idx: Array<AST>.Index
-        ) -> (Array<AST>.Index, String)? {
-          var result = ""
-          var idx = idx
-          while idx < astChildren.endIndex {
-            guard let atom: AST.Atom = astChildren[idx].as() else { break }
-
-            // TODO: For printing, nice to coalesce
-            // scalars literals too. We likely need a different
-            // approach even before we have a better IR.
-            if let char = atom.singleCharacter  {
-              result.append(char)
-            } else if let scalar = atom.singleScalar {
-              result.append(Character(scalar))
-            } else if case .scalarSequence(let seq) = atom.kind {
-              result += seq.scalarValues.map(Character.init)
-            } else {
-              break
-            }
-            
-            astChildren.formIndex(after: &idx)
-          }
-          return result.isEmpty ? nil : (idx, result)
-        }
-
-        // No need to nest single children concatenations
-        if astChildren.count == 1 {
-          return astChildren.first!.dslTreeNode
-        }
-
-        // Check for a single child post-coalescing
-        if let (idx, str) = coalesce(astChildren.startIndex),
-           idx == astChildren.endIndex
-        {
-          return .quotedLiteral(str)
-        }
-
-        // Coalesce adjacent string children
-        var curIdx = astChildren.startIndex
-        var children = Array<DSLTree.Node>()
-        while curIdx < astChildren.endIndex {
-          if let (nextIdx, str) = coalesce(curIdx) {
-            // TODO: Track source info...
-            children.append(.quotedLiteral(str))
-            curIdx = nextIdx
-          } else {
-            children.append(astChildren[curIdx].dslTreeNode)
-            astChildren.formIndex(after: &curIdx)
-          }
-        }
-        return .concatenation(children)
+        return .concatenation(v.children.map(\.dslTreeNode))
 
       case let .group(v):
         let child = v.child.dslTreeNode
@@ -135,10 +81,9 @@ extension AST.Node {
       case let .atom(v):
         switch v.kind {
         case .scalarSequence(let seq):
-          // Scalar sequences are splatted into concatenated scalars, which
-          // becomes a quoted literal. Sequences nested in concatenations have
-          // already been coalesced, this just handles the lone atom case.
-          return .quotedLiteral(String(seq.scalarValues.map(Character.init)))
+          // The DSL doesn't have an equivalent node for scalar sequences. Splat
+          // them into a concatenation of scalars.
+          return .concatenation(seq.scalarValues.map { .atom(.scalar($0)) })
         default:
           return .atom(v.dslTreeAtom)
         }
diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift
index 4ea905fd5..520f4991a 100644
--- a/Sources/_StringProcessing/Regex/DSLTree.swift
+++ b/Sources/_StringProcessing/Regex/DSLTree.swift
@@ -334,6 +334,14 @@ extension DSLTree.Node {
     default: return nil
     }
   }
+
+  /// If this node is for a converted literal, look through it.
+  var lookingThroughConvertedLiteral: Self {
+    switch self {
+    case let .convertedRegexLiteral(n, _): return n
+    default: return self
+    }
+  }
 }
 
 extension DSLTree.Atom {
diff --git a/Sources/_StringProcessing/Unicode/CharacterProps.swift b/Sources/_StringProcessing/Unicode/CharacterProps.swift
index 80f6819a6..e0be4e386 100644
--- a/Sources/_StringProcessing/Unicode/CharacterProps.swift
+++ b/Sources/_StringProcessing/Unicode/CharacterProps.swift
@@ -11,10 +11,3 @@
 
 
 // TODO
-
-extension Character {
-  /// Whether this character is made up of exactly one Unicode scalar value.
-  var hasExactlyOneScalar: Bool {
-    unicodeScalars.index(after: unicodeScalars.startIndex) == unicodeScalars.endIndex
-  }
-}
diff --git a/Sources/_StringProcessing/Unicode/NFC.swift b/Sources/_StringProcessing/Unicode/NFC.swift
new file mode 100644
index 000000000..5c2c4aa48
--- /dev/null
+++ b/Sources/_StringProcessing/Unicode/NFC.swift
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+@_spi(_Unicode)
+import Swift
+
+extension UnicodeScalar {
+  /// Checks whether the scalar is in NFC form.
+  var isNFC: Bool { Character(self).singleNFCScalar == self }
+}
+
+extension Character {
+  /// If the given character consists of a single NFC scalar, returns it. If
+  /// there are multiple NFC scalars, returns `nil`.
+  var singleNFCScalar: UnicodeScalar? {
+    // SwiftStdlib is always >= 5.7 for a shipped StringProcessing.
+    guard #available(SwiftStdlib 5.7, *) else { return nil }
+    var nfcIter = String(self)._nfc.makeIterator()
+    guard let scalar = nfcIter.next(), nfcIter.next() == nil else { return nil }
+    return scalar
+  }
+
+  /// If the given character contains a single scalar, returns it. If none or
+  /// multiple scalars are present, returns `nil`.
+  var singleScalar: UnicodeScalar? {
+    hasExactlyOneScalar ? unicodeScalars.first! : nil
+  }
+}
+
+extension String {
+  /// If the given string consists of a single NFC scalar, returns it. If none
+  /// or multiple NFC scalars are present, returns `nil`.
+  var singleNFCScalar: UnicodeScalar? {
+    guard !isEmpty && index(after: startIndex) == endIndex else { return nil }
+    return first!.singleNFCScalar
+  }
+
+  /// If the given string contains a single scalar, returns it. If none or
+  /// multiple scalars are present, returns `nil`.
+  var singleScalar: UnicodeScalar? {
+    let scalars = unicodeScalars
+    guard !scalars.isEmpty &&
+          scalars.index(after: scalars.startIndex) == scalars.endIndex
+    else { return nil }
+    return scalars.first!
+  }
+}
diff --git a/Sources/_StringProcessing/Utility/Misc.swift b/Sources/_StringProcessing/Utility/Misc.swift
new file mode 100644
index 000000000..8a9cbe325
--- /dev/null
+++ b/Sources/_StringProcessing/Utility/Misc.swift
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+extension Array {
+  /// Coalesce adjacent elements using a given accumulator. The accumulator is
+  /// transformed into elements of the array by `finish`. The `accumulate`
+  /// function should return `true` if the accumulator has coalesced the
+  /// element, `false` otherwise.
+  func coalescing<T>(
+    with initialAccumulator: T, into finish: (T) -> Self,
+    accumulate: (inout T, Element) -> Bool
+  ) -> Self {
+    var didAccumulate = false
+    var accumulator = initialAccumulator
+
+    var result = Self()
+    for elt in self {
+      if accumulate(&accumulator, elt) {
+        // The element has been coalesced into accumulator, there is nothing
+        // else to do.
+        didAccumulate = true
+        continue
+      }
+      if didAccumulate {
+        // We have a leftover accumulator, which needs to be finished before we
+        // can append the next element.
+        result += finish(accumulator)
+        accumulator = initialAccumulator
+        didAccumulate = false
+      }
+      result.append(elt)
+    }
+    // Handle a leftover accumulation.
+    if didAccumulate {
+      result += finish(accumulator)
+    }
+    return result
+  }
+
+  /// Coalesce adjacent elements using a given accumulator. The accumulator is
+  /// transformed into an element of the array by `finish`. The `accumulate`
+  /// function should return `true` if the accumulator has coalesced the
+  /// element, `false` otherwise.
+  func coalescing<T>(
+    with initialAccumulator: T, into finish: (T) -> Element,
+    accumulate: (inout T, Element) -> Bool
+  ) -> Self {
+    coalescing(
+      with: initialAccumulator, into: { [finish($0) ]}, accumulate: accumulate)
+  }
+}
diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index 05375a1f7..e25f2df05 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -12,10 +12,7 @@
 import XCTest
 import _StringProcessing
 import RegexBuilder
-
-#if os(Linux)
-func XCTExpectFailure(_ message: String? = nil, body: () throws -> Void) rethrows {}
-#endif
+import TestSupport
 
 class RegexDSLTests: XCTestCase {
   func _testDSLCaptures<Content: RegexComponent, MatchType>(
@@ -77,6 +74,9 @@ class RegexDSLTests: XCTestCase {
   let asciiNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n"
 
   func testCharacterClasses() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     try _testDSLCaptures(
       ("a c", ("a c", " ", "c")),
       matchType: (Substring, Substring, Substring).self, ==)
@@ -251,6 +251,9 @@ class RegexDSLTests: XCTestCase {
   }
 
   func testCharacterClassOperations() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     try _testDSLCaptures(
       ("bcdefn1a", "bcdefn1a"),
       ("nbcdef1a", nil),        // fails symmetric difference lookahead
@@ -594,6 +597,9 @@ class RegexDSLTests: XCTestCase {
   }
   
   func testQuantificationBehavior() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     // Eager by default
     try _testDSLCaptures(
       ("abc1def2", ("abc1def2", "2")),
@@ -1429,7 +1435,8 @@ class RegexDSLTests: XCTestCase {
       "\u{200D}" as UnicodeScalar
       "👦" as UnicodeScalar
     }
-    XCTAssertNil(try r3.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r3.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r3.wholeMatch(in: "👨‍👨‍👧‍👦"))
     XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
     XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
 
@@ -1441,18 +1448,72 @@ class RegexDSLTests: XCTestCase {
       try r4.firstMatch(in: "é")
     )
 
-    try XCTExpectFailure("Need stronger scalar coalescing logic") {
-      let r5 = Regex {
-        "e"
-        "\u{301}" as UnicodeScalar
+    let r5 = Regex {
+      "e"
+      "\u{301}" as UnicodeScalar
+    }
+    XCTAssertNotNil(try r5.firstMatch(in: "e\u{301}"))
+    XCTAssertNotNil(try r5.firstMatch(in: "é"))
+
+    let r6 = Regex {
+      "abcde"
+      "\u{301}"
+    }
+    XCTAssertNotNil(try r6.firstMatch(in: "abcde\u{301}"))
+    XCTAssertNotNil(try r6.firstMatch(in: "abcdé"))
+
+    let r7 = Regex {
+      "e" as Character
+      "\u{301}" as Character
+    }
+    XCTAssertNotNil(try r7.firstMatch(in: "e\u{301}"))
+    XCTAssertNotNil(try r7.firstMatch(in: "é"))
+
+    // You can't match a partial grapheme in grapheme semantic mode.
+    let r8 = Regex {
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👧" as UnicodeScalar
+    }
+    XCTAssertNil(try r8.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNil(try r8.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r8.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNil(try r8.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+
+    // Scalar coalescing occurs across nested concatenations and literals.
+    let r9 = Regex {
+      Regex {
+        try! Regex(#"👨"#)
+        "\u{200D}" as UnicodeScalar
+        Regex {
+          "👨" as UnicodeScalar
+        }
       }
-      XCTAssertNotNil(
-        try r5.firstMatch(in: "e\u{301}")
-      )
-      XCTAssertNotNil(
-        try r5.firstMatch(in: "é")
-      )
+      Regex {
+        Regex {
+          "\u{200D}" as UnicodeScalar
+          "👧"
+        }
+        try! Regex(#"\u{200D}👦"#)
+      }
+    }
+    XCTAssertNotNil(try r9.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+
+    let r10 = Regex {
+      "👨" as UnicodeScalar
+      try! Regex(#"\u{200D 1F468 200D 1F467}"#)
+      "\u{200D}" as UnicodeScalar
+      "👦" as UnicodeScalar
     }
+    XCTAssertNotNil(try r10.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
   }
 
   struct SemanticVersion: Equatable {
diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift
index 6c8f66e10..27f8d79cb 100644
--- a/Tests/RegexTests/CompileTests.swift
+++ b/Tests/RegexTests/CompileTests.swift
@@ -11,6 +11,7 @@
 
 @testable import _RegexParser
 @testable import _StringProcessing
+import TestSupport
 
 import XCTest
 
@@ -168,6 +169,45 @@ extension RegexTests {
     }
   }
 
+  private func testCompileError(
+    _ regex: String, _ error: RegexCompilationError,
+    file: StaticString = #file, line: UInt = #line
+  ) {
+    do {
+      _ = try _compileRegex(regex)
+      XCTFail("Expected compile error", file: file, line: line)
+    } catch let err as RegexCompilationError {
+      XCTAssertEqual(err, error, file: file, line: line)
+    } catch {
+      XCTFail("Unknown compile error", file: file, line: line)
+    }
+  }
+
+  func testInvalidScalarCoalescing() throws {
+    guard ensureNewStdlib() else { return }
+
+    // Non-single-scalar bounds.
+    testCompileError(
+      #"[a\u{302}-✅]"#, .invalidCharacterClassRangeOperand("a\u{302}"))
+    testCompileError(
+      #"[e\u{301}-\u{302}]"#, .invalidCharacterClassRangeOperand("e\u{301}"))
+    testCompileError(
+      #"[\u{73}\u{323}\u{307}-\u{1E00}]"#,
+      .invalidCharacterClassRangeOperand("\u{73}\u{323}\u{307}"))
+    testCompileError(
+      #"[a\u{315}\u{301}-\u{302}]"#,
+      .invalidCharacterClassRangeOperand("a\u{315}\u{301}")
+    )
+    testCompileError(
+      #"[a-z1e\u{301}-\u{302}\u{E1}3-59]"#,
+      .invalidCharacterClassRangeOperand("e\u{301}")
+    )
+    testCompileError(
+      #"[[e\u{301}-\u{302}]&&e\u{303}]"#,
+      .invalidCharacterClassRangeOperand("e\u{301}")
+    )
+  }
+
   func testCompileQuantification() throws {
 
     // NOTE: While we might change how we compile
@@ -317,6 +357,15 @@ extension RegexTests {
       semanticLevel: .unicodeScalar,
       contains: [.matchBitsetScalar],
       doesNotContain: [.matchBitset, .consumeBy])
+    expectProgram(
+      for: #"[\Qab\Ec]"#,
+      contains: [.matchBitset],
+      doesNotContain: [.consumeBy, .matchBitsetScalar])
+    expectProgram(
+      for: #"[\Qab\Ec]"#,
+      semanticLevel: .unicodeScalar,
+      contains: [.matchBitsetScalar],
+      doesNotContain: [.matchBitset, .consumeBy])
   }
 
   func testScalarOptimizeCompilation() {
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index a8f7977d6..8e01582a9 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -12,6 +12,7 @@
 import XCTest
 @testable import _RegexParser
 @testable import _StringProcessing
+import TestSupport
 
 struct MatchError: Error {
   var message: String
@@ -26,23 +27,33 @@ func _firstMatch(
   validateOptimizations: Bool,
   semanticLevel: RegexSemanticLevel = .graphemeCluster,
   syntax: SyntaxOptions = .traditional
-) throws -> (String, [String?]) {
+) throws -> (String, [String?])? {
   var regex = try Regex(regexStr, syntax: syntax).matchingSemantics(semanticLevel)
-  guard let result = try regex.firstMatch(in: input) else {
-    throw MatchError("match not found for \(regexStr) in \(input)")
-  }
-  let caps = result.output.slices(from: input)
-  
+  let result = try regex.firstMatch(in: input)
+
   if validateOptimizations {
     regex._setCompilerOptionsForTesting(.disableOptimizations)
-    guard let unoptResult = try regex.firstMatch(in: input) else {
+    let unoptResult = try regex.firstMatch(in: input)
+    if result != nil && unoptResult == nil {
       throw MatchError("match not found for unoptimized \(regexStr) in \(input)")
     }
-    XCTAssertEqual(
-      String(input[result.range]),
-      String(input[unoptResult.range]),
-      "Unoptimized regex returned a different result")
+    if result == nil && unoptResult != nil {
+      throw MatchError("match not found in optimized \(regexStr) in \(input)")
+    }
+    if let result = result, let unoptResult = unoptResult {
+      let optMatch = String(input[result.range])
+      let unoptMatch = String(input[unoptResult.range])
+      if optMatch != unoptMatch {
+        throw MatchError("""
+
+        Unoptimized regex returned: '\(unoptMatch)'
+        Optimized regex returned: '\(optMatch)'
+        """)
+      }
+    }
   }
+  guard let result = result else { return nil }
+  let caps = result.output.slices(from: input)
   return (String(input[result.range]), caps.map { $0.map(String.init) })
 }
 
@@ -153,12 +164,12 @@ func firstMatchTest(
   line: UInt = #line
 ) {
   do {
-    let (found, _) = try _firstMatch(
+    let found = try _firstMatch(
       regex,
       input: input,
       validateOptimizations: validateOptimizations,
       semanticLevel: semanticLevel,
-      syntax: syntax)
+      syntax: syntax)?.0
 
     if xfail {
       XCTAssertNotEqual(found, match, file: file, line: line)
@@ -166,9 +177,7 @@ func firstMatchTest(
       XCTAssertEqual(found, match, "Incorrect match", file: file, line: line)
     }
   } catch {
-    // FIXME: This allows non-matches to succeed even when xfail'd
-    // When xfail == true, this should report failure for match == nil
-    if !xfail && match != nil {
+    if !xfail {
       XCTFail("\(error)", file: file, line: line)
     }
     return
@@ -182,6 +191,7 @@ func firstMatchTests(
   enableTracing: Bool = false,
   dumpAST: Bool = false,
   xfail: Bool = false,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   file: StaticString = #filePath,
   line: UInt = #line
 ) {
@@ -194,6 +204,7 @@ func firstMatchTests(
       enableTracing: enableTracing,
       dumpAST: dumpAST,
       xfail: xfail,
+      semanticLevel: semanticLevel,
       file: file,
       line: line)
   }
@@ -303,6 +314,55 @@ extension RegexTests {
       match: "\u{006f}\u{031b}\u{0323}"
     )
 
+    // e + combining accents
+    firstMatchTest(
+      #"e\u{301 302 303}"#,
+      input: "e\u{301}\u{302}\u{303}",
+      match: "e\u{301}\u{302}\u{303}"
+    )
+    firstMatchTest(
+      #"e\u{315 35C 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{301}\u{302 303}"#,
+      input: "e\u{301}\u{302}\u{303}",
+      match: "e\u{301}\u{302}\u{303}"
+    )
+    firstMatchTest(
+      #"e\u{35C}\u{315 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{35C}\u{315 301}"#,
+      input: "e\u{315}\u{301}\u{35C}",
+      match: "e\u{315}\u{301}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{301}\de\u{302}"#,
+      input: "e\u{301}0e\u{302}",
+      match: "e\u{301}0e\u{302}"
+    )
+    firstMatchTest(
+      #"(?x) e \u{35C} \u{315}(?#hello)\u{301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"(?x) e \u{35C} \u{315 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+
+    // We don't coalesce across groups.
+    firstMatchTests(
+      #"e\u{301}(?:\u{315}\u{35C})?"#,
+      ("e\u{301}", "e\u{301}"),
+      ("e\u{301}\u{315}\u{35C}", nil)
+    )
+
     // Escape sequences that represent scalar values.
     firstMatchTest(#"\a[\b]\e\f\n\r\t"#,
                    input: "\u{7}\u{8}\u{1B}\u{C}\n\r\t",
@@ -311,8 +371,6 @@ extension RegexTests {
                    input: "\u{7}\u{8}\u{1B}\u{C}\n\r\t",
                    match: "\u{7}\u{8}\u{1B}\u{C}\n\r\t")
 
-    firstMatchTest(#"\r\n"#, input: "\r\n", match: "\r\n")
-
     // MARK: Quotes
 
     firstMatchTest(
@@ -428,8 +486,7 @@ extension RegexTests {
       "a++a",
       ("babc", nil),
       ("baaabc", nil),
-      ("bb", nil),
-      xfail: true)
+      ("bb", nil))
     firstMatchTests(
       "a+?a",
       ("babc", nil),
@@ -505,23 +562,19 @@ extension RegexTests {
       ("baabc", nil),
       ("bb", nil))
     
-    // XFAIL'd versions of the above
     firstMatchTests(
       "a{2,4}+a",
-      ("baaabc", nil),
-      xfail: true)
+      ("baaabc", nil))
     firstMatchTests(
       "a{,4}+a",
       ("babc", nil),
       ("baabc", nil),
-      ("baaabc", nil),
-      xfail: true)
+      ("baaabc", nil))
     firstMatchTests(
       "a{2,}+a",
       ("baaabc", nil),
       ("baaaaabc", nil),
-      ("baaaaaaaabc", nil),
-      xfail: true)
+      ("baaaaaaaabc", nil))
 
     // XFAIL'd possessive tests
     firstMatchTests(
@@ -568,6 +621,9 @@ extension RegexTests {
   }
 
   func testMatchCharacterClasses() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Character classes
 
     firstMatchTest(#"abc\d"#, input: "xyzabc123", match: "abc1")
@@ -691,6 +747,331 @@ extension RegexTests {
       ("a\u{301}", true),
       semanticLevel: .unicodeScalar)
 
+    // Scalar matching in quoted sequences.
+    firstMatchTests(
+      "[\\Qe\u{301}\\E]",
+      ("e", nil),
+      ("E", nil),
+      ("\u{301}", nil),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("E\u{301}", nil),
+      ("\u{C9}", nil)
+    )
+    firstMatchTests(
+      "[\\Qe\u{301}\\E]",
+      ("e", "e"),
+      ("E", nil),
+      ("\u{301}", "\u{301}"),
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("E\u{301}", "\u{301}"),
+      ("\u{C9}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      "(?i)[\\Qe\u{301}\\E]",
+      ("e", nil),
+      ("E", nil),
+      ("\u{301}", nil),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("E\u{301}", "E\u{301}"),
+      ("\u{C9}", "\u{C9}")
+    )
+    firstMatchTests(
+      "(?i)[\\Qe\u{301}\\E]",
+      ("e", "e"),
+      ("E", "E"),
+      ("\u{301}", "\u{301}"),
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("E\u{301}", "E"),
+      ("\u{C9}", nil),
+      semanticLevel: .unicodeScalar
+    )
+
+    // Scalar coalescing.
+    firstMatchTests(
+      #"[e\u{301}]"#,
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("e", nil),
+      ("\u{301}", nil)
+    )
+    firstMatchTests(
+      #"[e\u{301}]"#,
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[[[e\u{301}]]]"#,
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("e", nil),
+      ("\u{301}", nil)
+    )
+    firstMatchTests(
+      #"[[[e\u{301}]]]"#,
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[👨\u{200D}👩\u{200D}👧\u{200D}👦]"#,
+      ("👨", nil),
+      ("👩", nil),
+      ("👧", nil),
+      ("👦", nil),
+      ("\u{200D}", nil),
+      ("👨‍👩‍👧‍👦", "👨‍👩‍👧‍👦")
+    )
+    firstMatchTests(
+      #"[👨\u{200D}👩\u{200D}👧\u{200D}👦]"#,
+      ("👨", "👨"),
+      ("👩", "👩"),
+      ("👧", "👧"),
+      ("👦", "👦"),
+      ("\u{200D}", "\u{200D}"),
+      ("👨‍👩‍👧‍👦", "👨"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[e\u{315}\u{301}\u{35C}]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{301}", nil),
+      ("e\u{315}\u{301}\u{35C}", "e\u{315}\u{301}\u{35C}"),
+      ("e\u{301}\u{315}\u{35C}", "e\u{301}\u{315}\u{35C}"),
+      ("e\u{35C}\u{301}\u{315}", "e\u{35C}\u{301}\u{315}")
+    )
+    firstMatchTests(
+      #"(?x) [ e \u{315} \u{301} \u{35C} ]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{301}", nil),
+      ("e\u{315}\u{301}\u{35C}", "e\u{315}\u{301}\u{35C}"),
+      ("e\u{301}\u{315}\u{35C}", "e\u{301}\u{315}\u{35C}"),
+      ("e\u{35C}\u{301}\u{315}", "e\u{35C}\u{301}\u{315}")
+    )
+
+    // We don't coalesce across character classes.
+    firstMatchTests(
+      #"e[\u{315}\u{301}\u{35C}]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{315}\u{301}", nil),
+      ("e\u{301}\u{315}\u{35C}", nil)
+    )
+    firstMatchTests(
+      #"[e[\u{301}]]"#,
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      ("e\u{301}", nil)
+    )
+
+    firstMatchTests(
+      #"[a-z1\u{E9}-\u{302}\u{E1}3-59]"#,
+      ("a", "a"),
+      ("a\u{301}", "a\u{301}"),
+      ("\u{E1}", "\u{E1}"),
+      ("\u{E2}", nil),
+      ("z", "z"),
+      ("e", "e"),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("\u{302}", "\u{302}"),
+      ("1", "1"),
+      ("2", nil),
+      ("3", "3"),
+      ("4", "4"),
+      ("5", "5"),
+      ("6", nil),
+      ("7", nil),
+      ("8", nil),
+      ("9", "9")
+    )
+    firstMatchTests(
+      #"[ab-df-hik-lm]"#,
+      ("a", "a"),
+      ("b", "b"),
+      ("c", "c"),
+      ("d", "d"),
+      ("e", nil),
+      ("f", "f"),
+      ("g", "g"),
+      ("h", "h"),
+      ("i", "i"),
+      ("j", nil),
+      ("k", "k"),
+      ("l", "l"),
+      ("m", "m")
+    )
+    firstMatchTests(
+      #"[a-ce-fh-j]"#,
+      ("a", "a"),
+      ("b", "b"),
+      ("c", "c"),
+      ("d", nil),
+      ("e", "e"),
+      ("f", "f"),
+      ("g", nil),
+      ("h", "h"),
+      ("i", "i"),
+      ("j", "j")
+    )
+
+
+    // These can't compile in grapheme semantic mode, but make sure they work in
+    // scalar semantic mode.
+    firstMatchTests(
+      #"[a\u{315}\u{301}-\u{302}]"#,
+      ("a", "a"),
+      ("\u{315}", "\u{315}"),
+      ("\u{301}", "\u{301}"),
+      ("\u{302}", "\u{302}"),
+      ("\u{303}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[\u{73}\u{323}\u{307}-\u{1E00}]"#,
+      ("\u{73}", "\u{73}"),
+      ("\u{323}", "\u{323}"),
+      ("\u{307}", "\u{307}"),
+      ("\u{400}", "\u{400}"),
+      ("\u{500}", "\u{500}"),
+      ("\u{1E00}", "\u{1E00}"),
+      ("\u{1E01}", nil),
+      ("\u{1E69}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[a\u{302}-✅]"#,
+      ("a", "a"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", nil),
+      ("a\u{301}", "a"),
+      ("\u{E1}", nil),
+      ("a\u{302}", "a"),
+      ("\u{E2}", nil),
+      ("\u{E3}", nil),
+      ("\u{EF}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "\u{302}"),
+      ("\u{2705}", "\u{2705}"),
+      ("✅", "✅"),
+      ("\u{376}", "\u{376}"),
+      ("\u{850}", "\u{850}"),
+      ("a\u{302}\u{315}", "a"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"(?i)[a\u{302}-✅]"#,
+      ("a", "a"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "A"),
+      ("E\u{301}", nil),
+      ("a\u{301}", "a"),
+      ("\u{E1}", nil),
+      ("a\u{302}", "a"),
+      ("\u{E2}", nil),
+      ("\u{E3}", nil),
+      ("\u{EF}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "\u{302}"),
+      ("\u{2705}", "\u{2705}"),
+      ("✅", "✅"),
+      ("\u{376}", "\u{376}"),
+      ("\u{850}", "\u{850}"),
+      ("a\u{302}\u{315}", "a"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[e\u{301}-\u{302}]"#,
+      ("a", nil),
+      ("e", "e"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", "\u{301}"),
+      ("\u{C8}", nil),
+      ("\u{C9}", nil),
+      ("\u{CA}", nil),
+      ("\u{CB}", nil),
+      ("a\u{301}", "\u{301}"),
+      ("a\u{302}", "\u{302}"),
+      ("e\u{301}", "e"),
+      ("e\u{302}", "e"),
+      ("\u{E1}", nil),
+      ("\u{E2}", nil),
+      ("\u{E9}", nil),
+      ("\u{EA}", nil),
+      ("\u{EF}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"(?i)[e\u{301}-\u{302}]"#,
+      ("a", nil),
+      ("e", "e"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", "E"),
+      ("\u{C8}", nil),
+      ("\u{C9}", nil),
+      ("\u{CA}", nil),
+      ("\u{CB}", nil),
+      ("a\u{301}", "\u{301}"),
+      ("a\u{302}", "\u{302}"),
+      ("e\u{301}", "e"),
+      ("e\u{302}", "e"),
+      ("\u{E1}", nil),
+      ("\u{E2}", nil),
+      ("\u{E9}", nil),
+      ("\u{EA}", nil),
+      ("\u{EF}", nil),
+      semanticLevel: .unicodeScalar
+    )
+
+    // Set operation scalar coalescing.
+    firstMatchTests(
+      #"[e\u{301}&&e\u{301}e\u{302}]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("e\u{301}", "e\u{301}"),
+      ("e\u{302}", nil))
+    firstMatchTests(
+      #"[e\u{301}~~[[e\u{301}]e\u{302}]]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "e\u{302}"))
+    firstMatchTests(
+      #"[e\u{301}[e\u{303}]--[[e\u{301}]e\u{302}]]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("\u{303}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", nil),
+      ("e\u{303}", "e\u{303}"))
+
+    firstMatchTests(
+      #"(?x) [ e \u{301} [ e \u{303} ] -- [ [ e \u{301} ] e \u{302} ] ]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("\u{303}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", nil),
+      ("e\u{303}", "e\u{303}"))
+
     firstMatchTest("[-]", input: "123-abcxyz", match: "-")
 
     // These are metacharacters in certain contexts, but normal characters
@@ -773,6 +1154,15 @@ extension RegexTests {
     }
     firstMatchTest(#"[\t-\t]"#, input: "\u{8}\u{A}\u{9}", match: "\u{9}")
 
+    firstMatchTest(#"[12]"#, input: "1️⃣", match: nil)
+    firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil)
+    firstMatchTest(#"[\d]"#, input: "1️⃣", match: "1️⃣")
+    firstMatchTest(#"(?P)[\d]"#, input: "1️⃣", match: nil)
+    firstMatchTest("[0-2&&1-3]", input: "1️⃣", match: nil)
+    firstMatchTest("[1-2e\u{301}]", input: "1️⃣", match: nil)
+
+    firstMatchTest(#"[\u{3A9}-\u{3A9}]"#, input: "\u{3A9}", match: "\u{3A9}")
+
     // Currently not supported in the matching engine.
     for c: UnicodeScalar in ["a", "b", "c"] {
       firstMatchTest(#"[\c!-\C-#]"#, input: "def\(c)", match: "\(c)",
@@ -826,6 +1216,35 @@ extension RegexTests {
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
                    syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
+
+    for semantics in [RegexSemanticLevel.unicodeScalar, .graphemeCluster] {
+      // Case sensitivity and ranges.
+      for ch in "abcD" {
+        firstMatchTest("[a-cD]", input: String(ch), match: String(ch))
+      }
+      for ch in "ABCd" {
+        firstMatchTest("[a-cD]", input: String(ch), match: nil)
+      }
+      for ch in "abcABCdD" {
+        let input = String(ch)
+        firstMatchTest(
+          "(?i)[a-cd]", input: input, match: input, semanticLevel: semantics)
+        firstMatchTest(
+          "(?i)[A-CD]", input: input, match: input, semanticLevel: semantics)
+      }
+      for ch in "XYZ[\\]^_`abcd" {
+        let input = String(ch)
+        firstMatchTest(
+          "[X-cd]", input: input, match: input, semanticLevel: semantics)
+      }
+      for ch in "XYZ[\\]^_`abcxyzABCdD" {
+        let input = String(ch)
+        firstMatchTest(
+          "(?i)[X-cd]", input: input, match: input, semanticLevel: semantics)
+        firstMatchTest(
+          "(?i)[X-cD]", input: input, match: input, semanticLevel: semantics)
+      }
+    }
   }
 
   func testCharacterProperties() {
@@ -1038,6 +1457,9 @@ extension RegexTests {
   }
 
   func testMatchAnchors() throws {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Anchors
     firstMatchTests(
       #"^\d+"#,
@@ -1086,8 +1508,6 @@ extension RegexTests {
       (" 123\n456\n", nil),
       ("123 456", "456"))
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     firstMatchTests(
       #"\d+\b"#,
       ("123", "123"),
@@ -1105,7 +1525,6 @@ extension RegexTests {
       ("123", "23"),
       (" 123", "23"),
       ("123 456", "23"))
-#endif
 
     // TODO: \G and \K
     do {
@@ -1118,8 +1537,8 @@ extension RegexTests {
     // TODO: Oniguruma \y and \Y
     firstMatchTests(
       #"\u{65}"#,             // Scalar 'e' is present in both
-      ("Cafe\u{301}", nil),   // but scalar mode requires boundary at end of match
-      xfail: true)
+      ("Cafe\u{301}", nil))   // but scalar mode requires boundary at end of match
+
     firstMatchTests(
       #"\u{65}"#,             // Scalar 'e' is present in both
       ("Sol Cafe", "e"))      // standalone is okay
@@ -1136,9 +1555,10 @@ extension RegexTests {
       ("Sol Cafe", nil), xfail: true)
   }
 
-  // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
   func testLevel2WordBoundaries() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Level 2 Word Boundaries
     firstMatchTest(#"\b😊\b"#, input: "🔥😊👍", match: "😊")
     firstMatchTest(#"\b👨🏽\b"#, input: "👩🏻👶🏿👨🏽🧑🏾👩🏼", match: "👨🏽")
@@ -1154,9 +1574,11 @@ extension RegexTests {
     firstMatchTest(#"can\B\'\Bt"#, input: "I can't do that.", match: "can't")
     firstMatchTest(#"\b÷\b"#, input: "3 ÷ 3 = 1", match: "÷")
   }
-#endif
-  
+
   func testMatchGroups() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Groups
 
     // Named captures
@@ -1380,6 +1802,9 @@ extension RegexTests {
   }
   
   func testMatchExamples() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // Backreferences
     matchTest(
       #"(sens|respons)e and \1ibility"#,
@@ -1429,8 +1854,6 @@ extension RegexTests {
       xfail: true
     )
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     // HTML tags
     matchTest(
       #"<([a-zA-Z][a-zA-Z0-9]*)\b[^>]*>.*?</\1>"#,
@@ -1448,7 +1871,6 @@ extension RegexTests {
       ("pass me the the kettle", ["the"]),
       ("this doesn't have any", nil)
     )
-#endif
 
     // Floats
     flatCaptureTest(
@@ -1464,8 +1886,79 @@ extension RegexTests {
     firstMatchTest(#".+"#, input: "a\nb", match: "a")
     firstMatchTest(#"(?s:.+)"#, input: "a\nb", match: "a\nb")
   }
+
+  func testMatchNewlines() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
+    for semantics in [RegexSemanticLevel.unicodeScalar, .graphemeCluster] {
+      firstMatchTest(
+        #"\r\n"#, input: "\r\n", match: "\r\n",
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\r\n"#, input: "\n", match: nil, semanticLevel: semantics)
+      firstMatchTest(
+        #"\r\n"#, input: "\r", match: nil, semanticLevel: semantics)
+
+      // \r\n is not treated as ASCII.
+      firstMatchTest(
+        #"^\p{ASCII}$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^\r$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\r]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^\n$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\n]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\u{0}-\u{7F}]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+
+      let scalarSemantics = semantics == .unicodeScalar
+      firstMatchTest(
+        #"\p{ASCII}"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\r"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\r]"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\n"#, input: "\r\n", match:  scalarSemantics ? "\n" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\n]"#, input: "\r\n", match:  scalarSemantics ? "\n" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\u{0}-\u{7F}]"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+    }
+  }
   
   func testCaseSensitivity() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     matchTest(
       #"c..e"#,
       ("cafe", true),
@@ -1528,6 +2021,9 @@ extension RegexTests {
   }
   
   func testASCIIClasses() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // 'D' ASCII-only digits
     matchTest(
       #"\d+"#,
@@ -1556,8 +2052,6 @@ extension RegexTests {
       ("aeiou", true),
       ("åe\u{301}ïôú", false))
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     matchTest(
       #"abcd\b.+"#,
       ("abcd ef", true),
@@ -1573,7 +2067,6 @@ extension RegexTests {
       ("abcd ef", true),
       ("abcdef", false),
       ("abcdéf", false))
-#endif
 
     // 'S' ASCII-only spaces
     matchTest(
@@ -1699,6 +2192,9 @@ extension RegexTests {
   var eComposed: String { "é" }
   var eDecomposed: String { "e\u{301}" }
   
+  var eComposedUpper: String { "É" }
+  var eDecomposedUpper: String { "E\u{301}" }
+
   func testIndividualScalars() {
     // Expectation: A standalone Unicode scalar value in a regex literal
     // can match either that specific scalar value or participate in matching
@@ -1711,19 +2207,15 @@ extension RegexTests {
     firstMatchTest(#"\u{65 301}$"#, input: eComposed, match: eComposed)
 
     // FIXME: Implicit \y at end of match
-    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
-      xfail: true)
+    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil)
     firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
-    // FIXME: \y is unsupported
-    firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
-      xfail: true)
+    firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil)
 
     // FIXME: Unicode scalars are only matched at the start of a grapheme cluster
     firstMatchTest(#"\u{301}"#, input: eDecomposed, match: "\u{301}",
       xfail: true)
-    // FIXME: \y is unsupported
-    firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil,
-      xfail: true)
+
+    firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil)
   }
 
   func testCanonicalEquivalence() throws {
@@ -1745,6 +2237,16 @@ extension RegexTests {
       #"e$"#,
       (eComposed, false),
       (eDecomposed, false))
+
+    matchTest(
+      #"\u{65 301}"#,
+      (eComposed, true),
+      (eDecomposed, true))
+
+    matchTest(
+      #"(?x) \u{65} \u{301}"#,
+      (eComposed, true),
+      (eDecomposed, true))
   }
 
   func testCanonicalEquivalenceCharacterClass() throws {
@@ -1781,41 +2283,70 @@ extension RegexTests {
     // \s
     firstMatchTest(#"\s"#, input: " ", match: " ")
     // FIXME: \s shouldn't match a number composed with a non-number character
-    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
-              xfail: true)
+    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
     // \p{Whitespace}
     firstMatchTest(#"\s"#, input: " ", match: " ")
-    // FIXME: \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
-    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
-              xfail: true)
+    // \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
+    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
   }
   
   func testCanonicalEquivalenceCustomCharacterClass() throws {
-    // Expectation: Concatenations with custom character classes should be able
-    // to match within a grapheme cluster. That is, a regex should be able to
-    // match the scalar values that comprise a grapheme cluster in separate,
-    // or repeated, custom character classes.
-    
+    // Expectation: Custom character class matches do not cross grapheme
+    // character boundaries by default. When matching with Unicode scalar
+    // semantics, grapheme cluster boundaries are ignored, so matching
+    // sequences of custom character classes can succeed.
+
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     matchTest(
       #"[áéíóú]$"#,
       (eComposed, true),
       (eDecomposed, true))
 
-    // FIXME: Custom char classes don't use canonical equivalence with composed characters
-    firstMatchTest(#"e[\u{301}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
-    firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
-    firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
+    for input in [eDecomposed, eComposed] {
+      // Unicode scalar semantics means that only the decomposed version can
+      // match here.
+      let match = input.unicodeScalars.count == 2 ? input : nil
+      firstMatchTest(
+        #"e[\u{301}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"e[\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[e][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[e-e][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[a-z][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+    }
+    for input in [eComposed, eDecomposed] {
+      // Grapheme cluster semantics means that we can't match the 'e' separately
+      // from the accent.
+      firstMatchTest(#"e[\u{301}]$"#, input: input, match: nil)
+      firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[e][\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[e-e][\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: input, match: nil)
+
+      // A range that covers é (U+E9). Inputs are mapped to NFC, so match.
+      firstMatchTest(#"[\u{E8}-\u{EA}]"#, input: input, match: input)
+    }
 
-    // FIXME: Custom char classes don't match decomposed characters
-    firstMatchTest(#"e[\u{301}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
-    firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
-    firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
+    // A range that covers É (U+C9). Inputs are mapped to NFC, so match.
+    for input in [eComposedUpper, eDecomposedUpper] {
+      firstMatchTest(#"[\u{C8}-\u{CA}]"#, input: input, match: input)
+      firstMatchTest(#"[\u{C9}-\u{C9}]"#, input: input, match: input)
+    }
+    // Case insensitive matching of É (U+C9).
+    for input in [eComposed, eDecomposed, eComposedUpper, eDecomposedUpper] {
+      firstMatchTest(#"(?i)[\u{C8}-\u{CA}]"#, input: input, match: input)
+      firstMatchTest(#"(?i)[\u{C9}-\u{C9}]"#, input: input, match: input)
+    }
 
     let flag = "🇰🇷"
     firstMatchTest(#"🇰🇷"#, input: flag, match: flag)
@@ -1824,27 +2355,33 @@ extension RegexTests {
     firstMatchTest(#"\u{1F1F0 1F1F7}"#, input: flag, match: flag)
 
     // First Unicode scalar followed by CCC of regional indicators
-    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag,
-              xfail: true)
-
-    // FIXME: CCC of Regional Indicator doesn't match with both parts of a flag character
+    firstMatchTest(
+      #"^\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
+    // A CCC of regional indicators followed by the second Unicode scalar
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
     // A CCC of regional indicators x 2
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]{2}"#, input: flag, match: flag,
-              xfail: true)
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]{2}$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
+    // A CCC of N regional indicators
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]+$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
 
-    // FIXME: A single CCC of regional indicators matches the whole flag character
-    // A CCC of regional indicators followed by the second Unicode scalar
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}"#, input: flag, match: flag,
-              xfail: true)
     // A single CCC of regional indicators
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: nil,
-              xfail: true)
-    
-    // A single CCC of actual flag emojis / combined regional indicators
-    firstMatchTest(#"[🇦🇫-🇿🇼]"#, input: flag, match: flag)
-    // This succeeds (correctly) because \u{1F1F0} is lexicographically
-    // within the CCC range
-    firstMatchTest(#"[🇦🇫-🇿🇼]"#, input: "\u{1F1F0}abc", match: "\u{1F1F0}")
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: nil)
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: nil,
+      semanticLevel: .unicodeScalar
+    )
   }
   
   func testAnyChar() throws {
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index 52a272915..84ce361f3 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -374,10 +374,21 @@ extension RegexTests {
 
     // MARK: Allowed combining characters
 
-    parseTest("e\u{301}", "e\u{301}")
     parseTest("1\u{358}", "1\u{358}")
     parseTest(#"\ \#u{361}"#, " \u{361}")
 
+    parseTest("e\u{301}", "e\u{301}")
+    parseTest("[e\u{301}]", charClass("e\u{301}"))
+    parseTest("\u{E9}", "e\u{301}")
+    parseTest("[\u{E9}]", charClass("e\u{301}"))
+
+    parseTest(
+      "\\e\u{301}", "e\u{301}", throwsError: .invalidEscape("e\u{301}"))
+    parseTest(
+      "[\\e\u{301}]", charClass("e\u{301}"),
+      throwsError: .invalidEscape("e\u{301}")
+    )
+
     // MARK: Alternations
 
     parseTest(
@@ -2885,11 +2896,41 @@ extension RegexTests {
     diagnosticTest(#"[a-\Qbc\E]"#, .unsupported("range with quoted sequence"))
     diagnosticTest(#"[\Qbc\E-de]"#, .unsupported("range with quoted sequence"))
 
+    diagnosticTest(#"|([🇦🇫-🇿🇼])?"#, .invalidCharacterClassRangeOperand)
+    diagnosticTest(#"|([👨‍👩‍👦-👩‍👩‍👧‍👧])?"#, .invalidCharacterClassRangeOperand)
+
+    // Not single-scalar NFC.
+    diagnosticTest("[e\u{301}-e\u{302}]", .invalidCharacterClassRangeOperand)
+
+    // These scalar values expand under NFC.
+    let nfcExpandingScalars: [UInt32] = [
+      0x344, 0x958, 0x959, 0x95A, 0x95B, 0x95C, 0x95D, 0x95E, 0x95F, 0x9DC,
+      0x9DD, 0x9DF, 0xA33, 0xA36, 0xA59, 0xA5A, 0xA5B, 0xA5E, 0xB5C, 0xB5D,
+      0xF43, 0xF4D, 0xF52, 0xF57, 0xF5C, 0xF69, 0xF73, 0xF75, 0xF76, 0xF78,
+      0xF81, 0xF93, 0xF9D, 0xFA2, 0xFA7, 0xFAC, 0xFB9, 0x2ADC, 0xFB1D, 0xFB1F,
+      0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E, 0xFB2F, 0xFB30, 0xFB31, 0xFB32,
+      0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38, 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C,
+      0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, 0xFB46, 0xFB47, 0xFB48, 0xFB49,
+      0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E, 0x1D15E, 0x1D15F, 0x1D160,
+      0x1D161, 0x1D162, 0x1D163, 0x1D164, 0x1D1BB, 0x1D1BC, 0x1D1BD, 0x1D1BE,
+      0x1D1BF, 0x1D1C0
+    ]
+    for scalar in nfcExpandingScalars {
+      let hex = String(scalar, radix: 16)
+      diagnosticTest(
+        #"[\u{\#(hex)}-\u{\#(hex)}]"#, .invalidCharacterClassRangeOperand)
+    }
+
+    // The NFC form of U+2126 is U+3A9.
+    diagnosticTest(#"[\u{2126}-\u{2126}]"#, .invalidCharacterClassRangeOperand)
+
     diagnosticTest(#"[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
     diagnosticTest(#"(?i)[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
     diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
     diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))
 
+    diagnosticTest(#"[e\u{301}-e\u{302}]"#, .invalidCharacterRange(from: "\u{301}", to: "e"))
+
     diagnosticTest("(?x)[(?#)]", .expected("]"))
     diagnosticTest("(?x)[(?#abc)]", .expected("]"))
 
diff --git a/Tests/RegexTests/RenderDSLTests.swift b/Tests/RegexTests/RenderDSLTests.swift
index 3b0a8d5b3..e925d255c 100644
--- a/Tests/RegexTests/RenderDSLTests.swift
+++ b/Tests/RegexTests/RenderDSLTests.swift
@@ -171,10 +171,71 @@ extension RenderDSLTests {
       }
       """#)
 
-    // TODO: We ought to try and preserve the scalar syntax here.
     try testConversion(#"a\u{301}"#, #"""
       Regex {
-        "á"
+        "a\u{301}"
+      }
+      """#)
+
+    try testConversion(#"(?x) a \u{301}"#, #"""
+      Regex {
+        "a\u{301}"
+      }
+      """#)
+
+    try testConversion(#"(?x) [ a b c \u{301} ] "#, #"""
+      Regex {
+        One(.anyOf("abc\u{301}"))
+      }
+      """#)
+
+    try testConversion(#"👨\u{200D}👨\u{200D}👧\u{200D}👦"#, #"""
+      Regex {
+        "👨\u{200D}👨\u{200D}👧\u{200D}👦"
+      }
+      """#)
+
+    try testConversion(#"(👨\u{200D}👨)\u{200D}👧\u{200D}👦"#, #"""
+      Regex {
+        Capture {
+          "👨\u{200D}👨"
+        }
+        "\u{200D}👧\u{200D}👦"
+      }
+      """#)
+
+    // We preserve the structure of non-capturing groups.
+    try testConversion(#"abcd(?:e\u{301}\d)"#, #"""
+      Regex {
+        "abcd"
+        Regex {
+          "e\u{301}"
+          One(.digit)
+        }
+      }
+      """#)
+
+    try testConversion(#"\u{A B C}"#, #"""
+      Regex {
+        "\u{A}\u{B}\u{C}"
+      }
+      """#)
+
+    // TODO: We might want to consider preserving scalar sequences in the DSL,
+    // and allowing them to merge with other concatenations.
+    try testConversion(#"\u{A B C}\u{d}efg"#, #"""
+      Regex {
+        "\u{A}\u{B}\u{C}"
+        "\u{D}efg"
+      }
+      """#)
+
+    // FIXME: We don't actually have a way of specifying in the DSL that we
+    // shouldn't join these together, should we print them as regex instead?
+    try testConversion(#"a(?:\u{301})"#, #"""
+      Regex {
+        "a"
+        "\u{301}"
       }
       """#)
   }
diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
index aa3639ea6..11479bfb6 100644
--- a/Tests/RegexTests/UTS18Tests.swift
+++ b/Tests/RegexTests/UTS18Tests.swift
@@ -21,6 +21,7 @@
 import XCTest
 @testable // for internal `matches(of:)`
 import _StringProcessing
+import TestSupport
 
 extension UnicodeScalar {
   var value4Digits: String {
@@ -316,6 +317,9 @@ extension UTS18Tests {
   // surrogate followed by a trailing surrogate shall be handled as a single
   // code point in matching.
   func testSupplementaryCodePoints() {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     XCTAssertTrue("👍".contains(regex(#"\u{1F44D}"#)))
     XCTAssertTrue("👍".contains(regex(#"[\u{1F440}-\u{1F44F}]"#)))
     XCTAssertTrue("👍👎".contains(regex(#"^[\u{1F440}-\u{1F44F}]+$"#)))
@@ -388,6 +392,9 @@ extension UTS18Tests {
   }
   
   func testCharacterClassesWithStrings() {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     let regex = regex(#"[a-z🧐🇧🇪🇧🇫🇧🇬]"#)
     XCTAssertEqual("🧐", "🧐".wholeMatch(of: regex)?.0)
     XCTAssertEqual("🇧🇫", "🇧🇫".wholeMatch(of: regex)?.0)