cometkim · cometkim · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.changeset/silver-flowers-refuse.md b/.changeset/silver-flowers-refuse.md
@@ -0,0 +1,10 @@
+---
+"unicode-segmenter": minor
+---
+
+Removed deprecated APIs
+
+- `searchGrapheme` in `unicode-segmenter/grapheme`
+- `takeChar` and `takeCodePoint` in `unicode-segmenter/utils`
+
+Which are used internally before, but never from outside.
diff --git a/README.md b/README.md
@@ -254,7 +254,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Unicode® | ESM? |   Size    | Size (min) | Size (min+gzip) | Size (min+br) |
 |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
-| `unicode-segmenter/grapheme` |   16.0.0 |    ✔️ |    15,929 |     12,110 |           5,049 |         3,740 |
+| `unicode-segmenter/grapheme` |   16.0.0 |    ✔️ |    15,929 |     12,110 |           5,050 |         3,738 |
 | `graphemer`                  |   15.0.0 |    ✖️ ️|   410,435 |     95,104 |          15,752 |        10,660 |
 | `grapheme-splitter`          |   10.0.0 |    ✖️ |   122,252 |     23,680 |           7,852 |         4,841 |
 | `@formatjs/intl-segmenter`*  |   15.0.0 |    ✖️ |   491,043 |    318,721 |          54,248 |        34,380 |
@@ -270,7 +270,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Bytecode size | Bytecode size (gzip)* |
 |------------------------------|--------------:|----------------------:|
-| `unicode-segmenter/grapheme` |        23,037 |                12,058 |
+| `unicode-segmenter/grapheme` |        21,997 |                11,505 |
 | `graphemer`                  |       133,952 |                31,708 |
 | `grapheme-splitter`          |        63,813 |                19,123 |
 

diff --git a/src/grapheme.js b/src/grapheme.js
@@ -31,26 +31,7 @@ import { consonant_ranges } from './_incb_data.js';
  * @typedef {import('./core.js').Segmenter<GraphemeSegmentExtra>} GraphemeSegmenter
  */
 
-export {
-  /**
-   * @deprecated DO NOT USE directly, will be removed in v1
-   */
-  searchGraphemeCategory as searchGrapheme,
-  GraphemeCategory,
-};
-
-/**
- * @deprecated DO NOT USE directly, will be removed in v1
- * @param {number} cp
- * @return A {@link GraphemeCategoryRange} value if found, or garbage value with {@link GC_Any} category.
- */
-export function searchGraphemeCategory(cp) {
-  let index = findUnicodeRangeIndex(cp, grapheme_ranges);
-  if (index < 0) {
-    return [0, 0, 0 /* GC_Any */];
-  }
-  return grapheme_ranges[index];
-}
+export { GraphemeCategory };
 
 /**
  * Unicode segmentation by extended grapheme rules.

diff --git a/src/utils.js b/src/utils.js
@@ -1,55 +1,5 @@
 // @ts-check
 
-/**
- * Take a Unicode code point from the given input by cursor
- *
- * @deprecated
- * Use this only if `String.prototype.codePointAt()` isn't available on the host environment
- *
- * @param {string} input
- * @param {number} cursor
- * @param {number} [length] length of input
- * @return {number} a code point of the character
- */
-export function takeCodePoint(input, cursor, length = input.length) {
-  let hi = input.charCodeAt(cursor);
-  if (isHighSurrogate(hi)) {
-    if (cursor + 1 < length) {
-      let lo = input.charCodeAt(cursor + 1);
-      if (isLowSurrogate(lo)) {
-        return surrogatePairToCodePoint(hi, lo);
-      }
-    }
-  }
-  return hi;
-}
-
-/**
- * Take a UTF-8 char from the given input by cursor
- *
- * @deprecated
- * Use this only if `String.fromCodePoint()` isn't available on the host environment
- *
- * @param {string} input
- * @param {number} cursor
- * @param {number} [length] length of input
- * @return {string} a UTF-8 character (its `.length` will be 1 or 2)
- */
-export function takeChar(input, cursor, length = input.length) {
-  let hi = input.charCodeAt(cursor);
-  if (isHighSurrogate(hi)) {
-    if (cursor + 1 < length) {
-      let lo = input.charCodeAt(cursor + 1);
-      if (isLowSurrogate(lo)) {
-        // This seems to be much slower in V8
-        // return String.fromCharCode(hi, lo);
-        return String.fromCharCode(hi) + String.fromCharCode(lo);
-      }
-    }
-  }
-  return String.fromCharCode(hi);
-}
-
 /** 
  * @param {number} c UTF-16 code point
  */

diff --git a/test/utils.js b/test/utils.js
@@ -1,12 +1,9 @@
 // @ts-check
 
 import { test } from 'node:test';
-import * as assert from 'node:assert/strict';
 import fc from 'fast-check';
 
 import {
-  takeChar,
-  takeCodePoint,
   isBMP,
   isSMP,
   isSIP,
@@ -20,64 +17,6 @@ fc.configureGlobal({
   numRuns: 100_000,
 });
 
-test('takeChar', async t => {
-  await t.test('ascii', () => {
-    fc.assert(
-      fc.property(
-        fc.string({ unit: 'binary-ascii', minLength: 1, maxLength: 1 }),
-        // @ts-ignore
-        fc.string({ unit: 'grapheme' }),
-        (data, extra) => {
-          return takeChar(data + extra, 0).length === 1;
-        }
-      ),
-    );
-  });
-
-  await t.test('over BMP', () => {
-    fc.assert(
-      fc.property(
-        fc.integer({ min: 0xffff + 1, max: 0x10ffff }),
-        // @ts-ignore
-        fc.string({ unit: 'grapheme' }),
-        (data, extra) => {
-          let leading = String.fromCodePoint(data);
-          return takeChar(leading + extra, 0).length === 2;
-        },
-      ),
-    );
-  });
-});
-
-test('takeCodePoint', async t => {
-  await t.test('ascii', () => {
-    fc.assert(
-      fc.property(
-        fc.string({ unit: 'binary-ascii', minLength: 1, maxLength: 1 }),
-        // @ts-ignore
-        fc.string({ unit: 'grapheme' }),
-        (data, extra) => {
-          return takeCodePoint(data + extra, 0) === (data + extra).codePointAt(0);
-        },
-      ),
-    );
-  });
-
-  await t.test('over BMP', () => {
-    fc.assert(
-      fc.property(
-        fc.integer({ min: 0xffff + 1, max: 0x10ffff }),
-        // @ts-ignore
-        fc.string({ unit: 'grapheme' }),
-        (data, extra) => {
-          let leading = String.fromCodePoint(data);
-          return takeCodePoint(leading + extra, 0) === (leading + extra).codePointAt(0);
-        },
-      ),
-    );
-  });
-});
-
 test('isBMP', () => {
   fc.assert(
     fc.property(