Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .changeset/silver-flowers-refuse.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"unicode-segmenter": minor
---

Removed deprecated APIs

- `searchGrapheme` in `unicode-segmenter/grapheme`
- `takeChar` and `takeCodePoint` in `unicode-segmenter/utils`

Which are used internally before, but never from outside.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb

| Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) |
|------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,929 | 12,110 | 5,049 | 3,740 |
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,929 | 12,110 | 5,050 | 3,738 |
| `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 |
| `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 |
| `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 491,043 | 318,721 | 54,248 | 34,380 |
Expand All @@ -270,7 +270,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb

| Name | Bytecode size | Bytecode size (gzip)* |
|------------------------------|--------------:|----------------------:|
| `unicode-segmenter/grapheme` | 23,037 | 12,058 |
| `unicode-segmenter/grapheme` | 21,997 | 11,505 |
| `graphemer` | 133,952 | 31,708 |
| `grapheme-splitter` | 63,813 | 19,123 |

Expand Down
21 changes: 1 addition & 20 deletions src/grapheme.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,7 @@ import { consonant_ranges } from './_incb_data.js';
* @typedef {import('./core.js').Segmenter<GraphemeSegmentExtra>} GraphemeSegmenter
*/

export {
/**
* @deprecated DO NOT USE directly, will be removed in v1
*/
searchGraphemeCategory as searchGrapheme,
GraphemeCategory,
};

/**
* @deprecated DO NOT USE directly, will be removed in v1
* @param {number} cp
* @return A {@link GraphemeCategoryRange} value if found, or garbage value with {@link GC_Any} category.
*/
export function searchGraphemeCategory(cp) {
let index = findUnicodeRangeIndex(cp, grapheme_ranges);
if (index < 0) {
return [0, 0, 0 /* GC_Any */];
}
return grapheme_ranges[index];
}
export { GraphemeCategory };

/**
* Unicode segmentation by extended grapheme rules.
Expand Down
50 changes: 0 additions & 50 deletions src/utils.js
Original file line number Diff line number Diff line change
@@ -1,55 +1,5 @@
// @ts-check

/**
* Take a Unicode code point from the given input by cursor
*
* @deprecated
* Use this only if `String.prototype.codePointAt()` isn't available on the host environment
*
* @param {string} input
* @param {number} cursor
* @param {number} [length] length of input
* @return {number} a code point of the character
*/
export function takeCodePoint(input, cursor, length = input.length) {
let hi = input.charCodeAt(cursor);
if (isHighSurrogate(hi)) {
if (cursor + 1 < length) {
let lo = input.charCodeAt(cursor + 1);
if (isLowSurrogate(lo)) {
return surrogatePairToCodePoint(hi, lo);
}
}
}
return hi;
}

/**
* Take a UTF-8 char from the given input by cursor
*
* @deprecated
* Use this only if `String.fromCodePoint()` isn't available on the host environment
*
* @param {string} input
* @param {number} cursor
* @param {number} [length] length of input
* @return {string} a UTF-8 character (its `.length` will be 1 or 2)
*/
export function takeChar(input, cursor, length = input.length) {
let hi = input.charCodeAt(cursor);
if (isHighSurrogate(hi)) {
if (cursor + 1 < length) {
let lo = input.charCodeAt(cursor + 1);
if (isLowSurrogate(lo)) {
// This seems to be much slower in V8
// return String.fromCharCode(hi, lo);
return String.fromCharCode(hi) + String.fromCharCode(lo);
}
}
}
return String.fromCharCode(hi);
}

/**
* @param {number} c UTF-16 code point
*/
Expand Down
61 changes: 0 additions & 61 deletions test/utils.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
// @ts-check

import { test } from 'node:test';
import * as assert from 'node:assert/strict';
import fc from 'fast-check';

import {
takeChar,
takeCodePoint,
isBMP,
isSMP,
isSIP,
Expand All @@ -20,64 +17,6 @@ fc.configureGlobal({
numRuns: 100_000,
});

test('takeChar', async t => {
await t.test('ascii', () => {
fc.assert(
fc.property(
fc.string({ unit: 'binary-ascii', minLength: 1, maxLength: 1 }),
// @ts-ignore
fc.string({ unit: 'grapheme' }),
(data, extra) => {
return takeChar(data + extra, 0).length === 1;
}
),
);
});

await t.test('over BMP', () => {
fc.assert(
fc.property(
fc.integer({ min: 0xffff + 1, max: 0x10ffff }),
// @ts-ignore
fc.string({ unit: 'grapheme' }),
(data, extra) => {
let leading = String.fromCodePoint(data);
return takeChar(leading + extra, 0).length === 2;
},
),
);
});
});

test('takeCodePoint', async t => {
await t.test('ascii', () => {
fc.assert(
fc.property(
fc.string({ unit: 'binary-ascii', minLength: 1, maxLength: 1 }),
// @ts-ignore
fc.string({ unit: 'grapheme' }),
(data, extra) => {
return takeCodePoint(data + extra, 0) === (data + extra).codePointAt(0);
},
),
);
});

await t.test('over BMP', () => {
fc.assert(
fc.property(
fc.integer({ min: 0xffff + 1, max: 0x10ffff }),
// @ts-ignore
fc.string({ unit: 'grapheme' }),
(data, extra) => {
let leading = String.fromCodePoint(data);
return takeCodePoint(leading + extra, 0) === (leading + extra).codePointAt(0);
},
),
);
});
});

test('isBMP', () => {
fc.assert(
fc.property(
Expand Down