From bc5a67a32748c397fb2afb3d986ec4fbb5c60e94 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 29 Oct 2024 11:15:05 +0900 Subject: [PATCH 1/9] [PHP 8.4] Add manual for grapheme_str_split function --- .../intl/grapheme/grapheme-str-split.xml | 87 +++++++++++++++++++ reference/intl/versions.xml | 1 + reference/mbstring/functions/mb-str-split.xml | 1 + reference/strings/functions/str-split.xml | 2 + 4 files changed, 91 insertions(+) create mode 100644 reference/intl/grapheme/grapheme-str-split.xml diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml new file mode 100644 index 000000000000..0673c4a9a852 --- /dev/null +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -0,0 +1,87 @@ + + + + + grapheme_str_split + Given a grapheme cluster string, which must be encoded in UTF-8 + + + &reftitle.description; + &style.procedural; + + arrayfalsegrapheme_str_split + stringstring + intlength1 + + + This function will return an array of strings, it is a version of str_split with support for grapheme cluster byte characters. + If the length parameter is specified, the string is broken down into chunks of the specified length in grapheme clusters (not bytes). + + + + + &reftitle.parameters; + + + + string + + + The &string; to split into grapheme clusters or chunks. Must be valid UTF-8. + + + + + length + + + If specified, each element of the returned array will be composed of grapheme clusters instead of a single grapheme cluster. + + + + + + + + + &reftitle.returnvalues; + + grapheme_str_split returns an array of strings. + + + + + &reftitle.seealso; + + + str_split + mb_str_split + + + Unicode Text Segmentation: Grapheme Cluster Boundaries + + + + + + + diff --git a/reference/intl/versions.xml b/reference/intl/versions.xml index 6b4b422cbe6f..ce66042ad19a 100644 --- a/reference/intl/versions.xml +++ b/reference/intl/versions.xml @@ -392,6 +392,7 @@ + diff --git a/reference/mbstring/functions/mb-str-split.xml b/reference/mbstring/functions/mb-str-split.xml index 0ab739e18b8d..ac60cd73c7b3 100644 --- a/reference/mbstring/functions/mb-str-split.xml +++ b/reference/mbstring/functions/mb-str-split.xml @@ -89,6 +89,7 @@ str_split + grapheme_str_split diff --git a/reference/strings/functions/str-split.xml b/reference/strings/functions/str-split.xml index 8430f128f397..80773c1e1891 100644 --- a/reference/strings/functions/str-split.xml +++ b/reference/strings/functions/str-split.xml @@ -151,6 +151,7 @@ Array str_split will split into bytes, rather than characters when dealing with a multi-byte encoded string. Use mb_str_split to split the string into code points. + Use grapheme_str_split to split the string into grapheme clusetrs. @@ -160,6 +161,7 @@ Array mb_str_split + grapheme_str_split chunk_split preg_split explode From 40433650bc26f6e397f2dcba19095374f01724f2 Mon Sep 17 00:00:00 2001 From: tekimen Date: Tue, 29 Oct 2024 03:44:25 -0700 Subject: [PATCH 2/9] Update reference/intl/grapheme/grapheme-str-split.xml Co-authored-by: Christoph M. Becker --- reference/intl/grapheme/grapheme-str-split.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index 0673c4a9a852..665c22a67bf9 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -3,7 +3,7 @@ grapheme_str_split - Given a grapheme cluster string, which must be encoded in UTF-8 + Split a string into an array &reftitle.description; From cbf8550bc06e7080dc746c73a594e0379abd78a9 Mon Sep 17 00:00:00 2001 From: tekimen Date: Tue, 29 Oct 2024 03:44:57 -0700 Subject: [PATCH 3/9] Update reference/intl/grapheme/grapheme-str-split.xml Co-authored-by: Christoph M. Becker --- reference/intl/grapheme/grapheme-str-split.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index 665c22a67bf9..dc749fa3d35f 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -35,7 +35,7 @@ length - If specified, each element of the returned array will be composed of grapheme clusters instead of a single grapheme cluster. + Each element of the returned array will be composed of length grapheme clusters. From b6dbd9da8b98020b6fcb954745d10ff15e9c72c4 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 29 Oct 2024 22:25:14 +0900 Subject: [PATCH 4/9] Fix from feedback --- reference/intl/grapheme/grapheme-str-split.xml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index dc749fa3d35f..77aa2f9dcf77 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -7,15 +7,18 @@ &reftitle.description; - &style.procedural; arrayfalsegrapheme_str_split stringstring intlength1 - This function will return an array of strings, it is a version of str_split with support for grapheme cluster byte characters. - If the length parameter is specified, the string is broken down into chunks of the specified length in grapheme clusters (not bytes). + This function will return an array of strings, + it is a version of str_split + with support for grapheme cluster byte characters. + If the length parameter is specified, + the string is broken down into chunks of the specified length + in grapheme clusters (not bytes). @@ -27,7 +30,7 @@ string - The &string; to split into grapheme clusters or chunks. Must be valid UTF-8. + The string to split into grapheme clusters or chunks. Support only UTF-8. @@ -46,7 +49,7 @@ &reftitle.returnvalues; - grapheme_str_split returns an array of strings. + grapheme_str_split returns an array of strings. Returns false when opening text fails. From e738c40c431156198e0acd43380605f3988e70ca Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 29 Oct 2024 22:42:05 +0900 Subject: [PATCH 5/9] Add throw error of grapheme_str_split --- reference/intl/grapheme/grapheme-str-split.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index 77aa2f9dcf77..bb269bf60680 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -53,6 +53,14 @@ + + &reftitle.errors; + + If length is less than 1, + a ValueError will be thrown. + + + &reftitle.seealso; From 1de18d2b619b4854cff97b888b513232adc58f2f Mon Sep 17 00:00:00 2001 From: tekimen Date: Wed, 30 Oct 2024 07:25:40 -0700 Subject: [PATCH 6/9] Update reference/strings/functions/str-split.xml Co-authored-by: Christoph M. Becker --- reference/strings/functions/str-split.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reference/strings/functions/str-split.xml b/reference/strings/functions/str-split.xml index 80773c1e1891..869f3e1a0cdd 100644 --- a/reference/strings/functions/str-split.xml +++ b/reference/strings/functions/str-split.xml @@ -150,8 +150,8 @@ Array str_split will split into bytes, rather than characters when dealing with a multi-byte encoded string. - Use mb_str_split to split the string into code points. - Use grapheme_str_split to split the string into grapheme clusetrs. + mb_str_split can be used to split the string into code points. + grapheme_str_split can be used to split the string into grapheme clusters. From f544b255305c68faca65784faf4e9d160c3be636 Mon Sep 17 00:00:00 2001 From: tekimen Date: Wed, 30 Oct 2024 07:25:56 -0700 Subject: [PATCH 7/9] Update reference/intl/grapheme/grapheme-str-split.xml Co-authored-by: Christoph M. Becker --- reference/intl/grapheme/grapheme-str-split.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index bb269bf60680..d842eb63663b 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -30,7 +30,8 @@ string - The string to split into grapheme clusters or chunks. Support only UTF-8. + The string to split into grapheme clusters or chunks. + string must be valid UTF-8. From e84b9225a17fce5fdfd8a7c956c84aaf0946a043 Mon Sep 17 00:00:00 2001 From: tekimen Date: Wed, 30 Oct 2024 07:26:07 -0700 Subject: [PATCH 8/9] Update reference/intl/grapheme/grapheme-str-split.xml Co-authored-by: Christoph M. Becker --- reference/intl/grapheme/grapheme-str-split.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/intl/grapheme/grapheme-str-split.xml b/reference/intl/grapheme/grapheme-str-split.xml index d842eb63663b..9c15f9e5f62e 100644 --- a/reference/intl/grapheme/grapheme-str-split.xml +++ b/reference/intl/grapheme/grapheme-str-split.xml @@ -50,7 +50,7 @@ &reftitle.returnvalues; - grapheme_str_split returns an array of strings. Returns false when opening text fails. + grapheme_str_split returns an array of strings, &return.falseforfailure;. From 710bad822ba8eebaae12cd159805cce81a8b320b Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Wed, 30 Oct 2024 23:37:31 +0900 Subject: [PATCH 9/9] Fix trailing space --- reference/strings/functions/str-split.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/strings/functions/str-split.xml b/reference/strings/functions/str-split.xml index 869f3e1a0cdd..18483d026378 100644 --- a/reference/strings/functions/str-split.xml +++ b/reference/strings/functions/str-split.xml @@ -150,7 +150,7 @@ Array str_split will split into bytes, rather than characters when dealing with a multi-byte encoded string. - mb_str_split can be used to split the string into code points. + mb_str_split can be used to split the string into code points. grapheme_str_split can be used to split the string into grapheme clusters.