@@ -477,11 +477,17 @@ public static function split(
477
477
bool |int $ captureOffset = false ,
478
478
bool $ skipEmpty = false ,
479
479
int $ limit = -1 ,
480
+ bool $ utf8 = false ,
480
481
): array {
481
482
$ flags = is_int ($ captureOffset ) && $ captureOffset // back compatibility
482
483
? $ captureOffset
483
484
: ($ captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0 ) | ($ skipEmpty ? PREG_SPLIT_NO_EMPTY : 0 );
484
- return self ::pcre ('preg_split ' , [$ pattern , $ subject , $ limit , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
485
+ $ pattern .= $ utf8 ? 'u ' : '' ;
486
+ $ m = self ::pcre ('preg_split ' , [$ pattern , $ subject , $ limit , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
487
+ if ($ utf8 && ($ flags & PREG_SPLIT_OFFSET_CAPTURE )) {
488
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
489
+ }
490
+ return $ m ;
485
491
}
486
492
487
493
@@ -494,16 +500,25 @@ public static function match(
494
500
bool |int $ captureOffset = false ,
495
501
int $ offset = 0 ,
496
502
bool $ unmatchedAsNull = false ,
503
+ bool $ utf8 = false ,
497
504
): ?array {
498
505
$ flags = is_int ($ captureOffset ) && $ captureOffset // back compatibility
499
506
? $ captureOffset
500
507
: ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
508
+ if ($ utf8 ) {
509
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
510
+ $ pattern .= 'u ' ;
511
+ }
501
512
if ($ offset > strlen ($ subject )) {
502
513
return null ;
503
514
}
504
- return self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])
505
- ? $ m
506
- : null ;
515
+ if (!self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])) {
516
+ return null ;
517
+ }
518
+ if ($ utf8 && ($ flags & PREG_OFFSET_CAPTURE )) {
519
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
520
+ }
521
+ return $ m ;
507
522
}
508
523
509
524
@@ -518,10 +533,15 @@ public static function matchAll(
518
533
int $ offset = 0 ,
519
534
bool $ unmatchedAsNull = false ,
520
535
bool $ patternOrder = false ,
536
+ bool $ utf8 = false ,
521
537
): array {
522
538
$ flags = is_int ($ captureOffset ) && $ captureOffset // back compatibility
523
539
? $ captureOffset
524
540
: ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 ) | ($ patternOrder ? PREG_PATTERN_ORDER : 0 );
541
+ if ($ utf8 ) {
542
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
543
+ $ pattern .= 'u ' ;
544
+ }
525
545
if ($ offset > strlen ($ subject )) {
526
546
return [];
527
547
}
@@ -530,6 +550,9 @@ public static function matchAll(
530
550
($ flags & PREG_PATTERN_ORDER ) ? $ flags : ($ flags | PREG_SET_ORDER ),
531
551
$ offset ,
532
552
]);
553
+ if ($ utf8 && ($ flags & PREG_OFFSET_CAPTURE )) {
554
+ return self ::bytesToChars ($ subject , $ m );
555
+ }
533
556
return $ m ;
534
557
}
535
558
@@ -544,23 +567,52 @@ public static function replace(
544
567
int $ limit = -1 ,
545
568
bool $ captureOffset = false ,
546
569
bool $ unmatchedAsNull = false ,
570
+ bool $ utf8 = false ,
547
571
): string {
548
572
if (is_object ($ replacement ) || is_array ($ replacement )) {
549
573
if (!is_callable ($ replacement , false , $ textual )) {
550
574
throw new Nette \InvalidStateException ("Callback ' $ textual' is not callable. " );
551
575
}
552
576
$ flags = ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
577
+ if ($ utf8 ) {
578
+ $ pattern .= 'u ' ;
579
+ if ($ captureOffset ) {
580
+ $ replacement = fn ($ m ) => $ replacement (self ::bytesToChars ($ subject , [$ m ])[0 ]);
581
+ }
582
+ }
553
583
return self ::pcre ('preg_replace_callback ' , [$ pattern , $ replacement , $ subject , $ limit , 0 , $ flags ]);
554
584
555
585
} elseif (is_array ($ pattern ) && is_string (key ($ pattern ))) {
556
586
$ replacement = array_values ($ pattern );
557
587
$ pattern = array_keys ($ pattern );
558
588
}
559
589
590
+ if ($ utf8 ) {
591
+ $ pattern = array_map (fn ($ item ) => $ item . 'u ' , (array ) $ pattern );
592
+ }
593
+
560
594
return self ::pcre ('preg_replace ' , [$ pattern , $ replacement , $ subject , $ limit ]);
561
595
}
562
596
563
597
598
+ private static function bytesToChars (string $ s , array $ groups ): array
599
+ {
600
+ $ lastBytes = $ lastChars = 0 ;
601
+ foreach ($ groups as &$ matches ) {
602
+ foreach ($ matches as &$ match ) {
603
+ if ($ match [1 ] > $ lastBytes ) {
604
+ $ lastChars += self ::length (substr ($ s , $ lastBytes , $ match [1 ] - $ lastBytes ));
605
+ } elseif ($ match [1 ] < $ lastBytes ) {
606
+ $ lastChars -= self ::length (substr ($ s , $ match [1 ], $ lastBytes - $ match [1 ]));
607
+ }
608
+ $ lastBytes = $ match [1 ];
609
+ $ match [1 ] = $ lastChars ;
610
+ }
611
+ }
612
+ return $ groups ;
613
+ }
614
+
615
+
564
616
/** @internal */
565
617
public static function pcre (string $ func , array $ args )
566
618
{
0 commit comments