@@ -17,7 +17,9 @@ use crate::{
17
17
empty, iter,
18
18
prefilter:: Prefilter ,
19
19
primitives:: { NonMaxUsize , PatternID , SmallIndex , StateID } ,
20
- search:: { Anchored , Input , Match , MatchKind , PatternSet , Span } ,
20
+ search:: {
21
+ Anchored , HalfMatch , Input , Match , MatchKind , PatternSet , Span ,
22
+ } ,
21
23
sparse_set:: SparseSet ,
22
24
} ,
23
25
} ;
@@ -1094,7 +1096,8 @@ impl PikeVM {
1094
1096
) -> Option < PatternID > {
1095
1097
let utf8empty = self . get_nfa ( ) . has_empty ( ) && self . get_nfa ( ) . is_utf8 ( ) ;
1096
1098
if !utf8empty {
1097
- return self . search_slots_imp ( cache, input, slots) ;
1099
+ let hm = self . search_slots_imp ( cache, input, slots) ?;
1100
+ return Some ( hm. pattern ( ) ) ;
1098
1101
}
1099
1102
// There is an unfortunate special case where if the regex can
1100
1103
// match the empty string and UTF-8 mode is enabled, the search
@@ -1109,22 +1112,23 @@ impl PikeVM {
1109
1112
// this case.
1110
1113
let min = self . get_nfa ( ) . group_info ( ) . implicit_slot_len ( ) ;
1111
1114
if slots. len ( ) >= min {
1112
- return self . search_slots_imp ( cache, input, slots) ;
1115
+ let hm = self . search_slots_imp ( cache, input, slots) ?;
1116
+ return Some ( hm. pattern ( ) ) ;
1113
1117
}
1114
1118
if self . get_nfa ( ) . pattern_len ( ) == 1 {
1115
1119
let mut enough = [ None , None ] ;
1116
1120
let got = self . search_slots_imp ( cache, input, & mut enough) ;
1117
1121
// This is OK because we know `enough` is strictly bigger than
1118
1122
// `slots`, otherwise this special case isn't reached.
1119
1123
slots. copy_from_slice ( & enough[ ..slots. len ( ) ] ) ;
1120
- return got;
1124
+ return got. map ( |hm| hm . pattern ( ) ) ;
1121
1125
}
1122
1126
let mut enough = vec ! [ None ; min] ;
1123
1127
let got = self . search_slots_imp ( cache, input, & mut enough) ;
1124
1128
// This is OK because we know `enough` is strictly bigger than `slots`,
1125
1129
// otherwise this special case isn't reached.
1126
1130
slots. copy_from_slice ( & enough[ ..slots. len ( ) ] ) ;
1127
- got
1131
+ got. map ( |hm| hm . pattern ( ) )
1128
1132
}
1129
1133
1130
1134
/// This is the actual implementation of `search_slots_imp` that
@@ -1137,30 +1141,17 @@ impl PikeVM {
1137
1141
cache : & mut Cache ,
1138
1142
input : & Input < ' _ > ,
1139
1143
slots : & mut [ Option < NonMaxUsize > ] ,
1140
- ) -> Option < PatternID > {
1144
+ ) -> Option < HalfMatch > {
1141
1145
let utf8empty = self . get_nfa ( ) . has_empty ( ) && self . get_nfa ( ) . is_utf8 ( ) ;
1142
- let ( pid , end ) = match self . search_imp ( cache, input, slots) {
1146
+ let hm = match self . search_imp ( cache, input, slots) {
1143
1147
None => return None ,
1144
- Some ( pid) if !utf8empty => return Some ( pid) ,
1145
- Some ( pid) => {
1146
- let slot_start = pid. as_usize ( ) * 2 ;
1147
- let slot_end = slot_start + 1 ;
1148
- // OK because we know we have a match and we know our caller
1149
- // provided slots are big enough (which we make true above if
1150
- // the caller didn't). Namely, we're only here when 'utf8empty'
1151
- // is true, and when that's true, we require slots for every
1152
- // pattern.
1153
- ( pid, slots[ slot_end] . unwrap ( ) . get ( ) )
1154
- }
1148
+ Some ( hm) if !utf8empty => return Some ( hm) ,
1149
+ Some ( hm) => hm,
1155
1150
} ;
1156
- empty:: skip_splits_fwd ( input, pid, end, |input| {
1157
- let pid = match self . search_imp ( cache, input, slots) {
1158
- None => return Ok ( None ) ,
1159
- Some ( pid) => pid,
1160
- } ;
1161
- let slot_start = pid. as_usize ( ) * 2 ;
1162
- let slot_end = slot_start + 1 ;
1163
- Ok ( Some ( ( pid, slots[ slot_end] . unwrap ( ) . get ( ) ) ) )
1151
+ empty:: skip_splits_fwd ( input, hm, hm. offset ( ) , |input| {
1152
+ Ok ( self
1153
+ . search_imp ( cache, input, slots)
1154
+ . map ( |hm| ( hm, hm. offset ( ) ) ) )
1164
1155
} )
1165
1156
// OK because the PikeVM never errors.
1166
1157
. unwrap ( )
@@ -1235,7 +1226,7 @@ impl PikeVM {
1235
1226
cache : & mut Cache ,
1236
1227
input : & Input < ' _ > ,
1237
1228
slots : & mut [ Option < NonMaxUsize > ] ,
1238
- ) -> Option < PatternID > {
1229
+ ) -> Option < HalfMatch > {
1239
1230
cache. setup_search ( slots. len ( ) ) ;
1240
1231
if input. is_done ( ) {
1241
1232
return None ;
@@ -1264,7 +1255,7 @@ impl PikeVM {
1264
1255
let pre =
1265
1256
if anchored { None } else { self . get_config ( ) . get_prefilter ( ) } ;
1266
1257
let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
1267
- let mut pid = None ;
1258
+ let mut hm = None ;
1268
1259
// Yes, our search doesn't end at input.end(), but includes it. This
1269
1260
// is necessary because matches are delayed by one byte, just like
1270
1261
// how the DFA engines work. The delay is used to handle look-behind
@@ -1283,7 +1274,7 @@ impl PikeVM {
1283
1274
if curr. set . is_empty ( ) {
1284
1275
// We have a match and we haven't been instructed to continue
1285
1276
// on even after finding a match, so we can quit.
1286
- if pid . is_some ( ) && !allmatches {
1277
+ if hm . is_some ( ) && !allmatches {
1287
1278
break ;
1288
1279
}
1289
1280
// If we're running an anchored search and we've advanced
@@ -1353,7 +1344,7 @@ impl PikeVM {
1353
1344
// search. If we re-computed it at every position, we would be
1354
1345
// simulating an unanchored search when we were tasked to perform
1355
1346
// an anchored search.
1356
- if ( !pid . is_some ( ) || allmatches)
1347
+ if ( !hm . is_some ( ) || allmatches)
1357
1348
&& ( !anchored || at == input. start ( ) )
1358
1349
{
1359
1350
// Since we are adding to the 'curr' active states and since
@@ -1372,22 +1363,23 @@ impl PikeVM {
1372
1363
let slots = next. slot_table . all_absent ( ) ;
1373
1364
self . epsilon_closure ( stack, slots, curr, input, at, start_id) ;
1374
1365
}
1375
- if let Some ( x) = self . nexts ( stack, curr, next, input, at, slots) {
1376
- pid = Some ( x) ;
1366
+ if let Some ( pid) = self . nexts ( stack, curr, next, input, at, slots)
1367
+ {
1368
+ hm = Some ( HalfMatch :: new ( pid, at) ) ;
1377
1369
}
1378
1370
// Unless the caller asked us to return early, we need to mush on
1379
1371
// to see if we can extend our match. (But note that 'nexts' will
1380
1372
// quit right after seeing a match when match_kind==LeftmostFirst,
1381
1373
// as is consistent with leftmost-first match priority.)
1382
- if input. get_earliest ( ) && pid . is_some ( ) {
1374
+ if input. get_earliest ( ) && hm . is_some ( ) {
1383
1375
break ;
1384
1376
}
1385
1377
core:: mem:: swap ( curr, next) ;
1386
1378
next. set . clear ( ) ;
1387
1379
at += 1 ;
1388
1380
}
1389
1381
instrument ! ( |c| c. eprint( & self . nfa) ) ;
1390
- pid
1382
+ hm
1391
1383
}
1392
1384
1393
1385
/// The implementation for the 'which_overlapping_matches' API. Basically,
0 commit comments