Skip to content

Commit 4f11bb7

Browse files
committed
Remove unnecessary unsafe functions
Fundamentally, pest never does anything unsafe. All of the UTF-8 slicing uses indexing and is therefore checked. There's no need to provide the internal guarantee that all pest positions lie on UTF-8 boundaries when it provides no performance benefit.
1 parent 9f9094e commit 4f11bb7

8 files changed

Lines changed: 43 additions & 94 deletions

File tree

pest/src/error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ impl<R: RuleType> Error<R> {
490490
};
491491
let error = Error::new_from_pos(
492492
ErrorVariant::CustomError { message },
493-
Position::new(input, error_position).unwrap(),
493+
Position::new_internal(input, error_position),
494494
);
495495
Some(error)
496496
}

pest/src/iterators/flat_pairs.rs

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,14 @@ use crate::RuleType;
2222
/// [`Pair`]: struct.Pair.html
2323
/// [`Pairs::flatten`]: struct.Pairs.html#method.flatten
2424
pub struct FlatPairs<'i, R> {
25-
/// # Safety
26-
///
27-
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
2825
queue: Rc<Vec<QueueableToken<'i, R>>>,
2926
input: &'i str,
3027
start: usize,
3128
end: usize,
3229
line_index: Rc<LineIndex>,
3330
}
3431

35-
/// # Safety
36-
///
37-
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
38-
pub unsafe fn new<'i, R: RuleType>(
32+
pub fn new<'i, R: RuleType>(
3933
queue: Rc<Vec<QueueableToken<'i, R>>>,
4034
input: &'i str,
4135
start: usize,
@@ -117,14 +111,12 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
117111
return None;
118112
}
119113

120-
let pair = unsafe {
121-
pair::new(
122-
Rc::clone(&self.queue),
123-
self.input,
124-
Rc::clone(&self.line_index),
125-
self.start,
126-
)
127-
};
114+
let pair = pair::new(
115+
Rc::clone(&self.queue),
116+
self.input,
117+
Rc::clone(&self.line_index),
118+
self.start,
119+
);
128120
self.next_start();
129121

130122
Some(pair)
@@ -144,14 +136,12 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {
144136

145137
self.next_start_from_end();
146138

147-
let pair = unsafe {
148-
pair::new(
149-
Rc::clone(&self.queue),
150-
self.input,
151-
Rc::clone(&self.line_index),
152-
self.end,
153-
)
154-
};
139+
let pair = pair::new(
140+
Rc::clone(&self.queue),
141+
self.input,
142+
Rc::clone(&self.line_index),
143+
self.end,
144+
);
155145

156146
Some(pair)
157147
}

pest/src/iterators/pair.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,14 @@ use crate::RuleType;
3838
/// [`Token`]: ../enum.Token.html
3939
#[derive(Clone)]
4040
pub struct Pair<'i, R> {
41-
/// # Safety
42-
///
43-
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
4441
queue: Rc<Vec<QueueableToken<'i, R>>>,
4542
input: &'i str,
4643
/// Token index into `queue`.
4744
start: usize,
4845
line_index: Rc<LineIndex>,
4946
}
5047

51-
/// # Safety
52-
///
53-
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
54-
pub unsafe fn new<'i, R: RuleType>(
48+
pub fn new<'i, R: RuleType>(
5549
queue: Rc<Vec<QueueableToken<'i, R>>>,
5650
input: &'i str,
5751
line_index: Rc<LineIndex>,
@@ -210,8 +204,7 @@ impl<'i, R: RuleType> Pair<'i, R> {
210204
let start = self.pos(self.start);
211205
let end = self.pos(self.pair());
212206

213-
// Generated positions always come from Positions and are UTF-8 borders.
214-
unsafe { span::Span::new_unchecked(self.input, start, end) }
207+
span::Span::new_internal(self.input, start, end)
215208
}
216209

217210
/// Get current node tag

pest/src/iterators/pairs.rs

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ impl<'i, R: RuleType> Pairs<'i, R> {
205205
/// ```
206206
#[inline]
207207
pub fn flatten(self) -> FlatPairs<'i, R> {
208-
unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
208+
flat_pairs::new(self.queue, self.input, self.start, self.end)
209209
}
210210

211211
/// Finds the first pair that has its node or branch tagged with the provided
@@ -347,14 +347,12 @@ impl<'i, R: RuleType> Pairs<'i, R> {
347347
#[inline]
348348
pub fn peek(&self) -> Option<Pair<'i, R>> {
349349
if self.start < self.end {
350-
Some(unsafe {
351-
pair::new(
352-
Rc::clone(&self.queue),
353-
self.input,
354-
Rc::clone(&self.line_index),
355-
self.start,
356-
)
357-
})
350+
Some(pair::new(
351+
Rc::clone(&self.queue),
352+
self.input,
353+
Rc::clone(&self.line_index),
354+
self.start,
355+
))
358356
} else {
359357
None
360358
}
@@ -427,14 +425,12 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
427425
self.end = self.pair_from_end();
428426
self.pairs_count -= 1;
429427

430-
let pair = unsafe {
431-
pair::new(
432-
Rc::clone(&self.queue),
433-
self.input,
434-
Rc::clone(&self.line_index),
435-
self.end,
436-
)
437-
};
428+
let pair = pair::new(
429+
Rc::clone(&self.queue),
430+
self.input,
431+
Rc::clone(&self.line_index),
432+
self.end,
433+
);
438434

439435
Some(pair)
440436
}

pest/src/iterators/tokens.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,12 @@ use crate::RuleType;
2424
/// [`Pairs::tokens`]: struct.Pairs.html#method.tokens
2525
#[derive(Clone)]
2626
pub struct Tokens<'i, R> {
27-
/// # Safety:
28-
///
29-
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
3027
queue: Rc<Vec<QueueableToken<'i, R>>>,
3128
input: &'i str,
3229
start: usize,
3330
end: usize,
3431
}
3532

36-
// TODO(safety): QueueableTokens must be valid indices into input.
3733
pub fn new<'i, R: RuleType>(
3834
queue: Rc<Vec<QueueableToken<'i, R>>>,
3935
input: &'i str,
@@ -46,7 +42,7 @@ pub fn new<'i, R: RuleType>(
4642
QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
4743
assert!(
4844
input.get(input_pos..).is_some(),
49-
"💥 UNSAFE `Tokens` CREATED 💥"
45+
"💥 INVALID `Tokens` CREATED 💥"
5046
)
5147
}
5248
}
@@ -75,19 +71,15 @@ impl<'i, R: RuleType> Tokens<'i, R> {
7571

7672
Token::Start {
7773
rule,
78-
// QueueableTokens are safely created.
79-
pos: unsafe { position::Position::new_unchecked(self.input, input_pos) },
74+
pos: position::Position::new_internal(self.input, input_pos),
8075
}
8176
}
8277
QueueableToken::End {
8378
rule, input_pos, ..
84-
} => {
85-
Token::End {
86-
rule,
87-
// QueueableTokens are safely created.
88-
pos: unsafe { position::Position::new_unchecked(self.input, input_pos) },
89-
}
90-
}
79+
} => Token::End {
80+
rule,
81+
pos: position::Position::new_internal(self.input, input_pos),
82+
},
9183
}
9284
}
9385
}

pest/src/parser_state.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,8 +463,7 @@ where
463463

464464
Err(Error::new_from_pos_with_parsing_attempts(
465465
variant,
466-
// TODO(performance): Guarantee state.attempt_pos is a valid position
467-
Position::new(input, state.attempt_pos).unwrap(),
466+
Position::new_internal(input, state.attempt_pos),
468467
state.parse_attempts.clone(),
469468
))
470469
}

pest/src/position.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,12 @@ use crate::span;
2020
#[derive(Clone, Copy)]
2121
pub struct Position<'i> {
2222
input: &'i str,
23-
/// # Safety:
24-
///
25-
/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
2623
pos: usize,
2724
}
2825

2926
impl<'i> Position<'i> {
3027
/// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
31-
///
32-
/// # Safety:
33-
///
34-
/// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
35-
pub(crate) unsafe fn new_unchecked(input: &str, pos: usize) -> Position<'_> {
28+
pub(crate) fn new_internal(input: &str, pos: usize) -> Position<'_> {
3629
debug_assert!(input.get(pos..).is_some());
3730
Position { input, pos }
3831
}
@@ -106,8 +99,7 @@ impl<'i> Position<'i> {
10699
if ptr::eq(self.input, other.input)
107100
/* && self.input.get(self.pos..other.pos).is_some() */
108101
{
109-
// This is safe because the pos field of a Position should always be a valid str index.
110-
unsafe { span::Span::new_unchecked(self.input, self.pos, other.pos) }
102+
span::Span::new_internal(self.input, self.pos, other.pos)
111103
} else {
112104
// TODO: maybe a panic if self.pos < other.pos
113105
panic!("span created from positions from different inputs")

pest/src/span.rs

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,13 @@ use crate::position;
2222
#[derive(Clone, Copy)]
2323
pub struct Span<'i> {
2424
input: &'i str,
25-
/// # Safety
26-
///
27-
/// Must be a valid character boundary index into `input`.
2825
start: usize,
29-
/// # Safety
30-
///
31-
/// Must be a valid character boundary index into `input`.
3226
end: usize,
3327
}
3428

3529
impl<'i> Span<'i> {
3630
/// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
37-
///
38-
/// # Safety
39-
///
40-
/// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
41-
pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> {
31+
pub(crate) fn new_internal(input: &str, start: usize, end: usize) -> Span<'_> {
4232
debug_assert!(input.get(start..end).is_some());
4333
Span { input, start, end }
4434
}
@@ -144,8 +134,7 @@ impl<'i> Span<'i> {
144134
/// ```
145135
#[inline]
146136
pub fn start_pos(&self) -> position::Position<'i> {
147-
// Span's start position is always a UTF-8 border.
148-
unsafe { position::Position::new_unchecked(self.input, self.start) }
137+
position::Position::new_internal(self.input, self.start)
149138
}
150139

151140
/// Returns the `Span`'s end `Position`.
@@ -163,8 +152,7 @@ impl<'i> Span<'i> {
163152
/// ```
164153
#[inline]
165154
pub fn end_pos(&self) -> position::Position<'i> {
166-
// Span's end position is always a UTF-8 border.
167-
unsafe { position::Position::new_unchecked(self.input, self.end) }
155+
position::Position::new_internal(self.input, self.end)
168156
}
169157

170158
/// Splits the `Span` into a pair of `Position`s.
@@ -182,9 +170,8 @@ impl<'i> Span<'i> {
182170
/// ```
183171
#[inline]
184172
pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
185-
// Span's start and end positions are always a UTF-8 borders.
186-
let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
187-
let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
173+
let pos1 = position::Position::new_internal(self.input, self.start);
174+
let pos2 = position::Position::new_internal(self.input, self.end);
188175

189176
(pos1, pos2)
190177
}

0 commit comments

Comments
 (0)