@@ -4,17 +4,19 @@ use chumsky::input::{Input, ValueInput};
4
4
use chumsky:: prelude:: * ;
5
5
use chumsky:: span:: SimpleSpan ;
6
6
use chumsky:: text:: { ident, int} ;
7
+ use core:: f64;
7
8
use num_complex:: Complex64 ;
9
+ use std:: iter:: Peekable ;
8
10
use std:: ops:: Range ;
11
+ use std:: str:: Chars ;
9
12
10
13
pub type Span = SimpleSpan ;
11
14
12
15
#[ derive( Clone , Debug , PartialEq ) ]
13
16
pub enum Token < ' src > {
14
17
// literals ----------------------------------------------------------------
15
18
Const ( Literal ) , // numeric or complex constants recognised at lex‑time
16
- Var ( & ' src str ) , // #identifier (variables)
17
- Call ( & ' src str ) ,
19
+ Ident ( & ' src str ) ,
18
20
// punctuation -------------------------------------------------------------
19
21
LParen ,
20
22
RParen ,
@@ -34,124 +36,216 @@ pub enum Token<'src> {
34
36
If ,
35
37
}
36
38
37
- pub fn lexer < ' src > ( ) -> impl Parser < ' src , & ' src str , Vec < ( Token < ' src > , Span ) > , extra:: Err < Rich < ' src , char > > > {
38
- // ── numbers ────────────────────────────────────────────────────────────
39
- let num = int ( 10 )
40
- . then ( just ( '.' ) . then ( int ( 10 ) ) . or_not ( ) )
41
- . then ( just ( 'e' ) . or ( just ( 'E' ) ) . then ( one_of ( "+-" ) . or_not ( ) ) . then ( int ( 10 ) ) . or_not ( ) )
42
- . map ( |( ( int_part, frac) , exp) : ( ( & str , _ ) , _ ) | {
43
- let mut s = int_part. to_string ( ) ;
44
- if let Some ( ( _, frac) ) = frac {
45
- s. push ( '.' ) ;
46
- s. push_str ( frac) ;
39
+ fn const_lit ( name : & str ) -> Option < Literal > {
40
+ use std:: f64:: consts:: * ;
41
+
42
+ Some ( match name {
43
+ "pi" | "π" => Literal :: Float ( PI ) ,
44
+ "tau" | "τ" => Literal :: Float ( TAU ) ,
45
+ "e" => Literal :: Float ( E ) ,
46
+ "phi" | "φ" => Literal :: Float ( 1.618_033_988_75 ) ,
47
+ "inf" | "∞" => Literal :: Float ( f64:: INFINITY ) ,
48
+ "i" => Literal :: Complex ( Complex64 :: new ( 0.0 , 1.0 ) ) ,
49
+ "G" => Literal :: Float ( 9.80665 ) ,
50
+ _ => return None ,
51
+ } )
52
+ }
53
+
54
+ pub struct Lexer < ' a > {
55
+ input : & ' a str ,
56
+ pos : usize ,
57
+ }
58
+
59
+ impl < ' a > Lexer < ' a > {
60
+ pub fn new ( input : & ' a str ) -> Self {
61
+ Self { input, pos : 0 }
62
+ }
63
+
64
+ fn peek ( & self ) -> Option < char > {
65
+ self . input [ self . pos ..] . chars ( ) . next ( )
66
+ }
67
+
68
+ fn bump ( & mut self ) -> Option < char > {
69
+ let c = self . peek ( ) ?;
70
+ self . pos += c. len_utf8 ( ) ;
71
+ Some ( c)
72
+ }
73
+
74
+ fn consume_while < F > ( & mut self , cond : F ) -> & ' a str
75
+ where
76
+ F : Fn ( char ) -> bool ,
77
+ {
78
+ let start = self . pos ;
79
+ while self . peek ( ) . is_some_and ( & cond) {
80
+ self . bump ( ) ;
81
+ }
82
+ & self . input [ start..self . pos ]
83
+ }
84
+
85
+ fn lex_ident ( & mut self ) -> & ' a str {
86
+ self . consume_while ( |c| c. is_alphanumeric ( ) || c == '_' )
87
+ }
88
+
89
+ fn lex_uint ( & mut self ) -> Option < ( u64 , usize ) > {
90
+ let mut v = 0u64 ;
91
+ let mut digits = 0 ;
92
+ while let Some ( d) = self . peek ( ) . and_then ( |c| c. to_digit ( 10 ) ) {
93
+ v = v * 10 + d as u64 ;
94
+ digits += 1 ;
95
+ self . bump ( ) ;
96
+ }
97
+ ( digits > 0 ) . then_some ( ( v, digits) )
98
+ }
99
+
100
+ fn lex_number ( & mut self ) -> Option < f64 > {
101
+ let start_pos = self . pos ;
102
+ let ( int_val, int_digits) = self . lex_uint ( ) . unwrap_or ( ( 0 , 0 ) ) ;
103
+ let mut got_digit = int_digits > 0 ;
104
+ let mut num = int_val as f64 ;
105
+
106
+ if self . peek ( ) == Some ( '.' ) {
107
+ self . bump ( ) ;
108
+ if let Some ( ( frac_val, frac_digits) ) = self . lex_uint ( ) {
109
+ num += ( frac_val as f64 ) / 10f64 . powi ( frac_digits as i32 ) ;
110
+ got_digit = true ;
47
111
}
48
- if let Some ( ( ( e, sign) , exp) ) = exp {
49
- s. push ( e) ;
50
- if let Some ( sign) = sign {
51
- s. push ( sign) ;
112
+ }
113
+
114
+ if matches ! ( self . peek( ) , Some ( 'e' | 'E' ) ) {
115
+ self . bump ( ) ;
116
+ let sign = match self . peek ( ) {
117
+ Some ( '+' ) => {
118
+ self . bump ( ) ;
119
+ 1
52
120
}
53
- s. push_str ( exp) ;
121
+ Some ( '-' ) => {
122
+ self . bump ( ) ;
123
+ -1
124
+ }
125
+ _ => 1 ,
126
+ } ;
127
+ if let Some ( ( exp_val, _) ) = self . lex_uint ( ) {
128
+ num *= 10f64 . powi ( sign * exp_val as i32 ) ;
129
+ } else {
130
+ self . pos = start_pos;
131
+ return None ;
54
132
}
55
- Token :: Const ( Literal :: Float ( s. parse :: < f64 > ( ) . unwrap ( ) ) )
56
- } ) ;
57
-
58
- // ── single‑char symbols ────────────────────────────────────────────────
59
- let sym = choice ( (
60
- just ( '(' ) . to ( Token :: LParen ) ,
61
- just ( ')' ) . to ( Token :: RParen ) ,
62
- just ( ',' ) . to ( Token :: Comma ) ,
63
- just ( '+' ) . to ( Token :: Plus ) ,
64
- just ( '-' ) . to ( Token :: Minus ) ,
65
- just ( '*' ) . to ( Token :: Star ) ,
66
- just ( '/' ) . to ( Token :: Slash ) ,
67
- just ( '^' ) . to ( Token :: Caret ) ,
68
- ) ) ;
69
-
70
- // ── comparison operators ───────────────────────────────────────────────
71
- let cmp = choice ( (
72
- just ( "<=" ) . to ( Token :: Le ) ,
73
- just ( ">=" ) . to ( Token :: Ge ) ,
74
- just ( "==" ) . to ( Token :: EqEq ) ,
75
- just ( '<' ) . to ( Token :: Lt ) ,
76
- just ( '>' ) . to ( Token :: Gt ) ,
77
- ) ) ;
78
-
79
- let kw_token = |w, t| just ( w) . padded ( ) . to ( t) ;
80
-
81
- let kw_lit = |w, lit : Literal | just ( w) . padded ( ) . to ( lit) ;
82
-
83
- let const_token = choice ( (
84
- kw_lit ( "pi" , Literal :: Float ( std:: f64:: consts:: PI ) ) ,
85
- kw_lit ( "π" , Literal :: Float ( std:: f64:: consts:: PI ) ) ,
86
- kw_lit ( "tau" , Literal :: Float ( std:: f64:: consts:: TAU ) ) ,
87
- kw_lit ( "τ" , Literal :: Float ( std:: f64:: consts:: TAU ) ) ,
88
- kw_lit ( "e" , Literal :: Float ( std:: f64:: consts:: E ) ) ,
89
- kw_lit ( "phi" , Literal :: Float ( 1.618_033_988_75 ) ) ,
90
- kw_lit ( "φ" , Literal :: Float ( 1.618_033_988_75 ) ) ,
91
- kw_lit ( "inf" , Literal :: Float ( f64:: INFINITY ) ) ,
92
- kw_lit ( "∞" , Literal :: Float ( f64:: INFINITY ) ) ,
93
- kw_lit ( "i" , Literal :: Complex ( Complex64 :: new ( 0.0 , 1.0 ) ) ) ,
94
- kw_lit ( "G" , Literal :: Float ( 9.80665 ) ) ,
95
- ) )
96
- . map ( Token :: Const ) ;
97
-
98
- let var_token = just ( '#' ) . ignore_then ( ident ( ) ) . map ( Token :: Var ) ;
99
- let call_token = just ( '@' ) . ignore_then ( ident ( ) ) . map ( Token :: Call ) ;
100
-
101
- choice ( ( num, kw_token ( "if" , Token :: If ) , const_token, cmp, sym, var_token, call_token) )
102
- . map_with ( |t, e| ( t, e. span ( ) ) )
103
- . padded ( )
104
- . repeated ( )
105
- . collect ( )
106
- }
133
+ }
107
134
108
- #[ derive( Debug ) ]
109
- pub struct TokenStream < ' src > {
110
- tokens : Vec < ( Token < ' src > , Span ) > ,
111
- }
135
+ got_digit. then_some ( num)
136
+ }
137
+
138
+ fn skip_ws ( & mut self ) {
139
+ self . consume_while ( char:: is_whitespace) ;
140
+ }
141
+
142
+ pub fn next_token ( & mut self ) -> Option < Token < ' a > > {
143
+ self . skip_ws ( ) ;
144
+ let start = self . pos ;
145
+ let ch = self . bump ( ) ?;
146
+
147
+ use Token :: * ;
148
+ let tok = match ch {
149
+ '(' => LParen ,
150
+ ')' => RParen ,
151
+ ',' => Comma ,
152
+ '+' => Plus ,
153
+ '-' => Minus ,
154
+ '*' => Star ,
155
+ '/' => Slash ,
156
+ '^' => Caret ,
157
+
158
+ '<' => {
159
+ if self . peek ( ) == Some ( '=' ) {
160
+ self . bump ( ) ;
161
+ Le
162
+ } else {
163
+ Lt
164
+ }
165
+ }
166
+ '>' => {
167
+ if self . peek ( ) == Some ( '=' ) {
168
+ self . bump ( ) ;
169
+ Ge
170
+ } else {
171
+ Gt
172
+ }
173
+ }
174
+ '=' => {
175
+ if self . peek ( ) == Some ( '=' ) {
176
+ self . bump ( ) ;
177
+ EqEq
178
+ } else {
179
+ return None ;
180
+ }
181
+ }
182
+
183
+ c if c. is_ascii_digit ( ) || ( c == '.' && self . peek ( ) . is_some_and ( |c| c. is_ascii_digit ( ) ) ) => {
184
+ self . pos = start;
185
+ Const ( Literal :: Float ( self . lex_number ( ) ?) )
186
+ }
187
+
188
+ _ => {
189
+ self . consume_while ( |c| c. is_alphanumeric ( ) || c == '_' ) ;
190
+ let ident = & self . input [ start..self . pos ] ;
191
+
192
+ if ident == "if" {
193
+ If
194
+ } else if let Some ( lit) = const_lit ( ident) {
195
+ Const ( lit)
196
+ } else if ch. is_alphanumeric ( ) {
197
+ Ident ( ident)
198
+ } else {
199
+ return None ;
200
+ }
201
+ }
202
+ } ;
112
203
113
- impl < ' src > TokenStream < ' src > {
114
- pub fn new ( tokens : Vec < ( Token < ' src > , Span ) > ) -> Self {
115
- TokenStream { tokens }
204
+ Some ( tok)
116
205
}
117
206
}
118
207
119
- impl < ' src > Input < ' src > for TokenStream < ' src > {
120
- type Token = ( Token < ' src > , Span ) ;
208
+ impl < ' src > Input < ' src > for Lexer < ' src > {
209
+ type Token = Token < ' src > ;
121
210
type Span = Span ;
122
- type Cursor = usize ;
123
- type MaybeToken = ( Token < ' src > , Span ) ;
211
+ type Cursor = usize ; // byte offset inside `input`
212
+ type MaybeToken = Token < ' src > ;
124
213
type Cache = Self ;
125
214
215
+ #[ inline]
126
216
fn begin ( self ) -> ( Self :: Cursor , Self :: Cache ) {
127
217
( 0 , self )
128
218
}
129
219
220
+ #[ inline]
130
221
fn cursor_location ( cursor : & Self :: Cursor ) -> usize {
131
222
* cursor
132
223
}
133
224
134
- #[ inline( always ) ]
225
+ #[ inline]
135
226
unsafe fn next_maybe ( this : & mut Self :: Cache , cursor : & mut Self :: Cursor ) -> Option < Self :: MaybeToken > {
136
- if let Some ( tok) = this. tokens . get ( * cursor) {
137
- * cursor += 1 ;
138
- Some ( tok. clone ( ) )
227
+ this. pos = * cursor;
228
+ if let Some ( tok) = this. next_token ( ) {
229
+ * cursor = this. pos ;
230
+ Some ( tok)
139
231
} else {
140
232
None
141
233
}
142
234
}
143
235
144
- #[ inline( always ) ]
236
+ #[ inline]
145
237
unsafe fn span ( _this : & mut Self :: Cache , range : Range < & Self :: Cursor > ) -> Self :: Span {
146
238
( * range. start ..* range. end ) . into ( )
147
239
}
148
240
}
149
241
150
- impl < ' src > ValueInput < ' src > for TokenStream < ' src > {
242
+ impl < ' src > ValueInput < ' src > for Lexer < ' src > {
243
+ #[ inline]
151
244
unsafe fn next ( this : & mut Self :: Cache , cursor : & mut Self :: Cursor ) -> Option < Self :: Token > {
152
- if let Some ( tok) = this. tokens . get ( * cursor) {
153
- * cursor += 1 ;
154
- Some ( tok. clone ( ) )
245
+ this. pos = * cursor;
246
+ if let Some ( tok) = this. next_token ( ) {
247
+ * cursor = this. pos ;
248
+ Some ( tok)
155
249
} else {
156
250
None
157
251
}
0 commit comments