Skip to content

Commit 6cc0745

Browse files
committed
add implicit multiplication and make lexer much faster
1 parent b38ffa9 commit 6cc0745

File tree

4 files changed

+242
-145
lines changed

4 files changed

+242
-145
lines changed

libraries/math-parser/benches/bench.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,17 @@ macro_rules! generate_benchmarks {
3333
}
3434

3535
generate_benchmarks! {
36-
"(3 * (4 + @sqrt(25)) - @cos(pi/3) * (2^3)) + 5 * e", // Mixed nested functions, constants, and operations
37-
"((5 + 2 * (3 - @sqrt(49)))^2) / (1 + @sqrt(16)) + tau / 2", // Complex nested expression with constants
38-
"@log(100, 10) + (5 * @sin(pi/4) + @sqrt(81)) / (2 * phi)", // Logarithmic and trigonometric functions
39-
"(@sqrt(144) * 2 + 5) / (3 * (4 - @sin(pi / 6))) + e^2", // Combined square root, trigonometric, and exponential operations
40-
"@cos(2 * pi) + @tan(pi / 3) * @log(32, 2) - @sqrt(256)", // Multiple trigonometric and logarithmic functions
41-
"(10 * (3 + 2) - 8 / 2)^2 + 7 * (2^4) - @sqrt(225) + phi", // Mixed arithmetic with constants
42-
"(5^2 + 3^3) * (@sqrt(81) + @sqrt(64)) - tau * @log(1000, 10)", // Power and square root with constants
43-
"((8 * @sqrt(49) - 2 * e) + @log(256, 2) / (2 + @cos(pi))) * 1.5", // Nested functions and constants
44-
"(tan(pi / 4) + 5) * (3 + @sqrt(36)) / (@log(1024, 2) - 4)", // Nested functions with trigonometry and logarithm
45-
"((3 * e + 2 * @sqrt(100)) - @cos(tau / 4)) * @log(27, 3) + phi", // Mixed constant usage and functions
46-
"(@sqrt(100) + 5 * @sin(pi / 6) - 8 / log(64, 2)) + e^(1.5)", // Complex mix of square root, division, and exponentiation
47-
"((@sin(pi/2) + @cos(0)) * (e^2 - 2 * @sqrt(16))) / (@log(100, 10) + pi)", // Nested trigonometric, exponential, and logarithmic functions
48-
"(5 * (7 + @sqrt(121)) - (@log(243, 3) * phi)) + 3^5 / tau", //
36+
"(3 * (4 + sqrt(25)) - cos(pi/3) * (2^3)) + 5 * e", // Mixed nested functions, constants, and operations
37+
"((5 + 2 * (3 - sqrt(49)))^2) / (1 + sqrt(16)) + tau / 2", // Complex nested expression with constants
38+
"log(100, 10) + (5 * sin(pi/4) + sqrt(81)) / (2 * phi)", // Logarithmic and trigonometric functions
39+
"(sqrt(144) * 2 + 5) / (3 * (4 - sin(pi / 6))) + e^2", // Combined square root, trigonometric, and exponential operations
40+
"cos(2 * pi) + tan(pi / 3) * log(32, 2) - sqrt(256)", // Multiple trigonometric and logarithmic functions
41+
"(10 * (3 + 2) - 8 / 2)^2 + 7 * (2^4) - sqrt(225) + phi", // Mixed arithmetic with constants
42+
"(5^2 + 3^3) * (sqrt(81) + sqrt(64)) - tau * log(1000, 10)", // Power and square root with constants
43+
"((8 * sqrt(49) - 2 * e) + log(256, 2) / (2 + cos(pi))) * 1.5", // Nested functions and constants
44+
"(tan(pi / 4) + 5) * (3 + sqrt(36)) / (log(1024, 2) - 4)", // Nested functions with trigonometry and logarithm
45+
"((3 * e + 2 * sqrt(100)) - cos(tau / 4)) * log(27, 3) + phi", // Mixed constant usage and functions
46+
"(sqrt(100) + 5 * sin(pi / 6) - 8 / log(64, 2)) + e^(1.5)", // Complex mix of square root, division, and exponentiation
47+
"((sin(pi/2) + cos(0)) * (e^2 - 2 * sqrt(16))) / (log(100, 10) + pi)", // Nested trigonometric, exponential, and logarithmic functions
48+
"(5 * (7 + sqrt(121)) - (log(243, 3) * phi)) + 3^5 / tau", //
4949
}

libraries/math-parser/src/lexer.rs

Lines changed: 183 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,19 @@ use chumsky::input::{Input, ValueInput};
44
use chumsky::prelude::*;
55
use chumsky::span::SimpleSpan;
66
use chumsky::text::{ident, int};
7+
use core::f64;
78
use num_complex::Complex64;
9+
use std::iter::Peekable;
810
use std::ops::Range;
11+
use std::str::Chars;
912

1013
pub type Span = SimpleSpan;
1114

1215
#[derive(Clone, Debug, PartialEq)]
1316
pub enum Token<'src> {
1417
// literals ----------------------------------------------------------------
1518
Const(Literal), // numeric or complex constants recognised at lex‑time
16-
Var(&'src str), // #identifier (variables)
17-
Call(&'src str),
19+
Ident(&'src str),
1820
// punctuation -------------------------------------------------------------
1921
LParen,
2022
RParen,
@@ -34,124 +36,216 @@ pub enum Token<'src> {
3436
If,
3537
}
3638

37-
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char>>> {
38-
// ── numbers ────────────────────────────────────────────────────────────
39-
let num = int(10)
40-
.then(just('.').then(int(10)).or_not())
41-
.then(just('e').or(just('E')).then(one_of("+-").or_not()).then(int(10)).or_not())
42-
.map(|((int_part, frac), exp): ((&str, _), _)| {
43-
let mut s = int_part.to_string();
44-
if let Some((_, frac)) = frac {
45-
s.push('.');
46-
s.push_str(frac);
39+
fn const_lit(name: &str) -> Option<Literal> {
40+
use std::f64::consts::*;
41+
42+
Some(match name {
43+
"pi" | "π" => Literal::Float(PI),
44+
"tau" | "τ" => Literal::Float(TAU),
45+
"e" => Literal::Float(E),
46+
"phi" | "φ" => Literal::Float(1.618_033_988_75),
47+
"inf" | "∞" => Literal::Float(f64::INFINITY),
48+
"i" => Literal::Complex(Complex64::new(0.0, 1.0)),
49+
"G" => Literal::Float(9.80665),
50+
_ => return None,
51+
})
52+
}
53+
54+
pub struct Lexer<'a> {
55+
input: &'a str,
56+
pos: usize,
57+
}
58+
59+
impl<'a> Lexer<'a> {
60+
pub fn new(input: &'a str) -> Self {
61+
Self { input, pos: 0 }
62+
}
63+
64+
fn peek(&self) -> Option<char> {
65+
self.input[self.pos..].chars().next()
66+
}
67+
68+
fn bump(&mut self) -> Option<char> {
69+
let c = self.peek()?;
70+
self.pos += c.len_utf8();
71+
Some(c)
72+
}
73+
74+
fn consume_while<F>(&mut self, cond: F) -> &'a str
75+
where
76+
F: Fn(char) -> bool,
77+
{
78+
let start = self.pos;
79+
while self.peek().is_some_and(&cond) {
80+
self.bump();
81+
}
82+
&self.input[start..self.pos]
83+
}
84+
85+
fn lex_ident(&mut self) -> &'a str {
86+
self.consume_while(|c| c.is_alphanumeric() || c == '_')
87+
}
88+
89+
fn lex_uint(&mut self) -> Option<(u64, usize)> {
90+
let mut v = 0u64;
91+
let mut digits = 0;
92+
while let Some(d) = self.peek().and_then(|c| c.to_digit(10)) {
93+
v = v * 10 + d as u64;
94+
digits += 1;
95+
self.bump();
96+
}
97+
(digits > 0).then_some((v, digits))
98+
}
99+
100+
fn lex_number(&mut self) -> Option<f64> {
101+
let start_pos = self.pos;
102+
let (int_val, int_digits) = self.lex_uint().unwrap_or((0, 0));
103+
let mut got_digit = int_digits > 0;
104+
let mut num = int_val as f64;
105+
106+
if self.peek() == Some('.') {
107+
self.bump();
108+
if let Some((frac_val, frac_digits)) = self.lex_uint() {
109+
num += (frac_val as f64) / 10f64.powi(frac_digits as i32);
110+
got_digit = true;
47111
}
48-
if let Some(((e, sign), exp)) = exp {
49-
s.push(e);
50-
if let Some(sign) = sign {
51-
s.push(sign);
112+
}
113+
114+
if matches!(self.peek(), Some('e' | 'E')) {
115+
self.bump();
116+
let sign = match self.peek() {
117+
Some('+') => {
118+
self.bump();
119+
1
52120
}
53-
s.push_str(exp);
121+
Some('-') => {
122+
self.bump();
123+
-1
124+
}
125+
_ => 1,
126+
};
127+
if let Some((exp_val, _)) = self.lex_uint() {
128+
num *= 10f64.powi(sign * exp_val as i32);
129+
} else {
130+
self.pos = start_pos;
131+
return None;
54132
}
55-
Token::Const(Literal::Float(s.parse::<f64>().unwrap()))
56-
});
57-
58-
// ── single‑char symbols ────────────────────────────────────────────────
59-
let sym = choice((
60-
just('(').to(Token::LParen),
61-
just(')').to(Token::RParen),
62-
just(',').to(Token::Comma),
63-
just('+').to(Token::Plus),
64-
just('-').to(Token::Minus),
65-
just('*').to(Token::Star),
66-
just('/').to(Token::Slash),
67-
just('^').to(Token::Caret),
68-
));
69-
70-
// ── comparison operators ───────────────────────────────────────────────
71-
let cmp = choice((
72-
just("<=").to(Token::Le),
73-
just(">=").to(Token::Ge),
74-
just("==").to(Token::EqEq),
75-
just('<').to(Token::Lt),
76-
just('>').to(Token::Gt),
77-
));
78-
79-
let kw_token = |w, t| just(w).padded().to(t);
80-
81-
let kw_lit = |w, lit: Literal| just(w).padded().to(lit);
82-
83-
let const_token = choice((
84-
kw_lit("pi", Literal::Float(std::f64::consts::PI)),
85-
kw_lit("π", Literal::Float(std::f64::consts::PI)),
86-
kw_lit("tau", Literal::Float(std::f64::consts::TAU)),
87-
kw_lit("τ", Literal::Float(std::f64::consts::TAU)),
88-
kw_lit("e", Literal::Float(std::f64::consts::E)),
89-
kw_lit("phi", Literal::Float(1.618_033_988_75)),
90-
kw_lit("φ", Literal::Float(1.618_033_988_75)),
91-
kw_lit("inf", Literal::Float(f64::INFINITY)),
92-
kw_lit("∞", Literal::Float(f64::INFINITY)),
93-
kw_lit("i", Literal::Complex(Complex64::new(0.0, 1.0))),
94-
kw_lit("G", Literal::Float(9.80665)),
95-
))
96-
.map(Token::Const);
97-
98-
let var_token = just('#').ignore_then(ident()).map(Token::Var);
99-
let call_token = just('@').ignore_then(ident()).map(Token::Call);
100-
101-
choice((num, kw_token("if", Token::If), const_token, cmp, sym, var_token, call_token))
102-
.map_with(|t, e| (t, e.span()))
103-
.padded()
104-
.repeated()
105-
.collect()
106-
}
133+
}
107134

108-
#[derive(Debug)]
109-
pub struct TokenStream<'src> {
110-
tokens: Vec<(Token<'src>, Span)>,
111-
}
135+
got_digit.then_some(num)
136+
}
137+
138+
fn skip_ws(&mut self) {
139+
self.consume_while(char::is_whitespace);
140+
}
141+
142+
pub fn next_token(&mut self) -> Option<Token<'a>> {
143+
self.skip_ws();
144+
let start = self.pos;
145+
let ch = self.bump()?;
146+
147+
use Token::*;
148+
let tok = match ch {
149+
'(' => LParen,
150+
')' => RParen,
151+
',' => Comma,
152+
'+' => Plus,
153+
'-' => Minus,
154+
'*' => Star,
155+
'/' => Slash,
156+
'^' => Caret,
157+
158+
'<' => {
159+
if self.peek() == Some('=') {
160+
self.bump();
161+
Le
162+
} else {
163+
Lt
164+
}
165+
}
166+
'>' => {
167+
if self.peek() == Some('=') {
168+
self.bump();
169+
Ge
170+
} else {
171+
Gt
172+
}
173+
}
174+
'=' => {
175+
if self.peek() == Some('=') {
176+
self.bump();
177+
EqEq
178+
} else {
179+
return None;
180+
}
181+
}
182+
183+
c if c.is_ascii_digit() || (c == '.' && self.peek().is_some_and(|c| c.is_ascii_digit())) => {
184+
self.pos = start;
185+
Const(Literal::Float(self.lex_number()?))
186+
}
187+
188+
_ => {
189+
self.consume_while(|c| c.is_alphanumeric() || c == '_');
190+
let ident = &self.input[start..self.pos];
191+
192+
if ident == "if" {
193+
If
194+
} else if let Some(lit) = const_lit(ident) {
195+
Const(lit)
196+
} else if ch.is_alphanumeric() {
197+
Ident(ident)
198+
} else {
199+
return None;
200+
}
201+
}
202+
};
112203

113-
impl<'src> TokenStream<'src> {
114-
pub fn new(tokens: Vec<(Token<'src>, Span)>) -> Self {
115-
TokenStream { tokens }
204+
Some(tok)
116205
}
117206
}
118207

119-
impl<'src> Input<'src> for TokenStream<'src> {
120-
type Token = (Token<'src>, Span);
208+
impl<'src> Input<'src> for Lexer<'src> {
209+
type Token = Token<'src>;
121210
type Span = Span;
122-
type Cursor = usize;
123-
type MaybeToken = (Token<'src>, Span);
211+
type Cursor = usize; // byte offset inside `input`
212+
type MaybeToken = Token<'src>;
124213
type Cache = Self;
125214

215+
#[inline]
126216
fn begin(self) -> (Self::Cursor, Self::Cache) {
127217
(0, self)
128218
}
129219

220+
#[inline]
130221
fn cursor_location(cursor: &Self::Cursor) -> usize {
131222
*cursor
132223
}
133224

134-
#[inline(always)]
225+
#[inline]
135226
unsafe fn next_maybe(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::MaybeToken> {
136-
if let Some(tok) = this.tokens.get(*cursor) {
137-
*cursor += 1;
138-
Some(tok.clone())
227+
this.pos = *cursor;
228+
if let Some(tok) = this.next_token() {
229+
*cursor = this.pos;
230+
Some(tok)
139231
} else {
140232
None
141233
}
142234
}
143235

144-
#[inline(always)]
236+
#[inline]
145237
unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span {
146238
(*range.start..*range.end).into()
147239
}
148240
}
149241

150-
impl<'src> ValueInput<'src> for TokenStream<'src> {
242+
impl<'src> ValueInput<'src> for Lexer<'src> {
243+
#[inline]
151244
unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::Token> {
152-
if let Some(tok) = this.tokens.get(*cursor) {
153-
*cursor += 1;
154-
Some(tok.clone())
245+
this.pos = *cursor;
246+
if let Some(tok) = this.next_token() {
247+
*cursor = this.pos;
248+
Some(tok)
155249
} else {
156250
None
157251
}

0 commit comments

Comments
 (0)