Skip to content

Commit 0ce9852

Browse files
committed
Parser: improve error message handling
* use single `on_error` handler with error level and message arguments * remove `Warning` token type, never returned anyway. * improve `#error` and `#warning` message parsing consistency * make `num_error` messages non fatal * fix `#warning` behavior
1 parent 6f8bf69 commit 0ce9852

File tree

4 files changed

+44
-46
lines changed

4 files changed

+44
-46
lines changed

parser/c2_parser.c2

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
121121
p.kwinfo,
122122
p.features,
123123
on_tokenizer_error,
124-
on_tokenizer_warning,
125124
p,
126125
false);
127126
p.tok.init();
@@ -137,17 +136,16 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
137136
buf.free();
138137
}
139138

140-
fn void on_tokenizer_error(void* arg, SrcLoc loc) {
139+
fn void on_tokenizer_error(void* arg, c2_tokenizer.ErrorLevel level, SrcLoc loc, const char* message) {
141140
Parser* p = arg;
142-
// NOTE: cannot use p.tok.error_msg, because of possible lookahead (changes token)
143-
p.tok.loc = loc;
144-
// will longjmp
145-
p.error("%s", p.tokenizer.error_msg);
146-
}
147141

148-
fn void on_tokenizer_warning(void* arg, SrcLoc loc) {
149-
Parser* p = arg;
150-
p.diags.error(loc, "%s", p.tokenizer.error_msg);
142+
if (level) {
143+
p.diags.error(loc, "%s", message);
144+
} else {
145+
p.diags.warn(loc, "%s", message);
146+
}
147+
if (level == c2_tokenizer.ErrorLevel.FatalError)
148+
longjmp(&p.jmpbuf, 1);
151149
}
152150

153151
fn void Parser.consumeToken(Parser* p) {
@@ -822,10 +820,6 @@ fn void Parser.dump_token(Parser* p, const Token* tok) @(unused) {
822820
out.add(p.pool.idx2str(tok.text_idx));
823821
out.add("*/");
824822
break;
825-
case Warning:
826-
out.color(color.Yellow);
827-
out.add(tok.error_msg);
828-
break;
829823
case Error:
830824
out.color(color.Red);
831825
out.add(p.tokenizer.error_msg);

parser/c2_tokenizer.c2

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ public type Feature struct {
255255
bool is_else; // inside the #else block
256256
}
257257

258-
public type HandlerFn fn void (void* arg, SrcLoc loc);
258+
public type ErrorLevel enum u8 { Warning, Error, FatalError }
259+
public type ErrorFn fn void (void* arg, ErrorLevel level, SrcLoc loc, const char* msg);
259260

260261
public type Tokenizer struct {
261262
const char* cur;
@@ -270,9 +271,8 @@ public type Tokenizer struct {
270271

271272
string_pool.Pool* pool; // no ownership
272273
string_buffer.Buf* buf; // no ownership, used for strings and character constants
273-
HandlerFn on_error;
274-
HandlerFn on_warning;
275-
void* fn_arg;
274+
ErrorFn on_error;
275+
void* on_error_arg;
276276

277277
// Feature handling
278278
Feature[constants.MaxFeatureDepth+1] feature_stack;
@@ -283,7 +283,7 @@ public type Tokenizer struct {
283283

284284
char[256] error_msg;
285285
}
286-
static_assert(1448, sizeof(Tokenizer));
286+
static_assert(1440, sizeof(Tokenizer));
287287

288288
public fn void Tokenizer.init(Tokenizer* t,
289289
string_pool.Pool* pool,
@@ -292,9 +292,8 @@ public fn void Tokenizer.init(Tokenizer* t,
292292
SrcLoc loc_start,
293293
const keywords.Info* kwinfo,
294294
const string_list.List* features,
295-
HandlerFn on_error,
296-
HandlerFn on_warning,
297-
void* fn_arg,
295+
ErrorFn on_error,
296+
void* on_error_arg,
298297
bool raw_mode)
299298
{
300299
string.memset(t, 0, sizeof(Tokenizer));
@@ -307,8 +306,7 @@ public fn void Tokenizer.init(Tokenizer* t,
307306
t.pool = pool;
308307
t.buf = buf;
309308
t.on_error = on_error;
310-
t.on_warning = on_warning;
311-
t.fn_arg = fn_arg;
309+
t.on_error_arg = on_error_arg;
312310

313311
t.features = features;
314312
t.raw_mode = raw_mode;
@@ -708,7 +706,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
708706
result.kind = Kind.Error;
709707
result.error_msg = t.error_msg;
710708
result.done = true;
711-
if (t.on_error) t.on_error(t.fn_arg, result.loc);
709+
if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.FatalError, result.loc, t.error_msg);
712710
}
713711

714712
// generate an error but keep parsing
@@ -736,7 +734,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
736734
}
737735
t.cur = p;
738736
result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start));
739-
if (t.on_warning) t.on_warning(t.fn_arg, result.loc);
737+
if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.Error, result.loc, t.error_msg);
740738
}
741739

742740
fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) {
@@ -1490,28 +1488,36 @@ fn bool Tokenizer.at_bol(Tokenizer* t) {
14901488

14911489
fn bool Tokenizer.parse_error_warn(Tokenizer* t, Token* result, Kind kind) {
14921490
const char* start = t.cur;
1493-
while (*t.cur != '\0' && *t.cur != '\r' && *t.cur != '\n')
1494-
t.cur++;
1495-
usize len = (usize)(t.cur - start);
1496-
if (len > constants.MaxErrorMsgLen) {
1497-
t.error(result, "error msg too long (max %d bytes)", constants.MaxErrorMsgLen);
1498-
return true;
1491+
1492+
// parse pptokens instead of raw text
1493+
string_buffer.Buf* msg = string_buffer.create_static(elemsof(t.error_msg), false, t.error_msg);
1494+
SrcLoc last_loc = 0;
1495+
while (t.lex_preproc(result) != Kind.Eof) {
1496+
// replace blanks with a single space
1497+
if (last_loc && last_loc < result.loc) msg.add1(' ');
1498+
// copy string text or token source
1499+
if (result.kind == Kind.StringLiteral) {
1500+
msg.add2(t.pool.idx2str(result.text_idx), result.text_len);
1501+
} else {
1502+
msg.add2(t.input_start + (result.loc - t.loc_start), result.len);
1503+
}
1504+
last_loc = result.loc + result.len;
14991505
}
1500-
char[constants.MaxErrorMsgLen+1] msg;
1501-
string.memcpy(msg, start, len);
1502-
msg[len] = 0;
1506+
msg.size(); // ensure null terminator
15031507

15041508
if (kind == Kind.Feat_error) {
1505-
t.cur = t.line_start;
1506-
t.error(result, "%s", msg);
1507-
} else {
1508-
// TODO: output diagnostic synchronously
1509-
string.strcpy(t.error_msg, msg);
1510-
result.kind = Kind.Warning;
1511-
result.len = (u16)((t.cur - t.input_start) - (result.loc - t.loc_start));
1509+
t.cur = t.line_start; // restart on the same line
1510+
result.kind = Kind.Error;
1511+
result.done = true;
1512+
result.loc = t.loc_start + (SrcLoc)(t.line_start - t.input_start);
1513+
result.len = (u16)(t.cur - start);
15121514
result.error_msg = t.error_msg;
1515+
if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.FatalError, result.loc, t.error_msg);
1516+
return true; // return error token with result.done set
1517+
} else {
1518+
if (t.on_error) t.on_error(t.on_error_arg, ErrorLevel.Warning, result.loc, t.error_msg);
1519+
return false; // continue reading tokens
15131520
}
1514-
return true;
15151521
}
15161522

15171523
fn bool Tokenizer.is_enabled(const Tokenizer* t) {

parser/token.c2

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ public type Kind enum u8 {
146146
BlockComment,
147147
// Special Tokens
148148
Eof,
149-
Warning,
150149
Error,
151150
}
152151

@@ -285,7 +284,6 @@ const char*[] token_names = {
285284
[Kind.LineComment] = "l-comment",
286285
[Kind.BlockComment] = "b-comment",
287286
[Kind.Eof] = "eof",
288-
[Kind.Warning] = "warning",
289287
[Kind.Error] = "error",
290288
}
291289

tools/c2cat.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ public fn i32 c2cat(const char* filename)
304304
keywords.Info kwinfo;
305305
kwinfo.init(ctx.pool);
306306
c2_tokenizer.Tokenizer tokenizer;
307-
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, nil, true);
307+
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, true);
308308
ctx.tokenizer = &tokenizer;
309309

310310
Token tok;

0 commit comments

Comments
 (0)