Skip to content

Commit 4da5ae0

Browse files
committed
fix(mediawiki): some improvements and added comments
Gemini suggested upon the review some improvements. Of course, I have thoroughly reviewed them and I accept complete reponsibility for it. Signed-off-by: Matěj Cepl <[email protected]>
1 parent effcb1b commit 4da5ae0

File tree

1 file changed

+53
-15
lines changed

1 file changed

+53
-15
lines changed

lexers/mediawiki.lua

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,75 @@ local P, S, B = lpeg.P, lpeg.S, lpeg.B
88
local lex = lexer.new(...)
99

1010
-- HTML-like tags
11-
local tag_start = lex:tag(lexer.TAG, '<' * P('/')^-1 * lexer.alnum^1 * lexer.space^0)
12-
local dq_str = '"' * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * '"'
11+
local dq_str = P('"') * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * P('"')
12+
-- Unquoted attributes (can't contain spaces, quotes, or angle brackets)
13+
local unquoted_attr = (lexer.any - S('"' .. "'" .. '<>=' .. lexer.space_set))^1
1314
local tag_attr = lex:tag(lexer.ATTRIBUTE, lexer.alpha^1 * lexer.space^0 *
14-
('=' * lexer.space^0 * (dq_str + (lexer.any - lexer.space - '>')^0)^-1)^0 * lexer.space^0)
15+
('=' * lexer.space^0 * (dq_str + unquoted_attr))^-1 * lexer.space^0)
16+
local tag_start = lex:tag(lexer.TAG, '<' * P('/')^-1 * lexer.alnum^1 * lexer.space^0)
1517
local tag_end = lex:tag(lexer.TAG, P('/')^-1 * '>')
16-
lex:add_rule('tag', tag_start * tag_attr^0 * tag_end)
18+
-- The tag rule captures the start tag, attributes, and then optionally a closing tag.
19+
-- A more robust solution might distinguish self-closing tags
20+
-- (<br />) from paired tags (<div>...</div>)
21+
lex:add_rule('tag', tag_start * tag_attr^0 * (P('/')^-1 * tag_end)^-1)
22+
23+
24+
-- Internal Link: [[Target]] or [[Target|Display Text]]
25+
-- The content can contain almost anything except unbalanced square brackets.
26+
-- We'll highlight the whole thing as LINK.
27+
local internal_link_content = (lexer.any - P(']]'))^1 -- Matches everything until ']]'
28+
lex:add_rule('internal_link', lex:tag(lexer.LINK, P('[[') * internal_link_content * P(']]')))
29+
30+
-- External Link: [http://example.com Link text] or [http://example.com]
31+
-- Content should start with a protocol (http/s, ftp, mailto etc.)
32+
local protocol = lexer.alpha^2 * P('://')
33+
local external_link_content = (protocol * (lexer.any - P(']'))^1) + (lexer.any - P(']'))^1
34+
lex:add_rule('external_link', lex:tag(lexer.LINK, P('[') * external_link_content * P(']')))
1735

18-
-- Link
19-
lex:add_rule('link', lex:tag(lexer.STRING, S('[]')))
20-
lex:add_rule('internal_link', B('[[') * lex:tag(lexer.LINK, (lexer.any - '|' - ']]')^1))
2136

22-
-- Templates and parser functions.
23-
lex:add_rule('template', lex:tag(lexer.OPERATOR, S('{}')))
37+
-- Parser Functions: {{#function:args}} or {{function:args}}
38+
-- This is a very complex area. This lexer assumes a simple "name:" pattern.
39+
-- Tag the function name and its arguments.
40+
local parser_function_name = P('#')^-1 * (lexer.alpha + S('_'))^1 * P(':')
41+
local parser_function_content = (lexer.any - S('{}'))^0
2442
lex:add_rule('parser_func',
25-
B('{{') * lex:tag(lexer.FUNCTION, '#' * lexer.alpha^1 + lexer.upper^1 * ':'))
26-
lex:add_rule('template_name', B('{{') * lex:tag(lexer.LINK, (lexer.any - S('{}|'))^1))
43+
lex:tag(lexer.FUNCTION, P('{{') * parser_function_name * parser_function_content * P('}}')))
44+
45+
46+
-- Templates and Variables: {{TemplateName|args}} or {{VARIABLENAME}}
47+
-- Tag the template/variable name.
48+
-- This rule needs to be placed *after* parser_func if there's any ambiguity in parsing.
49+
local template_or_variable_name = (lexer.alnum + S('_'))^1
50+
local template_content = (lexer.any - S('{}'))^0 -- Content up to closing braces
51+
lex:add_rule('template',
52+
lex:tag(lexer.VARIABLE, P('{{') * template_or_variable_name * template_content * P('}}')))
53+
54+
55+
-- Headings (e.g., == My Heading ==)
56+
-- Capture the heading text as lexer.HEADING
57+
local heading_level = S('=')^1
58+
lex:add_rule('heading',
59+
lex:tag(lexer.HEADING, lexer.starts_line(heading_level * lexer.space^0 *
60+
(lexer.any - S('=') - lexer.newline)^1 * lexer.space^0 * heading_level)))
61+
2762

2863
-- Operators.
64+
-- Consider adding more specific rules for bold/italic instead of general operators.
65+
-- For now, keep existing general operators.
2966
lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-=|#~!')))
3067

31-
-- Behavior switches
68+
-- Behavior switches (e.g., __TOC__)
3269
local start_pat = P(function(_, pos) return pos == 1 end)
3370
lex:add_rule('behavior_switch',
34-
((B(lexer.space) + start_pat) * lex:word_match('behavior_switch') * #lexer.space))
71+
((B(lexer.space) + start_pat) * lex:word_match('behavior_switch') * #lexer.space))
3572

3673
-- Comments.
37-
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->')))
74+
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('')))
3875

3976
-- Word lists
4077
lex:set_word_list('behavior_switch',
41-
{'__TOC__', '__FORCETOC__', '__NOTOC__', '__NOEDITSECTION__', '__NOCC__', '__NOINDEX__'})
78+
{'__TOC__', '__FORCETOC__', '__NOTOC__', '__NOEDITSECTION__', '__NOCC__',
79+
'__NOINDEX__', '__NOKEYWORDLINK__', '__NOCONTENTCONVERT__', '__NOEDITSECTION__'})
4280

4381
lexer.property['scintillua.comment'] = '<!--|-->'
4482
lexer.property['scintillua.angle.braces'] = '1'

0 commit comments

Comments
 (0)