@@ -8,37 +8,75 @@ local P, S, B = lpeg.P, lpeg.S, lpeg.B
8
8
local lex = lexer .new (... )
9
9
10
10
-- HTML-like tags
11
- local tag_start = lex :tag (lexer .TAG , ' <' * P (' /' )^- 1 * lexer .alnum ^ 1 * lexer .space ^ 0 )
12
- local dq_str = ' "' * ((lexer .any - S (' >"\\ ' )) + (' \\ ' * lexer .any ))^ 0 * ' "'
11
+ local dq_str = P (' "' ) * ((lexer .any - S (' >"\\ ' )) + (' \\ ' * lexer .any ))^ 0 * P (' "' )
12
+ -- Unquoted attributes (can't contain spaces, quotes, or angle brackets)
13
+ local unquoted_attr = (lexer .any - S (' "' .. " '" .. ' <>=' .. lexer .space_set ))^ 1
13
14
local tag_attr = lex :tag (lexer .ATTRIBUTE , lexer .alpha ^ 1 * lexer .space ^ 0 *
14
- (' =' * lexer .space ^ 0 * (dq_str + (lexer .any - lexer .space - ' >' )^ 0 )^- 1 )^ 0 * lexer .space ^ 0 )
15
+ (' =' * lexer .space ^ 0 * (dq_str + unquoted_attr ))^- 1 * lexer .space ^ 0 )
16
+ local tag_start = lex :tag (lexer .TAG , ' <' * P (' /' )^- 1 * lexer .alnum ^ 1 * lexer .space ^ 0 )
15
17
local tag_end = lex :tag (lexer .TAG , P (' /' )^- 1 * ' >' )
16
- lex :add_rule (' tag' , tag_start * tag_attr ^ 0 * tag_end )
18
+ -- The tag rule captures the start tag, attributes, and then optionally a closing tag.
19
+ -- A more robust solution might distinguish self-closing tags
20
+ -- (<br />) from paired tags (<div>...</div>)
21
+ lex :add_rule (' tag' , tag_start * tag_attr ^ 0 * (P (' /' )^- 1 * tag_end )^- 1 )
22
+
23
+
24
+ -- Internal Link: [[Target]] or [[Target|Display Text]]
25
+ -- The content can contain almost anything except unbalanced square brackets.
26
+ -- We'll highlight the whole thing as LINK.
27
+ local internal_link_content = (lexer .any - P (' ]]' ))^ 1 -- Matches everything until ']]'
28
+ lex :add_rule (' internal_link' , lex :tag (lexer .LINK , P (' [[' ) * internal_link_content * P (' ]]' )))
29
+
30
+ -- External Link: [http://example.com Link text] or [http://example.com]
31
+ -- Content should start with a protocol (http/s, ftp, mailto etc.)
32
+ local protocol = lexer .alpha ^ 2 * P (' ://' )
33
+ local external_link_content = (protocol * (lexer .any - P (' ]' ))^ 1 ) + (lexer .any - P (' ]' ))^ 1
34
+ lex :add_rule (' external_link' , lex :tag (lexer .LINK , P (' [' ) * external_link_content * P (' ]' )))
17
35
18
- -- Link
19
- lex :add_rule (' link' , lex :tag (lexer .STRING , S (' []' )))
20
- lex :add_rule (' internal_link' , B (' [[' ) * lex :tag (lexer .LINK , (lexer .any - ' |' - ' ]]' )^ 1 ))
21
36
22
- -- Templates and parser functions.
23
- lex :add_rule (' template' , lex :tag (lexer .OPERATOR , S (' {}' )))
37
+ -- Parser Functions: {{#function:args}} or {{function:args}}
38
+ -- This is a very complex area. This lexer assumes a simple "name:" pattern.
39
+ -- Tag the function name and its arguments.
40
+ local parser_function_name = P (' #' )^- 1 * (lexer .alpha + S (' _' ))^ 1 * P (' :' )
41
+ local parser_function_content = (lexer .any - S (' {}' ))^ 0
24
42
lex :add_rule (' parser_func' ,
25
- B (' {{' ) * lex :tag (lexer .FUNCTION , ' #' * lexer .alpha ^ 1 + lexer .upper ^ 1 * ' :' ))
26
- lex :add_rule (' template_name' , B (' {{' ) * lex :tag (lexer .LINK , (lexer .any - S (' {}|' ))^ 1 ))
43
+ lex :tag (lexer .FUNCTION , P (' {{' ) * parser_function_name * parser_function_content * P (' }}' )))
44
+
45
+
46
+ -- Templates and Variables: {{TemplateName|args}} or {{VARIABLENAME}}
47
+ -- Tag the template/variable name.
48
+ -- This rule needs to be placed *after* parser_func if there's any ambiguity in parsing.
49
+ local template_or_variable_name = (lexer .alnum + S (' _' ))^ 1
50
+ local template_content = (lexer .any - S (' {}' ))^ 0 -- Content up to closing braces
51
+ lex :add_rule (' template' ,
52
+ lex :tag (lexer .VARIABLE , P (' {{' ) * template_or_variable_name * template_content * P (' }}' )))
53
+
54
+
55
+ -- Headings (e.g., == My Heading ==)
56
+ -- Capture the heading text as lexer.HEADING
57
+ local heading_level = S (' =' )^ 1
58
+ lex :add_rule (' heading' ,
59
+ lex :tag (lexer .HEADING , lexer .starts_line (heading_level * lexer .space ^ 0 *
60
+ (lexer .any - S (' =' ) - lexer .newline )^ 1 * lexer .space ^ 0 * heading_level )))
61
+
27
62
28
63
-- Operators.
64
+ -- Consider adding more specific rules for bold/italic instead of general operators.
65
+ -- For now, keep existing general operators.
29
66
lex :add_rule (' operator' , lex :tag (lexer .OPERATOR , S (' -=|#~!' )))
30
67
31
- -- Behavior switches
68
+ -- Behavior switches (e.g., __TOC__)
32
69
local start_pat = P (function (_ , pos ) return pos == 1 end )
33
70
lex :add_rule (' behavior_switch' ,
34
- ((B (lexer .space ) + start_pat ) * lex :word_match (' behavior_switch' ) * # lexer .space ))
71
+ ((B (lexer .space ) + start_pat ) * lex :word_match (' behavior_switch' ) * # lexer .space ))
35
72
36
73
-- Comments.
37
- lex :add_rule (' comment' , lex :tag (lexer .COMMENT , lexer .range (' <!-- ' , ' --> ' )))
74
+ lex :add_rule (' comment' , lex :tag (lexer .COMMENT , lexer .range (' ' )))
38
75
39
76
-- Word lists
40
77
lex :set_word_list (' behavior_switch' ,
41
- {' __TOC__' , ' __FORCETOC__' , ' __NOTOC__' , ' __NOEDITSECTION__' , ' __NOCC__' , ' __NOINDEX__' })
78
+ {' __TOC__' , ' __FORCETOC__' , ' __NOTOC__' , ' __NOEDITSECTION__' , ' __NOCC__' ,
79
+ ' __NOINDEX__' , ' __NOKEYWORDLINK__' , ' __NOCONTENTCONVERT__' , ' __NOEDITSECTION__' })
42
80
43
81
lexer .property [' scintillua.comment' ] = ' <!--|-->'
44
82
lexer .property [' scintillua.angle.braces' ] = ' 1'
0 commit comments