Skip to content

Commit 9cc9bcc

Browse files
authored
Merge pull request #212 from VisLab/update-tokenizer
Update tokenizer -- removed extra delimiter tests in parser
2 parents 1f4764f + 08956d8 commit 9cc9bcc

File tree

6 files changed

+166
-152
lines changed

6 files changed

+166
-152
lines changed

parser/parser.js

Lines changed: 1 addition & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -34,116 +34,6 @@ class HedStringParser {
3434
this.hedSchemas = hedSchemas
3535
}
3636

37-
/**
38-
* Check if the parentheses in a tag group match.
39-
*
40-
* @returns {Issue[]} Any issues found related to unmatched parentheses.
41-
*/
42-
_countTagGroupParentheses() {
43-
const issues = []
44-
const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter)
45-
const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter)
46-
47-
if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
48-
issues.push(
49-
generateIssue('parentheses', {
50-
opening: numberOfOpeningParentheses,
51-
closing: numberOfClosingParentheses,
52-
}),
53-
)
54-
}
55-
56-
return issues
57-
}
58-
59-
/**
60-
* Check if a comma is missing after an opening parenthesis.
61-
*
62-
* @param {string} lastNonEmptyCharacter The last non-empty character.
63-
* @param {string} currentCharacter The current character in the HED string.
64-
* @returns {boolean} Whether a comma is missing after a closing parenthesis.
65-
*/
66-
_isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) {
67-
return (
68-
lastNonEmptyCharacter === closingGroupCharacter &&
69-
!(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
70-
)
71-
}
72-
73-
/**
74-
* Find delimiter-related issues in a HED string.
75-
*
76-
* @returns {Issue[]} Any issues related to delimiters.
77-
*/
78-
_findDelimiterIssues() {
79-
const issues = []
80-
let lastNonEmptyValidCharacter = ''
81-
let lastNonEmptyValidIndex = 0
82-
let currentTag = ''
83-
84-
for (let i = 0; i < this.hedString.length; i++) {
85-
const currentCharacter = this.hedString.charAt(i)
86-
currentTag += currentCharacter
87-
88-
if (stringIsEmpty(currentCharacter)) {
89-
continue
90-
}
91-
92-
if (delimiters.has(currentCharacter)) {
93-
if (currentTag.trim() === currentCharacter) {
94-
issues.push(
95-
generateIssue('extraDelimiter', {
96-
character: currentCharacter,
97-
index: i,
98-
string: this.hedString,
99-
}),
100-
)
101-
currentTag = ''
102-
continue
103-
}
104-
currentTag = ''
105-
} else if (currentCharacter === openingGroupCharacter) {
106-
if (currentTag.trim() !== openingGroupCharacter) {
107-
issues.push(generateIssue('commaMissing', { tag: currentTag }))
108-
}
109-
currentTag = ''
110-
} else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
111-
issues.push(
112-
generateIssue('commaMissing', {
113-
tag: currentTag.slice(0, -1),
114-
}),
115-
)
116-
break
117-
}
118-
119-
lastNonEmptyValidCharacter = currentCharacter
120-
lastNonEmptyValidIndex = i
121-
}
122-
123-
if (delimiters.has(lastNonEmptyValidCharacter)) {
124-
issues.push(
125-
generateIssue('extraDelimiter', {
126-
character: lastNonEmptyValidCharacter,
127-
index: lastNonEmptyValidIndex,
128-
string: this.hedString,
129-
}),
130-
)
131-
}
132-
133-
return issues
134-
}
135-
136-
/**
137-
* Validate the full unparsed HED string.
138-
*
139-
* @returns {Object<string, Issue[]>} Any issues found during validation.
140-
*/
141-
_validateFullUnparsedHedString() {
142-
const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues())
143-
144-
return { delimiter: delimiterIssues }
145-
}
146-
14737
/**
14838
* Parse a full HED string.
14939
*
@@ -154,14 +44,7 @@ class HedStringParser {
15444
return [this.hedString, {}]
15545
}
15646

157-
const fullStringIssues = this._validateFullUnparsedHedString()
158-
if (fullStringIssues.delimiter.length > 0) {
159-
fullStringIssues.syntax = []
160-
return [null, fullStringIssues]
161-
}
162-
163-
const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
164-
const parsingIssues = Object.assign(fullStringIssues, splitIssues)
47+
const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
16548
if (parsedTags === null) {
16649
return [null, parsingIssues]
16750
}

parser/tokenizer.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,13 @@ export class HedStringTokenizer {
179179
this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma
180180
} else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) {
181181
this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash
182+
}
183+
if (
184+
this.state.currentToken.trim().length > 0 &&
185+
![undefined, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0])
186+
) {
187+
// Missing comma
188+
this.pushIssue('commaMissing', this.state.lastDelimiter[1] + 1)
182189
} else {
183190
if (this.state.currentToken.trim().length > 0) {
184191
this.pushTag(this.hedString.length)
@@ -261,6 +268,10 @@ export class HedStringTokenizer {
261268
handleOpeningGroup(i) {
262269
if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) {
263270
this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1])
271+
} else if (this.state.lastDelimiter[0] === CHARACTERS.CLOSING_COLUMN) {
272+
this.pushIssue('commaMissing', this.state.lastDelimiter[1])
273+
} else if (this.state.currentToken.trim().length > 0) {
274+
this.pushInvalidTag('commaMissing', i, this.state.currentToken.trim())
264275
} else {
265276
this.state.currentGroupStack.push([])
266277
this.state.parenthesesStack.push(new GroupSpec(i, undefined, []))

tests/event.spec.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ describe('HED string and event validation', () => {
6161
describe('Full HED Strings', () => {
6262
const validatorSyntactic = validatorSyntacticBase
6363

64-
it('should not have mismatched parentheses', () => {
64+
//TODO: Remove this test -- the separate test for mismatched parentheses is no longer performed.
65+
it.skip('should not have mismatched parentheses', () => {
6566
const testStrings = {
6667
extraOpening:
6768
'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
@@ -84,7 +85,8 @@ describe('HED string and event validation', () => {
8485
validatorSyntactic(testStrings, expectedIssues, (validator) => {})
8586
})
8687

87-
it('should not have malformed delimiters', () => {
88+
// TODO: This test is replaced by tokenizer tests and should be removed
89+
it.skip('should not have malformed delimiters', () => {
8890
const testStrings = {
8991
missingOpeningComma:
9092
'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',

tests/testData/tokenizerTests.data.js

Lines changed: 148 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -268,36 +268,6 @@ export const tokenizerTests = [
268268
},
269269
],
270270
},
271-
{
272-
name: 'invalid-empty-tag-in-various-places',
273-
description: 'Empty tags in various places (empty groups are allowed).',
274-
tests: [
275-
{
276-
testname: 'end-in-comma',
277-
explanation: 'Cannot end in a comma',
278-
string: 'x,y,',
279-
tagSpecs: [],
280-
groupSpec: null,
281-
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
282-
},
283-
{
284-
testname: 'double-in-comma',
285-
explanation: 'Cannot have double commas',
286-
string: 'x,,y,',
287-
tagSpecs: [],
288-
groupSpec: null,
289-
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
290-
},
291-
{
292-
testname: 'leading-comma',
293-
string: ',x,y',
294-
explanation: 'Cannot have a leading comma',
295-
tagSpecs: [],
296-
groupSpec: null,
297-
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
298-
},
299-
],
300-
},
301271
{
302272
name: 'invalid-extra-slash-in-various-places',
303273
description: 'Tags cannot have leading or trailing, or extra slashes',
@@ -352,6 +322,124 @@ export const tokenizerTests = [
352322
},
353323
],
354324
},
325+
{
326+
name: 'invalid-commas',
327+
description: 'Commas must separate tags and groups',
328+
tests: [
329+
{
330+
testname: 'end-in-comma',
331+
explanation: 'Cannot end in a comma',
332+
string: 'x,y,',
333+
tagSpecs: [],
334+
groupSpec: null,
335+
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
336+
},
337+
{
338+
testname: 'double-in-comma',
339+
explanation: 'Cannot have double commas',
340+
string: 'x,,y,',
341+
tagSpecs: [],
342+
groupSpec: null,
343+
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
344+
},
345+
{
346+
testname: 'leading-comma',
347+
string: ',x,y',
348+
explanation: 'Cannot have a leading comma',
349+
tagSpecs: [],
350+
groupSpec: null,
351+
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
352+
},
353+
{
354+
testname: 'missing-comma-before-open',
355+
explanation: 'Must have a comma before open parentheses',
356+
string: 'x, y(z)',
357+
tagSpecs: [],
358+
groupSpec: null,
359+
errors: [generateIssue('commaMissing', { index: '4', string: 'x, y(z)', tag: 'y' })],
360+
},
361+
{
362+
testname: 'missing-comma-before-close',
363+
explanation: 'Must have a comma after closing parentheses',
364+
string: 'x, (y)zp, (w)',
365+
tagSpecs: [],
366+
groupSpec: null,
367+
errors: [generateIssue('invalidTag', { index: '8', string: 'x, (y)zp, (w)' })],
368+
},
369+
{
370+
testname: 'missing-comma-before-close-at-end',
371+
explanation: 'Must have a comma after closing parenthesis at end of string',
372+
string: 'x, (y)z',
373+
tagSpecs: [],
374+
groupSpec: null,
375+
errors: [generateIssue('commaMissing', { index: '6', string: 'x, (y)z' })],
376+
},
377+
{
378+
testname: 'missing-comma-before-open-column',
379+
explanation: 'Must have a comma before open brace',
380+
string: 'x, y, {(z)',
381+
tagSpecs: [],
382+
groupSpec: null,
383+
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {(z)' })],
384+
},
385+
{
386+
testname: 'missing-close-brace-before-parentheses',
387+
explanation: 'Must have a closed-brace-after-open-brace',
388+
string: 'x, y, {w(z)',
389+
tagSpecs: [],
390+
groupSpec: null,
391+
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {w(z)' })],
392+
},
393+
{
394+
testname: 'missing-comma-before-closing-column',
395+
explanation: 'Must have a comma before closing brace',
396+
string: 'x, {y}(z)',
397+
tagSpecs: [],
398+
groupSpec: null,
399+
errors: [generateIssue('commaMissing', { index: '5', string: 'x, {y}(z)' })],
400+
},
401+
{
402+
testname: 'extra-leading-comma',
403+
explanation: 'Leading comma not allowed',
404+
string: ',x, (y), z',
405+
tagSpecs: [],
406+
groupSpec: null,
407+
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x, (y), z' })],
408+
},
409+
{
410+
testname: 'extra-closing-comma',
411+
explanation: 'Closing comma not allowed',
412+
string: 'x, (y), z,',
413+
tagSpecs: [],
414+
groupSpec: null,
415+
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,' })],
416+
},
417+
{
418+
testname: 'multiple-leading-commas',
419+
explanation: 'Multiple leading commas not allowed',
420+
string: ',,x, (y), z',
421+
tagSpecs: [],
422+
groupSpec: null,
423+
errors: [generateIssue('emptyTagFound', { index: '0', string: ',,x, (y), z' })],
424+
},
425+
{
426+
testname: 'multiple-closing-commas',
427+
explanation: 'Multiple closing comma not allowed',
428+
string: 'x, (y), z,,',
429+
tagSpecs: [],
430+
groupSpec: null,
431+
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,,' })],
432+
},
433+
{
434+
testname: 'multiple-internal-commas',
435+
explanation: 'Multiple closing commas not allowed',
436+
string: 'x, (y),, z',
437+
tagSpecs: [],
438+
groupSpec: null,
439+
errors: [generateIssue('emptyTagFound', { index: '6', string: 'x, (y),, z' })],
440+
},
441+
],
442+
},
355443
{
356444
name: 'invalid-improper-curly-braces',
357445
description: 'Curly braces cannot have commas or parentheses or other curly braces',
@@ -398,4 +486,34 @@ export const tokenizerTests = [
398486
},
399487
],
400488
},
489+
{
490+
name: 'invalid-parenthesis',
491+
description: 'Various types of mismatched parentheses',
492+
tests: [
493+
{
494+
testname: 'extra-opening-parentheses',
495+
explanation: 'Parentheses must match',
496+
string: 'x, (((y, z), w), u',
497+
tagSpecs: [],
498+
groupSpec: null,
499+
errors: [generateIssue('unclosedParenthesis', { index: '3', string: 'x, (((y, z), w), u' })],
500+
},
501+
{
502+
testname: 'extra-closing-parentheses',
503+
explanation: 'Cannot parentheses inside curly braces',
504+
string: 'x, (y, z)), w',
505+
tagSpecs: [],
506+
groupSpec: null,
507+
errors: [generateIssue('unopenedParenthesis', { index: '9', string: 'x, (y, z)), w' })],
508+
},
509+
{
510+
testname: 'parentheses-in-wrong-order',
511+
explanation: 'Nesting must be proper',
512+
string: '((x),y))(z',
513+
tagSpecs: [],
514+
groupSpec: null,
515+
errors: [generateIssue('unopenedParenthesis', { index: '7', string: '((x),y))(z' })],
516+
},
517+
],
518+
},
401519
]

tests/tokenizerTests.spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { tokenizerTests } from './testData/tokenizerTests.data'
1010
const runAll = true
1111
let onlyRun = new Map()
1212
if (!runAll) {
13-
onlyRun = new Map([['invalid-empty-tag-in-various-places', ['end-in-comma']]])
13+
onlyRun = new Map([['invalid-comma-missing-or-extra', ['missing-comma-before-close']]])
1414
}
1515

1616
describe('Tokenizer validation using JSON tests', () => {

validator/event/init.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const initiallyValidateHedString = function (hedString, hedSchemas, options, def
3131
}
3232
if (parsedString === null) {
3333
return [null, [].concat(...Object.values(parsingIssues)), null]
34-
} else if (parsingIssues.syntax.length + parsingIssues.delimiter.length > 0) {
34+
} else if (parsingIssues.syntax.length > 0) {
3535
hedSchemas = new Schemas(null)
3636
}
3737
let hedValidator

0 commit comments

Comments
 (0)