Fix various tokenization issues

- Keywords and operators were previously matching too eagerly. For example, `ifcategory` would be matched as two tokens `if` `category` and result in a valid file. This is now a single error token.
- Strings previously allowed line breaks in them. This has been fixed, strings only consume up to the end of the line now.
- the error message for error tokens has been improved by using JSON escape.
This commit is contained in:
mschae23
2025-09-24 13:46:21 +02:00
parent 7c61c8b44e
commit d165e06d2b

View File

@@ -395,40 +395,55 @@ function nextToken(state: LexerState): Token {
"//", "//",
].concat(SKIP_RULE_ATTRIBUTES) ].concat(SKIP_RULE_ATTRIBUTES)
.concat(SKIP_RULE_OPERATORS), true); .concat(SKIP_RULE_OPERATORS), true);
let type: TokenType | null = null;
let kind: "word" | "operator" | null = null;
if (keyword !== null) { if (keyword !== null) {
if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword) || (SKIP_RULE_OPERATORS as string[]).includes(keyword)) { if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword)) {
return makeToken(keyword as TokenType); kind = "word";
type = keyword as TokenType;
} else if ((SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
kind = "operator";
type = keyword as TokenType;
} else {
switch (keyword) {
case "if": // Fallthrough
case "and": // Fallthrough
case "or": kind = "word"; type = keyword as TokenType; break;
case "(": return makeToken("(");
case ")": return makeToken(")");
case "//":
resetToCurrent();
skipLine();
return makeToken("comment");
default:
}
} }
} else {
const keyword2 = expectKeyword(
[ "disabled", "show overlay", "manual skip", "auto skip" ], false);
switch (keyword) { if (keyword2 !== null) {
case "if": return makeToken("if"); kind = "word";
case "and": return makeToken("and"); type = keyword2 as TokenType;
case "or": return makeToken("or");
case "(": return makeToken("(");
case ")": return makeToken(")");
case "//":
resetToCurrent();
skipLine();
return makeToken("comment");
default:
} }
} }
const keyword2 = expectKeyword( if (type !== null) {
[ "disabled", "show overlay", "manual skip", "auto skip" ], false); const more = kind == "operator" ? /[<>=!~*&|-]/ : kind == "word" ? /[a-zA-Z0-9.]/ : /[a-zA-Z0-9<>=!~*&|.-]/;
if (keyword2 !== null) { let c = peek();
switch (keyword2) { let error = false;
case "disabled": return makeToken("disabled"); while (c !== null && more.test(c)) {
case "show overlay": return makeToken("show overlay"); error = true;
case "manual skip": return makeToken("manual skip"); consume();
case "auto skip": return makeToken("auto skip"); c = peek();
default:
} }
return makeToken(error ? "error" : type);
} }
let c = consume(); let c = consume();
@@ -491,6 +506,11 @@ function nextToken(state: LexerState): Token {
output = output.concat(`\\${c}`); output = output.concat(`\\${c}`);
break; break;
} }
} else if (c === '\n') {
// Unterminated / multi-line string, unsupported
error = true;
// Prevent unterminated strings from consuming the entire rest of the input
break;
} else { } else {
output = output.concat(c); output = output.concat(c);
} }
@@ -566,12 +586,11 @@ function nextToken(state: LexerState): Token {
} }
// Consume common characters up to a space for a more useful value in the error token // Consume common characters up to a space for a more useful value in the error token
const common = /[a-zA-Z0-9<>=!~*.-]/; const common = /[a-zA-Z0-9<>=!~*&|.-]/;
if (c !== null && common.test(c)) { c = peek();
do { while (c !== null && common.test(c)) {
consume(); consume();
c = peek(); c = peek();
} while (c !== null && common.test(c));
} }
return makeToken("error"); return makeToken("error");
@@ -683,7 +702,7 @@ export function parseConfig(config: string): { rules: AdvancedSkipRule[]; errors
*/ */
function expect(expected: readonly TokenType[], message: string, panic: boolean) { function expect(expected: readonly TokenType[], message: string, panic: boolean) {
if (!match(expected)) { if (!match(expected)) {
errorAtCurrent(message.concat(`, got: \`${current.type === "error" ? current.value : current.type}\``), panic); errorAtCurrent(message.concat(current.type === "error" ? `, got: ${JSON.stringify(current.value)}` : `, got: \`${current.type}\``), panic);
} }
} }