Fix various tokenization issues

- Keywords and operators were previously matching too eagerly. For example, `ifcategory` would be matched as two tokens `if` `category` and result in a valid file. This is now a single error token. - Strings previously allowed line breaks in them. This has been fixed, strings only consume up to the end of the line now. - the error message for error tokens has been improved by using JSON escape.
2026-01-28 21:30:56 +03:00 · 2025-09-24 13:46:21 +02:00
parent 7c61c8b44e
commit d165e06d2b
1 changed files with 51 additions and 32 deletions
--- a/src/utils/skipRule.ts
+++ b/src/utils/skipRule.ts
@@ -395,40 +395,55 @@ function nextToken(state: LexerState): Token {
        "//",
    ].concat(SKIP_RULE_ATTRIBUTES)
        .concat(SKIP_RULE_OPERATORS), true);
    let type: TokenType | null = null;
    let kind: "word" | "operator" | null = null;
    if (keyword !== null) {
-        if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword) || (SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
+        if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword)) {
-            return makeToken(keyword as TokenType);
+            kind = "word";
            type = keyword as TokenType;
        } else if ((SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
            kind = "operator";
            type = keyword as TokenType;
        } else {
            switch (keyword) {
                case "if":  // Fallthrough
                case "and": // Fallthrough
                case "or": kind = "word"; type = keyword as TokenType; break;
                case "(": return makeToken("(");
                case ")": return makeToken(")");
                case "//":
                    resetToCurrent();
                    skipLine();
                    return makeToken("comment");
                default:
            }
        }
    } else {
        const keyword2 = expectKeyword(
            [ "disabled", "show overlay", "manual skip", "auto skip" ], false);
-        switch (keyword) {
+        if (keyword2 !== null) {
-            case "if": return makeToken("if");
+            kind = "word";
-            case "and": return makeToken("and");
+            type = keyword2 as TokenType;
            case "or": return makeToken("or");
            case "(": return makeToken("(");
            case ")": return makeToken(")");
            case "//":
                resetToCurrent();
                skipLine();
                return makeToken("comment");
            default:
        }
    }
-    const keyword2 = expectKeyword(
+    if (type !== null) {
-        [ "disabled", "show overlay", "manual skip", "auto skip" ], false);
+        const more = kind == "operator" ? /[<>=!~*&|-]/ : kind == "word" ? /[a-zA-Z0-9.]/ : /[a-zA-Z0-9<>=!~*&|.-]/;
-    if (keyword2 !== null) {
+        let c = peek();
-        switch (keyword2) {
+        let error = false;
-            case "disabled": return makeToken("disabled");
+        while (c !== null && more.test(c)) {
-            case "show overlay": return makeToken("show overlay");
+            error = true;
-            case "manual skip": return makeToken("manual skip");
+            consume();
-            case "auto skip": return makeToken("auto skip");
+            c = peek();
            default:
        }
        return makeToken(error ? "error" : type);
    }
    let c = consume();
@@ -491,6 +506,11 @@ function nextToken(state: LexerState): Token {
                        output = output.concat(`\\${c}`);
                        break;
                }
            } else if (c === '\n') {
                // Unterminated / multi-line string, unsupported
                error = true;
                // Prevent unterminated strings from consuming the entire rest of the input
                break;
            } else {
                output = output.concat(c);
            }
@@ -566,12 +586,11 @@ function nextToken(state: LexerState): Token {
    }
    // Consume common characters up to a space for a more useful value in the error token
-    const common = /[a-zA-Z0-9<>=!~*.-]/;
+    const common = /[a-zA-Z0-9<>=!~*&|.-]/;
-    if (c !== null && common.test(c)) {
+    c = peek();
-        do {
+    while (c !== null && common.test(c)) {
-            consume();
+        consume();
-            c = peek();
+        c = peek();
        } while (c !== null && common.test(c));
    }
    return makeToken("error");
@@ -683,7 +702,7 @@ export function parseConfig(config: string): { rules: AdvancedSkipRule[]; errors
     */
    function expect(expected: readonly TokenType[], message: string, panic: boolean) {
        if (!match(expected)) {
-            errorAtCurrent(message.concat(`, got: \`${current.type === "error" ? current.value : current.type}\``), panic);
+            errorAtCurrent(message.concat(current.type === "error" ?  `, got: ${JSON.stringify(current.value)}` : `, got: \`${current.type}\``), panic);
        }
    }