mirror of
https://github.com/ajayyy/SponsorBlock.git
synced 2026-01-28 21:30:56 +03:00
Fix various tokenization issues
- Keywords and operators were previously matching too eagerly. For example, `ifcategory` would be matched as two tokens `if` `category` and result in a valid file. This is now a single error token. - Strings previously allowed line breaks in them. This has been fixed, strings only consume up to the end of the line now. - the error message for error tokens has been improved by using JSON escape.
This commit is contained in:
@@ -395,40 +395,55 @@ function nextToken(state: LexerState): Token {
|
|||||||
"//",
|
"//",
|
||||||
].concat(SKIP_RULE_ATTRIBUTES)
|
].concat(SKIP_RULE_ATTRIBUTES)
|
||||||
.concat(SKIP_RULE_OPERATORS), true);
|
.concat(SKIP_RULE_OPERATORS), true);
|
||||||
|
let type: TokenType | null = null;
|
||||||
|
let kind: "word" | "operator" | null = null;
|
||||||
|
|
||||||
if (keyword !== null) {
|
if (keyword !== null) {
|
||||||
if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword) || (SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
|
if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword)) {
|
||||||
return makeToken(keyword as TokenType);
|
kind = "word";
|
||||||
|
type = keyword as TokenType;
|
||||||
|
} else if ((SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
|
||||||
|
kind = "operator";
|
||||||
|
type = keyword as TokenType;
|
||||||
|
} else {
|
||||||
|
switch (keyword) {
|
||||||
|
case "if": // Fallthrough
|
||||||
|
case "and": // Fallthrough
|
||||||
|
case "or": kind = "word"; type = keyword as TokenType; break;
|
||||||
|
|
||||||
|
case "(": return makeToken("(");
|
||||||
|
case ")": return makeToken(")");
|
||||||
|
|
||||||
|
case "//":
|
||||||
|
resetToCurrent();
|
||||||
|
skipLine();
|
||||||
|
return makeToken("comment");
|
||||||
|
|
||||||
|
default:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const keyword2 = expectKeyword(
|
||||||
|
[ "disabled", "show overlay", "manual skip", "auto skip" ], false);
|
||||||
|
|
||||||
switch (keyword) {
|
if (keyword2 !== null) {
|
||||||
case "if": return makeToken("if");
|
kind = "word";
|
||||||
case "and": return makeToken("and");
|
type = keyword2 as TokenType;
|
||||||
case "or": return makeToken("or");
|
|
||||||
|
|
||||||
case "(": return makeToken("(");
|
|
||||||
case ")": return makeToken(")");
|
|
||||||
|
|
||||||
case "//":
|
|
||||||
resetToCurrent();
|
|
||||||
skipLine();
|
|
||||||
return makeToken("comment");
|
|
||||||
|
|
||||||
default:
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const keyword2 = expectKeyword(
|
if (type !== null) {
|
||||||
[ "disabled", "show overlay", "manual skip", "auto skip" ], false);
|
const more = kind == "operator" ? /[<>=!~*&|-]/ : kind == "word" ? /[a-zA-Z0-9.]/ : /[a-zA-Z0-9<>=!~*&|.-]/;
|
||||||
|
|
||||||
if (keyword2 !== null) {
|
let c = peek();
|
||||||
switch (keyword2) {
|
let error = false;
|
||||||
case "disabled": return makeToken("disabled");
|
while (c !== null && more.test(c)) {
|
||||||
case "show overlay": return makeToken("show overlay");
|
error = true;
|
||||||
case "manual skip": return makeToken("manual skip");
|
consume();
|
||||||
case "auto skip": return makeToken("auto skip");
|
c = peek();
|
||||||
default:
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return makeToken(error ? "error" : type);
|
||||||
}
|
}
|
||||||
|
|
||||||
let c = consume();
|
let c = consume();
|
||||||
@@ -491,6 +506,11 @@ function nextToken(state: LexerState): Token {
|
|||||||
output = output.concat(`\\${c}`);
|
output = output.concat(`\\${c}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
} else if (c === '\n') {
|
||||||
|
// Unterminated / multi-line string, unsupported
|
||||||
|
error = true;
|
||||||
|
// Prevent unterminated strings from consuming the entire rest of the input
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
output = output.concat(c);
|
output = output.concat(c);
|
||||||
}
|
}
|
||||||
@@ -566,12 +586,11 @@ function nextToken(state: LexerState): Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Consume common characters up to a space for a more useful value in the error token
|
// Consume common characters up to a space for a more useful value in the error token
|
||||||
const common = /[a-zA-Z0-9<>=!~*.-]/;
|
const common = /[a-zA-Z0-9<>=!~*&|.-]/;
|
||||||
if (c !== null && common.test(c)) {
|
c = peek();
|
||||||
do {
|
while (c !== null && common.test(c)) {
|
||||||
consume();
|
consume();
|
||||||
c = peek();
|
c = peek();
|
||||||
} while (c !== null && common.test(c));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return makeToken("error");
|
return makeToken("error");
|
||||||
@@ -683,7 +702,7 @@ export function parseConfig(config: string): { rules: AdvancedSkipRule[]; errors
|
|||||||
*/
|
*/
|
||||||
function expect(expected: readonly TokenType[], message: string, panic: boolean) {
|
function expect(expected: readonly TokenType[], message: string, panic: boolean) {
|
||||||
if (!match(expected)) {
|
if (!match(expected)) {
|
||||||
errorAtCurrent(message.concat(`, got: \`${current.type === "error" ? current.value : current.type}\``), panic);
|
errorAtCurrent(message.concat(current.type === "error" ? `, got: ${JSON.stringify(current.value)}` : `, got: \`${current.type}\``), panic);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user