Tokenize keywords

This commit is contained in:
mschae23
2025-09-23 12:45:29 +02:00
parent 3d84152fa4
commit 2370adb8b2
2 changed files with 118 additions and 32 deletions

View File

@@ -2,7 +2,7 @@ import * as React from "react";
import * as CompileConfig from "../../../config.json"; import * as CompileConfig from "../../../config.json";
import Config from "../../config"; import Config from "../../config";
import { AdvancedSkipRuleSet, SkipRuleAttribute, SkipRuleOperator } from "../../utils/skipRule"; import {AdvancedSkipRuleSet, compileConfigNew, SkipRuleAttribute, SkipRuleOperator} from "../../utils/skipRule";
import { ActionType, ActionTypes, CategorySkipOption } from "../../types"; import { ActionType, ActionTypes, CategorySkipOption } from "../../types";
let configSaveTimeout: NodeJS.Timeout | null = null; let configSaveTimeout: NodeJS.Timeout | null = null;
@@ -65,6 +65,9 @@ export function AdvancedSkipOptionsComponent() {
} }
function compileConfig(config: string): AdvancedSkipRuleSet[] | null { function compileConfig(config: string): AdvancedSkipRuleSet[] | null {
// Debug
compileConfigNew(config);
const ruleSets: AdvancedSkipRuleSet[] = []; const ruleSets: AdvancedSkipRuleSet[] = [];
let ruleSet: AdvancedSkipRuleSet = { let ruleSet: AdvancedSkipRuleSet = {

View File

@@ -191,6 +191,7 @@ type TokenType =
| keyof typeof SkipRuleAttribute // Segment attributes | keyof typeof SkipRuleAttribute // Segment attributes
| keyof typeof SkipRuleOperator // Segment attribute operators | keyof typeof SkipRuleOperator // Segment attribute operators
| "and" | "or" // Expression operators | "and" | "or" // Expression operators
| "(" | ")" // Syntax
| "string" // Literal values | "string" // Literal values
| "eof" | "error"; // Sentinel and special tokens | "eof" | "error"; // Sentinel and special tokens
@@ -230,9 +231,9 @@ function nextToken(state: LexerState): Token {
/** /**
* Returns the UTF-16 value at the current position and advances it forward. * Returns the UTF-16 value at the current position and advances it forward.
* If the end of the source string has been reached, returns {@code null}. * If the end of the source string has been reached, returns <code>null</code>.
* *
* @return current UTF-16 value, or {@code null} on EOF * @return current UTF-16 value, or <code>null</code> on EOF
*/ */
function consume(): string | null { function consume(): string | null {
if (state.source.length > state.current) { if (state.source.length > state.current) {
@@ -258,9 +259,9 @@ function nextToken(state: LexerState): Token {
/** /**
* Returns the UTF-16 value at the current position without advancing it. * Returns the UTF-16 value at the current position without advancing it.
* If the end of the source string has been reached, returns {@code null}. * If the end of the source string has been reached, returns <code>null</code>.
* *
* @return current UTF-16 value, or {@code null} on EOF * @return current UTF-16 value, or <code>null</code> on EOF
*/ */
function peek(): string | null { function peek(): string | null {
if (state.source.length > state.current) { if (state.source.length > state.current) {
@@ -272,31 +273,28 @@ function nextToken(state: LexerState): Token {
} }
/** /**
* Checks the current position against expected UTF-16 values. * Checks the word at the current position against a list of
* If any of them matches, advances the current position and returns * expected keywords. The keyword can consist of multiple characters.
* {@code true}, otherwise {@code false}. * If a match is found, the current position is advanced by the length
* of the keyword found.
* *
* @param expected the expected set of UTF-16 values at the current position * @param keywords the expected set of keywords at the current position
* @return whether the actual value matches and whether the position was advanced * @param caseSensitive whether to do a case-sensitive comparison
* @return the matching keyword, or <code>null</code>
*/ */
function expect(expected: string | readonly string[]): boolean { function expectKeyword(keywords: readonly string[], caseSensitive: boolean): string | null {
const actual = peek(); for (const keyword of keywords) {
// slice() clamps to string length, so cannot cause out of bounds errors
const actual = state.source.slice(state.current, state.current + keyword.length);
if (actual === null) { if (caseSensitive && keyword === actual || !caseSensitive && keyword.toLowerCase() === actual.toLowerCase()) {
return false; // Does not handle keywords containing line feeds, which shouldn't happen anyway
state.current += keyword.length;
return keyword;
}
} }
if (typeof expected === "string") { return null;
if (expected === actual) {
consume();
return true;
}
} else if (expected.includes(actual)) {
consume();
return true;
}
return false;
} }
/** /**
@@ -306,7 +304,7 @@ function nextToken(state: LexerState): Token {
*/ */
function skipWhitespace() { function skipWhitespace() {
let c = peek(); let c = peek();
const whitespace = /s+/; const whitespace = /\s+/;
while (c != null) { while (c != null) {
if (!whitespace.test(c)) { if (!whitespace.test(c)) {
@@ -319,7 +317,7 @@ function nextToken(state: LexerState): Token {
} }
/** /**
* Skips all characters until the next {@code "\n"} (line feed) * Skips all characters until the next <code>"\n"</code> (line feed)
* character occurs (inclusive). Will always advance the current position * character occurs (inclusive). Will always advance the current position
* at least once. * at least once.
*/ */
@@ -341,23 +339,108 @@ function nextToken(state: LexerState): Token {
for (;;) { for (;;) {
skipWhitespace(); skipWhitespace();
state.start = state.current; state.start = state.current;
state.start_pos = state.current_pos;
if (isEof()) { if (isEof()) {
return makeToken("eof"); return makeToken("eof");
} }
const keyword = expectKeyword([
"if", "and", "or",
"(", ")",
"//",
].concat(Object.values(SkipRuleAttribute))
.concat(Object.values(SkipRuleOperator)), true);
if (keyword !== null) {
switch (keyword) {
case "if": return makeToken("if");
case "and": return makeToken("and");
case "or": return makeToken("or");
case "(": return makeToken("(");
case ")": return makeToken(")");
case "time.start": return makeToken("StartTime");
case "time.end": return makeToken("EndTime");
case "time.duration": return makeToken("Duration");
case "time.startPercent": return makeToken("StartTimePercent");
case "time.endPercent": return makeToken("EndTimePercent");
case "time.durationPercent": return makeToken("DurationPercent");
case "category": return makeToken("Category");
case "actionType": return makeToken("ActionType");
case "chapter.name": return makeToken("Description");
case "chapter.source": return makeToken("Source");
case "channel.id": return makeToken("ChannelID");
case "channel.name": return makeToken("ChannelName");
case "video.duration": return makeToken("VideoDuration");
case "video.title": return makeToken("Title");
case "<": return makeToken("Less");
case "<=": return makeToken("LessOrEqual");
case ">": return makeToken("Greater");
case ">=": return makeToken("GreaterOrEqual");
case "==": return makeToken("Equal");
case "!=": return makeToken("NotEqual");
case "*=": return makeToken("Contains");
case "!*=": return makeToken("NotContains");
case "~=": return makeToken("Regex");
case "~i=": return makeToken("RegexIgnoreCase");
case "!~=": return makeToken("NotRegex");
case "!~i=": return makeToken("NotRegexIgnoreCase");
case "//":
skipLine();
continue;
default:
}
}
const keyword2 = expectKeyword(
[ "disabled", "show overlay", "manual skip", "auto skip" ], false);
if (keyword2 !== null) {
switch (keyword2) {
case "disabled": return makeToken("disabled");
case "show overlay": return makeToken("show overlay");
case "manual skip": return makeToken("manual skip");
case "auto skip": return makeToken("auto skip");
default:
}
}
const c = consume(); const c = consume();
switch (c) { if (c === '"') {
// TODO
} else if (/[0-9.]/.test(c)) {
// TODO // TODO
default:
return makeToken("error");
} }
return makeToken("error");
} }
} }
export function compileConfig(config: string): AdvancedSkipRuleSet[] | null { export function compileConfigNew(config: string): AdvancedSkipRuleSet[] | null {
// Mutated by calls to nextToken()
const lexerState: LexerState = {
source: config,
start: 0,
current: 0,
start_pos: { line: 1 },
current_pos: { line: 1 },
};
let token = nextToken(lexerState);
while (token.type !== "eof") {
console.log(token);
token = nextToken(lexerState);
}
// TODO // TODO
const ruleSets: AdvancedSkipRuleSet[] = []; return null;
return ruleSets;
} }