Files
SponsorBlock/src/utils/skipRule.ts
2025-09-24 00:35:52 +02:00

854 lines
28 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { getCurrentPageTitle } from "../../maze-utils/src/elements";
import { getChannelIDInfo, getVideoDuration } from "../../maze-utils/src/video";
import Config from "../config";
import {ActionType, ActionTypes, CategorySelection, CategorySkipOption, SponsorSourceType, SponsorTime} from "../types";
import { getSkipProfile, getSkipProfileBool } from "./skipProfiles";
import { VideoLabelsCacheData } from "./videoLabels";
import * as CompileConfig from "../../config.json";
export interface Permission {
canSubmit: boolean;
}
export enum SkipRuleAttribute {
StartTime = "time.start",
EndTime = "time.end",
Duration = "time.duration",
StartTimePercent = "time.startPercent",
EndTimePercent = "time.endPercent",
DurationPercent = "time.durationPercent",
Category = "category",
ActionType = "actionType",
Description = "chapter.name",
Source = "chapter.source",
ChannelID = "channel.id",
ChannelName = "channel.name",
VideoDuration = "video.duration",
Title = "video.title"
}
export enum SkipRuleOperator {
LessOrEqual = "<=",
Less = "<",
GreaterOrEqual = ">=",
Greater = ">",
NotEqual = "!=",
Equal = "==",
NotContains = "!*=",
Contains = "*=",
NotRegex = "!~=",
Regex = "~=",
NotRegexIgnoreCase = "!~i=",
RegexIgnoreCase = "~i="
}
const SKIP_RULE_ATTRIBUTES = Object.values(SkipRuleAttribute);
const SKIP_RULE_OPERATORS = Object.values(SkipRuleOperator);
export interface AdvancedSkipCheck {
kind: "check";
attribute: SkipRuleAttribute;
operator: SkipRuleOperator;
value: string | number;
}
export enum PredicateOperator {
And = "and",
Or = "or",
}
export interface AdvancedSkipOperator {
kind: "operator";
operator: PredicateOperator;
left: AdvancedSkipPredicate;
right: AdvancedSkipPredicate;
}
export type AdvancedSkipPredicate = AdvancedSkipCheck | AdvancedSkipOperator;
export interface AdvancedSkipRule {
predicate: AdvancedSkipPredicate;
skipOption: CategorySkipOption;
comments: string[];
}
export function getCategorySelection(segment: SponsorTime | VideoLabelsCacheData): CategorySelection {
// First check skip rules
for (const rule of Config.local.skipRules) {
if (isSkipPredicatePassing(segment, rule.predicate)) {
return { name: segment.category, option: rule.skipOption } as CategorySelection;
}
}
// Action type filters
if ("actionType" in segment && (segment as SponsorTime).actionType === "mute" && !getSkipProfileBool("muteSegments")) {
return { name: segment.category, option: CategorySkipOption.Disabled } as CategorySelection;
}
// Then check skip profile
const profile = getSkipProfile();
if (profile) {
const profileSelection = profile.categorySelections.find(selection => selection.name === segment.category);
if (profileSelection) {
return profileSelection;
}
}
// Then fallback to default
for (const selection of Config.config.categorySelections) {
if (selection.name === segment.category) {
return selection;
}
}
return { name: segment.category, option: CategorySkipOption.Disabled} as CategorySelection;
}
function getSkipCheckValue(segment: SponsorTime | VideoLabelsCacheData, rule: AdvancedSkipCheck): string | number | undefined {
switch (rule.attribute) {
case SkipRuleAttribute.StartTime:
return (segment as SponsorTime).segment?.[0];
case SkipRuleAttribute.EndTime:
return (segment as SponsorTime).segment?.[1];
case SkipRuleAttribute.Duration:
return (segment as SponsorTime).segment?.[1] - (segment as SponsorTime).segment?.[0];
case SkipRuleAttribute.StartTimePercent: {
const startTime = (segment as SponsorTime).segment?.[0];
if (startTime === undefined) return undefined;
return startTime / getVideoDuration() * 100;
}
case SkipRuleAttribute.EndTimePercent: {
const endTime = (segment as SponsorTime).segment?.[1];
if (endTime === undefined) return undefined;
return endTime / getVideoDuration() * 100;
}
case SkipRuleAttribute.DurationPercent: {
const startTime = (segment as SponsorTime).segment?.[0];
const endTime = (segment as SponsorTime).segment?.[1];
if (startTime === undefined || endTime === undefined) return undefined;
return (endTime - startTime) / getVideoDuration() * 100;
}
case SkipRuleAttribute.Category:
return segment.category;
case SkipRuleAttribute.ActionType:
return (segment as SponsorTime).actionType;
case SkipRuleAttribute.Description:
return (segment as SponsorTime).description || "";
case SkipRuleAttribute.Source:
switch ((segment as SponsorTime).source) {
case SponsorSourceType.Local:
return "local";
case SponsorSourceType.YouTube:
return "youtube";
case SponsorSourceType.Autogenerated:
return "autogenerated";
case SponsorSourceType.Server:
return "server";
default:
return undefined;
}
case SkipRuleAttribute.ChannelID:
return getChannelIDInfo().id;
case SkipRuleAttribute.ChannelName:
return getChannelIDInfo().author;
case SkipRuleAttribute.VideoDuration:
return getVideoDuration();
case SkipRuleAttribute.Title:
return getCurrentPageTitle() || "";
default:
return undefined;
}
}
function isSkipCheckPassing(segment: SponsorTime | VideoLabelsCacheData, rule: AdvancedSkipCheck): boolean {
const value = getSkipCheckValue(segment, rule);
switch (rule.operator) {
case SkipRuleOperator.Less:
return typeof value === "number" && value < (rule.value as number);
case SkipRuleOperator.LessOrEqual:
return typeof value === "number" && value <= (rule.value as number);
case SkipRuleOperator.Greater:
return typeof value === "number" && value > (rule.value as number);
case SkipRuleOperator.GreaterOrEqual:
return typeof value === "number" && value >= (rule.value as number);
case SkipRuleOperator.Equal:
return value === rule.value;
case SkipRuleOperator.NotEqual:
return value !== rule.value;
case SkipRuleOperator.Contains:
return String(value).toLocaleLowerCase().includes(String(rule.value).toLocaleLowerCase());
case SkipRuleOperator.NotContains:
return !String(value).toLocaleLowerCase().includes(String(rule.value).toLocaleLowerCase());
case SkipRuleOperator.Regex:
return new RegExp(rule.value as string).test(String(value));
case SkipRuleOperator.RegexIgnoreCase:
return new RegExp(rule.value as string, "i").test(String(value));
case SkipRuleOperator.NotRegex:
return !new RegExp(rule.value as string).test(String(value));
case SkipRuleOperator.NotRegexIgnoreCase:
return !new RegExp(rule.value as string, "i").test(String(value));
default:
return false;
}
}
function isSkipPredicatePassing(segment: SponsorTime | VideoLabelsCacheData, predicate: AdvancedSkipPredicate): boolean {
if (predicate.kind === "check") {
return isSkipCheckPassing(segment, predicate as AdvancedSkipCheck);
} else { // predicate.kind === "operator"
// TODO Is recursion fine to use here?
if (predicate.operator == PredicateOperator.And) {
return isSkipPredicatePassing(segment, predicate.left) && isSkipPredicatePassing(segment, predicate.right);
} else { // predicate.operator === PredicateOperator.Or
return isSkipPredicatePassing(segment, predicate.left) || isSkipPredicatePassing(segment, predicate.right);
}
}
}
export function getCategoryDefaultSelection(category: string): CategorySelection {
for (const selection of Config.config.categorySelections) {
if (selection.name === category) {
return selection;
}
}
return { name: category, option: CategorySkipOption.Disabled} as CategorySelection;
}
type TokenType =
| "if" // Keywords
| "disabled" | "show overlay" | "manual skip" | "auto skip" // Skip option
| `${SkipRuleAttribute}` // Segment attributes
| `${SkipRuleOperator}` // Segment attribute operators
| "and" | "or" // Expression operators
| "(" | ")" | "comment" // Syntax
| "string" | "number" // Literal values
| "eof" | "error"; // Sentinel and special tokens
export interface SourcePos {
line: number;
// column: number;
}
export interface Span {
start: SourcePos;
end: SourcePos;
}
interface Token {
type: TokenType;
span: Span;
value: string;
}
interface LexerState {
source: string;
start: number;
current: number;
start_pos: SourcePos;
current_pos: SourcePos;
}
function nextToken(state: LexerState): Token {
function makeToken(type: TokenType): Token {
return {
type,
span: { start: state.start_pos, end: state.current_pos, },
value: state.source.slice(state.start, state.current),
};
}
/**
* Returns the UTF-16 value at the current position and advances it forward.
* If the end of the source string has been reached, returns <code>null</code>.
*
* @return current UTF-16 value, or <code>null</code> on EOF
*/
function consume(): string | null {
if (state.source.length > state.current) {
// The UTF-16 value at the current position, which could be either a Unicode code point or a lone surrogate.
// The check above this is also based on the UTF-16 value count, so this should not be able to fail on “weird” inputs.
const c = state.source[state.current];
state.current++;
if (c === "\n") {
state.current_pos = { line: state.current_pos.line + 1, /* column: 1 */ };
} else {
// // TODO This will be wrong on anything involving UTF-16 surrogate pairs or grapheme clusters with multiple code units
// // So just don't show column numbers on errors for now
// state.current_pos = { line: state.current_pos.line, /* column: state.current_pos.column + 1 */ };
}
return c;
} else {
return null;
}
}
/**
* Returns the UTF-16 value at the current position without advancing it.
* If the end of the source string has been reached, returns <code>null</code>.
*
* @return current UTF-16 value, or <code>null</code> on EOF
*/
function peek(): string | null {
if (state.source.length > state.current) {
// See comment in consume() for Unicode expectations here
return state.source[state.current];
} else {
return null;
}
}
/**
* Checks the word at the current position against a list of
* expected keywords. The keyword can consist of multiple characters.
* If a match is found, the current position is advanced by the length
* of the keyword found.
*
* @param keywords the expected set of keywords at the current position
* @param caseSensitive whether to do a case-sensitive comparison
* @return the matching keyword, or <code>null</code>
*/
function expectKeyword(keywords: readonly string[], caseSensitive: boolean): string | null {
for (const keyword of keywords) {
// slice() clamps to string length, so cannot cause out of bounds errors
const actual = state.source.slice(state.current, state.current + keyword.length);
if (caseSensitive && keyword === actual || !caseSensitive && keyword.toLowerCase() === actual.toLowerCase()) {
// Does not handle keywords containing line feeds, which shouldn't happen anyway
state.current += keyword.length;
return keyword;
}
}
return null;
}
/**
* Skips a series of whitespace characters starting at the current
* position. May advance the current position multiple times, once,
* or not at all.
*/
function skipWhitespace() {
let c = peek();
const whitespace = /\s+/;
while (c != null) {
if (!whitespace.test(c)) {
return;
}
consume();
c = peek();
}
}
/**
* Skips all characters until the next <code>"\n"</code> (line feed)
* character occurs (inclusive). Will always advance the current position
* at least once.
*/
function skipLine() {
let c = consume();
while (c != null) {
if (c == '\n') {
return;
}
c = consume();
}
}
/**
* @return whether the lexer has reached the end of input
*/
function isEof(): boolean {
return state.current >= state.source.length;
}
/**
* Sets the start position of the next token that will be emitted
* to the current position.
*
* More characters need to be consumed after calling this, as
* an empty token would be emitted otherwise.
*/
function resetToCurrent() {
state.start = state.current;
state.start_pos = state.current_pos;
}
skipWhitespace();
resetToCurrent();
if (isEof()) {
return makeToken("eof");
}
const keyword = expectKeyword([
"if", "and", "or",
"(", ")",
"//",
].concat(SKIP_RULE_ATTRIBUTES)
.concat(SKIP_RULE_OPERATORS), true);
if (keyword !== null) {
if ((SKIP_RULE_ATTRIBUTES as string[]).includes(keyword) || (SKIP_RULE_OPERATORS as string[]).includes(keyword)) {
return makeToken(keyword as TokenType);
}
switch (keyword) {
case "if": return makeToken("if");
case "and": return makeToken("and");
case "or": return makeToken("or");
case "(": return makeToken("(");
case ")": return makeToken(")");
case "//":
resetToCurrent();
skipLine();
return makeToken("comment");
default:
}
}
const keyword2 = expectKeyword(
[ "disabled", "show overlay", "manual skip", "auto skip" ], false);
if (keyword2 !== null) {
switch (keyword2) {
case "disabled": return makeToken("disabled");
case "show overlay": return makeToken("show overlay");
case "manual skip": return makeToken("manual skip");
case "auto skip": return makeToken("auto skip");
default:
}
}
let c = consume();
if (c === '"') {
// Parses string according to ECMA-404 2nd edition (JSON), section 9 “String”
let output = "";
let c = consume();
let error = false;
while (c !== null && c !== '"') {
if (c == '\\') {
c = consume();
switch (c) {
case '"':
output = output.concat('"');
break;
case '\\':
output = output.concat('\\');
break;
case '/':
output = output.concat('/');
break;
case 'b':
output = output.concat('\b');
break;
case 'f':
output = output.concat('\f');
break;
case 'n':
output = output.concat('\n');
break;
case 'r':
output = output.concat('\r');
break;
case 't':
output = output.concat('\t');
break;
case 'u': {
// UTF-16 value sequence
const digits = state.source.slice(state.current, state.current + 4);
if (digits.length < 4 || !/[0-9a-zA-Z]{4}/.test(digits)) {
error = true;
output = output.concat(`\\u`);
c = consume();
continue;
}
const value = parseInt(digits, 16);
// fromCharCode() takes a UTF-16 value without performing validity checks,
// which is exactly what is needed here in JSON, code units outside the
// BMP are represented by two Unicode escape sequences.
output = output.concat(String.fromCharCode(value));
break;
}
default:
error = true;
output = output.concat(`\\${c}`);
break;
}
} else {
output = output.concat(c);
}
c = consume();
}
return {
type: error || c !== '"' ? "error" : "string",
span: { start: state.start_pos, end: state.current_pos, },
value: output,
};
} else if (/[0-9-]/.test(c)) {
// Parses number according to ECMA-404 2nd edition (JSON), section 8 “Numbers”
if (c === '-') {
c = consume();
if (!/[0-9]/.test(c)) {
return makeToken("error");
}
}
const leadingZero = c === '0';
let next = peek();
let error = false;
while (next !== null && /[0-9]/.test(next)) {
consume();
next = peek();
if (leadingZero) {
error = true;
}
}
if (next !== null && next === '.') {
consume();
next = peek();
if (next === null || !/[0-9]/.test(next)) {
return makeToken("error");
}
do {
consume();
next = peek();
} while (next !== null && /[0-9]/.test(next));
}
next = peek();
if (next != null && (next === 'e' || next === 'E')) {
consume();
next = peek();
if (next === null) {
return makeToken("error");
}
if (next === '+' || next === '-') {
consume();
next = peek();
}
while (next !== null && /[0-9]/.test(next)) {
consume();
next = peek();
}
}
return makeToken(error ? "error" : "number");
}
// Consume common characters up to a space for a more useful value in the error token
const common = /[a-zA-Z0-9<>=!~*.-]/;
if (c !== null && common.test(c)) {
do {
consume();
c = peek();
} while (c !== null && common.test(c));
}
return makeToken("error");
}
export interface ParseError {
span: Span;
message: string;
}
export function parseConfig(config: string): { rules: AdvancedSkipRule[]; errors: ParseError[] } {
// Mutated by calls to nextToken()
const lexerState: LexerState = {
source: config,
start: 0,
current: 0,
start_pos: { line: 1 },
current_pos: { line: 1 },
};
let previous: Token = null;
let current: Token = nextToken(lexerState);
const rules: AdvancedSkipRule[] = [];
const errors: ParseError[] = [];
let erroring = false;
let panicMode = false;
/**
* Adds an error message. The current skip rule will be marked as erroring.
*
* @param span the range of the error
* @param message the message to report
* @param panic if <code>true</code>, all further errors will be silenced
* until panic mode is disabled again
*/
function errorAt(span: Span, message: string, panic: boolean) {
if (!panicMode) {
errors.push({span, message,});
}
panicMode ||= panic;
erroring = true;
}
/**
* Adds an error message for an error occurring at the previous token
* (which was just consumed).
*
* @param message the message to report
* @param panic if <code>true</code>, all further errors will be silenced
* until panic mode is disabled again
*/
function error(message: string, panic: boolean) {
errorAt(previous.span, message, panic);
}
/**
* Adds an error message for an error occurring at the current token
* (which has not been consumed yet).
*
* @param message the message to report
* @param panic if <code>true</code>, all further errors will be silenced
* until panic mode is disabled again
*/
function errorAtCurrent(message: string, panic: boolean) {
errorAt(current.span, message, panic);
}
/**
* Consumes the current token, which can then be accessed at <code>previous</code>.
* The next token will be at <code>current</code> after this call.
*
* If a token of type <code>error</code> is found, issues an error message.
*/
function consume() {
previous = current;
// Intentionally ignoring `error` tokens here;
// by handling those in later functions with more context (match(), expect(), ...),
// the user gets better errors
current = nextToken(lexerState);
}
/**
* Checks the current token (that has not been consumed yet) against a set of expected token types.
*
* @param expected the set of expected token types
* @return whether the actual current token matches any expected token type
*/
function match(expected: readonly TokenType[]): boolean {
if (expected.includes(current.type)) {
consume();
return true;
} else {
return false;
}
}
/**
* Checks the current token (that has not been consumed yet) against a set of expected token types.
*
* If there is no match, issues an error message which will be prepended to <code>, got: <token type></code>.
*
* @param expected the set of expected token types
* @param message the error message to report in case the actual token doesn't match
* @param panic if <code>true</code>, all further errors will be silenced
* until panic mode is disabled again
*/
function expect(expected: readonly TokenType[], message: string, panic: boolean) {
if (!match(expected)) {
errorAtCurrent(message.concat(`, got: \`${current.type === "error" ? current.value : current.type}\``), panic);
}
}
/**
* Synchronize with the next rule block and disable panic mode.
* Skips all tokens until the <code>if</code> keyword is found.
*/
function synchronize() {
panicMode = false;
while (!isEof()) {
if (current.type === "if") {
return;
}
consume();
}
}
/**
* @return whether the parser has reached the end of input
*/
function isEof(): boolean {
return current.type === "eof";
}
while (!isEof()) {
erroring = false;
const rule = parseRule();
if (!erroring) {
rules.push(rule);
}
if (panicMode) {
synchronize();
}
}
return { rules, errors, };
function parseRule(): AdvancedSkipRule {
const rule: AdvancedSkipRule = {
predicate: null,
skipOption: null,
comments: [],
};
while (match(["comment"])) {
rule.comments.push(previous.value.trim());
}
expect(["if"], rule.comments.length !== 0 ? "expected `if` after `comment`" : "expected `if`", true);
rule.predicate = parsePredicate();
expect(["disabled", "show overlay", "manual skip", "auto skip"], "expected skip option after condition", true);
switch (previous.type) {
case "disabled":
rule.skipOption = CategorySkipOption.Disabled;
break;
case "show overlay":
rule.skipOption = CategorySkipOption.ShowOverlay;
break;
case "manual skip":
rule.skipOption = CategorySkipOption.ManualSkip;
break;
case "auto skip":
rule.skipOption = CategorySkipOption.AutoSkip;
break;
default:
// Ignore, should have already errored
}
return rule;
}
function parsePredicate(): AdvancedSkipPredicate {
return parseOr();
}
function parseOr(): AdvancedSkipPredicate {
let left = parseAnd();
while (match(["or"])) {
const right = parseAnd();
left = {
kind: "operator",
operator: PredicateOperator.Or,
left, right,
};
}
return left;
}
function parseAnd(): AdvancedSkipPredicate {
let left = parsePrimary();
while (match(["and"])) {
const right = parsePrimary();
left = {
kind: "operator",
operator: PredicateOperator.And,
left, right,
};
}
return left;
}
function parsePrimary(): AdvancedSkipPredicate {
if (match(["("])) {
const predicate = parsePredicate();
expect([")"], "expected `)` after condition", true);
return predicate;
} else {
return parseCheck();
}
}
function parseCheck(): AdvancedSkipCheck {
expect(SKIP_RULE_ATTRIBUTES, `expected attribute after \`${previous.type}\``, true);
if (erroring) {
return null;
}
const attribute = previous.type as SkipRuleAttribute;
expect(SKIP_RULE_OPERATORS, `expected operator after \`${attribute}\``, true);
if (erroring) {
return null;
}
const operator = previous.type as SkipRuleOperator;
expect(["string", "number"], `expected string or number after \`${operator}\``, true);
if (erroring) {
return null;
}
const value = previous.type === "number" ? Number(previous.value) : previous.value;
if ([SkipRuleOperator.Equal, SkipRuleOperator.NotEqual].includes(operator)) {
if (attribute === SkipRuleAttribute.Category
&& !CompileConfig.categoryList.includes(value as string)) {
error(`unknown category: \`${value}\``, false);
return null;
} else if (attribute === SkipRuleAttribute.ActionType
&& !ActionTypes.includes(value as ActionType)) {
error(`unknown action type: \`${value}\``, false);
return null;
} else if (attribute === SkipRuleAttribute.Source
&& !["local", "youtube", "autogenerated", "server"].includes(value as string)) {
error(`unknown chapter source: \`${value}\``, false);
return null;
}
}
return {
kind: "check",
attribute, operator, value,
};
}
}