mirror of
https://github.com/ajayyy/SponsorBlockServer.git
synced 2025-12-09 21:17:15 +03:00
Merge pull request #188 from opl-/feat/faster-segments
Improve performance of segment querying endpoints
This commit is contained in:
@@ -1,26 +1,14 @@
|
|||||||
import {config} from '../config';
|
import { Request, Response } from 'express';
|
||||||
import {db, privateDB} from '../databases/databases';
|
import { config } from '../config';
|
||||||
import {Logger} from '../utils/logger';
|
import { db, privateDB } from '../databases/databases';
|
||||||
import {getHash} from '../utils/getHash';
|
import { Category, DBSegment, OverlappingSegmentGroup, Segment, SegmentCache, VideoData, VideoID, VideoIDHash, VotableObject } from "../types/segments.model";
|
||||||
import {getIP} from '../utils/getIP';
|
import { getHash } from '../utils/getHash';
|
||||||
import {Request, Response} from 'express';
|
import { getIP } from '../utils/getIP';
|
||||||
|
import { Logger } from '../utils/logger';
|
||||||
|
|
||||||
|
|
||||||
function cleanGetSegments(req: Request, videoID: string, categories: any[]) {
|
function prepareCategorySegments(req: Request, videoID: VideoID, category: Category, segments: DBSegment[], cache: SegmentCache = {shadowHiddenSegmentIPs: {}}): Segment[] {
|
||||||
let userHashedIP: any;
|
const filteredSegments = segments.filter((segment) => {
|
||||||
let shadowHiddenSegments: any[];
|
|
||||||
|
|
||||||
let segments: { category: any; segment: any[]; UUID: any; }[] = [];
|
|
||||||
|
|
||||||
try {
|
|
||||||
for (const category of categories) {
|
|
||||||
const categorySegments: any[] = db
|
|
||||||
.prepare(
|
|
||||||
'all',
|
|
||||||
'SELECT startTime, endTime, votes, UUID, shadowHidden FROM sponsorTimes WHERE videoID = ? and category = ? ORDER BY startTime',
|
|
||||||
[videoID, category],
|
|
||||||
)
|
|
||||||
.filter((segment: any) => {
|
|
||||||
if (segment.votes < -1) {
|
if (segment.votes < -1) {
|
||||||
return false; //too untrustworthy, just ignore it
|
return false; //too untrustworthy, just ignore it
|
||||||
}
|
}
|
||||||
@@ -31,27 +19,92 @@ function cleanGetSegments(req: Request, videoID: string, categories: any[]) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shadowHiddenSegments === undefined) {
|
if (cache.shadowHiddenSegmentIPs[videoID] === undefined) {
|
||||||
shadowHiddenSegments = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]);
|
cache.shadowHiddenSegmentIPs[videoID] = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]);
|
||||||
}
|
}
|
||||||
|
|
||||||
//if this isn't their ip, don't send it to them
|
//if this isn't their ip, don't send it to them
|
||||||
return shadowHiddenSegments.some(shadowHiddenSegment => {
|
return cache.shadowHiddenSegmentIPs[videoID].some((shadowHiddenSegment) => {
|
||||||
if (userHashedIP === undefined) {
|
if (cache.userHashedIP === undefined) {
|
||||||
//hash the IP only if it's strictly necessary
|
//hash the IP only if it's strictly necessary
|
||||||
userHashedIP = getHash(getIP(req) + config.globalSalt);
|
cache.userHashedIP = getHash(getIP(req) + config.globalSalt);
|
||||||
}
|
}
|
||||||
return shadowHiddenSegment.hashedIP === userHashedIP;
|
|
||||||
|
return shadowHiddenSegment.hashedIP === cache.userHashedIP;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
chooseSegments(categorySegments).forEach((chosenSegment: any) => {
|
return chooseSegments(filteredSegments).map((chosenSegment) => ({
|
||||||
segments.push({
|
|
||||||
category,
|
category,
|
||||||
segment: [chosenSegment.startTime, chosenSegment.endTime],
|
segment: [chosenSegment.startTime, chosenSegment.endTime],
|
||||||
UUID: chosenSegment.UUID,
|
UUID: chosenSegment.UUID
|
||||||
});
|
}));
|
||||||
});
|
}
|
||||||
|
|
||||||
|
function getSegmentsByVideoID(req: Request, videoID: string, categories: Category[]): Segment[] {
|
||||||
|
const cache: SegmentCache = {};
|
||||||
|
const segments: Segment[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const segmentsByCategory: Record<Category, DBSegment[]> = db
|
||||||
|
.prepare(
|
||||||
|
'all',
|
||||||
|
`SELECT startTime, endTime, votes, UUID, category, shadowHidden FROM sponsorTimes WHERE videoID = ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`,
|
||||||
|
[videoID, categories]
|
||||||
|
).reduce((acc: Record<Category, DBSegment[]>, segment: DBSegment) => {
|
||||||
|
acc[segment.category] = acc[segment.category] || [];
|
||||||
|
acc[segment.category].push(segment);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
for (const [category, categorySegments] of Object.entries(segmentsByCategory)) {
|
||||||
|
segments.push(...prepareCategorySegments(req, videoID, category, categorySegments, cache));
|
||||||
|
}
|
||||||
|
|
||||||
|
return segments;
|
||||||
|
} catch (err) {
|
||||||
|
if (err) {
|
||||||
|
Logger.error(err);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSegmentsByHash(req: Request, hashedVideoIDPrefix: VideoIDHash, categories: Category[]): Record<VideoID, VideoData> {
|
||||||
|
const cache: SegmentCache = {};
|
||||||
|
const segments: Record<VideoID, VideoData> = {};
|
||||||
|
|
||||||
|
try {
|
||||||
|
type SegmentWithHashPerVideoID = Record<VideoID, {hash: VideoIDHash, segmentPerCategory: Record<Category, DBSegment[]>}>;
|
||||||
|
|
||||||
|
const segmentPerVideoID: SegmentWithHashPerVideoID = db
|
||||||
|
.prepare(
|
||||||
|
'all',
|
||||||
|
`SELECT videoID, startTime, endTime, votes, UUID, category, shadowHidden, hashedVideoID FROM sponsorTimes WHERE hashedVideoID LIKE ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`,
|
||||||
|
[hashedVideoIDPrefix + '%', categories]
|
||||||
|
).reduce((acc: SegmentWithHashPerVideoID, segment: DBSegment) => {
|
||||||
|
acc[segment.videoID] = acc[segment.videoID] || {
|
||||||
|
hash: segment.hashedVideoID,
|
||||||
|
segmentPerCategory: {},
|
||||||
|
};
|
||||||
|
const videoCategories = acc[segment.videoID].segmentPerCategory;
|
||||||
|
|
||||||
|
videoCategories[segment.category] = videoCategories[segment.category] || [];
|
||||||
|
videoCategories[segment.category].push(segment);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
for (const [videoID, videoData] of Object.entries(segmentPerVideoID)) {
|
||||||
|
segments[videoID] = {
|
||||||
|
hash: videoData.hash,
|
||||||
|
segments: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const [category, segmentPerCategory] of Object.entries(videoData.segmentPerCategory)) {
|
||||||
|
segments[videoID].segments.push(...prepareCategorySegments(req, videoID, category, segmentPerCategory, cache));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return segments;
|
return segments;
|
||||||
@@ -66,15 +119,19 @@ function cleanGetSegments(req: Request, videoID: string, categories: any[]) {
|
|||||||
//gets a weighted random choice from the choices array based on their `votes` property.
|
//gets a weighted random choice from the choices array based on their `votes` property.
|
||||||
//amountOfChoices specifies the maximum amount of choices to return, 1 or more.
|
//amountOfChoices specifies the maximum amount of choices to return, 1 or more.
|
||||||
//choices are unique
|
//choices are unique
|
||||||
function getWeightedRandomChoice(choices: any[], amountOfChoices: number) {
|
function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOfChoices: number): T[] {
|
||||||
//trivial case: no need to go through the whole process
|
//trivial case: no need to go through the whole process
|
||||||
if (amountOfChoices >= choices.length) {
|
if (amountOfChoices >= choices.length) {
|
||||||
return choices;
|
return choices;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TWithWeight = T & {
|
||||||
|
weight: number
|
||||||
|
}
|
||||||
|
|
||||||
//assign a weight to each choice
|
//assign a weight to each choice
|
||||||
let totalWeight = 0;
|
let totalWeight = 0;
|
||||||
choices = choices.map(choice => {
|
let choicesWithWeights: TWithWeight[] = choices.map(choice => {
|
||||||
//The 3 makes -2 the minimum votes before being ignored completely
|
//The 3 makes -2 the minimum votes before being ignored completely
|
||||||
//this can be changed if this system increases in popularity.
|
//this can be changed if this system increases in popularity.
|
||||||
const weight = Math.exp((choice.votes + 3));
|
const weight = Math.exp((choice.votes + 3));
|
||||||
@@ -88,16 +145,16 @@ function getWeightedRandomChoice(choices: any[], amountOfChoices: number) {
|
|||||||
while (amountOfChoices-- > 0) {
|
while (amountOfChoices-- > 0) {
|
||||||
//weighted random draw of one element of choices
|
//weighted random draw of one element of choices
|
||||||
const randomNumber = Math.random() * totalWeight;
|
const randomNumber = Math.random() * totalWeight;
|
||||||
let stackWeight = choices[0].weight;
|
let stackWeight = choicesWithWeights[0].weight;
|
||||||
let i = 0;
|
let i = 0;
|
||||||
while (stackWeight < randomNumber) {
|
while (stackWeight < randomNumber) {
|
||||||
stackWeight += choices[++i].weight;
|
stackWeight += choicesWithWeights[++i].weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
//add it to the chosen ones and remove it from the choices before the next iteration
|
//add it to the chosen ones and remove it from the choices before the next iteration
|
||||||
chosen.push(choices[i]);
|
chosen.push(choicesWithWeights[i]);
|
||||||
totalWeight -= choices[i].weight;
|
totalWeight -= choicesWithWeights[i].weight;
|
||||||
choices.splice(i, 1);
|
choicesWithWeights.splice(i, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return chosen;
|
return chosen;
|
||||||
@@ -107,19 +164,19 @@ function getWeightedRandomChoice(choices: any[], amountOfChoices: number) {
|
|||||||
//Only one similar time will be returned, randomly generated based on the sqrt of votes.
|
//Only one similar time will be returned, randomly generated based on the sqrt of votes.
|
||||||
//This allows new less voted items to still sometimes appear to give them a chance at getting votes.
|
//This allows new less voted items to still sometimes appear to give them a chance at getting votes.
|
||||||
//Segments with less than -1 votes are already ignored before this function is called
|
//Segments with less than -1 votes are already ignored before this function is called
|
||||||
function chooseSegments(segments: any[]) {
|
function chooseSegments(segments: DBSegment[]): DBSegment[] {
|
||||||
//Create groups of segments that are similar to eachother
|
//Create groups of segments that are similar to eachother
|
||||||
//Segments must be sorted by their startTime so that we can build groups chronologically:
|
//Segments must be sorted by their startTime so that we can build groups chronologically:
|
||||||
//1. As long as the segments' startTime fall inside the currentGroup, we keep adding them to that group
|
//1. As long as the segments' startTime fall inside the currentGroup, we keep adding them to that group
|
||||||
//2. If a segment starts after the end of the currentGroup (> cursor), no other segment will ever fall
|
//2. If a segment starts after the end of the currentGroup (> cursor), no other segment will ever fall
|
||||||
// inside that group (because they're sorted) so we can create a new one
|
// inside that group (because they're sorted) so we can create a new one
|
||||||
const similarSegmentsGroups: any[] = [];
|
const overlappingSegmentsGroups: OverlappingSegmentGroup[] = [];
|
||||||
let currentGroup: any;
|
let currentGroup: OverlappingSegmentGroup;
|
||||||
let cursor = -1; //-1 to make sure that, even if the 1st segment starts at 0, a new group is created
|
let cursor = -1; //-1 to make sure that, even if the 1st segment starts at 0, a new group is created
|
||||||
segments.forEach(segment => {
|
segments.forEach(segment => {
|
||||||
if (segment.startTime > cursor) {
|
if (segment.startTime > cursor) {
|
||||||
currentGroup = {segments: [], votes: 0};
|
currentGroup = {segments: [], votes: 0};
|
||||||
similarSegmentsGroups.push(currentGroup);
|
overlappingSegmentsGroups.push(currentGroup);
|
||||||
}
|
}
|
||||||
|
|
||||||
currentGroup.segments.push(segment);
|
currentGroup.segments.push(segment);
|
||||||
@@ -132,7 +189,7 @@ function chooseSegments(segments: any[]) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
//if there are too many groups, find the best 8
|
//if there are too many groups, find the best 8
|
||||||
return getWeightedRandomChoice(similarSegmentsGroups, 32).map(
|
return getWeightedRandomChoice(overlappingSegmentsGroups, 32).map(
|
||||||
//randomly choose 1 good segment per group and return them
|
//randomly choose 1 good segment per group and return them
|
||||||
group => getWeightedRandomChoice(group.segments, 1)[0],
|
group => getWeightedRandomChoice(group.segments, 1)[0],
|
||||||
);
|
);
|
||||||
@@ -158,14 +215,14 @@ function handleGetSegments(req: Request, res: Response) {
|
|||||||
? [req.query.category]
|
? [req.query.category]
|
||||||
: ['sponsor'];
|
: ['sponsor'];
|
||||||
|
|
||||||
let segments = cleanGetSegments(req, videoID, categories);
|
const segments = getSegmentsByVideoID(req, videoID, categories);
|
||||||
|
|
||||||
if (segments === null || segments === undefined) {
|
if (segments === null || segments === undefined) {
|
||||||
res.sendStatus(500);
|
res.sendStatus(500);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (segments.length == 0) {
|
if (segments.length === 0) {
|
||||||
res.sendStatus(404);
|
res.sendStatus(404);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -173,8 +230,8 @@ function handleGetSegments(req: Request, res: Response) {
|
|||||||
return segments;
|
return segments;
|
||||||
}
|
}
|
||||||
|
|
||||||
function endpoint(req: Request, res: Response) {
|
function endpoint(req: Request, res: Response): void {
|
||||||
const segments = handleGetSegments(req, res);
|
let segments = handleGetSegments(req, res);
|
||||||
|
|
||||||
if (segments) {
|
if (segments) {
|
||||||
//send result
|
//send result
|
||||||
@@ -183,7 +240,9 @@ function endpoint(req: Request, res: Response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export {
|
export {
|
||||||
handleGetSegments,
|
getSegmentsByVideoID,
|
||||||
cleanGetSegments,
|
getSegmentsByHash,
|
||||||
endpoint,
|
endpoint,
|
||||||
|
handleGetSegments
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,31 +1,31 @@
|
|||||||
import {hashPrefixTester} from '../utils/hashPrefixTester';
|
import {hashPrefixTester} from '../utils/hashPrefixTester';
|
||||||
import {cleanGetSegments} from './getSkipSegments';
|
import {getSegmentsByHash} from './getSkipSegments';
|
||||||
import {db} from '../databases/databases';
|
|
||||||
import {Request, Response} from 'express';
|
import {Request, Response} from 'express';
|
||||||
|
import { Category, VideoIDHash } from '../types/segments.model';
|
||||||
|
|
||||||
export async function getSkipSegmentsByHash(req: Request, res: Response) {
|
export async function getSkipSegmentsByHash(req: Request, res: Response) {
|
||||||
let hashPrefix = req.params.prefix;
|
let hashPrefix: VideoIDHash = req.params.prefix;
|
||||||
if (!hashPrefixTester(req.params.prefix)) {
|
if (!hashPrefixTester(req.params.prefix)) {
|
||||||
res.status(400).send("Hash prefix does not match format requirements."); // Exit early on faulty prefix
|
res.status(400).send("Hash prefix does not match format requirements."); // Exit early on faulty prefix
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const categories = req.query.categories
|
const categories: Category[] = req.query.categories
|
||||||
? JSON.parse(req.query.categories as string)
|
? JSON.parse(req.query.categories as string)
|
||||||
: req.query.category
|
: req.query.category
|
||||||
? [req.query.category]
|
? [req.query.category]
|
||||||
: ['sponsor'];
|
: ['sponsor'];
|
||||||
|
|
||||||
// Get all video id's that match hash prefix
|
// Get all video id's that match hash prefix
|
||||||
const videoIds = db.prepare('all', 'SELECT DISTINCT videoId, hashedVideoID from sponsorTimes WHERE hashedVideoID LIKE ?', [hashPrefix + '%']);
|
const segments = getSegmentsByHash(req, hashPrefix, categories);
|
||||||
|
|
||||||
let segments = videoIds.map((video: any) => {
|
if (!segments) return res.status(404).json([]);
|
||||||
return {
|
|
||||||
videoID: video.videoID,
|
|
||||||
hash: video.hashedVideoID,
|
|
||||||
segments: cleanGetSegments(req, video.videoID, categories),
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
res.status((segments.length === 0) ? 404 : 200).json(segments);
|
const output = Object.entries(segments).map(([videoID, data]) => ({
|
||||||
|
videoID,
|
||||||
|
hash: data.hash,
|
||||||
|
segments: data.segments,
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.status(output.length === 0 ? 404 : 200).json(output);
|
||||||
}
|
}
|
||||||
|
|||||||
45
src/types/segments.model.ts
Normal file
45
src/types/segments.model.ts
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
export type SegmentUUID = string;
|
||||||
|
export type VideoID = string;
|
||||||
|
export type Category = string;
|
||||||
|
export type VideoIDHash = string;
|
||||||
|
export type IPHash = string;
|
||||||
|
|
||||||
|
export interface Segment {
|
||||||
|
category: Category;
|
||||||
|
segment: number[];
|
||||||
|
UUID: SegmentUUID;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DBSegment {
|
||||||
|
category: Category;
|
||||||
|
startTime: number;
|
||||||
|
endTime: number;
|
||||||
|
UUID: SegmentUUID;
|
||||||
|
votes: number;
|
||||||
|
shadowHidden: 0 | 1;
|
||||||
|
videoID: VideoID;
|
||||||
|
hashedVideoID: VideoIDHash;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OverlappingSegmentGroup {
|
||||||
|
segments: DBSegment[],
|
||||||
|
votes: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VotableObject {
|
||||||
|
votes: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VotableObjectWithWeight extends VotableObject {
|
||||||
|
weight: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VideoData {
|
||||||
|
hash: VideoIDHash;
|
||||||
|
segments: Segment[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SegmentCache {
|
||||||
|
shadowHiddenSegmentIPs?: Record<VideoID, {hashedIP: IPHash}[]>,
|
||||||
|
userHashedIP?: IPHash
|
||||||
|
}
|
||||||
@@ -32,17 +32,17 @@ describe('getSegmentsByHash', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('Should be able to get a 200 with empty segments for video but no matching categories', (done: Done) => {
|
it('Should return 404 if no segments are found even if a video for the given hash is known', (done: Done) => {
|
||||||
request.get(getbaseURL()
|
request.get(getbaseURL()
|
||||||
+ '/api/skipSegments/3272f?categories=["shilling"]', null,
|
+ '/api/skipSegments/3272f?categories=["shilling"]', null,
|
||||||
(err, res, body) => {
|
(err, res, body) => {
|
||||||
if (err) done("Couldn't call endpoint");
|
if (err) done("Couldn't call endpoint");
|
||||||
else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode);
|
else if (res.statusCode !== 404) done("non 404 status code, was " + res.statusCode);
|
||||||
else {
|
else {
|
||||||
if (JSON.parse(body) && JSON.parse(body).length > 0 && JSON.parse(body)[0].segments.length === 0) {
|
if (body === '[]') {
|
||||||
done(); // pass
|
done(); // pass
|
||||||
} else {
|
} else {
|
||||||
done("response had segments");
|
done("Response had videos");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user