diff --git a/src/routes/getSkipSegments.ts b/src/routes/getSkipSegments.ts index 3c70e6b..ad7a138 100644 --- a/src/routes/getSkipSegments.ts +++ b/src/routes/getSkipSegments.ts @@ -1,57 +1,110 @@ -import {config} from '../config'; -import {db, privateDB} from '../databases/databases'; -import {Logger} from '../utils/logger'; -import {getHash} from '../utils/getHash'; -import {getIP} from '../utils/getIP'; -import {Request, Response} from 'express'; +import { Request, Response } from 'express'; +import { config } from '../config'; +import { db, privateDB } from '../databases/databases'; +import { Category, DBSegment, OverlappingSegmentGroup, Segment, SegmentCache, VideoData, VideoID, VideoIDHash, VotableObject } from "../types/segments.model"; +import { getHash } from '../utils/getHash'; +import { getIP } from '../utils/getIP'; +import { Logger } from '../utils/logger'; -function cleanGetSegments(req: Request, videoID: string, categories: any[]) { - let userHashedIP: any; - let shadowHiddenSegments: any[]; +function prepareCategorySegments(req: Request, videoID: VideoID, category: Category, segments: DBSegment[], cache: SegmentCache = {shadowHiddenSegmentIPs: {}}): Segment[] { + const filteredSegments = segments.filter((segment) => { + if (segment.votes < -1) { + return false; //too untrustworthy, just ignore it + } - let segments: { category: any; segment: any[]; UUID: any; }[] = []; + //check if shadowHidden + //this means it is hidden to everyone but the original ip that submitted it + if (segment.shadowHidden != 1) { + return true; + } + + if (cache.shadowHiddenSegmentIPs[videoID] === undefined) { + cache.shadowHiddenSegmentIPs[videoID] = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]); + } + + //if this isn't their ip, don't send it to them + return cache.shadowHiddenSegmentIPs[videoID].some((shadowHiddenSegment) => { + if (cache.userHashedIP === undefined) { + //hash the IP only if it's strictly necessary + cache.userHashedIP = getHash(getIP(req) + config.globalSalt); + } + + return shadowHiddenSegment.hashedIP === cache.userHashedIP; + }); + }); + + return chooseSegments(filteredSegments).map((chosenSegment) => ({ + category, + segment: [chosenSegment.startTime, chosenSegment.endTime], + UUID: chosenSegment.UUID + })); +} + +function getSegmentsByVideoID(req: Request, videoID: string, categories: Category[]): Segment[] { + const cache: SegmentCache = {}; + const segments: Segment[] = []; try { - for (const category of categories) { - const categorySegments: any[] = db - .prepare( - 'all', - 'SELECT startTime, endTime, votes, UUID, shadowHidden FROM sponsorTimes WHERE videoID = ? and category = ? ORDER BY startTime', - [videoID, category], - ) - .filter((segment: any) => { - if (segment.votes < -1) { - return false; //too untrustworthy, just ignore it - } + const segmentsByCategory: Record = db + .prepare( + 'all', + `SELECT startTime, endTime, votes, UUID, category, shadowHidden FROM sponsorTimes WHERE videoID = ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`, + [videoID, categories] + ).reduce((acc: Record, segment: DBSegment) => { + acc[segment.category] = acc[segment.category] || []; + acc[segment.category].push(segment); - //check if shadowHidden - //this means it is hidden to everyone but the original ip that submitted it - if (segment.shadowHidden != 1) { - return true; - } + return acc; + }, {}); - if (shadowHiddenSegments === undefined) { - shadowHiddenSegments = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]); - } + for (const [category, categorySegments] of Object.entries(segmentsByCategory)) { + segments.push(...prepareCategorySegments(req, videoID, category, categorySegments, cache)); + } - //if this isn't their ip, don't send it to them - return shadowHiddenSegments.some(shadowHiddenSegment => { - if (userHashedIP === undefined) { - //hash the IP only if it's strictly necessary - userHashedIP = getHash(getIP(req) + config.globalSalt); - } - return shadowHiddenSegment.hashedIP === userHashedIP; - }); - }); + return segments; + } catch (err) { + if (err) { + Logger.error(err); + return null; + } + } +} - chooseSegments(categorySegments).forEach((chosenSegment: any) => { - segments.push({ - category, - segment: [chosenSegment.startTime, chosenSegment.endTime], - UUID: chosenSegment.UUID, - }); - }); +function getSegmentsByHash(req: Request, hashedVideoIDPrefix: VideoIDHash, categories: Category[]): Record { + const cache: SegmentCache = {}; + const segments: Record = {}; + + try { + type SegmentWithHashPerVideoID = Record}>; + + const segmentPerVideoID: SegmentWithHashPerVideoID = db + .prepare( + 'all', + `SELECT videoID, startTime, endTime, votes, UUID, category, shadowHidden, hashedVideoID FROM sponsorTimes WHERE hashedVideoID LIKE ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`, + [hashedVideoIDPrefix + '%', categories] + ).reduce((acc: SegmentWithHashPerVideoID, segment: DBSegment) => { + acc[segment.videoID] = acc[segment.videoID] || { + hash: segment.hashedVideoID, + segmentPerCategory: {}, + }; + const videoCategories = acc[segment.videoID].segmentPerCategory; + + videoCategories[segment.category] = videoCategories[segment.category] || []; + videoCategories[segment.category].push(segment); + + return acc; + }, {}); + + for (const [videoID, videoData] of Object.entries(segmentPerVideoID)) { + segments[videoID] = { + hash: videoData.hash, + segments: [], + }; + + for (const [category, segmentPerCategory] of Object.entries(videoData.segmentPerCategory)) { + segments[videoID].segments.push(...prepareCategorySegments(req, videoID, category, segmentPerCategory, cache)); + } } return segments; @@ -66,15 +119,19 @@ function cleanGetSegments(req: Request, videoID: string, categories: any[]) { //gets a weighted random choice from the choices array based on their `votes` property. //amountOfChoices specifies the maximum amount of choices to return, 1 or more. //choices are unique -function getWeightedRandomChoice(choices: any[], amountOfChoices: number) { +function getWeightedRandomChoice(choices: T[], amountOfChoices: number): T[] { //trivial case: no need to go through the whole process if (amountOfChoices >= choices.length) { return choices; } + type TWithWeight = T & { + weight: number + } + //assign a weight to each choice let totalWeight = 0; - choices = choices.map(choice => { + let choicesWithWeights: TWithWeight[] = choices.map(choice => { //The 3 makes -2 the minimum votes before being ignored completely //this can be changed if this system increases in popularity. const weight = Math.exp((choice.votes + 3)); @@ -88,16 +145,16 @@ function getWeightedRandomChoice(choices: any[], amountOfChoices: number) { while (amountOfChoices-- > 0) { //weighted random draw of one element of choices const randomNumber = Math.random() * totalWeight; - let stackWeight = choices[0].weight; + let stackWeight = choicesWithWeights[0].weight; let i = 0; while (stackWeight < randomNumber) { - stackWeight += choices[++i].weight; + stackWeight += choicesWithWeights[++i].weight; } //add it to the chosen ones and remove it from the choices before the next iteration - chosen.push(choices[i]); - totalWeight -= choices[i].weight; - choices.splice(i, 1); + chosen.push(choicesWithWeights[i]); + totalWeight -= choicesWithWeights[i].weight; + choicesWithWeights.splice(i, 1); } return chosen; @@ -107,19 +164,19 @@ function getWeightedRandomChoice(choices: any[], amountOfChoices: number) { //Only one similar time will be returned, randomly generated based on the sqrt of votes. //This allows new less voted items to still sometimes appear to give them a chance at getting votes. //Segments with less than -1 votes are already ignored before this function is called -function chooseSegments(segments: any[]) { +function chooseSegments(segments: DBSegment[]): DBSegment[] { //Create groups of segments that are similar to eachother //Segments must be sorted by their startTime so that we can build groups chronologically: //1. As long as the segments' startTime fall inside the currentGroup, we keep adding them to that group //2. If a segment starts after the end of the currentGroup (> cursor), no other segment will ever fall // inside that group (because they're sorted) so we can create a new one - const similarSegmentsGroups: any[] = []; - let currentGroup: any; + const overlappingSegmentsGroups: OverlappingSegmentGroup[] = []; + let currentGroup: OverlappingSegmentGroup; let cursor = -1; //-1 to make sure that, even if the 1st segment starts at 0, a new group is created segments.forEach(segment => { if (segment.startTime > cursor) { currentGroup = {segments: [], votes: 0}; - similarSegmentsGroups.push(currentGroup); + overlappingSegmentsGroups.push(currentGroup); } currentGroup.segments.push(segment); @@ -132,7 +189,7 @@ function chooseSegments(segments: any[]) { }); //if there are too many groups, find the best 8 - return getWeightedRandomChoice(similarSegmentsGroups, 32).map( + return getWeightedRandomChoice(overlappingSegmentsGroups, 32).map( //randomly choose 1 good segment per group and return them group => getWeightedRandomChoice(group.segments, 1)[0], ); @@ -158,14 +215,14 @@ function handleGetSegments(req: Request, res: Response) { ? [req.query.category] : ['sponsor']; - let segments = cleanGetSegments(req, videoID, categories); + const segments = getSegmentsByVideoID(req, videoID, categories); if (segments === null || segments === undefined) { res.sendStatus(500); return false; } - if (segments.length == 0) { + if (segments.length === 0) { res.sendStatus(404); return false; } @@ -173,8 +230,8 @@ function handleGetSegments(req: Request, res: Response) { return segments; } -function endpoint(req: Request, res: Response) { - const segments = handleGetSegments(req, res); +function endpoint(req: Request, res: Response): void { + let segments = handleGetSegments(req, res); if (segments) { //send result @@ -183,7 +240,9 @@ function endpoint(req: Request, res: Response) { } export { - handleGetSegments, - cleanGetSegments, + getSegmentsByVideoID, + getSegmentsByHash, endpoint, + handleGetSegments }; + diff --git a/src/routes/getSkipSegmentsByHash.ts b/src/routes/getSkipSegmentsByHash.ts index 81f2daa..9273465 100644 --- a/src/routes/getSkipSegmentsByHash.ts +++ b/src/routes/getSkipSegmentsByHash.ts @@ -1,31 +1,31 @@ import {hashPrefixTester} from '../utils/hashPrefixTester'; -import {cleanGetSegments} from './getSkipSegments'; -import {db} from '../databases/databases'; +import {getSegmentsByHash} from './getSkipSegments'; import {Request, Response} from 'express'; +import { Category, VideoIDHash } from '../types/segments.model'; export async function getSkipSegmentsByHash(req: Request, res: Response) { - let hashPrefix = req.params.prefix; + let hashPrefix: VideoIDHash = req.params.prefix; if (!hashPrefixTester(req.params.prefix)) { res.status(400).send("Hash prefix does not match format requirements."); // Exit early on faulty prefix return; } - const categories = req.query.categories + const categories: Category[] = req.query.categories ? JSON.parse(req.query.categories as string) : req.query.category ? [req.query.category] : ['sponsor']; // Get all video id's that match hash prefix - const videoIds = db.prepare('all', 'SELECT DISTINCT videoId, hashedVideoID from sponsorTimes WHERE hashedVideoID LIKE ?', [hashPrefix + '%']); + const segments = getSegmentsByHash(req, hashPrefix, categories); - let segments = videoIds.map((video: any) => { - return { - videoID: video.videoID, - hash: video.hashedVideoID, - segments: cleanGetSegments(req, video.videoID, categories), - }; - }); + if (!segments) return res.status(404).json([]); - res.status((segments.length === 0) ? 404 : 200).json(segments); + const output = Object.entries(segments).map(([videoID, data]) => ({ + videoID, + hash: data.hash, + segments: data.segments, + })); + + res.status(output.length === 0 ? 404 : 200).json(output); } diff --git a/src/types/segments.model.ts b/src/types/segments.model.ts new file mode 100644 index 0000000..3e436d5 --- /dev/null +++ b/src/types/segments.model.ts @@ -0,0 +1,45 @@ +export type SegmentUUID = string; +export type VideoID = string; +export type Category = string; +export type VideoIDHash = string; +export type IPHash = string; + +export interface Segment { + category: Category; + segment: number[]; + UUID: SegmentUUID; +} + +export interface DBSegment { + category: Category; + startTime: number; + endTime: number; + UUID: SegmentUUID; + votes: number; + shadowHidden: 0 | 1; + videoID: VideoID; + hashedVideoID: VideoIDHash; +} + +export interface OverlappingSegmentGroup { + segments: DBSegment[], + votes: number; +} + +export interface VotableObject { + votes: number; +} + +export interface VotableObjectWithWeight extends VotableObject { + weight: number; +} + +export interface VideoData { + hash: VideoIDHash; + segments: Segment[]; +} + +export interface SegmentCache { + shadowHiddenSegmentIPs?: Record, + userHashedIP?: IPHash +} \ No newline at end of file diff --git a/test/cases/getSegmentsByHash.ts b/test/cases/getSegmentsByHash.ts index 9bf4c89..029c1a8 100644 --- a/test/cases/getSegmentsByHash.ts +++ b/test/cases/getSegmentsByHash.ts @@ -32,17 +32,17 @@ describe('getSegmentsByHash', () => { }); }); - it('Should be able to get a 200 with empty segments for video but no matching categories', (done: Done) => { + it('Should return 404 if no segments are found even if a video for the given hash is known', (done: Done) => { request.get(getbaseURL() + '/api/skipSegments/3272f?categories=["shilling"]', null, (err, res, body) => { if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode); + else if (res.statusCode !== 404) done("non 404 status code, was " + res.statusCode); else { - if (JSON.parse(body) && JSON.parse(body).length > 0 && JSON.parse(body)[0].segments.length === 0) { + if (body === '[]') { done(); // pass } else { - done("response had segments"); + done("Response had videos"); } } });