From 5ebb6389253860986989d2b0c79ab31f0cbbc348 Mon Sep 17 00:00:00 2001 From: Ajay Date: Mon, 20 Dec 2021 22:22:45 -0500 Subject: [PATCH] Add overlapping group caching --- package-lock.json | 14 ++++ package.json | 2 + src/routes/getSkipSegments.ts | 103 +++++++++++++++++----------- src/utils/queryCacher.ts | 3 +- src/utils/redisKeys.ts | 4 ++ test/cases/getSkipSegments.ts | 2 +- test/cases/getSkipSegmentsByHash.ts | 2 +- 7 files changed, 88 insertions(+), 42 deletions(-) diff --git a/package-lock.json b/package-lock.json index a6c71de..beb6b65 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "express": "^4.17.1", "express-promise-router": "^4.1.1", "express-rate-limit": "^5.5.1", + "lodash": "^4.17.21", "pg": "^8.7.1", "redis": "^3.1.2", "sync-mysql": "^3.0.1" @@ -25,6 +26,7 @@ "@types/cron": "^1.7.3", "@types/express": "^4.17.13", "@types/express-rate-limit": "^5.1.3", + "@types/lodash": "^4.14.178", "@types/mocha": "^9.0.0", "@types/node": "^16.11.11", "@types/pg": "^8.6.1", @@ -317,6 +319,12 @@ "integrity": "sha512-qcUXuemtEu+E5wZSJHNxUXeCZhAfXKQ41D+duX+VYPde7xyEVZci+/oXKJL13tnRs9lR2pr4fod59GT6/X1/yQ==", "dev": true }, + "node_modules/@types/lodash": { + "version": "4.14.178", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.178.tgz", + "integrity": "sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw==", + "dev": true + }, "node_modules/@types/mime": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz", @@ -5337,6 +5345,12 @@ "integrity": "sha512-qcUXuemtEu+E5wZSJHNxUXeCZhAfXKQ41D+duX+VYPde7xyEVZci+/oXKJL13tnRs9lR2pr4fod59GT6/X1/yQ==", "dev": true }, + "@types/lodash": { + "version": "4.14.178", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.178.tgz", + "integrity": "sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw==", + "dev": true + }, "@types/mime": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz", diff --git a/package.json b/package.json index 30d912f..efee6ac 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "express": "^4.17.1", "express-promise-router": "^4.1.1", "express-rate-limit": "^5.5.1", + "lodash": "^4.17.21", "pg": "^8.7.1", "redis": "^3.1.2", "sync-mysql": "^3.0.1" @@ -33,6 +34,7 @@ "@types/cron": "^1.7.3", "@types/express": "^4.17.13", "@types/express-rate-limit": "^5.1.3", + "@types/lodash": "^4.14.178", "@types/mocha": "^9.0.0", "@types/node": "^16.11.11", "@types/pg": "^8.6.1", diff --git a/src/routes/getSkipSegments.ts b/src/routes/getSkipSegments.ts index 6c7c587..0a38b06 100644 --- a/src/routes/getSkipSegments.ts +++ b/src/routes/getSkipSegments.ts @@ -1,7 +1,8 @@ import { Request, Response } from "express"; +import { partition } from "lodash" import { config } from "../config"; import { db, privateDB } from "../databases/databases"; -import { skipSegmentsHashKey, skipSegmentsKey } from "../utils/redisKeys"; +import { skipSegmentsHashKey, skipSegmentsKey, skipSegmentGroupsKey } from "../utils/redisKeys"; import { SBRecord } from "../types/lib.model"; import { ActionType, Category, CategoryActionType, DBSegment, HashedIP, IPAddress, OverlappingSegmentGroup, Segment, SegmentCache, SegmentUUID, Service, VideoData, VideoID, VideoIDHash, Visibility, VotableObject } from "../types/segments.model"; import { getCategoryActionType } from "../utils/categoryInfo"; @@ -13,7 +14,7 @@ import { getReputation } from "../utils/reputation"; import { getService } from "../utils/getService"; -async function prepareCategorySegments(req: Request, videoID: VideoID, category: Category, segments: DBSegment[], cache: SegmentCache = { shadowHiddenSegmentIPs: {} }): Promise { +async function prepareCategorySegments(req: Request, videoID: VideoID, service: Service, segments: DBSegment[], cache: SegmentCache = { shadowHiddenSegmentIPs: {} }, useCache: boolean): Promise { const shouldFilter: boolean[] = await Promise.all(segments.map(async (segment) => { if (segment.votes < -1 && !segment.required) { return false; //too untrustworthy, just ignore it @@ -39,14 +40,16 @@ async function prepareCategorySegments(req: Request, videoID: VideoID, category: cache.userHashedIP = await getHashCache((getIP(req) + config.globalSalt) as IPAddress); } //if this isn't their ip, don't send it to them - return cache.shadowHiddenSegmentIPs[videoID][segment.timeSubmitted]?.some( + const shouldShadowHide = cache.shadowHiddenSegmentIPs[videoID][segment.timeSubmitted]?.some( (shadowHiddenSegment) => shadowHiddenSegment.hashedIP === cache.userHashedIP) ?? false; + + if (shouldShadowHide) useCache = false; + return shouldShadowHide; })); const filteredSegments = segments.filter((_, index) => shouldFilter[index]); - const maxSegments = getCategoryActionType(category) === CategoryActionType.Skippable ? Infinity : 1; - return (await chooseSegments(filteredSegments, maxSegments)).map((chosenSegment) => ({ + return (await chooseSegments(videoID, service, filteredSegments, useCache)).map((chosenSegment) => ({ category: chosenSegment.category, actionType: chosenSegment.actionType, segment: [chosenSegment.startTime, chosenSegment.endTime], @@ -62,28 +65,21 @@ async function prepareCategorySegments(req: Request, videoID: VideoID, category: async function getSegmentsByVideoID(req: Request, videoID: VideoID, categories: Category[], actionTypes: ActionType[], requiredSegments: SegmentUUID[], service: Service): Promise { const cache: SegmentCache = { shadowHiddenSegmentIPs: {} }; - const segments: Segment[] = []; try { categories = categories.filter((category) => !/[^a-z|_|-]/.test(category)); if (categories.length === 0) return null; - const segmentsByCategory: SBRecord = (await getSegmentsFromDBByVideoID(videoID, service)) - .filter((segment: DBSegment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType)) - .reduce((acc: SBRecord, segment: DBSegment) => { + const segments: DBSegment[] = (await getSegmentsFromDBByVideoID(videoID, service)) + .map((segment: DBSegment) => { if (filterRequiredSegments(segment.UUID, requiredSegments)) segment.required = true; - - acc[segment.category] ??= []; - acc[segment.category].push(segment); - - return acc; + return segment; }, {}); - for (const [category, categorySegments] of Object.entries(segmentsByCategory)) { - segments.push(...(await prepareCategorySegments(req, videoID, category as Category, categorySegments, cache))); - } + const canUseCache = requiredSegments.length === 0; + const processedSegments: Segment[] = await prepareCategorySegments(req, videoID, service, segments, cache, canUseCache); - return segments; + return processedSegments.filter((segment: Segment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType)); } catch (err) { if (err) { Logger.error(err as string); @@ -98,34 +94,37 @@ async function getSegmentsByHash(req: Request, hashedVideoIDPrefix: VideoIDHash, const segments: SBRecord = {}; try { - type SegmentWithHashPerVideoID = SBRecord}>; + type SegmentWithHashPerVideoID = SBRecord; categories = categories.filter((category) => !(/[^a-z|_|-]/.test(category))); if (categories.length === 0) return null; const segmentPerVideoID: SegmentWithHashPerVideoID = (await getSegmentsFromDBByHash(hashedVideoIDPrefix, service)) - .filter((segment: DBSegment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType)) .reduce((acc: SegmentWithHashPerVideoID, segment: DBSegment) => { acc[segment.videoID] = acc[segment.videoID] || { hash: segment.hashedVideoID, - segmentPerCategory: {} + segments: [] }; if (filterRequiredSegments(segment.UUID, requiredSegments)) segment.required = true; - acc[segment.videoID].segmentPerCategory[segment.category] ??= []; - acc[segment.videoID].segmentPerCategory[segment.category].push(segment); + acc[segment.videoID].segments ??= []; + acc[segment.videoID].segments.push(segment); return acc; }, {}); for (const [videoID, videoData] of Object.entries(segmentPerVideoID)) { - segments[videoID] = { + const data: VideoData = { hash: videoData.hash, segments: [], }; - for (const [category, segmentPerCategory] of Object.entries(videoData.segmentPerCategory)) { - segments[videoID].segments.push(...(await prepareCategorySegments(req, videoID as VideoID, category as Category, segmentPerCategory, cache))); + const canUseCache = requiredSegments.length === 0; + data.segments = (await prepareCategorySegments(req, videoID as VideoID, service, videoData.segments, cache, canUseCache)) + .filter((segment: Segment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType)); + + if (data.segments.length > 0) { + segments[videoID] = data; } } @@ -164,10 +163,11 @@ async function getSegmentsFromDBByVideoID(videoID: VideoID, service: Service): P return await QueryCacher.get(fetchFromDB, skipSegmentsKey(videoID, service)); } -//gets a weighted random choice from the choices array based on their `votes` property. -//amountOfChoices specifies the maximum amount of choices to return, 1 or more. -//choices are unique -function getWeightedRandomChoice(choices: T[], amountOfChoices: number): T[] { +// Gets a weighted random choice from the choices array based on their `votes` property. +// amountOfChoices specifies the maximum amount of choices to return, 1 or more. +// Choices are unique +// If a predicate is given, it will only filter choices following it, and will leave the rest in the list +function getWeightedRandomChoice(choices: T[], amountOfChoices: number, predicate?: (choice: T) => void): T[] { //trivial case: no need to go through the whole process if (amountOfChoices >= choices.length) { return choices; @@ -179,7 +179,7 @@ function getWeightedRandomChoice(choices: T[], amountOf //assign a weight to each choice let totalWeight = 0; - const choicesWithWeights: TWithWeight[] = choices.map(choice => { + let choicesWithWeights: TWithWeight[] = choices.map(choice => { const boost = Math.min(choice.reputation, 4); //The 3 makes -2 the minimum votes before being ignored completely @@ -190,8 +190,20 @@ function getWeightedRandomChoice(choices: T[], amountOf return { ...choice, weight }; }); + let forceIncludedChoices: T[] = []; + if (predicate) { + const splitArray = partition(choicesWithWeights, predicate); + choicesWithWeights = splitArray[0]; + forceIncludedChoices = splitArray[1]; + } + + // Nothing to filter for + if (amountOfChoices >= choicesWithWeights.length) { + return choices; + } + //iterate and find amountOfChoices choices - const chosen = []; + const chosen = [...forceIncludedChoices]; while (amountOfChoices-- > 0) { //weighted random draw of one element of choices const randomNumber = Math.random() * totalWeight; @@ -210,11 +222,25 @@ function getWeightedRandomChoice(choices: T[], amountOf return chosen; } +async function chooseSegments(videoID: VideoID, service: Service, segments: DBSegment[], useCache: boolean): Promise { + const fetchData = async () => await buildSegmentGroups(segments); + + const groups = useCache + ? await QueryCacher.get(fetchData, skipSegmentGroupsKey(videoID, service)) + : await fetchData(); + + // Filter for only 1 item for POI categories + return getWeightedRandomChoice(groups, 1, (choice) => getCategoryActionType(choice.segments[0].category) === CategoryActionType.POI) + .map(//randomly choose 1 good segment per group and return them + group => getWeightedRandomChoice(group.segments, 1)[0] + ); +} + //This function will find segments that are contained inside of eachother, called similar segments //Only one similar time will be returned, randomly generated based on the sqrt of votes. //This allows new less voted items to still sometimes appear to give them a chance at getting votes. //Segments with less than -1 votes are already ignored before this function is called -async function chooseSegments(segments: DBSegment[], max: number): Promise { +async function buildSegmentGroups(segments: DBSegment[]): Promise { //Create groups of segments that are similar to eachother //Segments must be sorted by their startTime so that we can build groups chronologically: //1. As long as the segments' startTime fall inside the currentGroup, we keep adding them to that group @@ -265,10 +291,7 @@ async function chooseSegments(segments: DBSegment[], max: number): Promise getWeightedRandomChoice(group.segments, 1)[0], - ); + return overlappingSegmentsGroups; } function splitPercentOverlap(groups: OverlappingSegmentGroup[]): OverlappingSegmentGroup[] { @@ -277,12 +300,14 @@ function splitPercentOverlap(groups: OverlappingSegmentGroup[]): OverlappingSegm group.segments.forEach((segment) => { const bestGroup = result.find((group) => { // At least one segment in the group must have high % overlap or the same action type + // Since POI segments will always have 0 overlap, they will always be in their own groups return group.segments.some((compareSegment) => { const overlap = Math.min(segment.endTime, compareSegment.endTime) - Math.max(segment.startTime, compareSegment.startTime); const overallDuration = Math.max(segment.endTime, compareSegment.endTime) - Math.min(segment.startTime, compareSegment.startTime); const overlapPercent = overlap / overallDuration; - return (overlapPercent > 0 && segment.actionType === compareSegment.actionType && segment.actionType !== ActionType.Chapter) - || overlapPercent >= 0.6 + return (overlapPercent > 0 && segment.actionType === compareSegment.actionType && segment.category == compareSegment.category && segment.actionType !== ActionType.Chapter) + || (overlapPercent >= 0.6 && segment.actionType !== compareSegment.actionType && segment.category === compareSegment.category) + || (overlapPercent >= 0.8 && segment.actionType === compareSegment.actionType && segment.category !== compareSegment.category) || (overlapPercent >= 0.8 && segment.actionType === ActionType.Chapter && compareSegment.actionType === ActionType.Chapter); }); }); diff --git a/src/utils/queryCacher.ts b/src/utils/queryCacher.ts index 128f3a2..57af472 100644 --- a/src/utils/queryCacher.ts +++ b/src/utils/queryCacher.ts @@ -1,6 +1,6 @@ import redis from "../utils/redis"; import { Logger } from "../utils/logger"; -import { skipSegmentsHashKey, skipSegmentsKey, reputationKey, ratingHashKey } from "./redisKeys"; +import { skipSegmentsHashKey, skipSegmentsKey, reputationKey, ratingHashKey, skipSegmentGroupsKey } from "./redisKeys"; import { Service, VideoID, VideoIDHash } from "../types/segments.model"; import { UserID } from "../types/user.model"; @@ -82,6 +82,7 @@ async function getAndSplit(fetchFromDB: (values: U[]) => Pr function clearSegmentCache(videoInfo: { videoID: VideoID; hashedVideoID: VideoIDHash; service: Service; userID?: UserID; }): void { if (videoInfo) { redis.delAsync(skipSegmentsKey(videoInfo.videoID, videoInfo.service)); + redis.delAsync(skipSegmentGroupsKey(videoInfo.videoID, videoInfo.service)); redis.delAsync(skipSegmentsHashKey(videoInfo.hashedVideoID, videoInfo.service)); if (videoInfo.userID) redis.delAsync(reputationKey(videoInfo.userID)); } diff --git a/src/utils/redisKeys.ts b/src/utils/redisKeys.ts index 2245d2e..65b12ff 100644 --- a/src/utils/redisKeys.ts +++ b/src/utils/redisKeys.ts @@ -7,6 +7,10 @@ export function skipSegmentsKey(videoID: VideoID, service: Service): string { return `segments.v2.${service}.videoID.${videoID}`; } +export function skipSegmentGroupsKey(videoID: VideoID, service: Service): string { + return `segments.groups.${service}.videoID.${videoID}`; +} + export function skipSegmentsHashKey(hashedVideoIDPrefix: VideoIDHash, service: Service): string { hashedVideoIDPrefix = hashedVideoIDPrefix.substring(0, 4) as VideoIDHash; if (hashedVideoIDPrefix.length !== 4) Logger.warn(`Redis skip segment hash-prefix key is not length 4! ${hashedVideoIDPrefix}`); diff --git a/test/cases/getSkipSegments.ts b/test/cases/getSkipSegments.ts index cf47098..780107f 100644 --- a/test/cases/getSkipSegments.ts +++ b/test/cases/getSkipSegments.ts @@ -25,7 +25,7 @@ describe("getSkipSegments", () => { await db.prepare("run", query, ["requiredSegmentVid", 80, 90, 2, 0, "requiredSegmentVid4", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]); await db.prepare("run", query, ["chapterVid", 60, 80, 2, 0, "chapterVid-1", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 1"]); await db.prepare("run", query, ["chapterVid", 70, 75, 2, 0, "chapterVid-2", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 2"]); - await db.prepare("run", query, ["chapterVid", 71, 76, 2, 0, "chapterVid-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 3"]); + await db.prepare("run", query, ["chapterVid", 71, 75, 2, 0, "chapterVid-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 3"]); await db.prepare("run", query, ["requiredSegmentHashVid", 10, 20, -2, 0, "1d04b98f48e8f8bcc15c6ae5ac050801cd6dcfd428fb5f9e65c4e16e7807340fa", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]); await db.prepare("run", query, ["requiredSegmentHashVid", 20, 30, -2, 0, "1ebde8e8ae03096b6c866aa2c8cc7ee1d720ca1fca27bea3f39a6a1b876577e71", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]); return; diff --git a/test/cases/getSkipSegmentsByHash.ts b/test/cases/getSkipSegmentsByHash.ts index 6763651..d526f2d 100644 --- a/test/cases/getSkipSegmentsByHash.ts +++ b/test/cases/getSkipSegmentsByHash.ts @@ -33,7 +33,7 @@ describe("getSkipSegmentsByHash", () => { await db.prepare("run", query, ["requiredSegmentVid", 80, 90, 2, "requiredSegmentVid-4", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, requiredSegmentVidHash, ""]); await db.prepare("run", query, ["chapterVid-hash", 60, 80, 2, "chapterVid-hash-1", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 1"]); //7258 await db.prepare("run", query, ["chapterVid-hash", 70, 75, 2, "chapterVid-hash-2", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 2"]); //7258 - await db.prepare("run", query, ["chapterVid-hash", 71, 76, 2, "chapterVid-hash-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 3"]); //7258 + await db.prepare("run", query, ["chapterVid-hash", 71, 75, 2, "chapterVid-hash-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 3"]); //7258 await db.prepare("run", query, ["longMuteVid-hash", 40, 45, 2, "longMuteVid-hash-1", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613 await db.prepare("run", query, ["longMuteVid-hash", 30, 35, 2, "longMuteVid-hash-2", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613 await db.prepare("run", query, ["longMuteVid-hash", 2, 80, 2, "longMuteVid-hash-3", "testman", 0, 50, "sponsor", "mute", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613