From 0bac7e8d90554c4b191dfc1421bc78d001e9cab2 Mon Sep 17 00:00:00 2001 From: opl- Date: Wed, 2 Dec 2020 15:20:42 +0100 Subject: [PATCH 1/3] Improve performance of segment querying endpoints --- src/routes/getSkipSegments.js | 134 +++++++++++++++++++--------- src/routes/getSkipSegmentsByHash.js | 22 ++--- test/cases/getSegmentsByHash.js | 8 +- 3 files changed, 108 insertions(+), 56 deletions(-) diff --git a/src/routes/getSkipSegments.js b/src/routes/getSkipSegments.js index 69d6562..9d7aa14 100644 --- a/src/routes/getSkipSegments.js +++ b/src/routes/getSkipSegments.js @@ -8,51 +8,102 @@ var logger = require('../utils/logger.js'); var getHash = require('../utils/getHash.js'); var getIP = require('../utils/getIP.js'); -function cleanGetSegments(req, videoID, categories) { - let userHashedIP, shadowHiddenSegments; +function prepareCategorySegments(req, videoID, category, segments, cache = {shadowHiddenSegments: {}}) { + const filteredSegments = segments.filter((segment) => { + if (segment.votes < -1) { + return false; //too untrustworthy, just ignore it + } - let segments = []; + //check if shadowHidden + //this means it is hidden to everyone but the original ip that submitted it + if (segment.shadowHidden != 1) { + return true; + } + + if (cache.shadowHiddenSegments[videoID] === undefined) { + cache.shadowHiddenSegments[videoID] = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]); + } + + //if this isn't their ip, don't send it to them + return cache.shadowHiddenSegments[videoID].some((shadowHiddenSegment) => { + if (cache.userHashedIP === undefined) { + //hash the IP only if it's strictly necessary + cache.userHashedIP = getHash(getIP(req) + config.globalSalt); + } + + return shadowHiddenSegment.hashedIP === cache.userHashedIP; + }); + }); + + return chooseSegments(filteredSegments).map((chosenSegment) => ({ + category, + segment: [chosenSegment.startTime, chosenSegment.endTime], + UUID: chosenSegment.UUID, + })); +} + +function getSegmentsByVideoID(req, videoID, categories) { + const cache = {}; + const segments = []; try { - for (const category of categories) { - const categorySegments = db - .prepare( - 'all', - 'SELECT startTime, endTime, votes, UUID, shadowHidden FROM sponsorTimes WHERE videoID = ? and category = ? ORDER BY startTime', - [videoID, category] - ) - .filter(segment => { - if (segment.votes < -1) { - return false; //too untrustworthy, just ignore it - } + const segmentsByCategory = db + .prepare( + 'all', + `SELECT startTime, endTime, votes, UUID, category, shadowHidden FROM sponsorTimes WHERE videoID = ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`, + [videoID, categories] + ).reduce((acc, segment) => { + acc[segment.category] = acc[segment.category] || []; + acc[segment.category].push(segment); - //check if shadowHidden - //this means it is hidden to everyone but the original ip that submitted it - if (segment.shadowHidden != 1) { - return true; - } + return acc; + }, {}); - if (shadowHiddenSegments === undefined) { - shadowHiddenSegments = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]); - } + for (const [category, categorySegments] of Object.entries(segmentsByCategory)) { + segments.push(...prepareCategorySegments(req, videoID, category, categorySegments, cache)); + } - //if this isn't their ip, don't send it to them - return shadowHiddenSegments.some(shadowHiddenSegment => { - if (userHashedIP === undefined) { - //hash the IP only if it's strictly necessary - userHashedIP = getHash(getIP(req) + config.globalSalt); - } - return shadowHiddenSegment.hashedIP === userHashedIP; - }); - }); + return segments; + } catch (err) { + if (err) { + logger.error(err); + return null; + } + } +} - chooseSegments(categorySegments).forEach(chosenSegment => { - segments.push({ - category, - segment: [chosenSegment.startTime, chosenSegment.endTime], - UUID: chosenSegment.UUID, - }); - }); +function getSegmentsByHash(req, hashedVideoIDPrefix, categories) { + const cache = {}; + const segments = {}; + + try { + const allSegments = db + .prepare( + 'all', + `SELECT videoID, startTime, endTime, votes, UUID, category, shadowHidden, hashedVideoID FROM sponsorTimes WHERE hashedVideoID LIKE ? AND category IN (${Array(categories.length).fill('?').join()}) ORDER BY startTime`, + [hashedVideoIDPrefix + '%', categories] + ).reduce((acc, segment) => { + acc[segment.videoID] = acc[segment.videoID] || { + hash: segment.hashedVideoID, + categories: {}, + }; + const videoCategories = acc[segment.videoID].categories; + + videoCategories[segment.category] = videoCategories[segment.category] || []; + videoCategories[segment.category].push(segment); + + return acc; + }, {}); + + for (const [videoID, videoData] of Object.entries(allSegments)) { + segments[videoID] = { + hash: videoData.hash, + segments: [], + }; + + for (const [category, categorySegments] of Object.entries(videoData.categories)) { + segments[videoID].segments.push(...prepareCategorySegments(req, videoID, category, categorySegments, cache)); + } } return segments; @@ -160,14 +211,14 @@ function handleGetSegments(req, res) { ? [req.query.category] : ['sponsor']; - let segments = cleanGetSegments(req, videoID, categories); + const segments = getSegmentsByVideoID(req, videoID, categories); if (segments === null || segments === undefined) { res.sendStatus(500); return false; } - if (segments.length == 0) { + if (segments.length === 0) { res.sendStatus(404); return false; } @@ -177,7 +228,8 @@ function handleGetSegments(req, res) { module.exports = { handleGetSegments, - cleanGetSegments, + getSegmentsByVideoID, + getSegmentsByHash, endpoint: function (req, res) { let segments = handleGetSegments(req, res); diff --git a/src/routes/getSkipSegmentsByHash.js b/src/routes/getSkipSegmentsByHash.js index 28131b6..9454eee 100644 --- a/src/routes/getSkipSegmentsByHash.js +++ b/src/routes/getSkipSegmentsByHash.js @@ -1,5 +1,5 @@ const hashPrefixTester = require('../utils/hashPrefixTester.js'); -const getSegments = require('./getSkipSegments.js').cleanGetSegments; +const getSegments = require('./getSkipSegments.js').getSegmentsByHash; const databases = require('../databases/databases.js'); const logger = require('../utils/logger.js'); @@ -19,15 +19,15 @@ module.exports = async function (req, res) { : ['sponsor']; // Get all video id's that match hash prefix - const videoIds = db.prepare('all', 'SELECT DISTINCT videoId, hashedVideoID from sponsorTimes WHERE hashedVideoID LIKE ?', [hashPrefix+'%']); + const segments = getSegments(req, hashPrefix, categories); - let segments = videoIds.map((video) => { - return { - videoID: video.videoID, - hash: video.hashedVideoID, - segments: getSegments(req, video.videoID, categories) - }; - }); + if (!segments) return res.status(404).json([]); - res.status((segments.length === 0) ? 404 : 200).json(segments); -} \ No newline at end of file + const output = Object.entries(segments).map(([videoID, data]) => ({ + videoID, + hash: data.hash, + segments: data.segments, + })); + + res.status(output.length === 0 ? 404 : 200).json(output); +} diff --git a/test/cases/getSegmentsByHash.js b/test/cases/getSegmentsByHash.js index ed4f819..1d1c2f1 100644 --- a/test/cases/getSegmentsByHash.js +++ b/test/cases/getSegmentsByHash.js @@ -24,17 +24,17 @@ describe('getSegmentsByHash', () => { }); }); - it('Should be able to get a 200 with empty segments for video but no matching categories', (done) => { + it('Should return 404 if no segments are found even if a video for the given hash is known', (done) => { request.get(utils.getbaseURL() + '/api/skipSegments/3272f?categories=["shilling"]', null, (err, res, body) => { if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode); + else if (res.statusCode !== 404) done("non 404 status code, was " + res.statusCode); else { - if (JSON.parse(body) && JSON.parse(body).length > 0 && JSON.parse(body)[0].segments.length === 0) { + if (body === '[]') { done(); // pass } else { - done("response had segments"); + done("response had videos"); } } }); From 71aa7ec0ef20b3268926b8db6a4bea70e4d65d3b Mon Sep 17 00:00:00 2001 From: Ajay Ramachandran Date: Thu, 24 Dec 2020 21:29:45 -0500 Subject: [PATCH 2/3] Remove extra js test --- test/cases/getSegmentsByHash.js | 190 -------------------------------- 1 file changed, 190 deletions(-) delete mode 100644 test/cases/getSegmentsByHash.js diff --git a/test/cases/getSegmentsByHash.js b/test/cases/getSegmentsByHash.js deleted file mode 100644 index 1d1c2f1..0000000 --- a/test/cases/getSegmentsByHash.js +++ /dev/null @@ -1,190 +0,0 @@ -var request = require('request'); -var db = require('../../src/databases/databases.js').db; -var utils = require('../utils.js'); -var getHash = require('../../src/utils/getHash.js'); - -describe('getSegmentsByHash', () => { - before(() => { - let startOfQuery = "INSERT INTO sponsorTimes (videoID, startTime, endTime, votes, UUID, userID, timeSubmitted, views, category, shadowHidden, hashedVideoID) VALUES"; - db.exec(startOfQuery + "('getSegmentsByHash-0', 1, 10, 2, 'getSegmentsByHash-0-0', 'testman', 0, 50, 'sponsor', 0, '" + getHash('getSegmentsByHash-0', 1) + "')"); // hash = fdaff4dee1043451faa7398324fb63d8618ebcd11bddfe0491c488db12c6c910 - db.exec(startOfQuery + "('getSegmentsByHash-0', 20, 30, 2, 'getSegmentsByHash-0-1', 'testman', 100, 150, 'intro', 0, '" + getHash('getSegmentsByHash-0', 1) + "')"); // hash = fdaff4dee1043451faa7398324fb63d8618ebcd11bddfe0491c488db12c6c910 - db.exec(startOfQuery + "('getSegmentsByHash-noMatchHash', 40, 50, 2, 'getSegmentsByHash-noMatchHash', 'testman', 0, 50, 'sponsor', 0, 'fdaffnoMatchHash')"); // hash = fdaff4dee1043451faa7398324fb63d8618ebcd11bddfe0491c488db12c6c910 - db.exec(startOfQuery + "('getSegmentsByHash-1', 60, 70, 2, 'getSegmentsByHash-1', 'testman', 0, 50, 'sponsor', 0, '" + getHash('getSegmentsByHash-1', 1) + "')"); // hash = 3272fa85ee0927f6073ef6f07ad5f3146047c1abba794cfa364d65ab9921692b - }); - - it('Should be able to get a 200', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/3272f?categories=["sponsor", "intro"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode); - else { - done(); - } // pass - }); - }); - - it('Should return 404 if no segments are found even if a video for the given hash is known', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/3272f?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 404) done("non 404 status code, was " + res.statusCode); - else { - if (body === '[]') { - done(); // pass - } else { - done("response had videos"); - } - } - }); - }); - - it('Should be able to get an empty array if no videos', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/11111?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 404) done("non 404 status code, was " + res.statusCode); - else { - if (JSON.parse(body).length === 0 && body === '[]') done(); // pass - else done("non empty array returned"); - } - }); - }); - - it('Should return 400 prefix too short', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/11?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 400) done("non 400 status code, was " + res.statusCode); - else { - done(); // pass - } - }); - }); - - it('Should return 400 prefix too long', (done) => { - let prefix = new Array(50).join('1'); - if (prefix.length <= 32) { // default value, config can change this - done('failed to generate a long enough string for the test ' + prefix.length); - return; - } - - request.get(utils.getbaseURL() - + '/api/skipSegments/'+prefix+'?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 400) done("non 400 status code, was " + res.statusCode); - else { - done(); // pass - } - }); - }); - - it('Should not return 400 prefix in range', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/11111?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode === 400) done("prefix length 5 gave 400 " + res.statusCode); - else { - done(); // pass - } - }); - }); - - it('Should return 404 for no hash', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/?categories=["shilling"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 404) done("expected 404, got " + res.statusCode); - else { - done(); // pass - } - }); - }); - - it('Should return 500 for bad format categories', (done) => { // should probably be 400 - request.get(utils.getbaseURL() - + '/api/skipSegments/?categories=shilling', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 500) done("expected 500 got " + res.statusCode); - else { - done(); // pass - } - }); - }); - - it('Should be able to get multiple videos', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/fdaf?categories=["sponsor","intro"]', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode); - else { - body = JSON.parse(body); - if (body.length !== 2) done("expected 2 video, got " + body.length); - else if (body[0].segments.length !== 2) done("expected 2 segments for first video, got " + body[0].segments.length); - else if (body[1].segments.length !== 1) done("expected 1 segment for second video, got " + body[1].segments.length); - else done(); - } - }); - }); - - it('Should be able to get 200 for no categories (default sponsor)', (done) => { - request.get(utils.getbaseURL() - + '/api/skipSegments/fdaf', null, - (err, res, body) => { - if (err) done("Couldn't call endpoint"); - else if (res.statusCode !== 200) done("non 200 status code, was " + res.statusCode); - else { - body = JSON.parse(body); - if (body.length !== 2) done("expected 2 videos, got " + body.length); - else if (body[0].segments.length !== 1) done("expected 1 segments for first video, got " + body[0].segments.length); - else if (body[1].segments.length !== 1) done("expected 1 segments for second video, got " + body[1].segments.length); - else if (body[0].segments[0].category !== 'sponsor' || body[1].segments[0].category !== 'sponsor') done("both segments are not sponsor"); - else done(); - } - }); - }); - - it('Should be able to post a segment and get it using endpoint', (done) => { - let testID = 'abc123goodVideo'; - request.post(utils.getbaseURL() - + "/api/postVideoSponsorTimes", { - json: { - userID: "test", - videoID: testID, - segments: [{ - segment: [13, 17], - category: "sponsor" - }] - } - }, - (err, res, body) => { - if (err) done('(post) ' + err); - else if (res.statusCode === 200) { - request.get(utils.getbaseURL() - + '/api/skipSegments/'+getHash(testID, 1).substring(0,3), null, - (err, res, body) => { - if (err) done("(get) Couldn't call endpoint"); - else if (res.statusCode !== 200) done("(get) non 200 status code, was " + res.statusCode); - else { - body = JSON.parse(body); - if (body.length !== 1) done("(get) expected 1 video, got " + body.length); - else if (body[0].segments.length !== 1) done("(get) expected 1 segments for first video, got " + body[0].segments.length); - else if (body[0].segments[0].category !== 'sponsor') done("(get) segment should be sponsor, was "+body[0].segments[0].category); - else done(); - } - }); - } else { - done("(post) non 200 status code, was " + res.statusCode); - } - } - ); - }); -}); \ No newline at end of file From e1a9004ed57865ebfe66e97243999196cb512e0f Mon Sep 17 00:00:00 2001 From: Ajay Ramachandran Date: Thu, 24 Dec 2020 21:38:15 -0500 Subject: [PATCH 3/3] Commit missing type file --- src/types/segments.model.ts | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/types/segments.model.ts diff --git a/src/types/segments.model.ts b/src/types/segments.model.ts new file mode 100644 index 0000000..3e436d5 --- /dev/null +++ b/src/types/segments.model.ts @@ -0,0 +1,45 @@ +export type SegmentUUID = string; +export type VideoID = string; +export type Category = string; +export type VideoIDHash = string; +export type IPHash = string; + +export interface Segment { + category: Category; + segment: number[]; + UUID: SegmentUUID; +} + +export interface DBSegment { + category: Category; + startTime: number; + endTime: number; + UUID: SegmentUUID; + votes: number; + shadowHidden: 0 | 1; + videoID: VideoID; + hashedVideoID: VideoIDHash; +} + +export interface OverlappingSegmentGroup { + segments: DBSegment[], + votes: number; +} + +export interface VotableObject { + votes: number; +} + +export interface VotableObjectWithWeight extends VotableObject { + weight: number; +} + +export interface VideoData { + hash: VideoIDHash; + segments: Segment[]; +} + +export interface SegmentCache { + shadowHiddenSegmentIPs?: Record, + userHashedIP?: IPHash +} \ No newline at end of file