added get segments by hash prefix

This commit is contained in:
Joe Dowd
2020-08-31 00:45:06 +01:00
parent 26c72b006c
commit 1a06502806
7 changed files with 206 additions and 198 deletions

View File

@@ -8,6 +8,62 @@ var logger = require('../utils/logger.js');
var getHash = require('../utils/getHash.js');
var getIP = require('../utils/getIP.js');
function cleanGetSegments(videoID, categories) {
let userHashedIP, shadowHiddenSegments;
let segments = [];
try {
for (const category of categories) {
const categorySegments = db
.prepare(
'all',
'SELECT startTime, endTime, votes, UUID, shadowHidden FROM sponsorTimes WHERE videoID = ? and category = ? ORDER BY startTime',
[videoID, category]
)
.filter(segment => {
if (segment.votes < -1) {
return false; //too untrustworthy, just ignore it
}
//check if shadowHidden
//this means it is hidden to everyone but the original ip that submitted it
if (segment.shadowHidden != 1) {
return true;
}
if (shadowHiddenSegments === undefined) {
shadowHiddenSegments = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]);
}
//if this isn't their ip, don't send it to them
return shadowHiddenSegments.some(shadowHiddenSegment => {
if (userHashedIP === undefined) {
//hash the IP only if it's strictly necessary
userHashedIP = getHash(getIP(req) + config.globalSalt);
}
return shadowHiddenSegment.hashedIP === userHashedIP;
});
});
chooseSegments(categorySegments).forEach(chosenSegment => {
segments.push({
category,
segment: [chosenSegment.startTime, chosenSegment.endTime],
UUID: chosenSegment.UUID,
});
});
}
return segments;
} catch (err) {
if (err) {
logger.error('j 2 Query failed');
return undefined;
}
}
}
//gets a weighted random choice from the choices array based on their `votes` property.
//amountOfChoices specifies the maximum amount of choices to return, 1 or more.
//choices are unique
@@ -104,58 +160,11 @@ function handleGetSegments(req, res) {
? [req.query.category]
: ['sponsor'];
/**
* @type {Array<{
* segment: number[],
* category: string,
* UUID: string
* }>
* }
*/
const segments = [];
let segments = cleanGetSegments(videoID, categories);
let userHashedIP, shadowHiddenSegments;
try {
for (const category of categories) {
const categorySegments = db
.prepare(
'all',
'SELECT startTime, endTime, votes, UUID, shadowHidden FROM sponsorTimes WHERE videoID = ? and category = ? ORDER BY startTime',
[videoID, category]
)
.filter(segment => {
if (segment.votes < -1) {
return false; //too untrustworthy, just ignore it
}
//check if shadowHidden
//this means it is hidden to everyone but the original ip that submitted it
if (segment.shadowHidden != 1) {
return true;
}
if (shadowHiddenSegments === undefined) {
shadowHiddenSegments = privateDB.prepare('all', 'SELECT hashedIP FROM sponsorTimes WHERE videoID = ?', [videoID]);
}
//if this isn't their ip, don't send it to them
return shadowHiddenSegments.some(shadowHiddenSegment => {
if (userHashedIP === undefined) {
//hash the IP only if it's strictly necessary
userHashedIP = getHash(getIP(req) + config.globalSalt);
}
return shadowHiddenSegment.hashedIP === userHashedIP;
});
});
chooseSegments(categorySegments).forEach(chosenSegment => {
segments.push({
category,
segment: [chosenSegment.startTime, chosenSegment.endTime],
UUID: chosenSegment.UUID,
});
});
if (segments === undefined) {
res.sendStatus(500);
return false;
}
if (segments.length == 0) {
@@ -164,16 +173,11 @@ function handleGetSegments(req, res) {
}
return segments;
} catch (error) {
logger.error(error);
res.sendStatus(500);
return false;
}
}
module.exports = {
handleGetSegments,
cleanGetSegments,
endpoint: function (req, res) {
let segments = handleGetSegments(req, res);

View File

@@ -1,125 +1,36 @@
const config = require('../config.js');
const { db, privateDB } = require('../databases/databases.js');
const hashPrefixTester = require('../utils/hashPrefixTester.js');
const getSegments = require('./getSkipSegments.js').cleanGetSegments;
const getHash = require('../utils/getHash.js');
const getIP = require('../utils/getIP.js');
/**
* @typedef {Object} Segment
* @property {string} videoID YouTube video ID the segment is meant for
* @property {number[]} segment Tuple of start and end times in seconds
* @property {string} category Category of content to skip
* @property {string} UUID Unique identifier for the specific segment
*/
/**
* @typedef {Object} Row
* @property {string} videoID
* @property {number} startTime
* @property {number} endTime
* @property {number} votes
* @property {string} UUID
* @property {string} category
* @property {number} shadowHidden
*/
/**
* Input an array of database records and get only one back, weighed on votes.
* The logic is taken from getWeightedRandomChoice, just simplified input and output to not work on indices only.
*
* @param {Row[]} rows
* @returns {?Row}
*/
function pickWeightedRandomRow(rows) {
if (rows.length === 0) {
return null;
} else if (rows.length === 1) {
return rows[0];
}
const sqrtWeightsList = [];
let totalSqrtWeights = 0;
for (const row of rows) {
let sqrtVote = Math.sqrt((row.votes + 3) * 10);
sqrtWeightsList.push(sqrtVote);
totalSqrtWeights += sqrtVote;
}
const randomNumber = Math.random();
let currentVoteNumber = 0;
for (let i = 0; i < sqrtWeightsList.length; i++) {
if (randomNumber > currentVoteNumber / totalSqrtWeights && randomNumber < (currentVoteNumber + sqrtWeightsList[i]) / totalSqrtWeights) {
return rows[i];
}
currentVoteNumber += sqrtWeightsList[i];
}
}
/**
* @param {string} prefix Lowercased hexadecimal hash prefix
* @param {string} hashedIP Custom hash of the visitors IP address
* @returns {Object.<string, Segment[]>}
*/
function getSkipSegmentsByHash(prefix, hashedIP) {
/** @type Row[] */
const rows = db.prepare('SELECT videoID, startTime, endTime, votes, UUID, category, shadowHidden FROM sponsorTimes WHERE votes >= -1 AND hashedVideoID LIKE ? ORDER BY videoID, startTime')
.all(prefix + '%');
/** @type {string[]} */
const onlyForCurrentUser = privateDB.prepare('SELECT videoID FROM sponsorTimes WHERE hashedIP = ?').all(hashedIP).map(row => row.videoID);
/** @type {Object.<string, Segment[][]>} */
const rowGroupsPerVideo = {};
let previousVideoID = null;
let previousEndTime = null;
for (const row of rows) {
/** @TODO check if this logic does what is expected. */
if (row.shadowHidden === 1 && onlyForCurrentUser.indexOf(row.videoID) === -1) {
// The current visitors IP did not submit for the current video.
// Do not send shadowHidden segments to them.
continue;
}
// Split up the rows per video and group overlapping segments together.
if (!(row.videoID in rowGroupsPerVideo)) {
rowGroupsPerVideo[row.videoID] = [];
}
if (previousVideoID === row.videoID && row.startTime <= previousEndTime) {
rowGroupsPerVideo[row.videoID][rowGroupsPerVideo[row.videoID].length - 1].push(row);
previousEndTime = Math.max(previousEndTime, row.endTime);
} else {
rowGroupsPerVideo[row.videoID].push([row]);
previousVideoID = row.videoID;
previousEndTime = row.endTime;
}
}
/** @type {Object.<string, Segment[]>} */
const output = {};
for (const videoID in rowGroupsPerVideo) {
const pickedVideosForVideoID = [];
for (const group of rowGroupsPerVideo[videoID]) {
pickedVideosForVideoID.push(pickWeightedRandomRow(group));
}
output[videoID] = pickedVideosForVideoID.map(row => ({ videoID: row.videoID, segment: [row.startTime, row.endTime], category: row.category, UUID: row.UUID }));
}
return output;
}
const minimumPrefix = config.minimumPrefix || '3';
const maximumPrefix = config.maximumPrefix || '32'; // Half the hash.
const prefixChecker = new RegExp('^[\\dA-F]{' + minimumPrefix + ',' + maximumPrefix + '}$', 'i');
const databases = require('../databases/databases.js');
const db = databases.db;
module.exports = async function (req, res) {
if (!prefixChecker.test(req.params.prefix)) {
res.sendStatus(400).end(); // Exit early on faulty prefix
let hashPrefix = req.params.prefix;
if (!hashPrefixTester(req.params.prefix)) {
res.status(400).send("Hash prefix does not match format requirements."); // Exit early on faulty prefix
return;
}
const segments = getSkipSegmentsByHash(
req.params.prefix.toLowerCase(),
getHash(getIP(req) + config.globalSalt)
);
const categories = req.query.categories
? JSON.parse(req.query.categories)
: req.query.category
? [req.query.category]
: ['sponsor'];
if (Object.keys(segments).length > 0) {
res.send(segments);
} else {
res.sendStatus(404); // No skipable segments within this prefix
// Get all video id's that match hash prefix
const videoIds = db.prepare('all', 'SELECT DISTINCT videoId, hashedVideoID from sponsorTimes WHERE hashedVideoID LIKE ?', [hashPrefix+'%']);
if (videoIds.length === 0) {
res.sendStatus(404);
return;
}
let segments = videoIds.map((video) => {
return {
videoID: video.videoID,
hash: video.hashedVideoID,
segments: getSegments(video.videoID, categories)
};
});
res.status(200).json(segments);
}

View File

@@ -1,4 +1,3 @@
var fs = require('fs');
var config = require('../config.js');
var getHash = require('../utils/getHash.js');
@@ -370,8 +369,6 @@ async function voteOnSponsorTime(req, res) {
}
module.exports = {
voteOnSponsorTime,
endpoint: function (req, res) {
voteOnSponsorTime(req, res);
},
};
voteOnSponsorTime,
endpoint: voteOnSponsorTime
};

View File

@@ -0,0 +1,11 @@
const config = require('../config.js');
const logger = require('./logger.js');
const minimumPrefix = config.minimumPrefix || '3';
const maximumPrefix = config.maximumPrefix || '32'; // Half the hash.
const prefixChecker = new RegExp('^[\\da-f]{' + minimumPrefix + ',' + maximumPrefix + '}$', 'i');
module.exports = (prefix) => {
return prefixChecker.test(prefix);
};