[jsinterp] Fix bitwise operations (#15985)

Authored by: bashonly
2026-03-13 21:53:13 +03:00 · 2026-02-17 17:10:18 -06:00
parent abade83f8d
commit 62574f5763
3 changed files with 62 additions and 36 deletions
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -9,7 +9,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
-from yt_dlp.jsinterp import JS_Undefined, JSInterpreter, js_number_to_string
+from yt_dlp.jsinterp import (
    JS_Undefined,
    JSInterpreter,
    int_to_int32,
    js_number_to_string,
 )
 class NaN:
@@ -101,8 +106,8 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f(){return 5 ^ 9;}', 12)
        self._test('function f(){return 0.0 << NaN}', 0)
        self._test('function f(){return null << undefined}', 0)
-        # TODO: Does not work due to number too large
+        self._test('function f(){return -12616 ^ 5041}', -8951)
-        # self._test('function f(){return 21 << 4294967297}', 42)
+        self._test('function f(){return 21 << 4294967297}', 42)
    def test_array_access(self):
        self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
@@ -447,6 +452,22 @@ class TestJSInterpreter(unittest.TestCase):
    def test_splice(self):
        self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0'])
    def test_int_to_int32(self):
        for inp, exp in [
            (0, 0),
            (1, 1),
            (-1, -1),
            (-8951, -8951),
            (2147483647, 2147483647),
            (2147483648, -2147483648),
            (2147483649, -2147483647),
            (-2147483649, 2147483647),
            (-2147483648, -2147483648),
            (-16799986688, 379882496),
            (39570129568, 915423904),
        ]:
            assert int_to_int32(inp) == exp
    def test_js_number_to_string(self):
        for test, radix, expected in [
            (0, None, '0'),
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -3,6 +3,7 @@ import re
 import urllib.parse
 from .common import InfoExtractor
 from ..jsinterp import int_to_int32
 from ..utils import (
    ExtractorError,
    clean_html,
@@ -20,73 +21,69 @@ from ..utils import (
 )
 def to_signed_32(n):
    return n % ((-1 if n < 0 else 1) * 2**32)
 class _ByteGenerator:
    def __init__(self, algo_id, seed):
        try:
            self._algorithm = getattr(self, f'_algo{algo_id}')
        except AttributeError:
            raise ExtractorError(f'Unknown algorithm ID "{algo_id}"')
-        self._s = to_signed_32(seed)
+        self._s = int_to_int32(seed)
    def _algo1(self, s):
        # LCG (a=1664525, c=1013904223, m=2^32)
        # Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator
-        s = self._s = to_signed_32(s * 1664525 + 1013904223)
+        s = self._s = int_to_int32(s * 1664525 + 1013904223)
        return s
    def _algo2(self, s):
        # xorshift32
        # Ref: https://en.wikipedia.org/wiki/Xorshift
-        s = to_signed_32(s ^ (s << 13))
+        s = int_to_int32(s ^ (s << 13))
-        s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17))
+        s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 17))
-        s = self._s = to_signed_32(s ^ (s << 5))
+        s = self._s = int_to_int32(s ^ (s << 5))
        return s
    def _algo3(self, s):
        # Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32)
        # Ref: https://en.wikipedia.org/wiki/Weyl_sequence
        # https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html
-        s = self._s = to_signed_32(s + 0x9e3779b9)
+        s = self._s = int_to_int32(s + 0x9e3779b9)
-        s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16))
+        s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16))
-        s = to_signed_32(s * to_signed_32(0x85ebca77))
+        s = int_to_int32(s * int_to_int32(0x85ebca77))
-        s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13))
+        s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 13))
-        s = to_signed_32(s * to_signed_32(0xc2b2ae3d))
+        s = int_to_int32(s * int_to_int32(0xc2b2ae3d))
-        return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16))
+        return int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16))
    def _algo4(self, s):
        # Custom scrambling function involving a left rotation (ROL)
-        s = self._s = to_signed_32(s + 0x6d2b79f5)
+        s = self._s = int_to_int32(s + 0x6d2b79f5)
-        s = to_signed_32((s << 7) | ((s & 0xFFFFFFFF) >> 25))  # ROL 7
+        s = int_to_int32((s << 7) | ((s & 0xFFFFFFFF) >> 25))  # ROL 7
-        s = to_signed_32(s + 0x9e3779b9)
+        s = int_to_int32(s + 0x9e3779b9)
-        s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 11))
+        s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 11))
-        return to_signed_32(s * 0x27d4eb2d)
+        return int_to_int32(s * 0x27d4eb2d)
    def _algo5(self, s):
        # xorshift variant with a final addition
-        s = to_signed_32(s ^ (s << 7))
+        s = int_to_int32(s ^ (s << 7))
-        s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 9))
+        s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 9))
-        s = to_signed_32(s ^ (s << 8))
+        s = int_to_int32(s ^ (s << 8))
-        s = self._s = to_signed_32(s + 0xa5a5a5a5)
+        s = self._s = int_to_int32(s + 0xa5a5a5a5)
        return s
    def _algo6(self, s):
        # LCG (a=0x2c9277b5, c=0xac564b05) with a variable right shift scrambler
-        s = self._s = to_signed_32(s * to_signed_32(0x2c9277b5) + to_signed_32(0xac564b05))
+        s = self._s = int_to_int32(s * int_to_int32(0x2c9277b5) + int_to_int32(0xac564b05))
-        s2 = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 18))
+        s2 = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 18))
        shift = (s & 0xFFFFFFFF) >> 27 & 31
-        return to_signed_32((s2 & 0xFFFFFFFF) >> shift)
+        return int_to_int32((s2 & 0xFFFFFFFF) >> shift)
    def _algo7(self, s):
        # Weyl Sequence (k=0x9e3779b9) + custom multiply-xor-shift mixing function
-        s = self._s = to_signed_32(s + to_signed_32(0x9e3779b9))
+        s = self._s = int_to_int32(s + int_to_int32(0x9e3779b9))
-        e = to_signed_32(s ^ (s << 5))
+        e = int_to_int32(s ^ (s << 5))
-        e = to_signed_32(e * to_signed_32(0x7feb352d))
+        e = int_to_int32(e * int_to_int32(0x7feb352d))
-        e = to_signed_32(e ^ ((e & 0xFFFFFFFF) >> 15))
+        e = int_to_int32(e ^ ((e & 0xFFFFFFFF) >> 15))
-        return to_signed_32(e * to_signed_32(0x846ca68b))
+        return int_to_int32(e * int_to_int32(0x846ca68b))
    def __next__(self):
        return self._algorithm(self._s) & 0xFF
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -18,6 +18,14 @@ from .utils import (
 )
 def int_to_int32(n):
    """Converts an integer to a signed 32-bit integer"""
    n &= 0xFFFFFFFF
    if n & 0x80000000:
        return n - 0x100000000
    return n
 def _js_bit_op(op):
    def zeroise(x):
        if x in (None, JS_Undefined):
@@ -28,7 +36,7 @@ def _js_bit_op(op):
        return int(float(x))
    def wrapped(a, b):
-        return op(zeroise(a), zeroise(b)) & 0xffffffff
+        return int_to_int32(op(int_to_int32(zeroise(a)), int_to_int32(zeroise(b))))
    return wrapped