Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[server] Add support for proxies #33

Merged
merged 25 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c22d116
[plugin] pass proxy to the server
grqz Sep 17, 2024
cb7067e
sort import block
grqz Sep 17, 2024
40e4a2e
[debug] print proxies
grqz Sep 17, 2024
6142dea
stringify proxies
grqz Sep 17, 2024
1813218
Server side implementation WIP
Brainicism Sep 17, 2024
b5665f9
use ydl.urlopen again
Brainicism Sep 18, 2024
472e7e1
remove dundant object.values
Brainicism Sep 18, 2024
5928cad
remove ellipsis from the features tuple
grqz Sep 18, 2024
2e5bfc5
[plugin] remove redundant assignment to rh.proxies
grqx Sep 18, 2024
4ead3d2
Add error handling for bgutils functions
Brainicism Sep 18, 2024
32dd899
add support for proxies in process.env
grqx Sep 18, 2024
70f344e
variable naming
grqx Sep 18, 2024
a80e3cf
code formatting
grqx Sep 18, 2024
51a217a
process undefined proxy
grqx Sep 18, 2024
756c316
Error handling for bgConfig fetch
Brainicism Sep 18, 2024
9645c41
Merge branch 'server/proxy' of github.com:Brainicism/bgutil-ytdlp-pot…
Brainicism Sep 18, 2024
8385c3c
Add support for env ALL_PROXY
grqx Sep 18, 2024
6f7ecd2
code formatting
grqx Sep 18, 2024
7a0a9b0
Simplify retrieving proxy from env variables
Brainicism Sep 18, 2024
06728e7
Add trailing comma for _SUPPORTED_FEATURES
Brainicism Sep 18, 2024
74d2ba0
prioritise env HTTPS_PROXY over ALL_PROXY
grqx Sep 18, 2024
9d02474
Select proxy for youtube
Brainicism Sep 19, 2024
d97049e
Merge branch 'server/proxy' of github.com:Brainicism/bgutil-ytdlp-pot…
Brainicism Sep 19, 2024
231045e
switch to select_proxy with yt api hostname
grqx Sep 19, 2024
e1f7ea2
fix proxy type: bool->str
grqx Sep 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import json
import typing
import urllib.request

if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL

from yt_dlp.networking.common import Request
from yt_dlp.networking.common import Features
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest

try:
Expand All @@ -21,8 +22,11 @@
@register_provider
class BgUtilHTTPPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilHTTPPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks5', ...)
_SUPPORTED_FEATURES = (Features.ALL_PROXY, ...)

def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
base_url = ydl.get_info_extractor('Youtube')._configuration_arg(
Expand All @@ -31,9 +35,10 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
try:
response = ydl.urlopen(Request(f'{base_url}/ping', extensions={'timeout': 5.0}))
response = urllib.request.urlopen(f'{base_url}/ping', timeout=5)
except Exception as e:
raise UnsupportedRequest(f'Error reaching GET /ping (caused by {e!s})') from e
raise UnsupportedRequest(
f'Error reaching GET /ping (caused by {e!s})') from e
try:
response = json.load(response)
except json.JSONDecodeError as e:
Expand All @@ -51,15 +56,16 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data

def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info('Generating POT via HTTP server')
self._logger.debug(f'Proxies: {self.proxies!r}')

try:
response = ydl.urlopen(Request(
response = urllib.request.urlopen(urllib.request.Request(
f'{self.base_url}/get_pot', data=json.dumps({
'client': client,
'visitor_data': visitor_data,
'data_sync_id': data_sync_id,
}).encode(), headers={'Content-Type': 'application/json'},
extensions={'timeout': 12.5}))
'proxies': self.proxies.values(),
}).encode(), headers={'Content-Type': 'application/json'}), timeout=12.5)
except Exception as e:
raise RequestError(
f'Error reaching POST /get_pot (caused by {e!s})') from e
Expand All @@ -80,4 +86,5 @@ def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=

@register_preference(BgUtilHTTPPotProviderRH)
def bgutil_HTTP_getpot_preference(rh, request):
rh.proxies = rh._get_proxies(request)
grqz marked this conversation as resolved.
Show resolved Hide resolved
return 0
13 changes: 11 additions & 2 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL
from yt_dlp.networking.common import Features
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest
from yt_dlp.utils import Popen, classproperty

Expand All @@ -23,8 +24,11 @@
@register_provider
class BgUtilScriptPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilScriptPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks5', ...)
_SUPPORTED_FEATURES = (Features.ALL_PROXY, ...)

@classproperty(cache=True)
def _default_script_path(self):
Expand All @@ -51,8 +55,11 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info(
f'Generating POT via script: {self.script_path}')
self._logger.debug(f'Proxies: {self.proxies!r}')

command_args = ['node', self.script_path]
if proxy := self.proxies: # maybe?
command_args.extend(['-p', ','.join(proxy.values())])
grqz marked this conversation as resolved.
Show resolved Hide resolved
if data_sync_id:
command_args.extend(['-d', data_sync_id])
elif visitor_data:
Expand All @@ -75,7 +82,8 @@ def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=
msg += f'\nstderr:\n{stderr.strip()}'
self._logger.debug(msg)
if returncode:
raise RequestError(f'_get_pot_via_script failed with returncode {returncode}')
raise RequestError(
f'_get_pot_via_script failed with returncode {returncode}')

try:
# The JSON response is always the last line
Expand All @@ -94,4 +102,5 @@ def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=

@register_preference(BgUtilScriptPotProviderRH)
def bgutil_script_getpot_preference(rh, request):
rh.proxies = rh._get_proxies(request)
return 100
3 changes: 3 additions & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
},
"dependencies": {
"@commander-js/extra-typings": "commander-js/extra-typings",
"axios": "^1.7.7",
"bgutils-js": "^1.1.0",
"body-parser": "^1.20.2",
"commander": "^12.1.0",
"express": "^4.19.2",
"https-proxy-agent": "^7.0.5",
"jsdom": "^25.0.0",
"socks-proxy-agent": "^8.0.4",
"youtubei.js": "^10.4.0"
},
"devDependencies": {
Expand Down
8 changes: 7 additions & 1 deletion server/src/generate_once.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const CACHE_PATH = path.resolve(__dirname, "..", "cache.json");
const program = new Command()
.option("-v, --visitor-data <visitordata>")
.option("-d, --data-sync-id <data-sync-id>")
.option("-p, --proxies <comma-seperated-proxies>")
.option("--verbose");

program.parse();
Expand Down Expand Up @@ -57,7 +58,12 @@ const options = program.opts();
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const proxies = (options.proxies || "").split(",").filter((x) => x);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxies,
);

try {
fs.writeFileSync(
CACHE_PATH,
Expand Down
7 changes: 5 additions & 2 deletions server/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const sessionManager = new SessionManager(options.verbose || false);
httpServer.post("/get_pot", async (request, response) => {
const visitorData = request.body.visitor_data as string;
const dataSyncId = request.body.data_sync_id as string;

const proxies: string[] = Object.values(request.body.proxies);
Brainicism marked this conversation as resolved.
Show resolved Hide resolved
let visitIdentifier: string;

// prioritize data sync id for authenticated requests, if passed
Expand All @@ -51,7 +51,10 @@ httpServer.post("/get_pot", async (request, response) => {
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxies,
);
response.send({
po_token: sessionData.poToken,
visit_identifier: sessionData.visitIdentifier,
Expand Down
65 changes: 60 additions & 5 deletions server/src/session_manager.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { BG } from "bgutils-js";
import { BG, BgConfig } from "bgutils-js";
import { JSDOM } from "jsdom";
import { Innertube } from "youtubei.js";
import { HttpsProxyAgent } from "https-proxy-agent";
import axios from "axios";
import { Agent } from "https";
import { SocksProxyAgent } from "socks-proxy-agent";

interface YoutubeSessionData {
poToken: string;
Expand Down Expand Up @@ -63,6 +67,10 @@ export class SessionManager {
if (this.shouldLog) console.log(msg);
}

warn(msg: string) {
if (this.shouldLog) console.warn(msg);
grqz marked this conversation as resolved.
Show resolved Hide resolved
}

async generateVisitorData(): Promise<string | null> {
const innertube = await Innertube.create({ retrieve_player: false });
const visitorData = innertube.session.context.client.visitorData;
Expand All @@ -74,9 +82,41 @@ export class SessionManager {
return visitorData;
}

getProxyDispatcher(proxy: string): Agent | undefined {
let protocol: string;
try {
const parsedUrl = new URL(proxy);

if (!parsedUrl.protocol) {
protocol = "https";
}

protocol = parsedUrl.protocol.replace(":", ""); // remove the trailing colon
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
// assume http if no protocol was passed
protocol = "https";
}

switch (protocol) {
case "http":
case "https":
this.log(`Using HTTPS proxy: ${proxy}`);
return new HttpsProxyAgent(proxy);
case "socks":
case "socks4":
case "socks5":
this.log(`Using Socks proxy: ${proxy}`);
return new SocksProxyAgent(proxy);
default:
this.warn(`Unsupported proxy protocol: ${proxy}`);
return undefined;
}
}
// mostly copied from /~https://github.com/LuanRT/BgUtils/tree/main/examples/node
async generatePoToken(
visitIdentifier: string,
proxies: string[] = [],
): Promise<YoutubeSessionData> {
this.cleanupCaches();
const sessionData = this.youtubeSessionDataCaches[visitIdentifier];
Expand All @@ -98,17 +138,32 @@ export class SessionManager {
globalThis.window = dom.window as any;
globalThis.document = dom.window.document;

const bgConfig = {
fetch: (url: any, options: any) => fetch(url, options),
let dispatcher: Agent | undefined;
if (proxies.length) {
dispatcher = this.getProxyDispatcher(proxies[0]!);
}

const bgConfig: BgConfig = {
fetch: async (url: any, options: any): Promise<any> => {
const response = await axios.post(url, options.body, {
headers: options.headers,
httpsAgent: dispatcher,
});

return {
ok: true,
json: async () => {
return response.data;
},
};
},
globalObj: globalThis,
identity: visitIdentifier,
requestKey,
};

const challenge = await BG.Challenge.create(bgConfig);

if (!challenge) throw new Error("Could not get Botguard challenge");

if (challenge.script) {
const script = challenge.script.find((sc) => sc !== null);
if (script) new Function(script)();
Expand Down
61 changes: 60 additions & 1 deletion server/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ acorn@^8.11.0, acorn@^8.12.0, acorn@^8.4.1, acorn@^8.8.0:
resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.12.1.tgz#71616bdccbe25e27a54439e0046e89ca76df2248"
integrity sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==

agent-base@^7.0.2, agent-base@^7.1.0:
agent-base@^7.0.2, agent-base@^7.1.0, agent-base@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.1.tgz#bdbded7dfb096b751a2a087eeeb9664725b2e317"
integrity sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==
Expand Down Expand Up @@ -393,6 +393,15 @@ asynckit@^0.4.0:
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==

axios@^1.7.7:
version "1.7.7"
resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f"
integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==
dependencies:
follow-redirects "^1.15.6"
form-data "^4.0.0"
proxy-from-env "^1.1.0"

balanced-match@^1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
Expand Down Expand Up @@ -886,6 +895,11 @@ flatted@^3.2.9:
resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a"
integrity sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==

follow-redirects@^1.15.6:
version "1.15.9"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.9.tgz#a604fa10e443bf98ca94228d9eebcc2e8a2c8ee1"
integrity sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==

form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
Expand Down Expand Up @@ -1052,6 +1066,14 @@ inherits@2.0.4:
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==

ip-address@^9.0.5:
version "9.0.5"
resolved "https://registry.yarnpkg.com/ip-address/-/ip-address-9.0.5.tgz#117a960819b08780c3bd1f14ef3c1cc1d3f3ea5a"
integrity sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==
dependencies:
jsbn "1.1.0"
sprintf-js "^1.1.3"

ipaddr.js@1.9.1:
version "1.9.1"
resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3"
Expand Down Expand Up @@ -1103,6 +1125,11 @@ js-yaml@^4.1.0:
dependencies:
argparse "^2.0.1"

jsbn@1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-1.1.0.tgz#b01307cb29b618a1ed26ec79e911f803c4da0040"
integrity sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==

jsdom@^25.0.0:
version "25.0.0"
resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-25.0.0.tgz#d1612b4ddab85af56821b2f731e15faae135f4e1"
Expand Down Expand Up @@ -1368,6 +1395,11 @@ proxy-addr@~2.0.7:
forwarded "0.2.0"
ipaddr.js "1.9.1"

proxy-from-env@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==

psl@^1.1.33:
version "1.9.0"
resolved "https://registry.yarnpkg.com/psl/-/psl-1.9.0.tgz#d0df2a137f00794565fcaf3b2c00cd09f8d5a5a7"
Expand Down Expand Up @@ -1532,6 +1564,33 @@ side-channel@^1.0.4:
get-intrinsic "^1.2.4"
object-inspect "^1.13.1"

smart-buffer@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/smart-buffer/-/smart-buffer-4.2.0.tgz#6e1d71fa4f18c05f7d0ff216dd16a481d0e8d9ae"
integrity sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==

socks-proxy-agent@^8.0.4:
version "8.0.4"
resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.4.tgz#9071dca17af95f483300316f4b063578fa0db08c"
integrity sha512-GNAq/eg8Udq2x0eNiFkr9gRg5bA7PXEWagQdeRX4cPSG+X/8V38v637gim9bjFptMk1QWsCTr0ttrJEiXbNnRw==
dependencies:
agent-base "^7.1.1"
debug "^4.3.4"
socks "^2.8.3"

socks@^2.8.3:
version "2.8.3"
resolved "https://registry.yarnpkg.com/socks/-/socks-2.8.3.tgz#1ebd0f09c52ba95a09750afe3f3f9f724a800cb5"
integrity sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==
dependencies:
ip-address "^9.0.5"
smart-buffer "^4.2.0"

sprintf-js@^1.1.3:
version "1.1.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.3.tgz#4914b903a2f8b685d17fdf78a70e917e872e444a"
integrity sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==

statuses@2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/statuses/-/statuses-2.0.1.tgz#55cb000ccf1d48728bd23c685a063998cf1a1b63"
Expand Down
Loading