From 187e1ffdd20f6b712a892cdcb752ac27f73710f8 Mon Sep 17 00:00:00 2001 From: Commandcracker <49335821+Commandcracker@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:13:48 +0200 Subject: [PATCH] httpx http2 and headers WIP --- pyproject.toml | 4 +- src/gucken/aniskip.py | 4 +- src/gucken/gucken.py | 2 +- src/gucken/hoster/common.py | 5 +- src/gucken/hoster/doodstream.py | 3 +- src/gucken/hoster/streamtape.py | 5 +- src/gucken/hoster/veo.py | 5 +- src/gucken/hoster/vidoza.py | 4 +- src/gucken/networking.py | 115 ++++++++++++++ src/gucken/provider/aniworld.py | 10 +- src/gucken/provider/serienstream.py | 28 ++-- src/gucken/resources/default_settings.toml | 3 + .../gucken/resources}/user_agents.json | 0 src/gucken/tracker/anilist.py | 6 +- src/gucken/tracker/myanimelist.py | 4 +- src/gucken/update.py | 4 +- test/headers.jsonc | 150 ++++++++++++++++++ test/networking.py | 32 +++- test/test.py | 27 ++++ 19 files changed, 370 insertions(+), 41 deletions(-) create mode 100644 src/gucken/networking.py rename {test => src/gucken/resources}/user_agents.json (100%) create mode 100644 test/headers.jsonc create mode 100644 test/test.py diff --git a/pyproject.toml b/pyproject.toml index 70a727f..e61784c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ maintainers = [{name="Commandcracker"}] license = {file = "LICENSE.txt"} readme = "README.md" dependencies = [ - "textual>=0.63.3", + "textual>=0.64.0", "beautifulsoup4>=4.12.3", - "httpx>=0.27.0", + "httpx[http2]>=0.27.0", "pypresence>=4.3.0", "packaging>=24.0", "platformdirs>=4.2.2", diff --git a/src/gucken/aniskip.py b/src/gucken/aniskip.py index 470dca9..1ccd286 100644 --- a/src/gucken/aniskip.py +++ b/src/gucken/aniskip.py @@ -3,8 +3,8 @@ from dataclasses import dataclass from fuzzywuzzy import process -from httpx import AsyncClient +from .networking import AsyncClient from .tracker.myanimelist import search from .rome import replace_roman_numerals @@ -20,7 +20,7 @@ class SkipTimes: async def get_timings_from_id( anime_id: int, episode_number: int ) -> Union[SkipTimes, None]: - async with (AsyncClient(verify=False) as client): + async with AsyncClient() as client: response = await client.get( f"https://api.aniskip.com/v1/skip-times/{anime_id}/{episode_number}?types=op&types=ed" ) diff --git a/src/gucken/gucken.py b/src/gucken/gucken.py index 8eb8874..181c3ff 100644 --- a/src/gucken/gucken.py +++ b/src/gucken/gucken.py @@ -509,7 +509,7 @@ async def disable_RPC(self): # TODO: https://textual.textualize.io/guide/workers/#thread-workers # TODO: Exit on error when debug = true - @work(exclusive=True, exit_on_error=False) + @work(exclusive=True) #exit_on_error=False async def lookup_anime(self, keyword: str) -> None: search_providers = [] if self.query_one("#aniworld_to", Checkbox).value: diff --git a/src/gucken/hoster/common.py b/src/gucken/hoster/common.py index fb81c18..790c6e8 100644 --- a/src/gucken/hoster/common.py +++ b/src/gucken/hoster/common.py @@ -1,7 +1,8 @@ from abc import abstractmethod from dataclasses import dataclass -from httpx import AsyncClient, HTTPError +from ..networking import AsyncClient +from httpx import HTTPError @dataclass @@ -13,7 +14,7 @@ async def check_is_working(self) -> bool: try: async with AsyncClient(verify=False) as client: response = await client.head( - self.url, follow_redirects=True, headers=self.headers + self.url, headers=self.headers ) return response.is_success except HTTPError: diff --git a/src/gucken/hoster/doodstream.py b/src/gucken/hoster/doodstream.py index 3043cfb..eae79c9 100644 --- a/src/gucken/hoster/doodstream.py +++ b/src/gucken/hoster/doodstream.py @@ -5,8 +5,7 @@ from time import time from urllib.parse import urlparse -from httpx import AsyncClient - +from ..networking import AsyncClient from .common import DirectLink, Hoster EXTRACT_DOODSTREAM_HLS_PATTERN = re_compile(r"/pass_md5/[\w-]+/[\w-]+") diff --git a/src/gucken/hoster/streamtape.py b/src/gucken/hoster/streamtape.py index 517d4e0..9f64ed8 100644 --- a/src/gucken/hoster/streamtape.py +++ b/src/gucken/hoster/streamtape.py @@ -1,7 +1,6 @@ from re import compile as re_compile -from httpx import AsyncClient - +from ..networking import AsyncClient from .common import DirectLink, Hoster STREAMTAPE_PATTERN = re_compile(r"botlink(.*?)innerHTML(.*?)\);") @@ -13,7 +12,7 @@ class StreamtapeHoster(Hoster): async def get_direct_link(self) -> DirectLink: # TODO: Error checking async with AsyncClient(verify=False) as client: - response = await client.get(self.url, follow_redirects=True) + response = await client.get(self.url) # TODO: Save html and error in order to investigate # with open("out.txt", "wb") as f: # f.write(response.text.encode('utf-8')) diff --git a/src/gucken/hoster/veo.py b/src/gucken/hoster/veo.py index d4a7dea..7968eba 100644 --- a/src/gucken/hoster/veo.py +++ b/src/gucken/hoster/veo.py @@ -1,8 +1,7 @@ from base64 import b64decode from re import compile as re_compile -from httpx import AsyncClient - +from ..networking import AsyncClient from .common import DirectLink, Hoster EXTRACT_VEO_HLS_PATTERN = re_compile(r"'hls': '(.*?)'") @@ -11,7 +10,7 @@ class VOEHoster(Hoster): async def get_direct_link(self) -> DirectLink: async with AsyncClient(verify=False) as client: - response = await client.get(self.url, follow_redirects=True) + response = await client.get(self.url) match_hls = EXTRACT_VEO_HLS_PATTERN.search(response.text) hls_link = match_hls.group(1) return DirectLink(b64decode(hls_link).decode()) diff --git a/src/gucken/hoster/vidoza.py b/src/gucken/hoster/vidoza.py index 3582995..a6c9654 100644 --- a/src/gucken/hoster/vidoza.py +++ b/src/gucken/hoster/vidoza.py @@ -1,6 +1,6 @@ from re import compile as re_compile -from httpx import AsyncClient +from ..networking import AsyncClient from .common import DirectLink, Hoster @@ -13,6 +13,6 @@ class VidozaHoster(Hoster): async def get_direct_link(self) -> DirectLink: async with AsyncClient(verify=False) as client: - response = await client.get(self.url, follow_redirects=True) + response = await client.get(self.url) match_hls = EXTRACT_VIDOZA_HLS_PATTERN.search(response.text) return DirectLink(match_hls.group(1)) diff --git a/src/gucken/networking.py b/src/gucken/networking.py new file mode 100644 index 0000000..5b37591 --- /dev/null +++ b/src/gucken/networking.py @@ -0,0 +1,115 @@ +from enum import Enum +from json import loads +from pathlib import Path +from random import choice +from urllib.parse import urlparse + +from httpx import AsyncClient as HttpxAsyncClient, Response, AsyncBaseTransport + +from rich import print +from asyncio import run + + +# https://www.useragents.me/ +# https://github.com/microlinkhq/top-user-agents/blob/master/src/index.json +# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/utils/networking.py +# TODO: generate and dict into ios android mac win etc +user_agents_path = Path(__file__).parent.joinpath("resources", "user_agents.json") +with open(user_agents_path, "r") as f: + user_agents_raw = f.read() +user_agents = loads(user_agents_raw) + + +class AsyncHTTPSRedirectTransport(AsyncBaseTransport): + async def handle_async_request(self, request) -> Response: + url = request.url.copy_with(scheme="https") + return Response(303, headers={"Location": str(url)}) + + +class AcceptLanguage(Enum): + EN = 0 + DE = 1 + + +class AsyncClient(HttpxAsyncClient): + def __init__( + self, + *args, + http2: bool = True, + follow_redirects: bool = True, + auto_referer: bool = True, + https_only: bool = True, + accept_language: AcceptLanguage = AcceptLanguage.EN, + **kwargs + ) -> None: + # verify=False + self.auto_referer = auto_referer + kwargs["http2"] = http2 + kwargs["follow_redirects"] = follow_redirects + + # aiodns / dnspython[doh] + # socksio - SOCKS proxy support. (Optional, with httpx[socks]) + + user_agent = choice(user_agents) + headers = { + # Add others + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + # "br" = "brotli" -> httpx[brotli] + # "zstd" -> httpx[zstd] wait for next release https://github.com/encode/httpx/pull/3139 + # "Accept-Encoding": "gzip, deflate, br", httpx is covering this + # "Accept-Language": "en-us,en;q=0.5", see below + # "Host": "xxx", httpx is covering this + # "Sec-Ch-Ua-Platform": "macOS", # only on mac + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", # Not on iphone + "Upgrade-Insecure-Requests": "1", # Not on iphone + "User-Agent": user_agent + # "X-Amzn-Trace-Id": "Root=1-xxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxx" + # TODO: More variation + } + + if accept_language is accept_language.EN: + headers["Accept-Language"] = "en-us,en;q=0.5" # "en-US,en;q=0.9", "en-US" + elif accept_language is accept_language.DE: + headers["Accept-Language"] = choice([ + "de-DE,de;q=0.9", + "de", # found on macos + "de-DE,de;q=0.9", # found on ios + "de-DE,de", + "de,en-US;q=0.7,en;q=0.3" + ]) + else: + raise Exception() + + if kwargs.get("headers") is not None: + headers = {**kwargs.get("headers"), **headers} + kwargs["headers"] = headers + + if https_only is True: + kwargs["mounts"] = {"http://": AsyncHTTPSRedirectTransport()} + + super().__init__(*args, **kwargs) + + async def request(self, *args, **kwargs) -> Response: + if self.auto_referer is True: + parsed_url = urlparse(args[1]) # maby use httpx.URL instead ? + base_url = f'{parsed_url.scheme}://{parsed_url.netloc}' + headers = {"Referer": base_url} + if kwargs.get("headers") is not None: + headers = {**kwargs.get("headers"), **headers} + kwargs["headers"] = headers + return await super().request(*args, **kwargs) + + +async def main(): + async with AsyncClient() as client: + response = await client.get("https://httpbin.org/headers") + print(response.json()) + async with HttpxAsyncClient() as client: + response = await client.get("https://httpbin.org/headers") + print(response.json()) + +if __name__ == "__main__": + run(main()) diff --git a/src/gucken/provider/aniworld.py b/src/gucken/provider/aniworld.py index 8221b9c..b038ed5 100644 --- a/src/gucken/provider/aniworld.py +++ b/src/gucken/provider/aniworld.py @@ -4,7 +4,7 @@ from typing import Union from bs4 import BeautifulSoup -from httpx import AsyncClient +from ..networking import AsyncClient, AcceptLanguage from ..hoster.doodstream import DoodstreamHoster from ..hoster.streamtape import StreamtapeHoster @@ -42,7 +42,7 @@ class AniWorldEpisode(Episode): url: str async def process_hoster(self) -> dict[Language, list[Hoster]]: - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get( f"{self.url}/staffel-{self.season}/episode-{self.episode_number}" ) @@ -131,7 +131,7 @@ class AniWorldProvider(Provider): async def search(keyword: str) -> Union[list[AniWorldSearchResult], None]: if keyword.strip() == "": return None - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get( f"https://{AniWorldProvider.host}/ajax/seriesSearch?keyword={keyword}" ) @@ -153,7 +153,7 @@ async def search(keyword: str) -> Union[list[AniWorldSearchResult], None]: @staticmethod async def get_series(search_result: AniWorldSearchResult) -> AniWorldSeries: - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get(search_result.url) soup = BeautifulSoup(response.text, "html.parser") @@ -235,7 +235,7 @@ async def get_series(search_result: AniWorldSearchResult) -> AniWorldSeries: async def get_episodes_from_url(staffel: int, url: str) -> list[Episode]: - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get(f"{url}/staffel-{staffel}") return await get_episodes_from_page(staffel, url, response.text) diff --git a/src/gucken/provider/serienstream.py b/src/gucken/provider/serienstream.py index d0d4f82..bcb262c 100644 --- a/src/gucken/provider/serienstream.py +++ b/src/gucken/provider/serienstream.py @@ -4,14 +4,24 @@ from typing import Union from bs4 import BeautifulSoup -from httpx import AsyncClient +from ..networking import AcceptLanguage, AsyncClient from ..hoster.doodstream import DoodstreamHoster from ..hoster.streamtape import StreamtapeHoster from ..hoster.veo import VOEHoster from ..hoster.vidoza import VidozaHoster from .common import Episode, Hoster, Language, Provider, SearchResult, Series +# TODO: Timeouts +# TODO: use base_url +# TODO: faster json +# TODO: reuse same client +# TODO: do serienstream resolve using mounts (remove veryfy fale from hosts) + + +headers = {"Host": "serienstream.to"} +extensions = {"sni_hostname": "serienstream.to"} + def provider_to_hoster(provider: str, url: str) -> Hoster: if provider == "VOE": @@ -42,9 +52,9 @@ class SerienStreamEpisode(Episode): url: str async def process_hoster(self) -> dict[Language, list[Hoster]]: - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get( - f"{self.url}/staffel-{self.season}/episode-{self.episode_number}" + f"{self.url}/staffel-{self.season}/episode-{self.episode_number}", headers=headers, extensions=extensions ) soup = BeautifulSoup(response.text, "html.parser") watch_episode = soup.find_all( @@ -132,9 +142,9 @@ class SerienStreamProvider(Provider): async def search(keyword: str) -> Union[list[SerienStreamSearchResult], None]: if keyword.strip() == "": return None - async with AsyncClient(verify=False) as client: + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: response = await client.get( - f"https://{SerienStreamProvider.host}/ajax/seriesSearch?keyword={keyword}" + f"https://{SerienStreamProvider.host}/ajax/seriesSearch?keyword={keyword}", headers=headers, extensions=extensions ) results = response.json() search_results = [] @@ -154,8 +164,8 @@ async def search(keyword: str) -> Union[list[SerienStreamSearchResult], None]: @staticmethod async def get_series(search_result: SerienStreamSearchResult) -> SerienStreamSeries: - async with AsyncClient(verify=False) as client: - response = await client.get(search_result.url) + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: + response = await client.get(search_result.url, headers=headers, extensions=extensions) soup = BeautifulSoup(response.text, "html.parser") tags = [] @@ -236,8 +246,8 @@ async def get_series(search_result: SerienStreamSearchResult) -> SerienStreamSer async def get_episodes_from_url(staffel: int, url: str) -> list[Episode]: - async with AsyncClient(verify=False) as client: - response = await client.get(f"{url}/staffel-{staffel}") + async with AsyncClient(accept_language=AcceptLanguage.DE) as client: + response = await client.get(f"{url}/staffel-{staffel}", headers=headers, extensions=extensions) return await get_episodes_from_page(staffel, url, response.text) diff --git a/src/gucken/resources/default_settings.toml b/src/gucken/resources/default_settings.toml index f000590..b0296e3 100644 --- a/src/gucken/resources/default_settings.toml +++ b/src/gucken/resources/default_settings.toml @@ -39,5 +39,8 @@ serienstream_to = false [settings.ui] dark = true +#[settings.networking] +#doh = true + [meta] version = "0.0.0" diff --git a/test/user_agents.json b/src/gucken/resources/user_agents.json similarity index 100% rename from test/user_agents.json rename to src/gucken/resources/user_agents.json diff --git a/src/gucken/tracker/anilist.py b/src/gucken/tracker/anilist.py index 2bcc625..3432c5c 100644 --- a/src/gucken/tracker/anilist.py +++ b/src/gucken/tracker/anilist.py @@ -1,4 +1,4 @@ -from httpx import AsyncClient +from ..networking import AsyncClient SEARCH_QUERY = """ query ($id: Int, $page: Int, $perPage: Int, $search: String) { @@ -24,9 +24,9 @@ async def search(keyword: str) -> dict: - async with AsyncClient(verify=False) as client: + async with AsyncClient() as client: response = await client.post( - f"https://graphql.anilist.co", + "https://graphql.anilist.co", headers={"Content-Type": "application/json"}, json={"query": SEARCH_QUERY, "variables": {"search": keyword}}, ) diff --git a/src/gucken/tracker/myanimelist.py b/src/gucken/tracker/myanimelist.py index 2e4ae29..02039c8 100644 --- a/src/gucken/tracker/myanimelist.py +++ b/src/gucken/tracker/myanimelist.py @@ -1,8 +1,8 @@ -from httpx import AsyncClient +from ..networking import AsyncClient async def search(keyword: str) -> dict: - async with AsyncClient(verify=False) as client: + async with AsyncClient() as client: response = await client.get( f"https://myanimelist.net/search/prefix.json?type=anime&keyword={keyword}" ) diff --git a/src/gucken/update.py b/src/gucken/update.py index b521564..754f9a3 100644 --- a/src/gucken/update.py +++ b/src/gucken/update.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Union -from httpx import AsyncClient +from .networking import AsyncClient from packaging.version import Version from . import __version__ as current_version @@ -16,7 +16,7 @@ class UpdateResult: async def get_latest_version(): - async with AsyncClient(verify=False) as client: + async with AsyncClient() as client: response = await client.get(f"https://pypi.org/pypi/{PACKAGE_NAME}/json") return response.json().get("info").get("version") diff --git a/test/headers.jsonc b/test/headers.jsonc new file mode 100644 index 0000000..681ad09 --- /dev/null +++ b/test/headers.jsonc @@ -0,0 +1,150 @@ +[ + //macOS + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "de", + "Dnt": "1", + "Host": "httpbin.org", + "Priority": "u=0, i", + "Sec-Ch-Ua": "Chromium\";v=\"125\", \"Not.A/Brand\";v=\"24\"", + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "macOS", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", + "X-Amzn-Trace-Id": "Root=1-665df75c-2a24536a0b636caa36cb3a57" + } + }, + + //Tor + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.5", + "Host": "httpbin.org", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0", + "X-Amzn-Trace-Id": "Root=1-665e1681-5e4a006c7334a06c760a2380" + } + }, + + //Mullvad + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.5", + "Host": "httpbin.org", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0", + "X-Amzn-Trace-Id": "Root=1-665e16ba-75d40e1c0e03729a28850f31" + } + }, + + //Brave + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "de-DE,de", + "Host": "httpbin.org", + "Priority": "u=0, i", + "Sec-Ch-Ua": "\"Brave\";v=\"125\", \"Chromium\";v=\"125\", \"Not.A/Brand\";v=\"24\"", + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "\"Windows\"", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Sec-Gpc": "1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", + "X-Amzn-Trace-Id": "Root=1-665e16d5-06d8ed0e43e359bf4bd2da73" + } + }, + + //Firefox + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "en-US,en;q=0.5", + "Host": "httpbin.org", + "Priority": "u=1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0", + "X-Amzn-Trace-Id": "Root=1-665e16e7-5b28d0dc1bde17235fe17e2e" + } + }, + + //IOS + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "de-DE,de;q=0.9", + "Host": "httpbin.org", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17 5 1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1", + "X-Amzn-Trace-Id": "Root=1-665df74e-5809d2b45a2982bf332b5fe7" + } + }, + + //Android + //Mull + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,/;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US", + "Dnt": "1", + "Host": "httpbin.org", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "cross-site", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Android 10; Mobile; rv:125.0) Gecko/125.0 Firefox/125.0", + "X-Amzn-Trace-Id": "Root=1-665e1eee-06ebbe017945d83e0c8b77b0" + } + }, + //Chrome + { + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "en-US,en;q=0.9", + "Host": "httpbin.org", + "Priority": "u=0, i", + "Sec-Ch-Ua": "Google Chrome\";v=\"125\", \"Chromium\";v=\"125\", \"Not.A/Brand\";v=\"24\"", + "Sec-Ch-Ua-Mobile": "?1", + "Sec-Ch-Ua-Platform": "Android", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Mobile Safari/537.36", + "X-Amzn-Trace-Id": "Root=1-665e1f3e-30270d5344f061280d5f95ab" + } + } +] \ No newline at end of file diff --git a/test/networking.py b/test/networking.py index a88443a..a008610 100644 --- a/test/networking.py +++ b/test/networking.py @@ -38,6 +38,31 @@ def random_user_agent(): # TODO: More variation } +import requests +import base64 +import struct + + +def create_dns_query(domain): + # Create a DNS query for the given domain. + # This is a basic example and creates a query for type A records. + transaction_id = 0x1234 + flags = 0x0100 # standard query + questions = 1 + answer_rrs = 0 + authority_rrs = 0 + additional_rrs = 0 + + query = struct.pack(">HHHHHH", transaction_id, flags, questions, answer_rrs, authority_rrs, additional_rrs) + + for part in domain.split('.'): + query += struct.pack("B", len(part)) + part.encode() + + query += struct.pack("B", 0) # end of domain name + query += struct.pack(">HH", 1, 1) # QTYPE=A, QCLASS=IN + + return query + # https://github.com/aio-libs/aiohttp/issues/8431 class RecordType(enum.Enum): @@ -72,9 +97,10 @@ async def _resolve(self, endpoint: str, host, port, family): headers = { "Accept": "application/dns-json" } + query = create_dns_query(host) + encoded_query = base64.urlsafe_b64encode(query).rstrip(b'=') params = { - 'name': host, - 'type': record_type.name, + 'dns': encoded_query.decode(), } resolver = self.resolveer_class() @@ -84,7 +110,7 @@ async def _resolve(self, endpoint: str, host, port, family): print("E:", endpoint) async with session.get(endpoint, params=params, headers=headers) as resp: print("T:", resp.content) - data = self.json_loads(await resp.text()) + data = self.json_loads(resp.content) await connector.close() diff --git a/test/test.py b/test/test.py new file mode 100644 index 0000000..002c6c3 --- /dev/null +++ b/test/test.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# +# This is an example of sending DNS queries over HTTPS (DoH) with dnspython. +import httpx + +import dns.message +import dns.query +import dns.rdatatype + +# pip install httpx[http2] +# pip install dnspython[doh] + + +def main(): + where = "https://dns.mullvad.net/dns-query" + qname = "youtube.com" + with httpx.Client(http2=True) as client: + q = dns.message.make_query(qname, dns.rdatatype.A) + r = dns.query.https(q, where, session=client) + for answer in r.answer: + print(answer) + + # ... do more lookups + + +if __name__ == "__main__": + main()