import datetime import lzma import pathlib import time import typing import json as stdlib_json import httpx import urllib.parse if typing.TYPE_CHECKING: import httpx._types ONE_DAY = datetime.timedelta(days=1) CACHE_DIR = pathlib.Path(__file__).parent / 'cache' client = httpx.Client(transport=httpx.HTTPTransport(http2=True, retries=3), timeout=5) # retry on ConnectError and ConnectTimeout @typing.overload def get(url: str, *, json: typing.Literal[True]=True, headers: httpx._types.HeaderTypes|None=None, expiry: datetime.timedelta=datetime.timedelta(minutes=10)) -> typing.Any: ... @typing.overload def get(url: str, *, json: typing.Literal[False], headers: httpx._types.HeaderTypes|None=None, expiry: datetime.timedelta=datetime.timedelta(minutes=10)) -> str: ... def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=10)) -> typing.Any: parsed = urllib.parse.urlparse(url) assert parsed.hostname is not None cache_filename = urllib.parse.quote(parsed.path.removeprefix('/'), safe='') if json: cache_filename += '.json' cache_filename += '.xz' cache_path = CACHE_DIR / parsed.hostname / cache_filename try: if cache_path.stat().st_mtime > time.time() - expiry.total_seconds(): # less than 10 minutes old with lzma.open(cache_path, 'rb') as f: if json: return stdlib_json.loads(f.read()) else: return f.read().decode('utf-8') except FileNotFoundError: pass # fall through except Exception as e: # EXTREME DETAIL: We catch exceptions here to gracefully recover from corrupted local files. # Previously, this used cbor2, which threw uncatchable Rust panics when reading corrupted data. # Now that we use the standard library json module, standard Exceptions will be caught, # allowing the system to fall through and fetch fresh data instead of crashing. print(f"Warning: Corrupted cache detected for {url} ({type(e).__name__}). Fetching fresh data...") pass # fall through r = get_with_retries(url, headers) cache_path.parent.mkdir(parents=True, exist_ok=True) with lzma.open(cache_path, 'wb') as f: if json: data = r.json() f.write(stdlib_json.dumps(data).encode('utf-8')) else: data = r.text f.write(data.encode('utf-8')) return data def get_with_retries(url: str, headers: httpx._types.HeaderTypes|None=None) -> httpx.Response: for attempt in range(5): try: return client.get(url, headers=headers).raise_for_status() except httpx.ReadTimeout: if attempt == 4: raise else: print(url, 'attempt', attempt+1, 'timed out; retrying...') raise AssertionError