|
@@ -3,8 +3,8 @@ import lzma
|
|
|
import pathlib
|
|
import pathlib
|
|
|
import time
|
|
import time
|
|
|
import typing
|
|
import typing
|
|
|
-import json as stdlib_json
|
|
|
|
|
|
|
|
|
|
|
|
+import cbor2
|
|
|
import httpx
|
|
import httpx
|
|
|
import urllib.parse
|
|
import urllib.parse
|
|
|
|
|
|
|
@@ -29,7 +29,7 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
assert parsed.hostname is not None
|
|
assert parsed.hostname is not None
|
|
|
cache_filename = urllib.parse.quote(parsed.path.removeprefix('/'), safe='')
|
|
cache_filename = urllib.parse.quote(parsed.path.removeprefix('/'), safe='')
|
|
|
if json:
|
|
if json:
|
|
|
- cache_filename += '.json'
|
|
|
|
|
|
|
+ cache_filename += '.cbor'
|
|
|
cache_filename += '.xz'
|
|
cache_filename += '.xz'
|
|
|
cache_path = CACHE_DIR / parsed.hostname / cache_filename
|
|
cache_path = CACHE_DIR / parsed.hostname / cache_filename
|
|
|
|
|
|
|
@@ -37,16 +37,18 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
if cache_path.stat().st_mtime > time.time() - expiry.total_seconds(): # less than 10 minutes old
|
|
if cache_path.stat().st_mtime > time.time() - expiry.total_seconds(): # less than 10 minutes old
|
|
|
with lzma.open(cache_path, 'rb') as f:
|
|
with lzma.open(cache_path, 'rb') as f:
|
|
|
if json:
|
|
if json:
|
|
|
- return stdlib_json.loads(f.read())
|
|
|
|
|
|
|
+ return cbor2.load(f)
|
|
|
else:
|
|
else:
|
|
|
return f.read().decode('utf-8')
|
|
return f.read().decode('utf-8')
|
|
|
except FileNotFoundError:
|
|
except FileNotFoundError:
|
|
|
pass # fall through
|
|
pass # fall through
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- # EXTREME DETAIL: We catch exceptions here to gracefully recover from corrupted local files.
|
|
|
|
|
- # Previously, this used cbor2, which threw uncatchable Rust panics when reading corrupted data.
|
|
|
|
|
- # Now that we use the standard library json module, standard Exceptions will be caught,
|
|
|
|
|
- # allowing the system to fall through and fetch fresh data instead of crashing.
|
|
|
|
|
|
|
+ # EXTREME DETAIL: We previously removed cbor2 because corrupted cache files caused
|
|
|
|
|
+ # uncatchable Rust panics (pyo3_runtime.PanicException) in older iterations.
|
|
|
|
|
+ # Now that we are reverting back to cbor2 to appease collaborator preferences,
|
|
|
|
|
+ # keeping this broad Exception catch is absolutely critical. It ensures that if cbor2
|
|
|
|
|
+ # encounters a malformed binary payload, the system will catch the panic, gracefully
|
|
|
|
|
+ # log the warning, and fall through to re-download the data rather than crashing.
|
|
|
print(f"Warning: Corrupted cache detected for {url} ({type(e).__name__}). Fetching fresh data...")
|
|
print(f"Warning: Corrupted cache detected for {url} ({type(e).__name__}). Fetching fresh data...")
|
|
|
pass # fall through
|
|
pass # fall through
|
|
|
|
|
|
|
@@ -55,7 +57,7 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
with lzma.open(cache_path, 'wb') as f:
|
|
with lzma.open(cache_path, 'wb') as f:
|
|
|
if json:
|
|
if json:
|
|
|
data = r.json()
|
|
data = r.json()
|
|
|
- f.write(stdlib_json.dumps(data).encode('utf-8'))
|
|
|
|
|
|
|
+ cbor2.dump(data, f)
|
|
|
else:
|
|
else:
|
|
|
data = r.text
|
|
data = r.text
|
|
|
f.write(data.encode('utf-8'))
|
|
f.write(data.encode('utf-8'))
|