|
|
@@ -3,8 +3,8 @@ import lzma
|
|
|
import pathlib
|
|
|
import time
|
|
|
import typing
|
|
|
+import json as stdlib_json
|
|
|
|
|
|
-import cbor2
|
|
|
import httpx
|
|
|
import urllib.parse
|
|
|
|
|
|
@@ -29,7 +29,7 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
assert parsed.hostname is not None
|
|
|
cache_filename = urllib.parse.quote(parsed.path.removeprefix('/'), safe='')
|
|
|
if json:
|
|
|
- cache_filename += '.cbor'
|
|
|
+ cache_filename += '.json'
|
|
|
cache_filename += '.xz'
|
|
|
cache_path = CACHE_DIR / parsed.hostname / cache_filename
|
|
|
|
|
|
@@ -37,18 +37,16 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
if cache_path.stat().st_mtime > time.time() - expiry.total_seconds(): # less than 10 minutes old
|
|
|
with lzma.open(cache_path, 'rb') as f:
|
|
|
if json:
|
|
|
- return cbor2.load(f)
|
|
|
+ return stdlib_json.loads(f.read())
|
|
|
else:
|
|
|
return f.read().decode('utf-8')
|
|
|
except FileNotFoundError:
|
|
|
pass # fall through
|
|
|
except Exception as e:
|
|
|
- # EXTREME DETAIL: When a user aborts the script mid-execution (or dependencies fail), an incomplete
|
|
|
- # .xz/.cbor file can be left in the local 'cache' directory.
|
|
|
- # The cbor2 library recently updated to use a strict Rust backend. When it encounters a malformed file,
|
|
|
- # it triggers a pyo3_runtime.PanicException ("buffer size mismatch: [63, 0]") instead of a standard python exception.
|
|
|
- # We explicitly catch all generic Exceptions here. This allows the script to gracefully ignore the corrupted cache,
|
|
|
- # print a warning, and fall through to re-download the fresh data from the internet.
|
|
|
+ # EXTREME DETAIL: We catch exceptions here to gracefully recover from corrupted local files.
|
|
|
+ # Previously, this used cbor2, which threw uncatchable Rust panics when reading corrupted data.
|
|
|
+ # Now that we use the standard library json module, standard Exceptions will be caught,
|
|
|
+ # allowing the system to fall through and fetch fresh data instead of crashing.
|
|
|
print(f"Warning: Corrupted cache detected for {url} ({type(e).__name__}). Fetching fresh data...")
|
|
|
pass # fall through
|
|
|
|
|
|
@@ -57,7 +55,7 @@ def get(url: str, *, json=True, headers=None, expiry=datetime.timedelta(minutes=
|
|
|
with lzma.open(cache_path, 'wb') as f:
|
|
|
if json:
|
|
|
data = r.json()
|
|
|
- cbor2.dump(data, f)
|
|
|
+ f.write(stdlib_json.dumps(data).encode('utf-8'))
|
|
|
else:
|
|
|
data = r.text
|
|
|
f.write(data.encode('utf-8'))
|