b5bf689e72
- Add yfinance.org and defeatbeta-api.org reference docs - Fix defeatbeta_mapping.org: deprecated yfinance property names (quarterly_financials→quarterly_income_stmt, financials→income_stmt), longName vs longBusinessSummary conceptual mismatch, cashflow note typo - Add Mapping Limitations section with live verification results (AAPL): DuckDB 1.4.3 incompatibility, format differences, coverage gaps - Add docs/test_mapping.py as runnable mapping verification script - Add offline.py, persistent_cache.py, download_data.py, warmup_cache.py for offline/cached defeatbeta usage - Add aapl_yfinance.py exploration script and quant.py scaffold - Add .envrc (uv layout) and update pyproject.toml + uv.lock Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
68 lines
2.7 KiB
Python
68 lines
2.7 KiB
Python
"""
|
|
Patch defeatbeta_api to read from local parquet files with zero network.
|
|
|
|
Usage:
|
|
from offline import enable_offline
|
|
enable_offline("data/parquet") # call once before first Ticker()
|
|
|
|
from defeatbeta_api.data.ticker import Ticker
|
|
t = Ticker("AAPL")
|
|
t.price() # reads local file, no HTTP
|
|
|
|
Note: the one-time welcome banner on first import calls get_data_update_time()
|
|
once. After that, nothing touches the network.
|
|
"""
|
|
from pathlib import Path
|
|
|
|
|
|
def enable_offline(parquet_dir: str = "data/parquet") -> None:
|
|
local_dir = Path(parquet_dir).resolve()
|
|
|
|
# Importing these submodules triggers defeatbeta_api/__init__.py on first
|
|
# run (prints the welcome banner — one network call). After that it's a
|
|
# no-op because _welcome_printed is True.
|
|
from defeatbeta_api.client.hugging_face_client import HuggingFaceClient
|
|
import defeatbeta_api.client.duckdb_client as _duckdb_mod
|
|
from defeatbeta_api.client.duckdb_client import DuckDBClient
|
|
from defeatbeta_api.client.duckdb_conf import Configuration
|
|
from defeatbeta_api.data.company_meta import CompanyMeta
|
|
from defeatbeta_api.utils.util import validate_memory_limit
|
|
|
|
# 1. Redirect every table URL to a local parquet file
|
|
def _local_url(self, table: str) -> str:
|
|
path = local_dir / f"{table}.parquet"
|
|
if not path.exists():
|
|
raise FileNotFoundError(
|
|
f"Local parquet not found: {path}\n"
|
|
f"Run download_data.py first."
|
|
)
|
|
return str(path)
|
|
|
|
HuggingFaceClient.get_url_path = _local_url
|
|
|
|
# 2. Return a fixed update time (used by beta() and the welcome banner)
|
|
HuggingFaceClient.get_data_update_time = lambda self: "offline"
|
|
|
|
# 3. Skip the startup cache-validation (hits HuggingFace spec.json)
|
|
DuckDBClient._validate_httpfs_cache = lambda self: None
|
|
|
|
# 4. Skip "INSTALL cache_httpfs FROM community" (hits DuckDB extension registry)
|
|
# and all the cache_httpfs SET GLOBAL lines that follow — not needed for
|
|
# local files. Keep only memory and thread settings.
|
|
def _minimal_settings(self):
|
|
return [
|
|
f"SET GLOBAL memory_limit = '{validate_memory_limit(self.memory_limit)}'",
|
|
f"SET GLOBAL threads = {self.threads}",
|
|
]
|
|
|
|
Configuration.get_duckdb_settings = _minimal_settings
|
|
|
|
# 5. Redirect company_tickers.json to local file
|
|
CompanyMeta.COMPANY_TICKERS_URL = str(local_dir / "company_tickers.json")
|
|
|
|
# 6. Reset the DuckDB singleton so the next Ticker() call reinitialises
|
|
# using the patched Configuration (no cache_httpfs install/load)
|
|
_duckdb_mod._instance = None
|
|
|
|
print(f"[offline] defeatbeta_api patched → reading from {local_dir}")
|