docs: add API references, mapping corrections, and verification script

- Add yfinance.org and defeatbeta-api.org reference docs
- Fix defeatbeta_mapping.org: deprecated yfinance property names
  (quarterly_financials→quarterly_income_stmt, financials→income_stmt),
  longName vs longBusinessSummary conceptual mismatch, cashflow note typo
- Add Mapping Limitations section with live verification results (AAPL):
  DuckDB 1.4.3 incompatibility, format differences, coverage gaps
- Add docs/test_mapping.py as runnable mapping verification script
- Add offline.py, persistent_cache.py, download_data.py, warmup_cache.py
  for offline/cached defeatbeta usage
- Add aapl_yfinance.py exploration script and quant.py scaffold
- Add .envrc (uv layout) and update pyproject.toml + uv.lock

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 15:33:21 +08:00
parent b71a8e77b0
commit b5bf689e72
16 changed files with 3650 additions and 141 deletions
+1
View File
@@ -0,0 +1 @@
layout uv
+119
View File
@@ -0,0 +1,119 @@
# AAPL - All yfinance Data
```python
import yfinance as yf
import marimo as mo
ticker = yf.Ticker("AAPL")
```
## Price History
```python
history = ticker.history(period="1y", interval="1d")
mo.ui.table(history.tail(30))
```
## Dividends
```python
mo.ui.table(ticker.dividends.tail(20))
```
## Splits
```python
mo.ui.table(ticker.splits)
```
## Income Statement
```python
mo.ui.table(ticker.get_income_stmt())
```
## Balance Sheet
```python
mo.ui.table(ticker.get_balance_sheet())
```
## Cash Flow
```python
mo.ui.table(ticker.get_cashflow())
```
## Analyst Recommendations
```python
mo.ui.table(ticker.recommendations.tail(20))
```
## Analyst Target Prices
```python
mo.ui.table([ticker.target_prices])
```
## Earnings Dates
```python
mo.ui.table(ticker.earnings_dates)
```
## Options Expirations
```python
expirations = ticker.get_options()
f"Available: {expirations}"
```
```python
if expirations:
chain = ticker.option_chain(expirations[0])
mo.ui.table(chain.calls)
mo.ui.table(chain.puts)
```
## Major Holders
```python
mo.ui.table(ticker.major_holders)
```
## Institutional Holders
```python
mo.ui.table(ticker.institutional_holders)
```
## Sustainability / ESG
```python
mo.ui.table([ticker.sustainability])
```
## Calendar / Events
```python
mo.ui.table(ticker.calendar)
```
## Company Info Summary
```python
info = ticker.info
{
"name": info.get("longName"),
"sector": info.get("sector"),
"industry": info.get("industry"),
"marketCap": info.get("marketCap"),
"peRatio": info.get("trailingPE"),
"fwdDividend": info.get("dividendYield"),
"52wkHigh": info.get("fiftyTwoWeekHigh"),
"52wkLow": info.get("fiftyTwoWeekLow"),
"avgTarget": info.get("targetMeanPrice"),
"recommendation": info.get("recommendationKey"),
}
```
+279
View File
@@ -0,0 +1,279 @@
import marimo
__generated_with = "0.23.2"
app = marimo.App(width="full")
@app.cell
def _():
import yfinance as yf
import marimo as mo
ticker = yf.Ticker("AAPL")
return mo, ticker
@app.cell(hide_code=True)
def _(mo):
mo.md("""
# AAPL — All Data via yfinance
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Price History
""")
return
@app.cell
def _(ticker):
history = ticker.history(period="max", interval="1d")
history
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Info
""")
return
@app.cell
def _(mo, ticker):
info = ticker.info
mo.ui.table([info])
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Dividends
""")
return
@app.cell
def _(ticker):
dividends = ticker.dividends
dividends
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Splits
""")
return
@app.cell
def _(ticker):
splits = ticker.splits
splits
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Income Statement
""")
return
@app.cell
def _(mo, ticker):
inc = ticker.get_income_stmt()
mo.ui.table(inc)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Balance Sheet
""")
return
@app.cell
def _(mo, ticker):
bal = ticker.get_balance_sheet()
mo.ui.table(bal)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Cash Flow
""")
return
@app.cell
def _(mo, ticker):
cf = ticker.get_cashflow()
mo.ui.table(cf)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Recommendations
""")
return
@app.cell
def _(mo, ticker):
recs = ticker.recommendations
mo.ui.table(recs)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Analyst Target Prices
""")
return
@app.cell
def _(mo, ticker):
target = ticker.target_prices
mo.ui.table([target] if isinstance(target, dict) else target)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Earnings Dates
""")
return
@app.cell
def _(mo, ticker):
earn_dates = ticker.earnings_dates
mo.ui.table(earn_dates)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Options
""")
return
@app.cell
def _(mo, ticker):
expirations = ticker.options
mo.md(f"Available expirations: {expirations}")
return (expirations,)
@app.cell
def _(expirations, mo, ticker):
sections = []
for exp in expirations:
chain = ticker.option_chain(exp)
sections += [
mo.md(f"### {exp} — Calls"),
mo.ui.table(chain.calls),
mo.md(f"### {exp} — Puts"),
mo.ui.table(chain.puts),
]
mo.vstack(sections) if sections else None
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Major Holders
""")
return
@app.cell
def _(mo, ticker):
major = ticker.major_holders
mo.ui.table(major)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Institutional Holders
""")
return
@app.cell
def _(mo, ticker):
inst = ticker.institutional_holders
mo.ui.table(inst)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Mutual Fund Holders
""")
return
@app.cell
def _(mo, ticker):
mf = ticker.mutualfund_holders
mo.ui.table(mf)
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Sustainability / ESG
""")
return
@app.cell
def _(mo, ticker):
sus = ticker.sustainability
mo.ui.table([sus] if sus is not None else [{"data": "No ESG data available"}])
return
@app.cell(hide_code=True)
def _(mo):
mo.md("""
## Calendar / Events
""")
return
@app.cell
def _(mo, ticker):
cal = ticker.calendar
mo.ui.table([cal] if isinstance(cal, dict) else cal)
return
if __name__ == "__main__":
app.run()
+1147 -9
View File
File diff suppressed because one or more lines are too long
+203
View File
@@ -0,0 +1,203 @@
#+TITLE: defeatbeta-api Reference
#+AUTHOR: Wong Ding Feng
#+DATE: 2026-04-25
* How Data Retrieval Works
** NOT a full download
Uses *DuckDB + ~cache_httpfs~ extension* querying *remote Parquet files on HuggingFace*
(~defeatbeta/yahoo-finance-data~). Every query runs SQL directly against remote files:
#+begin_src sql
SELECT * FROM 'https://huggingface.co/.../stock_prices.parquet' WHERE symbol = 'AAPL'
#+end_src
Parquet's columnar format + DuckDB *predicate pushdown* = only the row-groups matching
your ticker are fetched over HTTP range requests. Not the full 3-4 GB file.
** On-disk cache
- Default 1 GB cache at ~~/.defeatbeta/cache/~
- Stores fetched blocks so repeated queries are fast
- On startup: checks ~spec.json~ on HuggingFace, clears stale cache if dataset was updated
* Getting All Available Tickers
#+begin_src python
from defeatbeta_api.data.company_meta import CompanyMeta
meta = CompanyMeta()
all_tickers = meta.get_all_tickers() # List[str]
all_companies = meta.get_all_companies_info() # List[dict]: symbol, name, cik, currency
#+end_src
Reads ~company_tickers.json~ from HuggingFace — a small JSON, not the big Parquet files.
* Single Ticker API — ~Ticker("AAPL")~
#+begin_src python
from defeatbeta_api.data.ticker import Ticker
t = Ticker("AAPL")
#+end_src
** Company Info
| Method | Returns | What it gives |
|----------------------------+-------------+---------------------------------------------------------|
| ~info()~ | DataFrame | Profile: name, sector, industry, description, headcount |
| ~officers()~ | DataFrame | Executive officers |
| ~sec_filing()~ | DataFrame | SEC filings list |
| ~news()~ | ~News~ object | Latest news articles |
| ~earning_call_transcripts()~ | ~Transcripts~ | Earnings call transcripts |
| ~calendar()~ | DataFrame | Upcoming earnings dates |
** Prices & Basic Finance
| Method | Returns | What it gives |
|------------------------------------+-----------+------------------------------|
| ~price()~ | DataFrame | Historical OHLCV prices |
| ~splits()~ | DataFrame | Stock split events |
| ~dividends()~ | DataFrame | Dividend payment history |
| ~shares()~ | DataFrame | Shares outstanding over time |
| ~beta(period="5y", benchmark="SPY")~ | DataFrame | Calculated beta vs benchmark |
| ~currency(symbol)~ | DataFrame | Exchange rate history |
| ~ttm_eps()~ | DataFrame | Trailing 12-month EPS |
** Financial Statements
| Method | Returns | What it gives |
|------------------------------+-----------+-------------------------|
| ~quarterly_income_statement()~ | ~Statement~ | Quarterly P&L |
| ~annual_income_statement()~ | ~Statement~ | Annual P&L |
| ~quarterly_balance_sheet()~ | ~Statement~ | Quarterly balance sheet |
| ~annual_balance_sheet()~ | ~Statement~ | Annual balance sheet |
| ~quarterly_cash_flow()~ | ~Statement~ | Quarterly cash flow |
| ~annual_cash_flow()~ | ~Statement~ | Annual cash flow |
** TTM Aggregates
| Method | Returns | What it gives |
|----------------------------------------+-----------+----------------------------------|
| ~ttm_revenue()~ | DataFrame | Trailing 12-month revenue |
| ~ttm_fcf()~ | DataFrame | Trailing 12-month free cash flow |
| ~ttm_ebitda()~ | DataFrame | Trailing 12-month EBITDA |
| ~ttm_net_income_common_stockholders()~ | DataFrame | Trailing 12-month net income |
| ~ttm_pe()~ | DataFrame | Trailing P/E (price / ttm_eps) |
** Revenue Breakdown
| Method | Returns | What it gives |
|------------------------+-----------+-----------------------------|
| ~revenue_by_segment()~ | DataFrame | Revenue by business segment |
| ~revenue_by_geography()~ | DataFrame | Revenue by region |
| ~revenue_by_product()~ | DataFrame | Revenue by product line |
** Valuation Multiples
| Method | Returns | What it gives |
|-------------------------+-----------+------------------------------|
| ~market_capitalization()~ | DataFrame | Historical market cap |
| ~ps_ratio()~ | DataFrame | Price/Sales ratio |
| ~pb_ratio()~ | DataFrame | Price/Book ratio |
| ~peg_ratio()~ | DataFrame | PEG ratio |
| ~enterprise_value()~ | DataFrame | Enterprise value |
| ~enterprise_to_revenue()~ | DataFrame | EV/Revenue |
| ~enterprise_to_ebitda()~ | DataFrame | EV/EBITDA |
| ~debt_to_equity()~ | DataFrame | D/E ratio |
| ~net_debt_ttm()~ | DataFrame | Net debt (TTM) |
| ~wacc()~ | DataFrame | Weighted avg cost of capital |
** Profitability Returns
| Method | Returns | What it gives |
|---------------------+-----------+------------------------------------|
| ~roe()~ | DataFrame | Return on equity |
| ~roa()~ | DataFrame | Return on assets |
| ~roic()~ | DataFrame | Return on invested capital |
| ~roce()~ | DataFrame | Return on capital employed |
| ~equity_multiplier()~ | DataFrame | Financial leverage (assets/equity) |
| ~asset_turnover()~ | DataFrame | Revenue/assets efficiency |
** Margins
| Method | Returns | What it gives |
|------------------------------+-----------+--------------------|
| ~quarterly_gross_margin()~ | DataFrame | Gross margin % |
| ~annual_gross_margin()~ | DataFrame | Gross margin % |
| ~quarterly_operating_margin()~ | DataFrame | Operating margin % |
| ~annual_operating_margin()~ | DataFrame | Operating margin % |
| ~quarterly_net_margin()~ | DataFrame | Net margin % |
| ~annual_net_margin()~ | DataFrame | Net margin % |
| ~quarterly_ebitda_margin()~ | DataFrame | EBITDA margin % |
| ~annual_ebitda_margin()~ | DataFrame | EBITDA margin % |
| ~quarterly_fcf_margin()~ | DataFrame | FCF margin % |
| ~annual_fcf_margin()~ | DataFrame | FCF margin % |
** YoY Growth
| Method | Returns | What it gives |
|-----------------------------------------+-----------+---------------------|
| ~quarterly_revenue_yoy_growth()~ | DataFrame | Revenue growth % |
| ~annual_revenue_yoy_growth()~ | DataFrame | Revenue growth % |
| ~quarterly_operating_income_yoy_growth()~ | DataFrame | Op. income growth % |
| ~annual_operating_income_yoy_growth()~ | DataFrame | Op. income growth % |
| ~quarterly_ebitda_yoy_growth()~ | DataFrame | EBITDA growth % |
| ~annual_ebitda_yoy_growth()~ | DataFrame | EBITDA growth % |
| ~quarterly_net_income_yoy_growth()~ | DataFrame | Net income growth % |
| ~annual_net_income_yoy_growth()~ | DataFrame | Net income growth % |
| ~quarterly_fcf_yoy_growth()~ | DataFrame | FCF growth % |
| ~annual_fcf_yoy_growth()~ | DataFrame | FCF growth % |
| ~quarterly_eps_yoy_growth()~ | DataFrame | EPS growth % |
| ~quarterly_ttm_eps_yoy_growth()~ | DataFrame | TTM EPS growth % |
** Industry Comparisons
Uses the ticker's own industry to benchmark against peers.
| Method | Returns | What it gives |
|------------------------------------+-----------+--------------------------|
| ~industry_ttm_pe()~ | DataFrame | Avg P/E across industry |
| ~industry_ps_ratio()~ | DataFrame | Industry P/S |
| ~industry_pb_ratio()~ | DataFrame | Industry P/B |
| ~industry_roe()~ | DataFrame | Industry ROE |
| ~industry_roa()~ | DataFrame | Industry ROA |
| ~industry_roic()~ | DataFrame | Industry ROIC |
| ~industry_equity_multiplier()~ | DataFrame | Industry leverage |
| ~industry_asset_turnover()~ | DataFrame | Industry efficiency |
| ~industry_quarterly_gross_margin()~ | DataFrame | Industry gross margin % |
| ~industry_quarterly_ebitda_margin()~ | DataFrame | Industry EBITDA margin % |
| ~industry_quarterly_net_margin()~ | DataFrame | Industry net margin % |
** DCF / Advanced
| Method | Returns | What it gives |
|-----------------------------+---------+----------------------------------------|
| ~dcf_data()~ | dict | All raw inputs for a DCF model |
| ~dcf()~ | dict | Full DCF valuation + exports ~.xlsx~ |
| ~download_data_performance()~ | str | Timing summary of data fetch durations |
* Multi-Ticker API — ~Tickers(["AAPL", "NVDA"])~
#+begin_src python
from defeatbeta_api.data.tickers import Tickers
t = Tickers(["AAPL", "NVDA"])
t = Tickers(["AAPL", "NVDA"], max_workers=2) # limit parallelism
#+end_src
Wraps all ~Ticker~ methods, running them in *parallel threads*.
- Methods returning simple data → *combined DataFrame* (all tickers in one table)
- Methods returning complex objects (statements, news, transcripts) → ~{symbol: result}~ dict
Same method names as ~Ticker~, plus industry comparison methods operate per unique
industry represented across the list.
#+begin_src python
t.info() # → DataFrame (combined)
t.price() # → DataFrame (combined)
t.annual_income_statement() # → {'AAPL': Statement(...), 'NVDA': Statement(...)}
t.news() # → {'AAPL': News(...), 'NVDA': News(...)}
t.earning_call_transcripts() # → {'AAPL': Transcripts(...), 'NVDA': Transcripts(...)}
t.industry_roe() # → DataFrame (one row per unique industry)
#+end_src
+185 -127
View File
@@ -7,20 +7,20 @@
# Overview
| Category | Yahoo Finance | DefeatBeta-API | Notes |
|----------|--------------|----------------|-------|
| **Data Source** | Yahoo Finance API | HuggingFace + DuckDB | No rate limits |
| **Query Engine** | Direct API | DuckDB OLAP | Sub-second queries |
| **Update Frequency** | Real-time (15min delayed) | Daily batch | DefeatBeta is historical only |
| **Historical Depth** | Full history | Full history | Comparable coverage |
| **Special Features** | Limited | Earnings transcripts, DCF, AI | DefeatBeta has unique capabilities |
| Category | Yahoo Finance | DefeatBeta-API | Notes |
|--------------------+---------------------------+-------------------------------+------------------------------------|
| **Data Source** | Yahoo Finance API | HuggingFace + DuckDB | No rate limits |
| **Query Engine** | Direct API | DuckDB OLAP | Sub-second queries |
| **Update Frequency** | Real-time (15min delayed) | Daily batch | DefeatBeta is historical only |
| **Historical Depth** | Full history | Full history | Comparable coverage |
| **Special Features** | Limited | Earnings transcripts, DCF, AI | DefeatBeta has unique capabilities |
* Price & Volume Data
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.history(period='max')~ | ~ticker.price()~ | pandas.DataFrame | OHLCV data |
| ~ticker.history(period='1d')~ | N/A | - | Real-time not available |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|----------------------------------------+-------------------------+------------------+--------------------------|
| ~ticker.history(period='max')~ | ~ticker.price()~ | pandas.DataFrame | OHLCV data |
| ~ticker.history(period='1d')~ | N/A | - | Real-time not available |
| ~ticker.history(start='...', end='...')~ | ~ticker.price()~ (filter) | pandas.DataFrame | Date filtering available |
* DefeatBeta Price Data Structure
@@ -33,14 +33,14 @@
* Financial Statements
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.quarterly_financials~ | ~ticker.quarterly_income_statement()~ | Statement object | Different format |
| ~ticker.financials~ | ~ticker.annual_income_statement()~ | Statement object | Annual version |
| ~ticker.quarterly_balance_sheet~ | ~ticker.quarterly_balance_sheet()~ | Statement object | Same structure |
| ~ticker.balance_sheet~ | ~ticker.annual_balance_sheet()~ | Statement object | Annual version |
| ~ticker.quarterly_cashflow~ | ~ticker.quarterly_cash_flow()~ | Statement object | Note: 'flow' vs 'flow' |
| ~ticker.cashflow~ | ~ticker.annual_cash_flow()~ | Statement object | Annual version |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------------------------+-------------------------------------+------------------+------------------------|
| ~ticker.quarterly_income_stmt~ | ~ticker.quarterly_income_statement()~ | Statement object | Different format |
| ~ticker.income_stmt~ | ~ticker.annual_income_statement()~ | Statement object | Annual version |
| ~ticker.quarterly_balance_sheet~ | ~ticker.quarterly_balance_sheet()~ | Statement object | Same structure |
| ~ticker.balance_sheet~ | ~ticker.annual_balance_sheet()~ | Statement object | Annual version |
| ~ticker.quarterly_cashflow~ | ~ticker.quarterly_cash_flow()~ | Statement object | 'cashflow' vs 'cash_flow' |
| ~ticker.cashflow~ | ~ticker.annual_cash_flow()~ | Statement object | Annual version |
** Statement Object Methods
#+BEGIN_SRC python
@@ -53,16 +53,16 @@ income_stmt.print_pretty_table() # Formatted output
* Valuation Metrics
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.info['trailingPE']~ | ~ticker.ttm_pe()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['forwardPE']~ | N/A | - | Not available |
| ~ticker.info['trailingEps']~ | ~ticker.ttm_eps()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['forwardEps']~ | N/A | - | Not available |
| ~ticker.info['marketCap']~ | ~ticker.market_capitalization()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['priceToBook']~ | ~ticker.pb_ratio()~ | pandas.DataFrame | Price/Book ratio |
| ~ticker.info['priceToSalesTrailing12Months']~ | ~ticker.ps_ratio()~ | pandas.DataFrame | Price/Sales ratio |
| N/A | ~ticker.peg_ratio()~ | pandas.DataFrame | PEG ratio (unique) |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|---------------------------------------------+--------------------------------+------------------+---------------------------|
| ~ticker.info['trailingPE']~ | ~ticker.ttm_pe()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['forwardPE']~ | N/A | - | Not available |
| ~ticker.info['trailingEps']~ | ~ticker.ttm_eps()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['forwardEps']~ | N/A | - | Not available |
| ~ticker.info['marketCap']~ | ~ticker.market_capitalization()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['priceToBook']~ | ~ticker.pb_ratio()~ | pandas.DataFrame | Price/Book ratio |
| ~ticker.info['priceToSalesTrailing12Months']~ | ~ticker.ps_ratio()~ | pandas.DataFrame | Price/Sales ratio |
| N/A | ~ticker.peg_ratio()~ | pandas.DataFrame | PEG ratio (unique) |
** Key Advantage: Historical Valuation Data
DefeatBeta provides **full historical time series** for:
@@ -75,13 +75,13 @@ Yahoo Finance only provides **current values** in ~.info~
* Financial Ratios
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.info['returnOnEquity']~ | ~ticker.roe()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['returnOnAssets']~ | ~ticker.roa()~ | pandas.DataFrame | **Historical** time series! |
| N/A | ~ticker.roic()~ | pandas.DataFrame | Return on Invested Capital |
| N/A | ~ticker.wacc()~ | pandas.DataFrame | Weighted Avg Cost of Capital |
| ~ticker.info['beta']~ | ~ticker.beta()~ | pandas.DataFrame | 5Y monthly beta |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|-------------------------------+----------------+------------------+------------------------------|
| ~ticker.info['returnOnEquity']~ | ~ticker.roe()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['returnOnAssets']~ | ~ticker.roa()~ | pandas.DataFrame | **Historical** time series! |
| N/A | ~ticker.roic()~ | pandas.DataFrame | Return on Invested Capital |
| N/A | ~ticker.wacc()~ | pandas.DataFrame | Weighted Avg Cost of Capital |
| ~ticker.info['beta']~ | ~ticker.beta()~ | pandas.DataFrame | 5Y monthly beta |
** WACC Components Available in DefeatBeta
#+BEGIN_SRC python
@@ -93,66 +93,67 @@ wacc = ticker.wacc()
* Growth Metrics
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.info['revenueGrowth']~ | ~ticker.quarterly_revenue_yoy_growth()~ | pandas.DataFrame | YoY growth |
| ~ticker.info['earningsGrowth']~ | ~ticker.quarterly_eps_yoy_growth()~ | pandas.DataFrame | EPS YoY growth |
| N/A | ~ticker.quarterly_net_income_yoy_growth()~ | pandas.DataFrame | Net income growth |
| N/A | ~ticker.quarterly_operating_income_yoy_growth()~ | pandas.DataFrame | Operating income growth |
| N/A | ~ticker.quarterly_ebitda_yoy_growth()~ | pandas.DataFrame | EBITDA growth |
| N/A | ~ticker.quarterly_fcf_yoy_growth()~ | pandas.DataFrame | Free cash flow growth |
| N/A | ~ticker.annual_revenue_yoy_growth()~ | pandas.DataFrame | Annual revenue growth |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|-------------------------------+------------------------------------------------+------------------+-------------------------|
| ~ticker.info['revenueGrowth']~ | ~ticker.quarterly_revenue_yoy_growth()~ | pandas.DataFrame | YoY growth |
| ~ticker.info['earningsGrowth']~ | ~ticker.quarterly_eps_yoy_growth()~ | pandas.DataFrame | EPS YoY growth |
| N/A | ~ticker.quarterly_net_income_yoy_growth()~ | pandas.DataFrame | Net income growth |
| N/A | ~ticker.quarterly_operating_income_yoy_growth()~ | pandas.DataFrame | Operating income growth |
| N/A | ~ticker.quarterly_ebitda_yoy_growth()~ | pandas.DataFrame | EBITDA growth |
| N/A | ~ticker.quarterly_fcf_yoy_growth()~ | pandas.DataFrame | Free cash flow growth |
| N/A | ~ticker.annual_revenue_yoy_growth()~ | pandas.DataFrame | Annual revenue growth |
* Margin Metrics
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.info['profitMargins']~ | ~ticker.quarterly_net_margin()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['grossMargins']~ | ~ticker.quarterly_gross_margin()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['operatingMargins']~ | ~ticker.quarterly_operating_margin()~ | pandas.DataFrame | **Historical** time series! |
| N/A | ~ticker.quarterly_ebitda_margin()~ | pandas.DataFrame | EBITDA margin |
| N/A | ~ticker.quarterly_fcf_margin()~ | pandas.DataFrame | Free cash flow margin |
| N/A | ~ticker.industry_quarterly_gross_margin()~ | pandas.DataFrame | Industry comparison |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|---------------------------------+------------------------------------------+------------------+---------------------------|
| ~ticker.info['profitMargins']~ | ~ticker.quarterly_net_margin()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['grossMargins']~ | ~ticker.quarterly_gross_margin()~ | pandas.DataFrame | **Historical** time series! |
| ~ticker.info['operatingMargins']~ | ~ticker.quarterly_operating_margin()~ | pandas.DataFrame | **Historical** time series! |
| N/A | ~ticker.quarterly_ebitda_margin()~ | pandas.DataFrame | EBITDA margin |
| N/A | ~ticker.quarterly_fcf_margin()~ | pandas.DataFrame | Free cash flow margin |
| N/A | ~ticker.industry_quarterly_gross_margin()~ | pandas.DataFrame | Industry comparison |
* Dividends & Stock Splits
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.dividends~ | ~ticker.dividends()~ | pandas.DataFrame | Dividend history |
| ~ticker.splits~ | ~ticker.splits()~ | pandas.DataFrame | Stock split history |
| ~ticker.info['dividendYield']~ | N/A | - | Not in separate field |
| ~ticker.info['trailingAnnualDividendYield']~ | N/A | - | Not available |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------------------------------------+--------------------+------------------+-----------------------|
| ~ticker.dividends~ | ~ticker.dividends()~ | pandas.DataFrame | Dividend history |
| ~ticker.splits~ | ~ticker.splits()~ | pandas.DataFrame | Stock split history |
| ~ticker.info['dividendYield']~ | N/A | - | Not in separate field |
| ~ticker.info['trailingAnnualDividendYield']~ | N/A | - | Not available |
* Company Info & Metadata
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|--------------|----------------|-------------|-------|
| ~ticker.info~ | ~ticker.info()~ | pandas.DataFrame | One row, many columns |
| ~ticker.info['longName']~ | ~ticker.info()['long_business_summary']~ | string | Business summary |
| ~ticker.info['sector']~ | ~ticker.info()['sector']~ | string | Sector classification |
| ~ticker.info['industry']~ | ~ticker.info()['industry']~ | string | Industry classification |
| ~ticker.info['website']~ | ~ticker.info()['web_site']~ | string | Company website |
| ~ticker.info['fullTimeEmployees']~ | ~ticker.info()['full_time_employees']~ | int | Employee count |
| N/A | ~ticker.officers()~ | pandas.DataFrame | Company officers |
| N/A | ~ticker.calendar()~ | pandas.DataFrame | Earnings calendar |
| Yahoo Finance | DefeatBeta-API | Return Type | Notes |
|----------------------------------+----------------------------------------+------------------+-------------------------|
| ~ticker.info~ | ~ticker.info()~ | pandas.DataFrame | One row, many columns |
| ~ticker.info['longName']~ | N/A (check name/short_name column) | string | Company trading name |
| ~ticker.info['longBusinessSummary']~ | ~ticker.info()['long_business_summary']~ | string | Business description |
| ~ticker.info['sector']~ | ~ticker.info()['sector']~ | string | Sector classification |
| ~ticker.info['industry']~ | ~ticker.info()['industry']~ | string | Industry classification |
| ~ticker.info['website']~ | ~ticker.info()['web_site']~ | string | Company website |
| ~ticker.info['fullTimeEmployees']~ | ~ticker.info()['full_time_employees']~ | int | Employee count |
| N/A | ~ticker.officers()~ | pandas.DataFrame | Company officers |
| N/A | ~ticker.calendar()~ | pandas.DataFrame | Earnings calendar |
* Unique DefeatBeta Features (Not in Yahoo Finance)
| Feature | Method | Description |
|---------|--------|-------------|
| **Earnings Transcripts** | ~ticker.earning_call_transcripts()~ | Full earnings call transcripts |
| | ~transcripts.get_transcripts_list()~ | List all available transcripts |
| | ~transcripts.get_transcript(year, quarter)~ | Get specific quarter's transcript |
| | ~transcripts.summarize_key_financial_data_with_ai()~ | AI-powered summary |
| **Revenue Breakdown** | ~ticker.revenue_by_segment()~ | Revenue by product segment |
| | ~ticker.revenue_by_product()~ | Detailed product breakdown |
| | ~ticker.revenue_by_geography()~ | Revenue by geographic region |
| **Automated DCF** | ~ticker.dcf()~ | Generates Excel DCF valuation |
| **AI Analysis** | ~transcripts.analyze_financial_metrics_change...~ | LLM analysis of quarter changes |
| | ~transcripts.analyze_financial_metrics_forecast...~ | LLM forecast analysis |
| **Industry Metrics** | ~ticker.industry_ttm_pe()~ | Industry P/E comparison |
| | ~ticker.industry_roe()~ | Industry ROE comparison |
| | ~ticker.industry_quarterly_gross_margin()~ | Industry margin comparison |
| Feature | Method | Description |
|------------------------+----------------------------------------------------+-----------------------------------|
| **Earnings Transcripts** | ~ticker.earning_call_transcripts()~ | Full earnings call transcripts |
| | ~transcripts.get_transcripts_list()~ | List all available transcripts |
| | ~transcripts.get_transcript(year, quarter)~ | Get specific quarter's transcript |
| | ~transcripts.summarize_key_financial_data_with_ai()~ | AI-powered summary |
| **Revenue Breakdown** | ~ticker.revenue_by_segment()~ | Revenue by product segment |
| | ~ticker.revenue_by_product()~ | Detailed product breakdown |
| | ~ticker.revenue_by_geography()~ | Revenue by geographic region |
| **Automated DCF** | ~ticker.dcf()~ | Generates Excel DCF valuation |
| **AI Analysis** | ~transcripts.analyze_financial_metrics_change...~ | LLM analysis of quarter changes |
| | ~transcripts.analyze_financial_metrics_forecast...~ | LLM forecast analysis |
| **Industry Metrics** | ~ticker.industry_ttm_pe()~ | Industry P/E comparison |
| | ~ticker.industry_roe()~ | Industry ROE comparison |
| | ~ticker.industry_quarterly_gross_margin()~ | Industry margin comparison |
* Example: yfinance → DefeatBeta Migration
@@ -209,26 +210,26 @@ current_roe = ticker.roe().iloc[-1]['roe']
* Quick Reference: Common Operations
| Operation | Yahoo Finance | DefeatBeta-API |
|-----------|--------------|----------------|
| Get current price | ~ticker.info['currentPrice']~ | ~ticker.price().iloc[-1]['close']~ |
| Get current P/E | ~ticker.info['trailingPE']~ | ~ticker.ttm_pe().iloc[-1]['ttm_pe']~ |
| Get current EPS | ~ticker.info['trailingEps']~ | ~ticker.ttm_eps().iloc[-1]['tailing_eps']~ |
| Get market cap | ~ticker.info['marketCap']~ | ~ticker.market_capitalization().iloc[-1]['market_capitalization']~ |
| Get ROE | ~ticker.info['returnOnEquity']~ | ~ticker.roe().iloc[-1]['roe']~ |
| Get revenue (TTM) | ~ticker.info['totalRevenue']~ | ~ticker.quarterly_income_statement().df().iloc[0]['TTM']~ |
| Get 5Y beta | ~ticker.info['beta']~ | ~ticker.beta().iloc[-1]['beta']~ |
| Operation | Yahoo Finance | DefeatBeta-API |
|-------------------+-------------------------------+------------------------------------------------------------------|
| Get current price | ~ticker.info['currentPrice']~ | ~ticker.price().iloc[-1]['close']~ |
| Get current P/E | ~ticker.info['trailingPE']~ | ~ticker.ttm_pe().iloc[-1]['ttm_pe']~ |
| Get current EPS | ~ticker.info['trailingEps']~ | ~ticker.ttm_eps().iloc[-1]['tailing_eps']~ |
| Get market cap | ~ticker.info['marketCap']~ | ~ticker.market_capitalization().iloc[-1]['market_capitalization']~ |
| Get ROE | ~ticker.info['returnOnEquity']~ | ~ticker.roe().iloc[-1]['roe']~ |
| Get revenue (TTM) | ~ticker.info['totalRevenue']~ | ~ticker.quarterly_income_statement().df().iloc[0]['TTM']~ |
| Get 5Y beta | ~ticker.info['beta']~ | ~ticker.beta().iloc[-1]['beta']~ |
* Data Type Differences
| Aspect | Yahoo Finance | DefeatBeta-API |
|--------|--------------|----------------|
| Dates in DataFrame | DatetimeIndex | 'report_date' column |
| Column naming | Title case (Open, Close) | Snake case (open, close) |
| Dividends/Splits | Separate columns in history | Separate DataFrames |
| Quarterly data | Transposed (dates as columns) | Wide format (TTM + quarters as columns) |
| Missing data | NaN | NaN |
| Numeric types | float | Decimal (convert with ~float()~) |
| Aspect | Yahoo Finance | DefeatBeta-API |
|--------------------+-------------------------------+-----------------------------------------|
| Dates in DataFrame | DatetimeIndex | 'report_date' column |
| Column naming | Title case (Open, Close) | Snake case (open, close) |
| Dividends/Splits | Separate columns in history | Separate DataFrames |
| Quarterly data | Transposed (dates as columns) | Wide format (TTM + quarters as columns) |
| Missing data | NaN | NaN |
| Numeric types | float | Decimal (convert with ~float()~) |
* Converting Decimal to Float
#+BEGIN_SRC python
@@ -244,37 +245,94 @@ market_cap_billions = float(market_cap.iloc[-1]['market_capitalization']) / 1e9
* When to Use Each
| Use Case | Recommendation | Reason |
|----------|---------------|--------|
| Backtesting trading strategies | **DefeatBeta** | No rate limits, consistent historical data |
| DCF Valuation modeling | **DefeatBeta** | Automated Excel output |
| Revenue segment analysis | **DefeatBeta** | Unique revenue breakdown |
| Earnings call research | **DefeatBeta** | Full transcripts available |
| Real-time price alerts | **Yahoo Finance** | 15min delayed but real-time |
| Analyst recommendations | **Yahoo Finance** | Price targets, ratings |
| Institutional ownership | **Yahoo Finance** | Major holders data |
| Options/derivatives data | **Yahoo Finance** | Not available in DefeatBeta |
| Quick stock lookup | **Either** | Both work well |
| Multi-year historical analysis | **DefeatBeta** | Faster queries, no rate limits |
| Use Case | Recommendation | Reason |
|--------------------------------+-----------------+--------------------------------------------|
| Backtesting trading strategies | **DefeatBeta** | No rate limits, consistent historical data |
| DCF Valuation modeling | **DefeatBeta** | Automated Excel output |
| Revenue segment analysis | **DefeatBeta** | Unique revenue breakdown |
| Earnings call research | **DefeatBeta** | Full transcripts available |
| Real-time price alerts | **Yahoo Finance** | 15min delayed but real-time |
| Analyst recommendations | **Yahoo Finance** | Price targets, ratings |
| Institutional ownership | **Yahoo Finance** | Major holders data |
| Options/derivatives data | **Yahoo Finance** | Not available in DefeatBeta |
| Quick stock lookup | **Either** | Both work well |
| Multi-year historical analysis | **DefeatBeta** | Faster queries, no rate limits |
* Environment Setup
| Task | Command |
|------|---------|
| Install DefeatBeta | ~uv add defeatbeta-api~ |
| Install yfinance | ~uv add yfinance~ |
| Run notebook | ~uv run jupyter notebook~ |
| Check version | ~ticker.price()~ shows data update date |
| Task | Command |
|--------------------+---------------------------------------|
| Install DefeatBeta | ~uv add defeatbeta-api~ |
| Install yfinance | ~uv add yfinance~ |
| Run notebook | ~uv run jupyter notebook~ |
| Check version | ~ticker.price()~ shows data update date |
* Mapping Limitations & Verification Notes
/Verified 2026-04-26 via test_mapping.py on AAPL. defeatbeta-api 0.0.45, yfinance 1.3.0, DuckDB 1.4.3./
** What was confirmed correct (live data matched)
- ~dividends()~: both APIs return $0.26 for Nov-2025 and Feb-2026 payments — values match exactly
- ~splits()~: both return the same ratios (7:1 in 2014, 4:1 in 2020)
** Incorrect property names for yfinance (deprecated in v1.3)
| Used in mapping | Correct name in yfinance 1.3+ |
|----------------------------+-------------------------------|
| ~ticker.quarterly_financials~ | ~ticker.quarterly_income_stmt~ |
| ~ticker.financials~ | ~ticker.income_stmt~ |
The old names were removed. The mapping should use the new names.
** Conceptual mismatch: longName vs long_business_summary (Company Info)
The mapping equates ~ticker.info['longName']~ with ~ticker.info()['long_business_summary']~,
labelling it "Business summary". This is wrong:
- ~longName~ = company trading name, e.g. "Apple Inc."
- ~long_business_summary~ = multi-sentence business description paragraph
- The yfinance equivalent of the description is ~ticker.info['longBusinessSummary']~
** Typo in Financial Statements note (cashflow row)
The "Notes" column reads ~Note: 'flow' vs 'flow'~. Should read: ~'cashflow' vs 'cash_flow'~
** Data format differences found in verified pairs
| Field | yfinance | defeatbeta |
|---------------+--------------------------------------+-------------------------------|
| dividends | Series, DatetimeIndex, float amount | DataFrame, report_date col |
| split_factor | float (e.g. ~7.0~) | string ratio (e.g. ~"7:1"~) |
** Historical coverage gap
- Dividends: yfinance returns 90 records, defeatbeta returns 61 (truncated history)
- Splits: yfinance returns 5 events, defeatbeta returns 4 (one historical split missing)
** DuckDB compatibility issue (defeatbeta-api 0.0.45 + DuckDB 1.4.3)
All Parquet-backed queries except ~dividends()~ and ~splits()~ failed during verification
with either ~"don't know what type:"~ or ~"TProtocolException: Invalid data"~. Affected:
~price()~, all financial statements, all valuation/ratio/margin/growth metrics, ~info()~, ~beta()~.
This is an incompatibility between DuckDB 1.4.3 and the httpfs extension / remote Parquet
format used by defeatbeta-api. Downgrading DuckDB or waiting for a defeatbeta-api update
may resolve it. The logical mapping is still valid — the API shape is correct, only the
DuckDB query layer is broken.
** Data freshness
DefeatBeta dataset last updated 2026-04-17, 9 days behind current date. yfinance has
15-minute delayed real-time data. This gap will always exist for defeatbeta.
* Common Issues & Solutions
| Issue | Solution |
|-------|----------|
| ~TypeError: unsupported operand type(s) for /: 'Decimal' and 'float'~ | Wrap values in ~float()~ |
| ~Rate limit exceeded~ | Switch to DefeatBeta |
| ~Real-time data needed~ | Use Yahoo Finance |
| ~Missing revenue breakdown~ | Use DefeatBeta ~revenue_by_segment()~ |
| ~Slow queries~ | Use DefeatBeta (DuckDB engine) |
| Issue | Solution |
|---------------------------------------------------------------------+-------------------------------------|
| ~TypeError: unsupported operand type(s) for /: 'Decimal' and 'float'~ | Wrap values in ~float()~ |
| ~Rate limit exceeded~ | Switch to DefeatBeta |
| ~Real-time data needed~ | Use Yahoo Finance |
| ~Missing revenue breakdown~ | Use DefeatBeta ~revenue_by_segment()~ |
| ~Slow queries~ | Use DefeatBeta (DuckDB engine) |
* Additional Resources
@@ -285,7 +343,7 @@ market_cap_billions = float(market_cap.iloc[-1]['market_capitalization']) / 1e9
* Footer
#+BEGIN_COMMENT
Last updated: 2026-04-25
Last updated: 2026-04-26 (verified via test_mapping.py on AAPL)
Author: Documentation
Version: 1.0
#+END_COMMENT
+228
View File
@@ -0,0 +1,228 @@
"""
Mapping verification: yfinance vs defeatbeta-api for AAPL.
Prints type + 2 representative rows/values for each mapped pair.
Methods that fail are reported inline (not crashed).
"""
import warnings
warnings.filterwarnings("ignore")
import yfinance as yf
from defeatbeta_api.data.ticker import Ticker
import pandas as pd
SYMBOL = "AAPL"
yf_t = yf.Ticker(SYMBOL)
db_t = Ticker(SYMBOL)
PASS = ""
FAIL = ""
def try_db(fn, *args, **kwargs):
"""Call a defeatbeta method, return (result, None) or (None, error_str)."""
try:
return fn(*args, **kwargs), None
except Exception as e:
return None, str(e)[:80]
def show_df(label, df, n=2):
if df is None:
return
if isinstance(df, pd.DataFrame):
print(f" {label} ({df.shape[0]}r×{df.shape[1]}c), last {n} rows:")
cols = list(df.columns)[:6]
print(df[cols].tail(n).to_string(index=False))
elif isinstance(df, pd.Series):
print(f" {label} (Series len={len(df)}), last {n}:")
print(df.tail(n).to_string())
def row(label, yf_val, yf_type_label, db_result, db_err):
ok = PASS if db_result is not None else FAIL
print(f"\n{ok} {label}")
if db_err:
print(f" yfinance → {yf_type_label}: {yf_val!r}")
print(f" defeatbeta→ ERROR: {db_err}")
else:
print(f" yfinance type={yf_type_label} value={yf_val!r}")
print(f" defeatbeta type={type(db_result).__name__}", end="")
if isinstance(db_result, pd.DataFrame):
print(f" cols={list(db_result.columns)}")
show_df("defeatbeta", db_result)
else:
print(f" value={db_result!r}")
def section(title):
print(f"\n{'='*68}\n {title}\n{'='*68}")
# ── 1. PRICE DATA ─────────────────────────────────────────────────────────────
section("1. PRICE DATA — ticker.history() vs ticker.price()")
yf_price = yf_t.history(period="5d")[["Open", "Close", "High", "Low", "Volume"]]
db_price, db_price_err = try_db(db_t.price)
print(f"\n{PASS if not db_price_err else FAIL} ticker.history() vs ticker.price()")
print(f" yfinance type=DataFrame cols={list(yf_price.columns)}")
print(yf_price.tail(2).to_string())
if db_price_err:
print(f" defeatbeta→ ERROR: {db_price_err}")
else:
print(f" defeatbeta type=DataFrame cols={list(db_price.columns)}")
print(db_price.tail(2).to_string(index=False))
# ── 2. FINANCIAL STATEMENTS ───────────────────────────────────────────────────
section("2. FINANCIAL STATEMENTS")
# yfinance uses .quarterly_income_stmt (v1.3) — old .quarterly_financials is deprecated
yf_inc_q = yf_t.quarterly_income_stmt
db_inc_q, db_inc_q_err = try_db(db_t.quarterly_income_statement)
db_inc_q_df = db_inc_q.df() if db_inc_q else None
print(f"\n{PASS if not db_inc_q_err else FAIL} quarterly_income_stmt vs quarterly_income_statement()")
print(f" yfinance type={type(yf_inc_q).__name__} shape={yf_inc_q.shape} (rows=line items, cols=quarters)")
print(yf_inc_q.iloc[:2, :2].to_string())
if db_inc_q_err:
print(f" defeatbeta→ ERROR: {db_inc_q_err}")
else:
print(f" defeatbeta type={type(db_inc_q).__name__} → .df() shape={db_inc_q_df.shape}")
print(db_inc_q_df.iloc[:2, :5].to_string(index=False))
yf_bs = yf_t.balance_sheet
db_bs, db_bs_err = try_db(db_t.annual_balance_sheet)
db_bs_df = db_bs.df() if db_bs else None
print(f"\n{PASS if not db_bs_err else FAIL} balance_sheet vs annual_balance_sheet()")
print(f" yfinance type={type(yf_bs).__name__} shape={yf_bs.shape}")
print(yf_bs.iloc[:2, :2].to_string())
if db_bs_err:
print(f" defeatbeta→ ERROR: {db_bs_err}")
else:
print(f" defeatbeta type={type(db_bs).__name__} → .df() shape={db_bs_df.shape}")
print(db_bs_df.iloc[:2, :5].to_string(index=False))
yf_cf = yf_t.cashflow
db_cf, db_cf_err = try_db(db_t.annual_cash_flow)
db_cf_df = db_cf.df() if db_cf else None
print(f"\n{PASS if not db_cf_err else FAIL} cashflow vs annual_cash_flow()")
print(f" yfinance type={type(yf_cf).__name__} shape={yf_cf.shape}")
print(yf_cf.iloc[:2, :2].to_string())
if db_cf_err:
print(f" defeatbeta→ ERROR: {db_cf_err}")
else:
print(f" defeatbeta → .df() shape={db_cf_df.shape}")
print(db_cf_df.iloc[:2, :5].to_string(index=False))
# ── 3. VALUATION METRICS ──────────────────────────────────────────────────────
section("3. VALUATION METRICS")
db_pe, db_pe_err = try_db(db_t.ttm_pe)
db_eps, db_eps_err = try_db(db_t.ttm_eps)
db_mc, db_mc_err = try_db(db_t.market_capitalization)
db_pb, db_pb_err = try_db(db_t.pb_ratio)
db_ps, db_ps_err = try_db(db_t.ps_ratio)
row("trailingPE → ttm_pe()", yf_t.info.get("trailingPE"), "float", db_pe, db_pe_err)
row("trailingEps → ttm_eps()", yf_t.info.get("trailingEps"), "float", db_eps, db_eps_err)
row("marketCap → market_cap()", yf_t.info.get("marketCap"), "int", db_mc, db_mc_err)
row("priceToBook → pb_ratio()", yf_t.info.get("priceToBook"), "float", db_pb, db_pb_err)
row("priceToSales → ps_ratio()", yf_t.info.get("priceToSalesTrailing12Months"), "float", db_ps, db_ps_err)
# ── 4. FINANCIAL RATIOS ───────────────────────────────────────────────────────
section("4. FINANCIAL RATIOS")
db_roe, db_roe_err = try_db(db_t.roe)
db_roa, db_roa_err = try_db(db_t.roa)
db_beta, db_beta_err = try_db(db_t.beta)
db_wacc, db_wacc_err = try_db(db_t.wacc)
row("returnOnEquity → roe()", yf_t.info.get("returnOnEquity"), "float", db_roe, db_roe_err)
row("returnOnAssets → roa()", yf_t.info.get("returnOnAssets"), "float", db_roa, db_roa_err)
row("beta → beta()", yf_t.info.get("beta"), "float", db_beta, db_beta_err)
row("N/A → wacc()", None, "N/A", db_wacc, db_wacc_err)
# ── 5. GROWTH METRICS ─────────────────────────────────────────────────────────
section("5. GROWTH METRICS")
db_rg, db_rg_err = try_db(db_t.quarterly_revenue_yoy_growth)
db_eg, db_eg_err = try_db(db_t.quarterly_eps_yoy_growth)
db_nig, db_nig_err = try_db(db_t.quarterly_net_income_yoy_growth)
row("revenueGrowth → quarterly_revenue_yoy_growth()", yf_t.info.get("revenueGrowth"), "float", db_rg, db_rg_err)
row("earningsGrowth → quarterly_eps_yoy_growth()", yf_t.info.get("earningsGrowth"), "float", db_eg, db_eg_err)
row("N/A → quarterly_net_income_yoy_growth()", None, "N/A", db_nig, db_nig_err)
# ── 6. MARGIN METRICS ─────────────────────────────────────────────────────────
section("6. MARGIN METRICS")
db_nm, db_nm_err = try_db(db_t.quarterly_net_margin)
db_gm, db_gm_err = try_db(db_t.quarterly_gross_margin)
db_om, db_om_err = try_db(db_t.quarterly_operating_margin)
row("profitMargins → quarterly_net_margin()", yf_t.info.get("profitMargins"), "float", db_nm, db_nm_err)
row("grossMargins → quarterly_gross_margin()", yf_t.info.get("grossMargins"), "float", db_gm, db_gm_err)
row("operatingMargins → quarterly_op_margin()", yf_t.info.get("operatingMargins"), "float", db_om, db_om_err)
# ── 7. DIVIDENDS & SPLITS ─────────────────────────────────────────────────────
section("7. DIVIDENDS & SPLITS")
yf_div = yf_t.dividends
db_div, db_div_err = try_db(db_t.dividends)
print(f"\n{PASS if not db_div_err else FAIL} .dividends vs .dividends()")
print(f" yfinance type={type(yf_div).__name__} len={len(yf_div)}")
print(yf_div.tail(2).to_string())
if db_div_err:
print(f" defeatbeta→ ERROR: {db_div_err}")
else:
print(f" defeatbeta type={type(db_div).__name__} shape={db_div.shape}")
print(db_div.tail(2).to_string(index=False))
yf_sp = yf_t.splits
db_sp, db_sp_err = try_db(db_t.splits)
print(f"\n{PASS if not db_sp_err else FAIL} .splits vs .splits()")
print(f" yfinance type={type(yf_sp).__name__} len={len(yf_sp)}")
print(yf_sp.tail(2).to_string())
if db_sp_err:
print(f" defeatbeta→ ERROR: {db_sp_err}")
else:
print(f" defeatbeta type={type(db_sp).__name__} shape={db_sp.shape}")
print(db_sp.tail(2).to_string(index=False))
# ── 8. COMPANY INFO ───────────────────────────────────────────────────────────
section("8. COMPANY INFO — .info vs .info()")
yf_info = yf_t.info
db_info, db_info_err = try_db(db_t.info)
if db_info_err:
print(f" defeatbeta→ ERROR: {db_info_err}")
else:
fields = [
("sector", "sector", yf_info.get("sector")),
("industry", "industry", yf_info.get("industry")),
("employees", "full_time_employees", yf_info.get("fullTimeEmployees")),
("website", "web_site", yf_info.get("website")),
# Note: yf longName = company name; longBusinessSummary = description
("longName", None, yf_info.get("longName")),
("longBusinessSummary", "long_business_summary", yf_info.get("longBusinessSummary", "")[:60]),
]
print(f"\n {'Field':<30} {'yfinance':<35} {'defeatbeta'}")
print(f" {'-'*30} {'-'*35} {'-'*30}")
for label, db_col, yf_val in fields:
db_val = db_info[db_col].iloc[0] if db_col and db_col in db_info.columns else ""
if isinstance(db_val, str) and len(db_val) > 40:
db_val = db_val[:40] + ""
if isinstance(yf_val, str) and len(yf_val) > 34:
yf_val = yf_val[:34] + ""
print(f" {label:<30} {str(yf_val):<35} {str(db_val)}")
# ── SUMMARY ───────────────────────────────────────────────────────────────────
print(f"\n{'='*68}")
print(" ALL CHECKS COMPLETE")
print(f"{'='*68}\n")
+302
View File
@@ -0,0 +1,302 @@
#+title: yfinance API Reference
#+subtitle: Version 1.3.0
#+author: Ran Aroussi
#+date: 2026-04-25
* Overview
yfinance offers a Pythonic way to fetch financial & market data from Yahoo! Finance.
** Installation
#+begin_src sh
pip install yfinance
#+end_src
** Quick Start
#+begin_src python
import yfinance as yf
# Single ticker
dat = yf.Ticker("MSFT")
dat.info
dat.history(period='1mo')
dat.option_chain(dat.options[0]).calls
# Multiple tickers
tickers = yf.Tickers('MSFT AAPL GOOG')
tickers.tickers['MSFT'].info
yf.download(['MSFT', 'AAPL', 'GOOG'], period='1mo')
# Funds
spy = yf.Ticker('SPY').funds_data
spy.description
spy.top_holdings
#+end_src
* Top-Level Functions
| Function | Description |
|--------------------------------+-------------------------------------------|
| ~yf.download(tickers, ...)~ | Download market data for multiple tickers |
| ~yf.enable_debug_mode()~ | Enable verbose debug logging |
| ~yf.set_tz_cache_location(path)~ | Set timezone cache directory |
| ~yf.screen(query)~ | Run equity/fund/ETF screener queries |
* Config
| Setting | Description |
|---------------------------------+----------------------|
| ~yf.config.debug.logging = True~ | Enable debug logging |
| ~yf.config.user_agent = 'custom'~ | Custom user agent |
* Ticker(symbol)
** Price History
| Property/Method | Returns |
|---------------------------------------------+--------------------------------------|
| ~.history(period, start, end, interval, ...)~ | OHLCV DataFrame |
| ~.get_history_metadata()~ | Dict with currency, exchange, etc. |
| ~.get_dividends()~ | Dividend history (Series) |
| ~.dividends~ | Cached dividend history |
| ~.get_splits()~ | Stock split history (Series) |
| ~.splits~ | Cached split history |
| ~.get_actions()~ | Dividends + splits combined (Series) |
| ~.actions~ | Cached actions history |
| ~.get_capital_gains()~ | Capital gains distributions (Series) |
| ~.capital_gains~ | Cached capital gains |
| ~.get_shares_full()~ | Full shares outstanding history |
** Info & News
| Property/Method | Returns |
|-------------------------------+----------------------------------|
| ~.get_info()~ / ~.info~ | Full company info dict |
| ~.get_fast_info()~ / ~.fast_info~ | Quick-access key metrics |
| ~.get_news()~ / ~.news~ | Recent news articles (DataFrame) |
| ~.isin~ | ISIN identifier |
| ~.get_isin()~ | ISIN as string |
** Financial Statements
| Property/Method | Returns |
|------------------------+-----------------------------------------|
| ~.income_stmt~ | Annual income statement |
| ~.quarterly_income_stmt~ | Quarterly income statement |
| ~.ttm_income_stmt~ | Trailing-twelve-months income statement |
| ~.balance_sheet~ | Annual balance sheet |
| ~.cashflow~ | Annual cash flow statement |
| ~.quarterly_cashflow~ | Quarterly cash flow |
| ~.ttm_cashflow~ | TTM cash flow |
| ~.get_income_stmt()~ | Alias for income_stmt |
| ~.get_balance_sheet()~ | Alias for balance_sheet |
| ~.get_cashflow()~ | Alias for cashflow |
** Earnings & Calendar
| Property/Method | Returns |
|-----------------------+--------------------------------------|
| ~.earnings~ | Annual earnings summary |
| ~.calendar~ | Upcoming earnings/dividends dates |
| ~.get_earnings_dates()~ | Historical & upcoming earnings dates |
| ~.earnings_dates~ | Cached earnings dates |
| ~.get_sec_filings()~ | SEC filing history |
| ~.sec_filings~ | Cached SEC filings |
** Analysis & Estimates
| Property/Method | Returns |
|--------------------------------+------------------------------------|
| ~.get_recommendations()~ | Historical analyst recommendations |
| ~.recommendations~ | Cached recommendations |
| ~.get_recommendations_summary()~ | Summary of buy/hold/sell |
| ~.recommendations_summary~ | Cached summary |
| ~.get_upgrades_downgrades()~ | Analyst upgrades/downgrades |
| ~.upgrades_downgrades~ | Cached upgrades/downgrades |
| ~.get_sustainability()~ | ESG scores |
| ~.sustainability~ | Cached ESG |
| ~.get_analyst_price_targets()~ | Price target estimates |
| ~.analyst_price_targets~ | Cached price targets |
| ~.get_earnings_estimate()~ | Earnings estimates |
| ~.earnings_estimate~ | Cached earnings estimates |
| ~.get_revenue_estimate()~ | Revenue estimates |
| ~.revenue_estimate~ | Cached revenue estimates |
| ~.get_earnings_history()~ | Earnings surprise history |
| ~.earnings_history~ | Cached earnings history |
| ~.get_eps_trend()~ | EPS trend revisions |
| ~.eps_trend~ | Cached EPS trend |
| ~.get_eps_revisions()~ | EPS revision counts |
| ~.eps_revisions~ | Cached EPS revisions |
| ~.get_growth_estimates()~ | Growth rate estimates |
| ~.growth_estimates~ | Cached growth estimates |
** Holdings & Insider Activity
| Property/Method | Returns |
|---------------------------------------------------------+-----------------------------------|
| ~.get_funds_data()~ / ~.funds_data~ | Fund info (for ETFs/mutual funds) |
| ~.get_insider_purchases()~ / ~.insider_purchases~ | Insider purchase records |
| ~.get_insider_transactions()~ / ~.insider_transactions~ | Insider trade history |
| ~.get_insider_roster_holders()~ / ~.insider_roster_holders~ | Insider roster |
| ~.get_major_holders()~ / ~.major_holders~ | Ownership breakdown |
| ~.get_institutional_holders()~ / ~.institutional_holders~ | Institutional holders |
| ~.get_mutualfund_holders()~ / ~.mutualfund_holders~ | Mutual fund holders |
** Options
| Method | Returns |
|---------------------+-------------------------------------|
| ~.options~ | Tuple of available expiration dates |
| ~.option_chain(date)~ | Calls & puts DataFrame for a date |
| | |
* Tickers('SYM1 SYM2 ...')
Multiple tickers class.
| Property | Returns |
|---------------------------+----------------------------------|
| ~.tickers['SYM'].info~ | Access individual Ticker objects |
| ~.tickers['SYM'].history()~ | Get history per ticker |
* Market(market_code)
Market summary class.
| Method/Property | Returns |
|-----------------+---------------------------|
| ~.status~ | Market open/closed status |
| ~.summary()~ | Market summary data |
* Calendars
Calendar events class.
| Method | Returns |
|--------------+-------------------|
| ~.earnings()~ | Earnings calendar |
| ~.dividends()~ | Dividend calendar |
| ~.splits()~ | Splits calendar |
| ~.ipo()~ | IPO calendar |
* Search(query)
Search class for quotes and news.
| Property/Method | Returns |
|-----------------+----------------------|
| ~.quotes~ | Search result quotes |
| ~.news~ | Search result news |
| ~.lists~ | Watchlists/lists |
| ~.nav~ | Navigation results |
| ~.research~ | Research reports |
| ~.get_quotes()~ | Fetch quotes |
| ~.get_news()~ | Fetch news |
* Lookup(query)
Ticker lookup class.
| Method | Returns |
|---------------+------------------------|
| ~.get_quotes()~ | Lookup matching quotes |
* WebSocket(symbols, ...)
Live streaming data (synchronous).
| Method | Description |
|-----------------------+---------------------------|
| ~.connect()~ | Open WebSocket connection |
| ~.subscribe(symbols)~ | Subscribe to symbols |
| ~.unsubscribe(symbols)~ | Unsubscribe from symbols |
| ~.close()~ | Close connection |
* AsyncWebSocket(symbols, ...)
Live streaming data (asynchronous).
Same methods as ~WebSocket~, but async.
* Sector(sector_key)
Sector information class.
| Property/Method | Returns |
|-----------------+--------------------------|
| ~.key~ | Sector identifier |
| ~.name~ | Sector name |
| ~.symbol~ | Associated ETF symbol |
| ~.top_etfs~ | Top ETFs in sector |
| ~.top_companies~ | Top companies in sector |
| ~.industries~ | Industries within sector |
* Industry(industry_key)
Industry information class.
| Property/Method | Returns |
|-----------------+---------------------------|
| ~.key~ | Industry identifier |
| ~.name~ | Industry name |
| ~.sector_key~ | Parent sector |
| ~.top_companies~ | Top companies in industry |
* Query Builders (Screener)
| Class | Description |
|------------------+------------------------------------|
| ~EquityQuery~ | Build equity filter queries |
| ~FundQuery~ | Build mutual fund filter queries |
| ~ETFQuery~ | Build ETF filter queries |
| ~yf.screen(query)~ | Execute a query and return results |
* download() Parameters
#+begin_src python
yf.download(
tickers, # str or list of tickers
period="1mo", # 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max
start=None, # datetime or str "YYYY-MM-DD"
end=None,
interval="1d", # 1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo
group_by="column", # "column" or "ticker"
auto_adjust=False,
back_adjust=False,
repair=False, # Enable price repair
keepna=False,
proxy=None,
timeout=10,
threads=True,
progress=True,
)
#+end_src
* Advanced Features
** Logging
Enable debug logging:
#+begin_src python
yf.config.debug.logging = True
#+end_src
** Caching
- Persistent cache is enabled by default
- Set custom cache location: ~yf.set_tz_cache_location(path)~
** Price Repair
- Enable with ~repair=True~ in ~download()~ or ~history()~
- Fixes NaN values, bad splits, and dividend adjustments
- Dividend repair also available
** Multi-Level Column Index
- When downloading multiple tickers, columns are MultiIndex
- ~group_by="column"~ or ~group_by="ticker"~
* Legal Disclaimer
Yahoo!, Y!Finance, and Yahoo! finance are registered trademarks of Yahoo, Inc.
yfinance is /not/ affiliated, endorsed, or vetted by Yahoo, Inc. It's an open-source tool that uses Yahoo's publicly available APIs, and is intended for research and educational purposes.
You should refer to Yahoo!'s terms of use for details on your rights to use the actual data downloaded. The Yahoo! finance API is intended for personal use only.
+75
View File
@@ -0,0 +1,75 @@
"""
One-time download of all defeatbeta parquet files + company_tickers.json.
Run this once; after that use offline.py for zero-network Ticker() calls.
uv run python download_data.py
uv run python download_data.py --out data/parquet # custom directory
"""
import argparse
import sys
from pathlib import Path
import requests
BASE = "https://huggingface.co/datasets/defeatbeta/yahoo-finance-data/resolve/main"
PARQUET_TABLES = [
"stock_profile",
"stock_officers",
"stock_tailing_eps",
"stock_earning_calendar",
"stock_statement",
"stock_prices",
"stock_dividend_events",
"stock_split_events",
"exchange_rate",
"daily_treasury_yield",
"stock_earning_call_transcripts",
"stock_news",
"stock_revenue_breakdown",
"stock_shares_outstanding",
"stock_sec_filing",
]
EXTRA_FILES = [
("data/company_tickers.json", "company_tickers.json"),
]
def download(url: str, dest: Path, label: str) -> None:
if dest.exists():
print(f" skip {label} ({dest.stat().st_size / 1e6:.1f} MB on disk)")
return
print(f" fetch {label} ...", end="", flush=True)
with requests.get(url, stream=True, timeout=60) as r:
r.raise_for_status()
tmp = dest.with_suffix(".tmp")
with open(tmp, "wb") as f:
for chunk in r.iter_content(chunk_size=8 * 1024 * 1024):
f.write(chunk)
tmp.rename(dest)
print(f" {dest.stat().st_size / 1e6:.1f} MB")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--out", default="data/parquet", help="local output directory")
args = parser.parse_args()
out = Path(args.out)
out.mkdir(parents=True, exist_ok=True)
print(f"Saving to: {out.resolve()}\n")
for table in PARQUET_TABLES:
url = f"{BASE}/data/{table}.parquet"
download(url, out / f"{table}.parquet", table)
for remote_path, local_name in EXTRA_FILES:
url = f"{BASE}/{remote_path}"
download(url, out / local_name, local_name)
print(f"\nDone. {sum(1 for _ in out.iterdir())} files in {out.resolve()}")
if __name__ == "__main__":
main()
+67
View File
@@ -0,0 +1,67 @@
"""
Patch defeatbeta_api to read from local parquet files with zero network.
Usage:
from offline import enable_offline
enable_offline("data/parquet") # call once before first Ticker()
from defeatbeta_api.data.ticker import Ticker
t = Ticker("AAPL")
t.price() # reads local file, no HTTP
Note: the one-time welcome banner on first import calls get_data_update_time()
once. After that, nothing touches the network.
"""
from pathlib import Path
def enable_offline(parquet_dir: str = "data/parquet") -> None:
local_dir = Path(parquet_dir).resolve()
# Importing these submodules triggers defeatbeta_api/__init__.py on first
# run (prints the welcome banner — one network call). After that it's a
# no-op because _welcome_printed is True.
from defeatbeta_api.client.hugging_face_client import HuggingFaceClient
import defeatbeta_api.client.duckdb_client as _duckdb_mod
from defeatbeta_api.client.duckdb_client import DuckDBClient
from defeatbeta_api.client.duckdb_conf import Configuration
from defeatbeta_api.data.company_meta import CompanyMeta
from defeatbeta_api.utils.util import validate_memory_limit
# 1. Redirect every table URL to a local parquet file
def _local_url(self, table: str) -> str:
path = local_dir / f"{table}.parquet"
if not path.exists():
raise FileNotFoundError(
f"Local parquet not found: {path}\n"
f"Run download_data.py first."
)
return str(path)
HuggingFaceClient.get_url_path = _local_url
# 2. Return a fixed update time (used by beta() and the welcome banner)
HuggingFaceClient.get_data_update_time = lambda self: "offline"
# 3. Skip the startup cache-validation (hits HuggingFace spec.json)
DuckDBClient._validate_httpfs_cache = lambda self: None
# 4. Skip "INSTALL cache_httpfs FROM community" (hits DuckDB extension registry)
# and all the cache_httpfs SET GLOBAL lines that follow — not needed for
# local files. Keep only memory and thread settings.
def _minimal_settings(self):
return [
f"SET GLOBAL memory_limit = '{validate_memory_limit(self.memory_limit)}'",
f"SET GLOBAL threads = {self.threads}",
]
Configuration.get_duckdb_settings = _minimal_settings
# 5. Redirect company_tickers.json to local file
CompanyMeta.COMPANY_TICKERS_URL = str(local_dir / "company_tickers.json")
# 6. Reset the DuckDB singleton so the next Ticker() call reinitialises
# using the patched Configuration (no cache_httpfs install/load)
_duckdb_mod._instance = None
print(f"[offline] defeatbeta_api patched → reading from {local_dir}")
+34
View File
@@ -0,0 +1,34 @@
"""
Use the persistent defeatbeta httpfs cache at ~/.cache/defeatbeta/.
Import this at the top of any notebook or script before using Ticker().
Run warmup_cache.py once first to populate the cache.
from persistent_cache import enable_persistent_cache
enable_persistent_cache()
from defeatbeta_api.data.ticker import Ticker
t = Ticker("AAPL")
"""
from pathlib import Path
CACHE_DIR = Path.home() / ".cache" / "defeatbeta"
def enable_persistent_cache() -> None:
import defeatbeta_api.utils.util as _util
import defeatbeta_api.client.duckdb_client as _duckdb_mod
from defeatbeta_api.client.duckdb_conf import Configuration
if not CACHE_DIR.exists():
raise RuntimeError(
f"Cache directory not found: {CACHE_DIR}\n"
"Run warmup_cache.py first to populate it."
)
# Redirect cache directory to the persistent location
_util.validate_httpfs_cache_directory = lambda: str(CACHE_DIR)
# Reset singleton so DuckDB reinitialises pointing at the new cache dir
_duckdb_mod._instance = None
print(f"[persistent_cache] cache → {CACHE_DIR}")
+1 -1
View File
@@ -37,7 +37,7 @@ dependencies = [
"torch>=2.0.0",
"transformers>=4.0.0",
"tscv>=0.1.3",
"vectorbt>=1.0.0",
"vectorbt[full,rust]>=1.0.0",
"xgboost>=3.2.0",
"yfinance>=1.3.0",
"defeatbeta-api>=0.0.45",
+599
View File
@@ -0,0 +1,599 @@
import marimo
__generated_with = "0.23.2"
app = marimo.App()
@app.cell
def _():
import marimo as mo
return (mo,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Quant Trading Scaffold
## Data Ingestion → Indicators → Walk-Forward ML → Backtesting → Tearsheet
Pipeline:
1. **Ingest** OHLCV data via yfinance
2. **Engineer features** — momentum, trend, volatility, volume indicators
3. **Label** — binary classification (next-N-day return > 0)
4. **Walk-forward split** with purging (no leakage)
5. **Train** XGBoost classifier per fold
6. **Evaluate** with quantstats tearsheet
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 1. Config & Imports
""")
return
@app.cell
def _():
from __future__ import annotations
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import numpy as np
import pandas as pd
import pandas_ta as ta
import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
import quantstats as qs
# ── Config ──────────────────────────────────────────────────────
TICKER = "AAPL"
START = "2015-01-01"
END = "2025-12-31"
HORIZON = 5 # predict N-day forward return
PURGE_GAP = 5 # gap between train/test to prevent leakage
N_SPLITS = 5 # walk-forward folds
TRAIN_MIN = 504 # ~2 years minimum training window
print(f"Config: {TICKER} | {START}{END} | horizon={HORIZON}d | {N_SPLITS} folds")
return (
END,
HORIZON,
N_SPLITS,
PURGE_GAP,
START,
TICKER,
TRAIN_MIN,
XGBClassifier,
accuracy_score,
classification_report,
go,
make_subplots,
np,
pd,
qs,
ta,
yf,
)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 2. Data Ingestion
""")
return
@app.cell
def _(END, START, TICKER, pd, yf):
raw = yf.download(TICKER, start=START, end=END, auto_adjust=True)
# yfinance may return MultiIndex columns for single ticker — flatten
if isinstance(raw.columns, pd.MultiIndex):
raw.columns = raw.columns.droplevel("Ticker")
raw.index = pd.DatetimeIndex(raw.index)
df = raw.copy()
print(f"Downloaded {len(df)} bars: {df.index[0].date()}{df.index[-1].date()}")
df.tail(3)
return (df,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 3. Feature Engineering — Technical Indicators
We compute features across 4 categories:
- **Momentum**: RSI, MACD, Stochastic, Williams %R, ROC
- **Trend**: SMA/EMA crossovers, ADX, Ichimoku
- **Volatility**: Bollinger Bands, ATR, Keltner Channels
- **Volume**: OBV, MFI, Accumulation/Distribution
""")
return
@app.cell
def _(df, ta):
# ── Momentum ────────────────────────────────────────────────────
df["rsi_14"] = ta.rsi(df["Close"], length=14)
df["rsi_7"] = ta.rsi(df["Close"], length=7)
macd = ta.macd(df["Close"], fast=12, slow=26, signal=9)
df["macd"] = macd.iloc[:, 0] # MACD line
df["macd_signal"] = macd.iloc[:, 1] # signal line
df["macd_hist"] = macd.iloc[:, 2] # histogram
stoch = ta.stoch(df["High"], df["Low"], df["Close"])
df["stoch_k"] = stoch.iloc[:, 0]
df["stoch_d"] = stoch.iloc[:, 1]
df["willr_14"] = ta.willr(df["High"], df["Low"], df["Close"], length=14)
df["roc_10"] = ta.roc(df["Close"], length=10)
df["roc_21"] = ta.roc(df["Close"], length=21)
df["mom_10"] = ta.mom(df["Close"], length=10)
# ── Trend ───────────────────────────────────────────────────────
df["sma_20"] = ta.sma(df["Close"], length=20)
df["sma_50"] = ta.sma(df["Close"], length=50)
df["sma_200"] = ta.sma(df["Close"], length=200)
df["ema_12"] = ta.ema(df["Close"], length=12)
df["ema_26"] = ta.ema(df["Close"], length=26)
# crossover features (price relative to MAs)
df["close_over_sma20"] = (df["Close"] / df["sma_20"]) - 1
df["close_over_sma50"] = (df["Close"] / df["sma_50"]) - 1
df["close_over_sma200"] = (df["Close"] / df["sma_200"]) - 1
df["sma20_over_sma50"] = (df["sma_20"] / df["sma_50"]) - 1
df["sma50_over_sma200"] = (df["sma_50"] / df["sma_200"]) - 1
adx = ta.adx(df["High"], df["Low"], df["Close"], length=14)
df["adx"] = adx.iloc[:, 0]
df["di_plus"] = adx.iloc[:, 1]
df["di_minus"] = adx.iloc[:, 2]
# ── Volatility ──────────────────────────────────────────────────
bbands = ta.bbands(df["Close"], length=20, std=2)
df["bb_upper"] = bbands.iloc[:, 0]
df["bb_mid"] = bbands.iloc[:, 1]
df["bb_lower"] = bbands.iloc[:, 2]
df["bb_width"] = bbands.iloc[:, 3]
df["bb_pctb"] = bbands.iloc[:, 4] # %B: where price is within bands
df["atr_14"] = ta.atr(df["High"], df["Low"], df["Close"], length=14)
df["atr_pct"] = df["atr_14"] / df["Close"] # normalized ATR
kc = ta.kc(df["High"], df["Low"], df["Close"], length=20)
df["kc_upper"] = kc.iloc[:, 0]
df["kc_lower"] = kc.iloc[:, 1]
# volatility: rolling std of returns
df["vol_10"] = df["Close"].pct_change().rolling(10).std()
df["vol_21"] = df["Close"].pct_change().rolling(21).std()
# ── Volume ──────────────────────────────────────────────────────
df["obv"] = ta.obv(df["Close"], df["Volume"])
df["obv_sma20"] = ta.sma(df["obv"], length=20)
df["mfi_14"] = ta.mfi(df["High"], df["Low"], df["Close"], df["Volume"], length=14)
ad = ta.ad(df["High"], df["Low"], df["Close"], df["Volume"])
df["ad_line"] = ad
# volume relative to average
df["vol_ratio_20"] = df["Volume"] / df["Volume"].rolling(20).mean()
# ── Returns features ────────────────────────────────────────────
df["ret_1d"] = df["Close"].pct_change(1)
df["ret_5d"] = df["Close"].pct_change(5)
df["ret_10d"] = df["Close"].pct_change(10)
df["ret_21d"] = df["Close"].pct_change(21)
print(f"Total columns after feature engineering: {len(df.columns)}")
df.tail(3)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 4. Labeling — Forward Return Classification
Target: is the N-day forward return positive? (buy signal = 1, sell/hold signal = 0)
""")
return
@app.cell
def _(HORIZON, df):
# forward return (what we're predicting)
df["fwd_ret"] = df["Close"].pct_change(HORIZON).shift(-HORIZON)
df["label"] = (df["fwd_ret"] > 0).astype(int)
# ── Define feature columns (exclude raw OHLCV, target, and non-stationary cols)
EXCLUDE = {
"Open", "High", "Low", "Close", "Volume",
"fwd_ret", "label",
"sma_20", "sma_50", "sma_200", "ema_12", "ema_26", # non-stationary
"bb_upper", "bb_mid", "bb_lower", # non-stationary
"kc_upper", "kc_lower", # non-stationary
"obv", "obv_sma20", "ad_line", # non-stationary
}
FEATURES = [c for c in df.columns if c not in EXCLUDE]
# drop rows with NaN (from indicator warm-up + forward label)
model_df = df[FEATURES + ["label", "fwd_ret"]].dropna()
print(f"Features: {len(FEATURES)}")
print(f"Usable rows: {len(model_df)} ({model_df.index[0].date()}{model_df.index[-1].date()})")
print(f"Label balance: {model_df['label'].value_counts(normalize=True).to_dict()}")
print(f"\nFeature list:\n{FEATURES}")
return FEATURES, model_df
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 5. Walk-Forward Split with Purge Gap
Time series data **cannot** use random k-fold — future data would leak into training.
We use **expanding-window walk-forward** with a **purge gap** between train/test:
```
Fold 1: [====TRAIN====]--gap--[TEST]
Fold 2: [========TRAIN========]--gap--[TEST]
Fold 3: [============TRAIN============]--gap--[TEST]
```
The gap prevents label leakage from overlapping forward-return windows.
""")
return
@app.cell
def _(FEATURES, N_SPLITS, PURGE_GAP, TRAIN_MIN, go, model_df, np):
def walk_forward_splits(n_samples: int, n_splits: int, test_size: int=126, purge_gap: int=5, min_train: int=504):
"""
Expanding-window walk-forward with purge gap.
Yields (train_idx, test_idx) index arrays.
test_size: ~6 months of trading days
min_train: ~2 years of trading days
purge_gap: days between train end and test start
"""
total_test = n_splits * test_size
if min_train + total_test + n_splits * purge_gap > n_samples:
raise ValueError(f'Not enough data for {n_splits} splits. Need {min_train + total_test + n_splits * purge_gap}, have {n_samples}')
for i in range(n_splits):
test_end = n_samples - (n_splits - 1 - i) * test_size
test_start = test_end - test_size
train_end = test_start - purge_gap
train_start = 0
train_idx = np.arange(train_start, train_end)
test_idx = np.arange(test_start, test_end)
yield (train_idx, test_idx) # expanding window (use max(0, train_end - fixed_window) for sliding)
X = model_df[FEATURES].values
y = model_df['label'].values
dates = model_df.index
_fig = go.Figure()
for _fold, (_tr_idx, _te_idx) in enumerate(walk_forward_splits(len(X), N_SPLITS, purge_gap=PURGE_GAP, min_train=TRAIN_MIN)):
_fig.add_trace(go.Scatter(x=[dates[_tr_idx[0]], dates[_tr_idx[-1]]], y=[_fold, _fold], mode='lines', line=dict(color='steelblue', width=8), name=f'Train {_fold}' if _fold == 0 else None, showlegend=_fold == 0))
# ── Visualize the splits ────────────────────────────────────────
_fig.add_trace(go.Scatter(x=[dates[_te_idx[0]], dates[_te_idx[-1]]], y=[_fold, _fold], mode='lines', line=dict(color='coral', width=8), name=f'Test {_fold}' if _fold == 0 else None, showlegend=_fold == 0))
print(f'Fold {_fold}: train {dates[_tr_idx[0]].date()}{dates[_tr_idx[-1]].date()} ({len(_tr_idx)}d) | test {dates[_te_idx[0]].date()}{dates[_te_idx[-1]].date()} ({len(_te_idx)}d)')
_fig.update_layout(title='Walk-Forward Splits', yaxis_title='Fold', height=300)
_fig.show()
return X, dates, walk_forward_splits, y
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 6. Train XGBoost per Fold — Walk-Forward
Train on expanding window, predict test fold, collect out-of-sample predictions.
""")
return
@app.cell
def _(
N_SPLITS,
PURGE_GAP,
TRAIN_MIN,
X,
XGBClassifier,
accuracy_score,
classification_report,
dates,
model_df,
walk_forward_splits,
y,
):
oos_preds = [] # out-of-sample predictions
oos_proba = [] # predicted probabilities
oos_labels = []
oos_dates = []
oos_fwd_ret = []
fold_metrics = []
for _fold, (_tr_idx, _te_idx) in enumerate(walk_forward_splits(len(X), N_SPLITS, purge_gap=PURGE_GAP, min_train=TRAIN_MIN)):
X_train, y_train = (X[_tr_idx], y[_tr_idx])
X_test, y_test = (X[_te_idx], y[_te_idx])
model = XGBClassifier(n_estimators=300, max_depth=4, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=1.0, random_state=42, eval_metric='logloss', early_stopping_rounds=30)
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
preds = model.predict(X_test)
proba = model.predict_proba(X_test)[:, 1]
acc = accuracy_score(y_test, preds)
oos_preds.extend(preds)
oos_proba.extend(proba)
oos_labels.extend(y_test)
oos_dates.extend(dates[_te_idx])
oos_fwd_ret.extend(model_df['fwd_ret'].values[_te_idx])
fold_metrics.append({'fold': _fold, 'accuracy': acc, 'train_size': len(_tr_idx), 'test_size': len(_te_idx)})
print(f'Fold {_fold}: acc={acc:.3f} | train={len(_tr_idx)} | test={len(_te_idx)}')
print(f'\nOverall OOS accuracy: {accuracy_score(oos_labels, oos_preds):.3f}')
print(classification_report(oos_labels, oos_preds, target_names=['SELL/HOLD', 'BUY']))
return model, oos_dates, oos_fwd_ret, oos_preds, oos_proba
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 7. Feature Importance (Last Fold)
""")
return
@app.cell
def _(FEATURES, go, model, pd):
imp = pd.Series(model.feature_importances_, index=FEATURES).sort_values(ascending=True)
_fig = go.Figure(go.Bar(x=imp.tail(20), y=imp.tail(20).index, orientation='h'))
_fig.update_layout(title='Top 20 Feature Importances (last fold)', height=500, margin=dict(l=150))
_fig.show()
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 8. Strategy Simulation — Signal → Returns
Convert model predictions to a strategy equity curve:
- **Signal = 1 (BUY)**: go long (earn the market return)
- **Signal = 0 (SELL/HOLD)**: stay in cash (earn 0)
Compare against buy-and-hold benchmark.
""")
return
@app.cell
def _(df, go, oos_dates, oos_fwd_ret, oos_preds, oos_proba, pd):
# Build strategy returns series from OOS predictions
strat = pd.DataFrame({'date': oos_dates, 'signal': oos_preds, 'proba': oos_proba, 'fwd_ret': oos_fwd_ret}).set_index('date')
daily_ret = df['Close'].pct_change().reindex(strat.index)
strat['strat_ret'] = daily_ret * strat['signal']
strat['bench_ret'] = daily_ret
strat['strat_equity'] = (1 + strat['strat_ret']).cumprod()
strat['bench_equity'] = (1 + strat['bench_ret']).cumprod()
_fig = go.Figure()
# daily returns: we use daily close-to-close returns, masked by signal
# align with actual daily returns (not forward returns) for proper equity curve
_fig.add_trace(go.Scatter(x=strat.index, y=strat['strat_equity'], name='Strategy', line=dict(color='steelblue')))
_fig.add_trace(go.Scatter(x=strat.index, y=strat['bench_equity'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
# strategy return: market return when signal=1, 0 when signal=0
in_market = strat['signal'] == 1
_changes = in_market.astype(int).diff().fillna(0)
entries = strat.index[_changes == 1]
# cumulative
exits = strat.index[_changes == -1]
if in_market.iloc[0]:
entries = entries.insert(0, strat.index[0])
# plot
if in_market.iloc[-1]:
exits = exits.append(pd.DatetimeIndex([strat.index[-1]]))
for ent, ext in zip(entries, exits):
_fig.add_vrect(x0=ent, x1=ext, fillcolor='green', opacity=0.07, line_width=0)
# shade buy signals
_fig.update_layout(title='Strategy vs Buy & Hold (OOS)', yaxis_title='Equity ($1 start)', height=450)
_fig.show()
print(f"Strategy final: ${strat['strat_equity'].iloc[-1]:.2f}")
# align: if first signal is 1, start from beginning
print(f"Benchmark final: ${strat['bench_equity'].iloc[-1]:.2f}")
return (strat,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 9. QuantStats Tearsheet
Full performance report: Sharpe, Sortino, max drawdown, rolling metrics, monthly heatmap.
""")
return
@app.cell
def _(pd, qs, strat):
# quantstats expects a returns series with datetime index
strategy_returns = strat["strat_ret"].copy()
strategy_returns.index = pd.DatetimeIndex(strategy_returns.index)
benchmark_returns = strat["bench_ret"].copy()
benchmark_returns.index = pd.DatetimeIndex(benchmark_returns.index)
qs.extend_pandas()
# key metrics
print("=" * 50)
print("STRATEGY METRICS (out-of-sample)")
print("=" * 50)
print(f"Sharpe: {qs.stats.sharpe(strategy_returns):.2f}")
print(f"Sortino: {qs.stats.sortino(strategy_returns):.2f}")
print(f"Max Drawdown: {qs.stats.max_drawdown(strategy_returns):.2%}")
print(f"CAGR: {qs.stats.cagr(strategy_returns):.2%}")
print(f"Calmar: {qs.stats.calmar(strategy_returns):.2f}")
print(f"Win Rate: {qs.stats.win_rate(strategy_returns):.2%}")
print(f"Volatility: {qs.stats.volatility(strategy_returns):.2%}")
print(f"Avg Win: {qs.stats.avg_win(strategy_returns):.4f}")
print(f"Avg Loss: {qs.stats.avg_loss(strategy_returns):.4f}")
print(f"Profit Factor:{qs.stats.profit_factor(strategy_returns):.2f}")
print("=" * 50)
return benchmark_returns, strategy_returns
@app.cell
def _(TICKER, benchmark_returns, qs, strategy_returns):
# full HTML tearsheet — saved to file + displayed inline
qs.reports.html(strategy_returns, benchmark=benchmark_returns,
title=f"{TICKER} ML Signal Strategy (OOS Walk-Forward)",
output="tearsheet.html")
print("Tearsheet saved to tearsheet.html")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## 10. Signal Dashboard — Price + Indicators + Buy/Sell Signals
""")
return
@app.cell
def _(TICKER, df, go, make_subplots, strat):
# show last fold's test period with signals overlaid on price
last_test_dates = strat.index[-126:] # last ~6 months
viz = df.loc[last_test_dates].copy()
sig = strat.loc[last_test_dates]
_fig = make_subplots(rows=4, cols=1, shared_xaxes=True, row_heights=[0.4, 0.2, 0.2, 0.2], vertical_spacing=0.03, subplot_titles=['Price + Bollinger Bands + Signals', 'RSI(14)', 'MACD', 'Volume'])
_fig.add_trace(go.Candlestick(x=viz.index, open=viz['Open'], high=viz['High'], low=viz['Low'], close=viz['Close'], name='OHLC', increasing_line_color='steelblue', decreasing_line_color='salmon'), row=1, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['bb_upper'], line=dict(color='gray', width=1, dash='dot'), name='BB Upper'), row=1, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['bb_lower'], line=dict(color='gray', width=1, dash='dot'), name='BB Lower', fill='tonexty', fillcolor='rgba(128,128,128,0.05)'), row=1, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['sma_50'], line=dict(color='orange', width=1), name='SMA 50'), row=1, col=1)
buy_mask = sig['signal'] == 1
_changes = buy_mask.astype(int).diff()
buy_entries = sig.index[_changes == 1]
# Row 1: Candlestick + BB + signals
sell_entries = sig.index[_changes == -1]
if len(buy_entries):
_fig.add_trace(go.Scatter(x=buy_entries, y=viz.loc[buy_entries, 'Low'] * 0.995, mode='markers', marker=dict(symbol='triangle-up', size=10, color='green'), name='BUY'), row=1, col=1)
if len(sell_entries):
_fig.add_trace(go.Scatter(x=sell_entries, y=viz.loc[sell_entries, 'High'] * 1.005, mode='markers', marker=dict(symbol='triangle-down', size=10, color='red'), name='SELL'), row=1, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['rsi_14'], line=dict(color='purple', width=1.5), name='RSI 14'), row=2, col=1)
_fig.add_hline(y=70, line_dash='dash', line_color='red', opacity=0.5, row=2, col=1)
_fig.add_hline(y=30, line_dash='dash', line_color='green', opacity=0.5, row=2, col=1)
# buy/sell markers
_fig.add_trace(go.Scatter(x=viz.index, y=viz['macd'], line=dict(color='blue', width=1.5), name='MACD'), row=3, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['macd_signal'], line=dict(color='orange', width=1), name='Signal'), row=3, col=1)
colors = ['green' if v >= 0 else 'red' for v in viz['macd_hist']]
_fig.add_trace(go.Bar(x=viz.index, y=viz['macd_hist'], marker_color=colors, name='Hist', opacity=0.5), row=3, col=1)
_fig.add_trace(go.Bar(x=viz.index, y=viz['Volume'], marker_color='steelblue', name='Volume', opacity=0.5), row=4, col=1)
_fig.add_trace(go.Scatter(x=viz.index, y=viz['Volume'].rolling(20).mean(), line=dict(color='orange', width=1), name='Vol SMA20'), row=4, col=1)
_fig.update_layout(height=900, title=f'{TICKER} — Last Test Fold Signal Dashboard', xaxis_rangeslider_visible=False, showlegend=False)
_fig.update_xaxes(rangeslider_visible=False)
# Row 2: RSI
# Row 3: MACD
# Row 4: Volume
_fig.show()
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Next Steps
Things to iterate on from here:
1. **Multi-asset**: swap `TICKER` to BTC-USD, QQQ, GLD, etc. or loop over a universe
2. **Probability threshold**: instead of binary 0/1, use `proba > 0.6` for higher-conviction signals
3. **Position sizing**: Kelly criterion via `PyPortfolioOpt` based on predicted probability
4. **Regime filter**: add ADX/volatility regime detection — only trade in trending regimes
5. **Transaction costs**: subtract realistic slippage (e.g., 5bps per trade) from returns
6. **Alternative splitters you have installed**:
- `from tscv import GapWalkForward` — sklearn-compatible, handles gap + purge natively
- `from sktime.split import ExpandingWindowSplitter, SlidingWindowSplitter`
- `from sklearn.model_selection import TimeSeriesSplit` — basic but solid
7. **LightGBM**: drop-in replacement for XGBoost, often faster on large feature sets
8. **Meta-labeling** (Lopez de Prado): train a secondary model on whether the primary model's signals are correct
""")
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()
+1 -1
View File
@@ -628,7 +628,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
"version": "3.12.12"
}
},
"nbformat": 4,
Generated
+236 -2
View File
@@ -131,6 +131,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
]
[[package]]
name = "alpaca-py"
version = "0.43.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "msgpack" },
{ name = "pandas" },
{ name = "pydantic" },
{ name = "pytz" },
{ name = "requests" },
{ name = "sseclient-py" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/31/40/e0b3470786de2dd917276759d669b3adc6577432323e8535724e71e53c2e/alpaca_py-0.43.3.tar.gz", hash = "sha256:d97e815eb503fe084113b139ed94227913dc4da8025f28460a14d489371bd07d", size = 97985, upload-time = "2026-04-24T18:28:37.068Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/b4/062dbf87910c4a5c0accde3bd073c2fa3f7ac9e66571489ef1a7c0f44531/alpaca_py-0.43.3-py3-none-any.whl", hash = "sha256:0eb36921696bbfc5ea839f083e7822c9ccab062d8d523a888bc4470985f1b3ef", size = 122528, upload-time = "2026-04-24T18:28:38.25Z" },
]
[[package]]
name = "annotated-doc"
version = "0.0.4"
@@ -202,6 +220,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dd/e3/03dc0f97eab839f72061342d69bd34424e89876ce4026509aab3d74d4f23/appscript-1.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5efce3302c00674b769b79938cc5f66f7791ef45c6419e850a5f1c8f9fcefcc1", size = 85610, upload-time = "2025-10-08T07:56:38.103Z" },
]
[[package]]
name = "apscheduler"
version = "3.6.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytz" },
{ name = "setuptools" },
{ name = "six" },
{ name = "tzlocal" },
]
sdist = { url = "https://files.pythonhosted.org/packages/89/3d/f65972547c5aa533276ada2bea3c2ef51bb4c4de55b67a66129c111b89ad/APScheduler-3.6.3.tar.gz", hash = "sha256:3bb5229eed6fbbdafc13ce962712ae66e175aa214c69bed35a06bffcf0c5e244", size = 96309, upload-time = "2019-11-05T07:51:50.394Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f3/34/9ef20ed473c4fd2c3df54ef77a27ae3fc7500b16b192add4720cab8b2c09/APScheduler-3.6.3-py2.py3-none-any.whl", hash = "sha256:e8b1ecdb4c7cb2818913f766d5898183c7cb8936680710a4d3a966e02262e526", size = 58881, upload-time = "2019-11-05T07:51:48.621Z" },
]
[[package]]
name = "arch"
version = "8.0.0"
@@ -491,6 +524,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/88/6764e7a109dd84294850741501145da90d13cdeac9d4e614929464a37420/build-1.4.4-py3-none-any.whl", hash = "sha256:8c3f48a6090b39edec1a273d2d57949aaf13723b01e02f9d518396887519f64d", size = 25921, upload-time = "2026-04-22T20:53:43.251Z" },
]
[[package]]
name = "cachetools"
version = "4.2.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/52/ba/619250fa6bc11ce6aa4de0604d45843090a53cd7d10d7253b89669313370/cachetools-4.2.2.tar.gz", hash = "sha256:61b5ed1e22a0924aed1d23b478f37e8d52549ff8a961de2909c69bf950020cff", size = 23682, upload-time = "2021-04-27T21:19:57.252Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bf/28/c4f5796c67ad06bb91d98d543a5e01805c1ff065e08871f78e52d2a331ad/cachetools-4.2.2-py3-none-any.whl", hash = "sha256:2cc0b89715337ab6dbba85b5b50effe2b0c74e035d83ee8ed637cf52f12ae001", size = 11998, upload-time = "2021-04-27T21:19:55.559Z" },
]
[[package]]
name = "ccxt"
version = "4.5.50"
@@ -2039,7 +2081,7 @@ dependencies = [
{ name = "torch", version = "2.11.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
{ name = "transformers" },
{ name = "tscv" },
{ name = "vectorbt" },
{ name = "vectorbt", extra = ["full", "rust"] },
{ name = "xgboost" },
{ name = "yfinance" },
]
@@ -2079,7 +2121,7 @@ requires-dist = [
{ name = "torch", specifier = ">=2.0.0", index = "https://download.pytorch.org/whl/cpu" },
{ name = "transformers", specifier = ">=4.0.0" },
{ name = "tscv", specifier = ">=0.1.3" },
{ name = "vectorbt", specifier = ">=1.0.0" },
{ name = "vectorbt", extras = ["full", "rust"], specifier = ">=1.0.0" },
{ name = "xgboost", specifier = ">=3.2.0" },
{ name = "yfinance", specifier = ">=1.3.0" },
]
@@ -2509,6 +2551,50 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
]
[[package]]
name = "msgpack"
version = "1.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ad/bd/8b0d01c756203fbab65d265859749860682ccd2a59594609aeec3a144efa/msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa", size = 81939, upload-time = "2025-10-08T09:15:01.472Z" },
{ url = "https://files.pythonhosted.org/packages/34/68/ba4f155f793a74c1483d4bdef136e1023f7bcba557f0db4ef3db3c665cf1/msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb", size = 85064, upload-time = "2025-10-08T09:15:03.764Z" },
{ url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" },
{ url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" },
{ url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" },
{ url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" },
{ url = "https://files.pythonhosted.org/packages/41/0d/2ddfaa8b7e1cee6c490d46cb0a39742b19e2481600a7a0e96537e9c22f43/msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029", size = 65096, upload-time = "2025-10-08T09:15:11.11Z" },
{ url = "https://files.pythonhosted.org/packages/8c/ec/d431eb7941fb55a31dd6ca3404d41fbb52d99172df2e7707754488390910/msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b", size = 72708, upload-time = "2025-10-08T09:15:12.554Z" },
{ url = "https://files.pythonhosted.org/packages/c5/31/5b1a1f70eb0e87d1678e9624908f86317787b536060641d6798e3cf70ace/msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69", size = 64119, upload-time = "2025-10-08T09:15:13.589Z" },
{ url = "https://files.pythonhosted.org/packages/6b/31/b46518ecc604d7edf3a4f94cb3bf021fc62aa301f0cb849936968164ef23/msgpack-1.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4efd7b5979ccb539c221a4c4e16aac1a533efc97f3b759bb5a5ac9f6d10383bf", size = 81212, upload-time = "2025-10-08T09:15:14.552Z" },
{ url = "https://files.pythonhosted.org/packages/92/dc/c385f38f2c2433333345a82926c6bfa5ecfff3ef787201614317b58dd8be/msgpack-1.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:42eefe2c3e2af97ed470eec850facbe1b5ad1d6eacdbadc42ec98e7dcf68b4b7", size = 84315, upload-time = "2025-10-08T09:15:15.543Z" },
{ url = "https://files.pythonhosted.org/packages/d3/68/93180dce57f684a61a88a45ed13047558ded2be46f03acb8dec6d7c513af/msgpack-1.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fdf7d83102bf09e7ce3357de96c59b627395352a4024f6e2458501f158bf999", size = 412721, upload-time = "2025-10-08T09:15:16.567Z" },
{ url = "https://files.pythonhosted.org/packages/5d/ba/459f18c16f2b3fc1a1ca871f72f07d70c07bf768ad0a507a698b8052ac58/msgpack-1.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fac4be746328f90caa3cd4bc67e6fe36ca2bf61d5c6eb6d895b6527e3f05071e", size = 424657, upload-time = "2025-10-08T09:15:17.825Z" },
{ url = "https://files.pythonhosted.org/packages/38/f8/4398c46863b093252fe67368b44edc6c13b17f4e6b0e4929dbf0bdb13f23/msgpack-1.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fffee09044073e69f2bad787071aeec727183e7580443dfeb8556cbf1978d162", size = 402668, upload-time = "2025-10-08T09:15:19.003Z" },
{ url = "https://files.pythonhosted.org/packages/28/ce/698c1eff75626e4124b4d78e21cca0b4cc90043afb80a507626ea354ab52/msgpack-1.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5928604de9b032bc17f5099496417f113c45bc6bc21b5c6920caf34b3c428794", size = 419040, upload-time = "2025-10-08T09:15:20.183Z" },
{ url = "https://files.pythonhosted.org/packages/67/32/f3cd1667028424fa7001d82e10ee35386eea1408b93d399b09fb0aa7875f/msgpack-1.1.2-cp313-cp313-win32.whl", hash = "sha256:a7787d353595c7c7e145e2331abf8b7ff1e6673a6b974ded96e6d4ec09f00c8c", size = 65037, upload-time = "2025-10-08T09:15:21.416Z" },
{ url = "https://files.pythonhosted.org/packages/74/07/1ed8277f8653c40ebc65985180b007879f6a836c525b3885dcc6448ae6cb/msgpack-1.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:a465f0dceb8e13a487e54c07d04ae3ba131c7c5b95e2612596eafde1dccf64a9", size = 72631, upload-time = "2025-10-08T09:15:22.431Z" },
{ url = "https://files.pythonhosted.org/packages/e5/db/0314e4e2db56ebcf450f277904ffd84a7988b9e5da8d0d61ab2d057df2b6/msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84", size = 64118, upload-time = "2025-10-08T09:15:23.402Z" },
{ url = "https://files.pythonhosted.org/packages/22/71/201105712d0a2ff07b7873ed3c220292fb2ea5120603c00c4b634bcdafb3/msgpack-1.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e23ce8d5f7aa6ea6d2a2b326b4ba46c985dbb204523759984430db7114f8aa00", size = 81127, upload-time = "2025-10-08T09:15:24.408Z" },
{ url = "https://files.pythonhosted.org/packages/1b/9f/38ff9e57a2eade7bf9dfee5eae17f39fc0e998658050279cbb14d97d36d9/msgpack-1.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6c15b7d74c939ebe620dd8e559384be806204d73b4f9356320632d783d1f7939", size = 84981, upload-time = "2025-10-08T09:15:25.812Z" },
{ url = "https://files.pythonhosted.org/packages/8e/a9/3536e385167b88c2cc8f4424c49e28d49a6fc35206d4a8060f136e71f94c/msgpack-1.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99e2cb7b9031568a2a5c73aa077180f93dd2e95b4f8d3b8e14a73ae94a9e667e", size = 411885, upload-time = "2025-10-08T09:15:27.22Z" },
{ url = "https://files.pythonhosted.org/packages/2f/40/dc34d1a8d5f1e51fc64640b62b191684da52ca469da9cd74e84936ffa4a6/msgpack-1.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:180759d89a057eab503cf62eeec0aa61c4ea1200dee709f3a8e9397dbb3b6931", size = 419658, upload-time = "2025-10-08T09:15:28.4Z" },
{ url = "https://files.pythonhosted.org/packages/3b/ef/2b92e286366500a09a67e03496ee8b8ba00562797a52f3c117aa2b29514b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:04fb995247a6e83830b62f0b07bf36540c213f6eac8e851166d8d86d83cbd014", size = 403290, upload-time = "2025-10-08T09:15:29.764Z" },
{ url = "https://files.pythonhosted.org/packages/78/90/e0ea7990abea5764e4655b8177aa7c63cdfa89945b6e7641055800f6c16b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8e22ab046fa7ede9e36eeb4cfad44d46450f37bb05d5ec482b02868f451c95e2", size = 415234, upload-time = "2025-10-08T09:15:31.022Z" },
{ url = "https://files.pythonhosted.org/packages/72/4e/9390aed5db983a2310818cd7d3ec0aecad45e1f7007e0cda79c79507bb0d/msgpack-1.1.2-cp314-cp314-win32.whl", hash = "sha256:80a0ff7d4abf5fecb995fcf235d4064b9a9a8a40a3ab80999e6ac1e30b702717", size = 66391, upload-time = "2025-10-08T09:15:32.265Z" },
{ url = "https://files.pythonhosted.org/packages/6e/f1/abd09c2ae91228c5f3998dbd7f41353def9eac64253de3c8105efa2082f7/msgpack-1.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:9ade919fac6a3e7260b7f64cea89df6bec59104987cbea34d34a2fa15d74310b", size = 73787, upload-time = "2025-10-08T09:15:33.219Z" },
{ url = "https://files.pythonhosted.org/packages/6a/b0/9d9f667ab48b16ad4115c1935d94023b82b3198064cb84a123e97f7466c1/msgpack-1.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:59415c6076b1e30e563eb732e23b994a61c159cec44deaf584e5cc1dd662f2af", size = 66453, upload-time = "2025-10-08T09:15:34.225Z" },
{ url = "https://files.pythonhosted.org/packages/16/67/93f80545eb1792b61a217fa7f06d5e5cb9e0055bed867f43e2b8e012e137/msgpack-1.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:897c478140877e5307760b0ea66e0932738879e7aa68144d9b78ea4c8302a84a", size = 85264, upload-time = "2025-10-08T09:15:35.61Z" },
{ url = "https://files.pythonhosted.org/packages/87/1c/33c8a24959cf193966ef11a6f6a2995a65eb066bd681fd085afd519a57ce/msgpack-1.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a668204fa43e6d02f89dbe79a30b0d67238d9ec4c5bd8a940fc3a004a47b721b", size = 89076, upload-time = "2025-10-08T09:15:36.619Z" },
{ url = "https://files.pythonhosted.org/packages/fc/6b/62e85ff7193663fbea5c0254ef32f0c77134b4059f8da89b958beb7696f3/msgpack-1.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5559d03930d3aa0f3aacb4c42c776af1a2ace2611871c84a75afe436695e6245", size = 435242, upload-time = "2025-10-08T09:15:37.647Z" },
{ url = "https://files.pythonhosted.org/packages/c1/47/5c74ecb4cc277cf09f64e913947871682ffa82b3b93c8dad68083112f412/msgpack-1.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70c5a7a9fea7f036b716191c29047374c10721c389c21e9ffafad04df8c52c90", size = 432509, upload-time = "2025-10-08T09:15:38.794Z" },
{ url = "https://files.pythonhosted.org/packages/24/a4/e98ccdb56dc4e98c929a3f150de1799831c0a800583cde9fa022fa90602d/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f2cb069d8b981abc72b41aea1c580ce92d57c673ec61af4c500153a626cb9e20", size = 415957, upload-time = "2025-10-08T09:15:40.238Z" },
{ url = "https://files.pythonhosted.org/packages/da/28/6951f7fb67bc0a4e184a6b38ab71a92d9ba58080b27a77d3e2fb0be5998f/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d62ce1f483f355f61adb5433ebfd8868c5f078d1a52d042b0a998682b4fa8c27", size = 422910, upload-time = "2025-10-08T09:15:41.505Z" },
{ url = "https://files.pythonhosted.org/packages/f0/03/42106dcded51f0a0b5284d3ce30a671e7bd3f7318d122b2ead66ad289fed/msgpack-1.1.2-cp314-cp314t-win32.whl", hash = "sha256:1d1418482b1ee984625d88aa9585db570180c286d942da463533b238b98b812b", size = 75197, upload-time = "2025-10-08T09:15:42.954Z" },
{ url = "https://files.pythonhosted.org/packages/15/86/d0071e94987f8db59d4eeb386ddc64d0bb9b10820a8d82bcd3e53eeb2da6/msgpack-1.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:5a46bf7e831d09470ad92dff02b8b1ac92175ca36b087f904a0519857c6be3ff", size = 85772, upload-time = "2025-10-08T09:15:43.954Z" },
{ url = "https://files.pythonhosted.org/packages/81/f2/08ace4142eb281c12701fc3b93a10795e4d4dc7f753911d836675050f886/msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46", size = 70868, upload-time = "2025-10-08T09:15:44.959Z" },
]
[[package]]
name = "msgspec"
version = "0.21.1"
@@ -2993,6 +3079,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/be/2f/c67d49afd31c3b02a02ecb5dd07399ed35298042e1b50d166efe2068bb0e/pandas_ta-0.4.71b0-py3-none-any.whl", hash = "sha256:b1f37831811462685be3ef456cfebc0615ce9c8a4eb31bbaa6b341e1a7767a84", size = 240265, upload-time = "2025-09-14T19:08:34.83Z" },
]
[[package]]
name = "pandas-ta-classic"
version = "0.4.47"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "pandas" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/83/6789ac41666a71d995a3d00d66067cae42f76f0996917e4560bb5eae5aa8/pandas_ta_classic-0.4.47-py3-none-any.whl", hash = "sha256:5dcb3b61c5affe9a7625079ccbab40e647d4ba0d49932e287ba43c3c3a28b0ec", size = 266591, upload-time = "2026-03-17T12:22:35.248Z" },
]
[[package]]
name = "pandocfilters"
version = "1.5.1"
@@ -3470,6 +3568,36 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
]
[[package]]
name = "pycryptodome"
version = "3.23.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/8e/a6/8452177684d5e906854776276ddd34eca30d1b1e15aa1ee9cefc289a33f5/pycryptodome-3.23.0.tar.gz", hash = "sha256:447700a657182d60338bab09fdb27518f8856aecd80ae4c6bdddb67ff5da44ef", size = 4921276, upload-time = "2025-05-17T17:21:45.242Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/5d/bdb09489b63cd34a976cc9e2a8d938114f7a53a74d3dd4f125ffa49dce82/pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:0011f7f00cdb74879142011f95133274741778abba114ceca229adbf8e62c3e4", size = 2495152, upload-time = "2025-05-17T17:20:20.833Z" },
{ url = "https://files.pythonhosted.org/packages/a7/ce/7840250ed4cc0039c433cd41715536f926d6e86ce84e904068eb3244b6a6/pycryptodome-3.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:90460fc9e088ce095f9ee8356722d4f10f86e5be06e2354230a9880b9c549aae", size = 1639348, upload-time = "2025-05-17T17:20:23.171Z" },
{ url = "https://files.pythonhosted.org/packages/ee/f0/991da24c55c1f688d6a3b5a11940567353f74590734ee4a64294834ae472/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4764e64b269fc83b00f682c47443c2e6e85b18273712b98aa43bcb77f8570477", size = 2184033, upload-time = "2025-05-17T17:20:25.424Z" },
{ url = "https://files.pythonhosted.org/packages/54/16/0e11882deddf00f68b68dd4e8e442ddc30641f31afeb2bc25588124ac8de/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb8f24adb74984aa0e5d07a2368ad95276cf38051fe2dc6605cbcf482e04f2a7", size = 2270142, upload-time = "2025-05-17T17:20:27.808Z" },
{ url = "https://files.pythonhosted.org/packages/d5/fc/4347fea23a3f95ffb931f383ff28b3f7b1fe868739182cb76718c0da86a1/pycryptodome-3.23.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d97618c9c6684a97ef7637ba43bdf6663a2e2e77efe0f863cce97a76af396446", size = 2309384, upload-time = "2025-05-17T17:20:30.765Z" },
{ url = "https://files.pythonhosted.org/packages/6e/d9/c5261780b69ce66d8cfab25d2797bd6e82ba0241804694cd48be41add5eb/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9a53a4fe5cb075075d515797d6ce2f56772ea7e6a1e5e4b96cf78a14bac3d265", size = 2183237, upload-time = "2025-05-17T17:20:33.736Z" },
{ url = "https://files.pythonhosted.org/packages/5a/6f/3af2ffedd5cfa08c631f89452c6648c4d779e7772dfc388c77c920ca6bbf/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:763d1d74f56f031788e5d307029caef067febf890cd1f8bf61183ae142f1a77b", size = 2343898, upload-time = "2025-05-17T17:20:36.086Z" },
{ url = "https://files.pythonhosted.org/packages/9a/dc/9060d807039ee5de6e2f260f72f3d70ac213993a804f5e67e0a73a56dd2f/pycryptodome-3.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:954af0e2bd7cea83ce72243b14e4fb518b18f0c1649b576d114973e2073b273d", size = 2269197, upload-time = "2025-05-17T17:20:38.414Z" },
{ url = "https://files.pythonhosted.org/packages/f9/34/e6c8ca177cb29dcc4967fef73f5de445912f93bd0343c9c33c8e5bf8cde8/pycryptodome-3.23.0-cp313-cp313t-win32.whl", hash = "sha256:257bb3572c63ad8ba40b89f6fc9d63a2a628e9f9708d31ee26560925ebe0210a", size = 1768600, upload-time = "2025-05-17T17:20:40.688Z" },
{ url = "https://files.pythonhosted.org/packages/e4/1d/89756b8d7ff623ad0160f4539da571d1f594d21ee6d68be130a6eccb39a4/pycryptodome-3.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6501790c5b62a29fcb227bd6b62012181d886a767ce9ed03b303d1f22eb5c625", size = 1799740, upload-time = "2025-05-17T17:20:42.413Z" },
{ url = "https://files.pythonhosted.org/packages/5d/61/35a64f0feaea9fd07f0d91209e7be91726eb48c0f1bfc6720647194071e4/pycryptodome-3.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9a77627a330ab23ca43b48b130e202582e91cc69619947840ea4d2d1be21eb39", size = 1703685, upload-time = "2025-05-17T17:20:44.388Z" },
{ url = "https://files.pythonhosted.org/packages/db/6c/a1f71542c969912bb0e106f64f60a56cc1f0fabecf9396f45accbe63fa68/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:187058ab80b3281b1de11c2e6842a357a1f71b42cb1e15bce373f3d238135c27", size = 2495627, upload-time = "2025-05-17T17:20:47.139Z" },
{ url = "https://files.pythonhosted.org/packages/6e/4e/a066527e079fc5002390c8acdd3aca431e6ea0a50ffd7201551175b47323/pycryptodome-3.23.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cfb5cd445280c5b0a4e6187a7ce8de5a07b5f3f897f235caa11f1f435f182843", size = 1640362, upload-time = "2025-05-17T17:20:50.392Z" },
{ url = "https://files.pythonhosted.org/packages/50/52/adaf4c8c100a8c49d2bd058e5b551f73dfd8cb89eb4911e25a0c469b6b4e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67bd81fcbe34f43ad9422ee8fd4843c8e7198dd88dd3d40e6de42ee65fbe1490", size = 2182625, upload-time = "2025-05-17T17:20:52.866Z" },
{ url = "https://files.pythonhosted.org/packages/5f/e9/a09476d436d0ff1402ac3867d933c61805ec2326c6ea557aeeac3825604e/pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8987bd3307a39bc03df5c8e0e3d8be0c4c3518b7f044b0f4c15d1aa78f52575", size = 2268954, upload-time = "2025-05-17T17:20:55.027Z" },
{ url = "https://files.pythonhosted.org/packages/f9/c5/ffe6474e0c551d54cab931918127c46d70cab8f114e0c2b5a3c071c2f484/pycryptodome-3.23.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa0698f65e5b570426fc31b8162ed4603b0c2841cbb9088e2b01641e3065915b", size = 2308534, upload-time = "2025-05-17T17:20:57.279Z" },
{ url = "https://files.pythonhosted.org/packages/18/28/e199677fc15ecf43010f2463fde4c1a53015d1fe95fb03bca2890836603a/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:53ecbafc2b55353edcebd64bf5da94a2a2cdf5090a6915bcca6eca6cc452585a", size = 2181853, upload-time = "2025-05-17T17:20:59.322Z" },
{ url = "https://files.pythonhosted.org/packages/ce/ea/4fdb09f2165ce1365c9eaefef36625583371ee514db58dc9b65d3a255c4c/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:156df9667ad9f2ad26255926524e1c136d6664b741547deb0a86a9acf5ea631f", size = 2342465, upload-time = "2025-05-17T17:21:03.83Z" },
{ url = "https://files.pythonhosted.org/packages/22/82/6edc3fc42fe9284aead511394bac167693fb2b0e0395b28b8bedaa07ef04/pycryptodome-3.23.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:dea827b4d55ee390dc89b2afe5927d4308a8b538ae91d9c6f7a5090f397af1aa", size = 2267414, upload-time = "2025-05-17T17:21:06.72Z" },
{ url = "https://files.pythonhosted.org/packages/59/fe/aae679b64363eb78326c7fdc9d06ec3de18bac68be4b612fc1fe8902693c/pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886", size = 1768484, upload-time = "2025-05-17T17:21:08.535Z" },
{ url = "https://files.pythonhosted.org/packages/54/2f/e97a1b8294db0daaa87012c24a7bb714147c7ade7656973fd6c736b484ff/pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2", size = 1799636, upload-time = "2025-05-17T17:21:10.393Z" },
{ url = "https://files.pythonhosted.org/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c", size = 1703675, upload-time = "2025-05-17T17:21:13.146Z" },
]
[[package]]
name = "pydantic"
version = "2.13.3"
@@ -3672,6 +3800,23 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216, upload-time = "2024-09-29T09:24:11.978Z" },
]
[[package]]
name = "python-binance"
version = "1.0.36"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiohttp" },
{ name = "dateparser" },
{ name = "pycryptodome" },
{ name = "requests" },
{ name = "six" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8d/49/20a473a228fc78e6cad0a1d0198d7b8dbbb20211aefd77b9ff3327c87308/python_binance-1.0.36.tar.gz", hash = "sha256:d7b0668ff1b620b30d95b52f9f61b557748eac2d6c4d5f17fe21c356e5e0c541", size = 191487, upload-time = "2026-03-24T11:23:15.936Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/a3/1501f902fc71d6e03081844b7af16acd69f5a476d33441287f2de4af6bd6/python_binance-1.0.36-py2.py3-none-any.whl", hash = "sha256:4a7ffdf9c0e43cdc8136a5169993f6f6520bb8b3334e2fbc9ed127ab4fb3c8b6", size = 148503, upload-time = "2026-03-24T11:23:13.969Z" },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@@ -3711,6 +3856,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" },
]
[[package]]
name = "python-telegram-bot"
version = "13.13"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "apscheduler" },
{ name = "cachetools" },
{ name = "certifi" },
{ name = "pytz" },
{ name = "tornado" },
]
sdist = { url = "https://files.pythonhosted.org/packages/0f/9a/52e8bfc2981cee700ee83b6d7c6f781c4f21e441898bf60834936c0e2452/python-telegram-bot-13.13.tar.gz", hash = "sha256:4296d81a38b7e5ef1f9795651128e58fb354678b8dc4db93ca166c96828c57b2", size = 351895, upload-time = "2022-06-28T17:56:05.405Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/59/39/30c73b9c049875870f46dae148528ad8671a5182263448b9c824c512e032/python_telegram_bot-13.13-py3-none-any.whl", hash = "sha256:13f83ec4433f24a67e7a2df0206dfde83c1627c92880a2fcf95a83585b3cc589", size = 513361, upload-time = "2022-06-28T17:55:57.953Z" },
]
[[package]]
name = "pytz"
version = "2026.1.post1"
@@ -3871,6 +4032,33 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1a/d4/484041d5c5a5d3ec8df5c74fef3054fec004dab554f6c3c00187888f8cc1/quantstats-0.0.81-py3-none-any.whl", hash = "sha256:6af2b501f61917c8c960faaf8007eb858d970ab02a3cf0d7dc19f048953e15f3", size = 90067, upload-time = "2026-01-13T18:18:18.451Z" },
]
[[package]]
name = "ray"
version = "2.55.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "filelock" },
{ name = "jsonschema" },
{ name = "msgpack" },
{ name = "packaging" },
{ name = "protobuf" },
{ name = "pyyaml" },
{ name = "requests" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/3a/4d34f471a68b958b7f94c974c19ad6836a61a2dc16393df4294169a2e4b0/ray-2.55.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:137f9006eee28caab8260803cca314f37bbda3fc94fdfa31c770b5d019626ad8", size = 65822379, upload-time = "2026-04-22T20:09:58.064Z" },
{ url = "https://files.pythonhosted.org/packages/f1/13/0db535102d0256b350ca116d8987588aca1a1f9ebb4638e1e1ff88bbcef8/ray-2.55.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:26541f69bb55607ef8335baac75b2ed12ff2ce02d56313219b29eda003039221", size = 72910802, upload-time = "2026-04-22T20:10:04.382Z" },
{ url = "https://files.pythonhosted.org/packages/4c/f8/fffadf3f4285eebd460e4d7f2ed1c0cd641ed89613c3f49eb881ee9fa7e2/ray-2.55.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:263705f6bab29e7622a94f82da25fd7f9cead76cdf89a07aab28f79cdf8f9d95", size = 73765203, upload-time = "2026-04-22T20:10:10.495Z" },
{ url = "https://files.pythonhosted.org/packages/10/f7/5acb86fc9625a0e6bbc40e1c7d42c60770e78585439a921c32738b6d675a/ray-2.55.1-cp312-cp312-win_amd64.whl", hash = "sha256:9ad56704c8bd7e92130162f9c58e4ef473609515637673d5a36e761f95335206", size = 27865547, upload-time = "2026-04-22T20:10:15.364Z" },
{ url = "https://files.pythonhosted.org/packages/d5/95/898699cc1a6a5f304ea95376d079843b5c05f4c8c1ec7e55a5cc7ffcea50/ray-2.55.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:f9844a9272ef2e6eb5771025866072cf4234cf4c7cc1a31e235b7de7111864be", size = 65766823, upload-time = "2026-04-22T20:10:20.786Z" },
{ url = "https://files.pythonhosted.org/packages/c9/13/87deecc090c672e45a0cf6f5eef511de448b93f37ef18fd10eb8e8557a0d/ray-2.55.1-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:b415d590e062f248907e0fe42994943f11726b7178fcf4b1cf5546721fb1a5f8", size = 72818676, upload-time = "2026-04-22T20:10:26.705Z" },
{ url = "https://files.pythonhosted.org/packages/71/d7/fc95d3b8824c62105c64aa1b59c59600b581f608d78a2af753e010936dc9/ray-2.55.1-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:1380e043eb57cde69b7e9199c6f2558ceeb8f0fc41c97d1d5e50ea042115f302", size = 73678908, upload-time = "2026-04-22T20:10:32.795Z" },
{ url = "https://files.pythonhosted.org/packages/a9/03/7e552325572e067b23a4584bda8dc6a67af8bd7e03c424d2610bfa93112d/ray-2.55.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:b062045c64c2bce39a51661624f7292c7bbf30f2a9d878627aae31d46da5712d", size = 65774106, upload-time = "2026-04-22T20:10:39.885Z" },
{ url = "https://files.pythonhosted.org/packages/94/62/607a8859520ce350861425f11f8e15d66c15ee33e6aac812f9e2889b5df4/ray-2.55.1-cp314-cp314-manylinux2014_aarch64.whl", hash = "sha256:4e618d61e1b14b6fde9a586151f3fd9d435b0b85048b997bcaa7f4a533747b2b", size = 72814044, upload-time = "2026-04-22T20:10:46.985Z" },
{ url = "https://files.pythonhosted.org/packages/04/5a/0699bef04a72d7dc54462960d07ef7a19cd8b1e09979880aba2b6d13cca2/ray-2.55.1-cp314-cp314-manylinux2014_x86_64.whl", hash = "sha256:156ed3e72ad95b645d2006cd71a8dddbcc89b56bfc00027f6225adf78bd9cb74", size = 73644244, upload-time = "2026-04-22T20:10:52.973Z" },
]
[[package]]
name = "referencing"
version = "0.37.0"
@@ -4421,6 +4609,14 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/7f/3de5402f39890ac5660b86bcf5c03f9d855dad5c4ed764866d7b592b46fd/sse_starlette-3.3.4-py3-none-any.whl", hash = "sha256:84bb06e58939a8b38d8341f1bc9792f06c2b53f48c608dd207582b664fc8f3c1", size = 14330, upload-time = "2026-03-29T09:00:21.846Z" },
]
[[package]]
name = "sseclient-py"
version = "1.9.0"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4d/2e/59920f7d66b7f9932a3d83dd0ec53fab001be1e058bf582606fe414a5198/sseclient_py-1.9.0-py3-none-any.whl", hash = "sha256:340062b1587fc2880892811e2ab5b176d98ef3eee98b3672ff3a3ba1e8ed0f6f", size = 8351, upload-time = "2026-01-02T23:39:30.995Z" },
]
[[package]]
name = "stack-data"
version = "0.6.3"
@@ -4877,6 +5073,44 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4c/08/c15c7ade057b0633ac39fe6f5fffa37c306304745538c4f9187a05e9aa69/vectorbt-1.0.0-py3-none-any.whl", hash = "sha256:c596e11bdad985181150f3b3c8db0a7322c738fbb64c7a919a0418e99326cc13", size = 451657, upload-time = "2026-04-22T13:29:59.982Z" },
]
[package.optional-dependencies]
full = [
{ name = "alpaca-py" },
{ name = "ccxt" },
{ name = "pandas-ta-classic" },
{ name = "python-binance" },
{ name = "python-telegram-bot" },
{ name = "quantstats" },
{ name = "ray" },
{ name = "ta" },
{ name = "ta-lib" },
{ name = "yfinance" },
]
rust = [
{ name = "vectorbt-rust" },
]
[[package]]
name = "vectorbt-rust"
version = "1.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
]
sdist = { url = "https://files.pythonhosted.org/packages/02/32/a5b7193cfaaa4485e6b3c0156e15701b21bb505b66e035f01594675d1242/vectorbt_rust-1.0.0.tar.gz", hash = "sha256:d9ef94a24f970be4d566d5c41d20c436cb1593ae2e960ee2ae9851ce5042b9aa", size = 74476, upload-time = "2026-04-22T13:29:36.715Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fd/62/3df4be3717f3c2df07f51d380f9beae7100e78656b1879710e62c2d36f28/vectorbt_rust-1.0.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ceceb76bb1f485dc66eb3aa4ee2e4934b704c8753884a4c2eebf156427ecf374", size = 845019, upload-time = "2026-04-22T13:29:22.458Z" },
{ url = "https://files.pythonhosted.org/packages/3d/5f/b96d327002901177fc05c64d9a99bbff5673a20081bb1a3a2e35e02a8707/vectorbt_rust-1.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:689794e3f3a91c0b8cfc3565076b01864a51a95a8a003410c972942a424a8014", size = 766156, upload-time = "2026-04-22T13:29:24.034Z" },
{ url = "https://files.pythonhosted.org/packages/35/fc/2ef2905a752f17329c768ddbfdff3cd14deab932e1010ca140bde9ab32b4/vectorbt_rust-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a204323805b5cc721ce4706f8d117886d0e860a2c74758aad965c65912aabc8f", size = 781441, upload-time = "2026-04-22T13:29:25.418Z" },
{ url = "https://files.pythonhosted.org/packages/8d/c6/708752ae698161575fe949ba659341e24ebd6341ed683e1f2b66b7a84a35/vectorbt_rust-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a379f7ae66ef5733d5de6ae1f23f8434d4411918d04b9b2baf7e8db105ca759", size = 873255, upload-time = "2026-04-22T13:29:27.081Z" },
{ url = "https://files.pythonhosted.org/packages/ec/fe/fe1b7b07520fbb1559187ea5a962db0246b5c3f6e24ace2f7cd6a5346fab/vectorbt_rust-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:f6adbd831d1615eca9e6499296345d092f670d668440d2b5043523f3753c62fc", size = 885340, upload-time = "2026-04-22T13:29:28.381Z" },
{ url = "https://files.pythonhosted.org/packages/5c/f5/02dae5b57efab7e8974f5d9753e69cbc4cff23f8432b0d6c15a86beea7b2/vectorbt_rust-1.0.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:65305a46fd522978208bb860c4f6a7a01f84a2105801661cf48ee24d5d8f1b20", size = 844571, upload-time = "2026-04-22T13:29:29.672Z" },
{ url = "https://files.pythonhosted.org/packages/98/fb/efd9ec9a448c87c069e1e82bf5362ddf58034fccad0c3ca71463e3ffffab/vectorbt_rust-1.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:29caf153a3e0ca9fb285970ae7086c6da667b309535e665f4e78d4f5797f10b0", size = 765910, upload-time = "2026-04-22T13:29:31.11Z" },
{ url = "https://files.pythonhosted.org/packages/8c/91/6749335b37b4b88468c89c8d30e5723a821e870b5eed62a9f2c3fca6808c/vectorbt_rust-1.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc623f8a824b65a15b049461d07eeb545928ef9e0524389d549dba3060a786da", size = 781120, upload-time = "2026-04-22T13:29:32.689Z" },
{ url = "https://files.pythonhosted.org/packages/ed/3d/523288d04f77660698a6db2d64ec0c7f72299993e711d09c00ae98993952/vectorbt_rust-1.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf8ae04a2dbd6ead5f4f36dace37c211e31a9b354d2daed20fcdc55a3dc5aefe", size = 872877, upload-time = "2026-04-22T13:29:34.086Z" },
{ url = "https://files.pythonhosted.org/packages/7c/9b/7f4138ff4309b6aa7f4cc3accde9bf8260d959c303dcfe3fc59271725679/vectorbt_rust-1.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8702515db6f46ef9a80cdd1d2c8c8617a148bf1861721d308cc299c8cc408e35", size = 884993, upload-time = "2026-04-22T13:29:35.331Z" },
]
[[package]]
name = "wcwidth"
version = "0.6.0"
+172
View File
@@ -0,0 +1,172 @@
"""
Pre-warm the defeatbeta httpfs cache to a persistent directory.
By default the library caches to /tmp/defeatbeta/cache/ which is wiped on
reboot. This script redirects the cache to ~/.cache/defeatbeta/ and then
does a full SELECT * on every table in parallel, forcing all parquet blocks
to be fetched and stored. Re-running is safe: already-warmed tables are
skipped.
Usage:
uv run python warmup_cache.py
After this runs once (~3-4 GB download), use persistent_cache.py in your
notebooks/scripts to read from the cache with no network:
from persistent_cache import enable_persistent_cache
enable_persistent_cache()
from defeatbeta_api.data.ticker import Ticker
t = Ticker("AAPL")
"""
import json
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
# ── Persistent cache location ────────────────────────────────────────────────
CACHE_DIR = Path.home() / ".cache" / "defeatbeta"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
STATE_FILE = CACHE_DIR / "warmup_done.json"
# ── Redirect cache dir before the library touches it ────────────────────────
import defeatbeta_api.utils.util as _util
_util.validate_httpfs_cache_directory = lambda: str(CACHE_DIR)
from defeatbeta_api.client.duckdb_conf import Configuration
from defeatbeta_api.client.duckdb_client import get_duckdb_client
from defeatbeta_api.client.hugging_face_client import HuggingFaceClient
from defeatbeta_api.utils.const import tables
WORKERS = 2 # keep concurrent connections low to avoid HuggingFace 429s
config = Configuration(
cache_httpfs_disk_size=500 * 1024 * 1024,
http_retries=10,
http_retry_wait_ms=10_000, # 10s base wait on 429/5xx
http_retry_backoff=2.0, # doubles each retry: 10s, 20s, 40s …
http_timeout=180,
)
client = get_duckdb_client(config=config)
hf = HuggingFaceClient()
# ── Resume state ─────────────────────────────────────────────────────────────
def _load_done() -> set[str]:
try:
return set(json.loads(STATE_FILE.read_text())["done"])
except Exception:
return set()
def _save_done(done: set[str]) -> None:
tmp = STATE_FILE.with_suffix(".tmp")
tmp.write_text(json.dumps({"done": sorted(done)}))
tmp.rename(STATE_FILE)
already_done = _load_done()
todo = [t for t in tables if t not in already_done]
# ── Helpers ───────────────────────────────────────────────────────────────────
def cache_mb() -> float:
try:
return sum(f.stat().st_size for f in CACHE_DIR.iterdir() if f.is_file()) / 1e6
except Exception:
return 0.0
# ── Shared state for the live reporter ───────────────────────────────────────
_lock = threading.Lock()
_in_flight: set[str] = set()
_done: list[str] = list(already_done) # pre-seed with already-completed tables
_stop_reporter = threading.Event()
def _reporter() -> None:
while not _stop_reporter.is_set():
with _lock:
n_done = len(_done)
flying = ", ".join(sorted(_in_flight)) or ""
n_flight = len(_in_flight)
mb = cache_mb()
print(
f"\r [live] cache={mb:>6.0f} MB | "
f"done={n_done}/{len(tables)} | "
f"in-flight({n_flight}): {flying} ",
end="", flush=True,
)
_stop_reporter.wait(timeout=2)
print("\r" + " " * 80 + "\r", end="", flush=True)
def fetch_table(table: str) -> tuple[str, int, float]:
url = hf.get_url_path(table)
with _lock:
_in_flight.add(table)
t0 = time.perf_counter()
for attempt in range(1, 4):
try:
df = client.query(f"SELECT * FROM '{url}'")
break
except Exception as exc:
if attempt == 3:
with _lock:
_in_flight.discard(table)
raise
wait = 60 * attempt
print(f"\n ! {table} failed (attempt {attempt}): {exc} — retrying in {wait}s")
time.sleep(wait)
elapsed = time.perf_counter() - t0
with _lock:
_in_flight.discard(table)
_done.append(table)
_save_done(set(_done))
return table, len(df), elapsed
# ── Run ───────────────────────────────────────────────────────────────────────
print(f"Cache dir : {CACHE_DIR}")
print(f"Tables : {len(todo)} to fetch ({len(already_done)} already done, skipping)")
print(f"Workers : {WORKERS}")
print()
if not todo:
print("All tables already warmed. Nothing to do.")
raise SystemExit(0)
for t in sorted(already_done):
print(f" - {t:<40} (skip)")
print()
reporter = threading.Thread(target=_reporter, daemon=True)
reporter.start()
t_start = time.perf_counter()
results: list[tuple[str, int, float]] = []
with ThreadPoolExecutor(max_workers=WORKERS) as pool:
futures = {pool.submit(fetch_table, t): t for t in todo}
for future in as_completed(futures):
table, rows, elapsed = future.result()
results.append((table, rows, elapsed))
with _lock:
n_done = len(_done)
mb = cache_mb()
print(
f"{table:<40} {rows:>9,} rows {elapsed:5.1f}s "
f"[{n_done}/{len(tables)} cache={mb:.0f} MB]"
)
_stop_reporter.set()
reporter.join()
total_elapsed = time.perf_counter() - t_start
total_rows = sum(r for _, r, _ in results)
print()
print(f"Finished in {total_elapsed:.0f}s")
print(f"Fetched : {total_rows:,} rows across {len(results)} tables")
print(f"Cache size : {cache_mb():.0f} MB → {CACHE_DIR}")