chore: initial project scaffold for quant trading learning
Sets up project structure with yfinance-based OHLCV fetcher for top 100 S&P companies, Jupyter notebook scaffold, and uv-managed deps. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
import pyarrow.parquet as pq
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
|
||||
TOP100_TICKERS = [
|
||||
"AAPL", "MSFT", "NVDA", "AMZN", "META", "GOOGL", "GOOG", "TSLA", "BRK-B", "JPM",
|
||||
"LLY", "V", "UNH", "XOM", "MA", "AVGO", "PG", "HD", "COST", "JNJ",
|
||||
"ABBV", "MRK", "PEP", "KO", "CVX", "NFLX", "ADBE", "CRM", "AMD", "ACN",
|
||||
"MCD", "WMT", "BAC", "CSCO", "TMO", "ABT", "DHR", "CMCSA", "UPS", "LIN",
|
||||
"NEE", "PM", "TXN", "HON", "ORCL", "QCOM", "RTX", "INTU", "AMGN", "LOW",
|
||||
"AMAT", "BMY", "UNP", "AXP", "CAT", "BKNG", "SPGI", "VRTX", "DE", "GILD",
|
||||
"ELV", "ISRG", "ADP", "MDT", "LMT", "CVS", "CI", "ADI", "SYK", "SBUX",
|
||||
"MMC", "C", "TJX", "GS", "BLK", "SCHW", "ZTS", "ETN", "PLD", "BDX",
|
||||
"REGN", "ADI", "CL", "CME", "CB", "SO", "DUK", "NOC", "FIS", "ITW",
|
||||
"APD", "NSC", "BSX", "GD", "FI", "MMM", "PGR", "WM", "AFL", "SLB",
|
||||
]
|
||||
|
||||
|
||||
def fetch_top100_ohlcv(output_path: str, period: str = "max") -> None:
|
||||
output_path = Path(output_path)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Fetching OHLCV data for top 100 companies (period={period})...")
|
||||
records = []
|
||||
errors = []
|
||||
|
||||
for i, ticker in enumerate(TOP100_TICKERS, 1):
|
||||
try:
|
||||
print(f"[{i}/{len(TOP100_TICKERS)}] Downloading {ticker}...", end=" ")
|
||||
hist = yf.Ticker(ticker).history(period=period, auto_adjust=False)
|
||||
if hist.empty:
|
||||
print(f"no data, skipping")
|
||||
errors.append((ticker, "no data"))
|
||||
continue
|
||||
hist = hist.reset_index()
|
||||
hist["Date"] = hist["Date"].astype(str)
|
||||
hist["ticker"] = ticker
|
||||
records.append(hist)
|
||||
print(f"{len(hist)} rows")
|
||||
time.sleep(0.1)
|
||||
except Exception as e:
|
||||
print(f"error: {e}")
|
||||
errors.append((ticker, str(e)))
|
||||
|
||||
if not records:
|
||||
print("No data fetched.")
|
||||
return
|
||||
|
||||
df = pd.concat(records, ignore_index=True)
|
||||
cols = ["ticker", "Date", "Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"]
|
||||
cols = [c for c in cols if c in df.columns]
|
||||
df = df[cols]
|
||||
|
||||
parquet_path = output_path / "top100_companies.parquet"
|
||||
df.to_parquet(parquet_path, index=False)
|
||||
print(f"\nSaved {len(df)} rows to {parquet_path}")
|
||||
print(f"Date range: {df['Date'].min()} -> {df['Date'].max()}")
|
||||
print(f"Tickers fetched: {df['ticker'].nunique()}")
|
||||
if errors:
|
||||
print(f"Errors ({len(errors)}): {errors[:10]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_top100_ohlcv("data/top100_ohlcv")
|
||||
Reference in New Issue
Block a user