Files
learn-trading/explore_data.py
T
tomatocream 4e5af95272 feat: add defeatbeta-api integration with comprehensive comparison tools
- Add defeatbeta-api as primary financial data source (replaces yfinance for analysis)
- Add comprehensive Jupyter notebook tutorial (defeatbeta_tutorial.ipynb)
- Add API comparison script (compare_apis.py)
- Add data exploration script (explore_data.py)
- Add basic test script (test_defeatbeta.py)
- Add notebook runner script (run_notebook.sh)
- Add org-mode mapping documentation (docs/defeatbeta_mapping.org)
- Update pyproject.toml with defeatbeta-api dependency
- Add defeatbeta-api as git submodule for reference

DefeatBeta Advantages:
- No rate limits (HuggingFace hosted)
- Historical financial ratios (ROE, ROIC, WACC time series)
- Earnings call transcripts access
- Revenue segmentation by product/geography
- Automated DCF valuation with Excel output
- DuckDB-powered fast queries

Note: .envrc, .jupyter_checkpoints/, __marimo__/, AAPL.xlsx, tearsheet.html
and other generated files intentionally excluded
2026-04-25 17:56:10 +08:00

147 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Explore defeatbeta-api data types and structure
"""
from defeatbeta_api.data.ticker import Ticker
def explore_data_types():
"""Explore the different data types returned by the API"""
print("EXPLORING DEFEATBETA-API DATA TYPES")
print("="*70)
# Test with Microsoft
msft = Ticker('MSFT')
# 1. Explore Statement object
print("\n1. STATEMENT OBJECT EXPLORATION:")
print("-" * 40)
income_stmt = msft.quarterly_income_statement()
print(f"Type: {type(income_stmt)}")
print(f"Available methods: {[m for m in dir(income_stmt) if not m.startswith('_')]}")
# Try to get the data
try:
print("\nTrying income_stmt.data():")
data_result = income_stmt.data()
print(f"Result type: {type(data_result)}")
if hasattr(data_result, 'shape'):
print(f"Shape: {data_result.shape}")
except Exception as e:
print(f"data() method error: {type(e).__name__}: {e}")
try:
print("\nTrying income_stmt.df():")
df_result = income_stmt.df()
print(f"Result type: {type(df_result)}")
if hasattr(df_result, 'shape'):
print(f"Shape: {df_result.shape}")
print(f"Columns: {list(df_result.columns)}")
print(f"\nFirst few breakdown items:")
print(df_result['Breakdown'].head(10).tolist())
except Exception as e:
print(f"df() method error: {type(e).__name__}: {e}")
# 2. Explore News object
print("\n\n2. NEWS OBJECT EXPLORATION:")
print("-" * 40)
news = msft.news()
print(f"Type: {type(news)}")
print(f"Available methods: {[m for m in dir(news) if not m.startswith('_')]}")
# 3. Explore Transcripts object
print("\n\n3. TRANSCRIPTS OBJECT EXPLORATION:")
print("-" * 40)
transcripts = msft.earning_call_transcripts()
print(f"Type: {type(transcripts)}")
print(f"Available methods: {[m for m in dir(transcripts) if not m.startswith('_')]}")
# Get transcripts list
try:
transcript_list = transcripts.get_transcripts_list()
print(f"\nget_transcripts_list() type: {type(transcript_list)}")
if hasattr(transcript_list, 'shape'):
print(f"Shape: {transcript_list.shape}")
print(f"Columns: {list(transcript_list.columns)}")
print(f"\nMost recent transcript: FY{transcript_list.iloc[-1]['fiscal_year']} Q{transcript_list.iloc[-1]['fiscal_quarter']}")
except Exception as e:
print(f"get_transcripts_list() error: {type(e).__name__}: {e}")
# 4. Test practical data retrieval
print("\n\n4. PRACTICAL DATA RETRIEVAL:")
print("-" * 40)
# Get recent financials
print("Recent quarterly metrics for MSFT:")
# Price
price = msft.price()
print(f"\nPrice data: {len(price)} rows, from {price.iloc[0]['report_date']} to {price.iloc[-1]['report_date']}")
# TTM metrics
ttm_eps = msft.ttm_eps()
print(f"TTM EPS: Latest = ${ttm_eps.iloc[-1]['tailing_eps']:.2f}")
# Growth
growth = msft.quarterly_revenue_yoy_growth()
if not growth.empty:
print(f"Revenue YoY Growth: {growth.iloc[-1]['yoy_growth']:.2%}")
# Ratios
ratios = msft.roe()
if not ratios.empty:
print(f"ROE: {ratios.iloc[-1]['roe']:.2%}")
# Beta
beta = msft.beta()
if not beta.empty:
print(f"Beta: {beta.iloc[-1]['beta']:.2f}")
def test_different_companies():
"""Test API with different types of companies"""
print("\n\n5. TESTING DIFFERENT COMPANIES:")
print("="*70)
companies = [
('GOOGL', 'Alphabet', 'Tech'),
('JPM', 'JPMorgan Chase', 'Financial'),
('JNJ', 'Johnson & Johnson', 'Healthcare'),
('XOM', 'Exxon Mobil', 'Energy')
]
for symbol, name, sector in companies:
print(f"\n{symbol} - {name} ({sector}):")
print("-" * 30)
try:
ticker = Ticker(symbol)
# Quick stats
price = ticker.price()
if not price.empty:
print(f" Price: ${price.iloc[-1]['close']:.2f}")
ttm_pe = ticker.ttm_pe()
if not ttm_pe.empty:
print(f" P/E: {ttm_pe.iloc[-1]['ttm_pe']:.2f}")
market_cap = ticker.market_capitalization()
if not market_cap.empty:
mcap = market_cap.iloc[-1]['market_capitalization']
print(f" Market Cap: ${mcap/1e9:.1f}B")
roe = ticker.roe()
if not roe.empty:
print(f" ROE: {roe.iloc[-1]['roe']:.2%}")
except Exception as e:
print(f" Error: {type(e).__name__}")
if __name__ == "__main__":
explore_data_types()
test_different_companies()