Files
learn-trading/compare_apis.py
T
tomatocream 4e5af95272 feat: add defeatbeta-api integration with comprehensive comparison tools
- Add defeatbeta-api as primary financial data source (replaces yfinance for analysis)
- Add comprehensive Jupyter notebook tutorial (defeatbeta_tutorial.ipynb)
- Add API comparison script (compare_apis.py)
- Add data exploration script (explore_data.py)
- Add basic test script (test_defeatbeta.py)
- Add notebook runner script (run_notebook.sh)
- Add org-mode mapping documentation (docs/defeatbeta_mapping.org)
- Update pyproject.toml with defeatbeta-api dependency
- Add defeatbeta-api as git submodule for reference

DefeatBeta Advantages:
- No rate limits (HuggingFace hosted)
- Historical financial ratios (ROE, ROIC, WACC time series)
- Earnings call transcripts access
- Revenue segmentation by product/geography
- Automated DCF valuation with Excel output
- DuckDB-powered fast queries

Note: .envrc, .jupyter_checkpoints/, __marimo__/, AAPL.xlsx, tearsheet.html
and other generated files intentionally excluded
2026-04-25 17:56:10 +08:00

359 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive comparison of defeatbeta-api vs yfinance
"""
import pandas as pd
import time
from defeatbeta_api.data.ticker import Ticker
# Check if yfinance is available
try:
import yfinance as yf
YFINANCE_AVAILABLE = True
except ImportError:
YFINANCE_AVAILABLE = False
print("yfinance not installed - will show defeatbeta only")
print("Install with: uv add yfinance\n")
def compare_data_structures():
"""Compare data structures returned by both APIs"""
print("="*80)
print("DEFEATBETA-API vs YFINANCE: DATA STRUCTURE COMPARISON")
print("="*80)
# Test with Apple (AAPL)
symbol = 'AAPL'
print(f"\n{'='*80}")
print(f"1. BASIC PRICE DATA - {symbol}")
print("="*80)
# DefeatBeta
print("\n[DEFEATBETA-API]")
print("-" * 40)
db_ticker = Ticker(symbol)
start = time.time()
db_price = db_ticker.price()
db_time = time.time() - start
print(f"Method: ticker.price()")
print(f"Type: {type(db_price).__name__}")
print(f"Shape: {db_price.shape}")
print(f"Columns: {list(db_price.columns)}")
print(f"Sample data (latest 3 rows):")
print(db_price.tail(3).to_string(index=False))
print(f"Time taken: {db_time:.3f}s")
# Yahoo Finance
if YFINANCE_AVAILABLE:
print("\n[YFINANCE]")
print("-" * 40)
start = time.time()
yf_ticker = yf.Ticker(symbol)
yf_price = yf_ticker.history(period="max")
yf_time = time.time() - start
print(f"Method: ticker.history(period='max')")
print(f"Type: {type(yf_price).__name__}")
print(f"Shape: {yf_price.shape}")
print(f"Columns: {list(yf_price.columns)}")
print(f"Sample data (latest 3 rows):")
print(yf_price.tail(3).to_string(index=False))
print(f"Time taken: {yf_time:.3f}s")
def compare_financial_data():
"""Compare financial data structures"""
print(f"\n{'='*80}")
print("2. FINANCIAL STATEMENTS")
print("="*80)
symbol = 'MSFT'
db_ticker = Ticker(symbol)
print("\n[DEFEATBETA-API - Quarterly Income Statement]")
print("-" * 40)
income_stmt = db_ticker.quarterly_income_statement()
df = income_stmt.df()
print(f"Type: {type(income_stmt).__name__}")
print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"\nBreakdown items available:")
print(df['Breakdown'].head(15).tolist())
print(f"\nKey metrics sample:")
for metric in ['Total Revenue', 'Gross Profit', 'Net Income Common Stockholders']:
if metric in df['Breakdown'].values:
row = df[df['Breakdown'] == metric].iloc[0]
print(f" {metric}: TTM={row['TTM']}, Latest Q={row.iloc[-1]}")
if YFINANCE_AVAILABLE:
print("\n[YFINANCE - Quarterly Financials]")
print("-" * 40)
yf_ticker = yf.Ticker(symbol)
yf_financials = yf_ticker.quarterly_financials
print(f"Type: {type(yf_financials).__name__}")
print(f"Shape: {yf_financials.shape}")
print(f"Index (dates): {list(yf_financials.index[:3])}")
print(f"Columns (sample): {list(yf_financials.columns[:5])}")
def compare_valuation_metrics():
"""Compare valuation metrics"""
print(f"\n{'='*80}")
print("3. VALUATION METRICS")
print("="*80)
symbol = 'NVDA'
db_ticker = Ticker(symbol)
print("\n[DEFEATBETA-API - TTM EPS]")
print("-" * 40)
ttm_eps = db_ticker.ttm_eps()
print(f"Type: {type(ttm_eps).__name__}")
print(f"Shape: {ttm_eps.shape}")
print(f"Columns: {list(ttm_eps.columns)}")
print(f"\nLatest values:")
print(ttm_eps.tail(3).to_string(index=False))
print("\n[DEFEATBETA-API - TTM P/E]")
print("-" * 40)
ttm_pe = db_ticker.ttm_pe()
print(f"Type: {type(ttm_pe).__name__}")
print(f"Shape: {ttm_pe.shape}")
print(f"Columns: {list(ttm_pe.columns)}")
print(f"\nLatest values:")
print(ttm_pe.tail(3).to_string(index=False))
print("\n[DEFEATBETA-API - Market Cap]")
print("-" * 40)
market_cap = db_ticker.market_capitalization()
print(f"Type: {type(market_cap).__name__}")
print(f"Shape: {market_cap.shape}")
print(f"Columns: {list(market_cap.columns)}")
print(f"\nLatest values:")
print(market_cap.tail(3).to_string(index=False))
def compare_financial_ratios():
"""Compare financial ratios"""
print(f"\n{'='*80}")
print("4. FINANCIAL RATIOS")
print("="*80)
symbol = 'TSLA'
db_ticker = Ticker(symbol)
print("\n[DEFEATBETA-API - ROE]")
print("-" * 40)
roe = db_ticker.roe()
print(f"Type: {type(roe).__name__}")
print(f"Shape: {roe.shape}")
print(f"Columns: {list(roe.columns)}")
print(f"\nData:")
print(roe.to_string(index=False))
print("\n[DEFEATBETA-API - ROIC]")
print("-" * 40)
roic = db_ticker.roic()
print(f"Type: {type(roic).__name__}")
print(f"Shape: {roic.shape}")
print(f"Columns: {list(roic.columns)}")
print(f"\nData:")
print(roic.to_string(index=False))
print("\n[DEFEATBETA-API - WACC]")
print("-" * 40)
wacc = db_ticker.wacc()
print(f"Type: {type(wacc).__name__}")
print(f"Shape: {wacc.shape}")
print(f"Columns: {list(wacc.columns)}")
print(f"\nLatest values:")
print(wacc.tail(3).to_string(index=False))
def compare_special_features():
"""Compare special features"""
print(f"\n{'='*80}")
print("5. SPECIAL FEATURES - EARNINGS TRANSCRIPTS")
print("="*80)
symbol = 'AAPL'
db_ticker = Ticker(symbol)
print("\n[DEFEATBETA-API - Earnings Call Transcripts]")
print("-" * 40)
transcripts = db_ticker.earning_call_transcripts()
transcript_list = transcripts.get_transcripts_list()
print(f"Transcripts object type: {type(transcripts).__name__}")
print(f"get_transcripts_list() type: {type(transcript_list).__name__}")
print(f"Shape: {transcript_list.shape}")
print(f"Columns: {list(transcript_list.columns)}")
print(f"\nMost recent transcript:")
recent = transcript_list.iloc[-1]
print(f" FY {recent['fiscal_year']} Q{recent['fiscal_quarter']} ({recent['report_date']})")
print(f" Transcript ID: {recent['transcripts_id']}")
print(f" Paragraphs in transcript: {len(recent['transcripts'])}")
# Get specific transcript
print("\n Sample from Q4 2025 transcript:")
q4_2025 = transcripts.get_transcript(2025, 4)
if q4_2025 is not None and len(q4_2025) > 0:
print(f" Type: {type(q4_2025).__name__}")
print(f" Shape: {q4_2025.shape}")
print(f" Columns: {list(q4_2025.columns)}")
print(f" First 2 paragraphs:")
for _, row in q4_2025.head(2).iterrows():
speaker = row['speaker']
content = row['content'][:100] + "..." if len(row['content']) > 100 else row['content']
print(f" {speaker}: {content}")
print("\n[DEFEATBETA-API - Revenue by Segment]")
print("-" * 40)
revenue_seg = db_ticker.revenue_by_segment()
print(f"Type: {type(revenue_seg).__name__}")
print(f"Shape: {revenue_seg.shape}")
print(f"Columns: {list(revenue_seg.columns)}")
print(f"\nLatest data:")
print(revenue_seg.tail(3).to_string(index=False))
print("\n[DEFEATBETA-API - Revenue by Geography]")
print("-" * 40)
revenue_geo = db_ticker.revenue_by_geography()
print(f"Type: {type(revenue_geo).__name__}")
if hasattr(revenue_geo, 'shape'):
print(f"Shape: {revenue_geo.shape}")
print(f"Columns: {list(revenue_geo.columns)}")
print(f"\nLatest data:")
print(revenue_geo.tail(3).to_string(index=False))
else:
print(f"Data: {revenue_geo}")
def show_all_available_methods():
"""Show all available methods in Ticker class"""
print(f"\n{'='*80}")
print("6. COMPLETE API METHOD REFERENCE")
print("="*80)
db_ticker = Ticker('AAPL')
print("\n[DEFEATBETA-API - All Ticker Methods]")
print("-" * 40)
all_methods = [m for m in dir(db_ticker) if not m.startswith('_')]
categories = {
'Price & Volume': ['price'],
'Valuation': ['ttm_eps', 'ttm_pe', 'market_capitalization', 'ps_ratio', 'pb_ratio', 'peg_ratio'],
'Financial Ratios': ['roe', 'roic', 'roa', 'wacc', 'beta', 'equity_multiplier', 'asset_turnover'],
'Income Statement': ['quarterly_income_statement', 'annual_income_statement'],
'Balance Sheet': ['quarterly_balance_sheet', 'annual_balance_sheet'],
'Cash Flow': ['quarterly_cash_flow', 'annual_cash_flow'],
'Growth Metrics': [m for m in all_methods if 'yoy_growth' in m.lower()],
'Margin Metrics': [m for m in all_methods if 'margin' in m.lower()],
'Special Data': ['earning_call_transcripts', 'news', 'sec_filing', 'dividends', 'splits'],
'Revenue Breakdown': ['revenue_by_segment', 'revenue_by_product', 'revenue_by_geography'],
'Industry Metrics': [m for m in all_methods if 'industry' in m.lower()],
'Info & Calendar': ['info', 'calendar', 'currency', 'shares', 'officers']
}
for category, methods in categories.items():
matching = [m for m in methods if m in all_methods]
if matching:
print(f"\n {category}:")
for method in sorted(matching):
print(f" • ticker.{method}()")
def compare_data_reliability():
"""Compare data reliability and availability"""
print(f"\n{'='*80}")
print("7. DATA RELIABILITY COMPARISON")
print("="*80)
print("\n[DEFEATBETA-API Advantages]")
print("-" * 40)
print("✓ Data hosted on Hugging Face - no rate limits")
print("✓ DuckDB engine for fast queries")
print("✓ Sub-second query performance")
print("✓ Consistent data format across all endpoints")
print("✓ Historical data from 1990s for most stocks")
print("✓ SEC filings, earnings transcripts available")
print("✓ Revenue segmentation (by product/geography)")
print("✓ Automated DCF valuation with Excel output")
print("✓ LLM-powered analysis capabilities")
print("\n[YFINANCE Advantages]")
print("-" * 40)
print("✓ Real-time data (delayed by 15min)")
print("✓ Larger ecosystem and community support")
print("✓ More extensive options/derivatives data")
print("✓ Institutional ownership data")
print("✓ Analyst recommendations and price targets")
print("✓ Splits and dividends detailed history")
def demonstrate_unique_features():
"""Demonstrate features unique to defeatbeta-api"""
print(f"\n{'='*80}")
print("8. UNIQUE DEFEATBETA-API FEATURES")
print("="*80)
print("\n[DCF Valuation]")
print("-" * 40)
db_ticker = Ticker('AAPL')
try:
dcf_result = db_ticker.dcf()
print(f"DCF method returns: {type(dcf_result).__name__}")
if isinstance(dcf_result, dict):
print(f"Keys: {list(dcf_result.keys())}")
print("(Note: DCF generates professional Excel output with WACC, cash flow projections, fair price)")
except Exception as e:
print(f"DCF error: {type(e).__name__}")
print("\n[AI-Powered Analysis on Earnings Transcripts]")
print("-" * 40)
transcripts = db_ticker.earning_call_transcripts()
ai_methods = [m for m in dir(transcripts) if 'ai' in m.lower() or 'analyze' in m.lower()]
print(f"Available AI methods:")
for method in ai_methods:
print(f" • transcripts.{method}()")
def main():
"""Main function"""
print("DEFEATBETA-API vs YFINANCE - COMPREHENSIVE COMPARISON")
print("="*80)
compare_data_structures()
compare_financial_data()
compare_valuation_metrics()
compare_financial_ratios()
compare_special_features()
show_all_available_methods()
compare_data_reliability()
demonstrate_unique_features()
print(f"\n{'='*80}")
print("SUMMARY")
print("="*80)
print("""
DEFEATBETA-API:
• Best for: Historical analysis, financial modeling, backtesting
• Strengths: No rate limits, comprehensive fundamentals, fast queries
• Data: Time-series heavy, quarterly/annual financial statements
• Unique: Revenue segmentation, WACC calculations, DCF automation
YFINANCE:
• Best for: Real-time data, market scanning, quick lookups
• Strengths: Real-time prices, analyst data, institutional ownership
• Data: Mixed frequency, real-time + historical
• Unique: Analyst recommendations, options data, recommendations
""")
if __name__ == "__main__":
main()