4e5af95272
- Add defeatbeta-api as primary financial data source (replaces yfinance for analysis) - Add comprehensive Jupyter notebook tutorial (defeatbeta_tutorial.ipynb) - Add API comparison script (compare_apis.py) - Add data exploration script (explore_data.py) - Add basic test script (test_defeatbeta.py) - Add notebook runner script (run_notebook.sh) - Add org-mode mapping documentation (docs/defeatbeta_mapping.org) - Update pyproject.toml with defeatbeta-api dependency - Add defeatbeta-api as git submodule for reference DefeatBeta Advantages: - No rate limits (HuggingFace hosted) - Historical financial ratios (ROE, ROIC, WACC time series) - Earnings call transcripts access - Revenue segmentation by product/geography - Automated DCF valuation with Excel output - DuckDB-powered fast queries Note: .envrc, .jupyter_checkpoints/, __marimo__/, AAPL.xlsx, tearsheet.html and other generated files intentionally excluded
359 lines
12 KiB
Python
359 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Comprehensive comparison of defeatbeta-api vs yfinance
|
|
"""
|
|
|
|
import pandas as pd
|
|
import time
|
|
from defeatbeta_api.data.ticker import Ticker
|
|
|
|
# Check if yfinance is available
|
|
try:
|
|
import yfinance as yf
|
|
YFINANCE_AVAILABLE = True
|
|
except ImportError:
|
|
YFINANCE_AVAILABLE = False
|
|
print("yfinance not installed - will show defeatbeta only")
|
|
print("Install with: uv add yfinance\n")
|
|
|
|
def compare_data_structures():
|
|
"""Compare data structures returned by both APIs"""
|
|
|
|
print("="*80)
|
|
print("DEFEATBETA-API vs YFINANCE: DATA STRUCTURE COMPARISON")
|
|
print("="*80)
|
|
|
|
# Test with Apple (AAPL)
|
|
symbol = 'AAPL'
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"1. BASIC PRICE DATA - {symbol}")
|
|
print("="*80)
|
|
|
|
# DefeatBeta
|
|
print("\n[DEFEATBETA-API]")
|
|
print("-" * 40)
|
|
db_ticker = Ticker(symbol)
|
|
start = time.time()
|
|
db_price = db_ticker.price()
|
|
db_time = time.time() - start
|
|
|
|
print(f"Method: ticker.price()")
|
|
print(f"Type: {type(db_price).__name__}")
|
|
print(f"Shape: {db_price.shape}")
|
|
print(f"Columns: {list(db_price.columns)}")
|
|
print(f"Sample data (latest 3 rows):")
|
|
print(db_price.tail(3).to_string(index=False))
|
|
print(f"Time taken: {db_time:.3f}s")
|
|
|
|
# Yahoo Finance
|
|
if YFINANCE_AVAILABLE:
|
|
print("\n[YFINANCE]")
|
|
print("-" * 40)
|
|
start = time.time()
|
|
yf_ticker = yf.Ticker(symbol)
|
|
yf_price = yf_ticker.history(period="max")
|
|
yf_time = time.time() - start
|
|
|
|
print(f"Method: ticker.history(period='max')")
|
|
print(f"Type: {type(yf_price).__name__}")
|
|
print(f"Shape: {yf_price.shape}")
|
|
print(f"Columns: {list(yf_price.columns)}")
|
|
print(f"Sample data (latest 3 rows):")
|
|
print(yf_price.tail(3).to_string(index=False))
|
|
print(f"Time taken: {yf_time:.3f}s")
|
|
|
|
def compare_financial_data():
|
|
"""Compare financial data structures"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("2. FINANCIAL STATEMENTS")
|
|
print("="*80)
|
|
|
|
symbol = 'MSFT'
|
|
db_ticker = Ticker(symbol)
|
|
|
|
print("\n[DEFEATBETA-API - Quarterly Income Statement]")
|
|
print("-" * 40)
|
|
income_stmt = db_ticker.quarterly_income_statement()
|
|
df = income_stmt.df()
|
|
|
|
print(f"Type: {type(income_stmt).__name__}")
|
|
print(f"DataFrame shape: {df.shape}")
|
|
print(f"Columns: {list(df.columns)}")
|
|
print(f"\nBreakdown items available:")
|
|
print(df['Breakdown'].head(15).tolist())
|
|
print(f"\nKey metrics sample:")
|
|
for metric in ['Total Revenue', 'Gross Profit', 'Net Income Common Stockholders']:
|
|
if metric in df['Breakdown'].values:
|
|
row = df[df['Breakdown'] == metric].iloc[0]
|
|
print(f" {metric}: TTM={row['TTM']}, Latest Q={row.iloc[-1]}")
|
|
|
|
if YFINANCE_AVAILABLE:
|
|
print("\n[YFINANCE - Quarterly Financials]")
|
|
print("-" * 40)
|
|
yf_ticker = yf.Ticker(symbol)
|
|
yf_financials = yf_ticker.quarterly_financials
|
|
|
|
print(f"Type: {type(yf_financials).__name__}")
|
|
print(f"Shape: {yf_financials.shape}")
|
|
print(f"Index (dates): {list(yf_financials.index[:3])}")
|
|
print(f"Columns (sample): {list(yf_financials.columns[:5])}")
|
|
|
|
def compare_valuation_metrics():
|
|
"""Compare valuation metrics"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("3. VALUATION METRICS")
|
|
print("="*80)
|
|
|
|
symbol = 'NVDA'
|
|
db_ticker = Ticker(symbol)
|
|
|
|
print("\n[DEFEATBETA-API - TTM EPS]")
|
|
print("-" * 40)
|
|
ttm_eps = db_ticker.ttm_eps()
|
|
print(f"Type: {type(ttm_eps).__name__}")
|
|
print(f"Shape: {ttm_eps.shape}")
|
|
print(f"Columns: {list(ttm_eps.columns)}")
|
|
print(f"\nLatest values:")
|
|
print(ttm_eps.tail(3).to_string(index=False))
|
|
|
|
print("\n[DEFEATBETA-API - TTM P/E]")
|
|
print("-" * 40)
|
|
ttm_pe = db_ticker.ttm_pe()
|
|
print(f"Type: {type(ttm_pe).__name__}")
|
|
print(f"Shape: {ttm_pe.shape}")
|
|
print(f"Columns: {list(ttm_pe.columns)}")
|
|
print(f"\nLatest values:")
|
|
print(ttm_pe.tail(3).to_string(index=False))
|
|
|
|
print("\n[DEFEATBETA-API - Market Cap]")
|
|
print("-" * 40)
|
|
market_cap = db_ticker.market_capitalization()
|
|
print(f"Type: {type(market_cap).__name__}")
|
|
print(f"Shape: {market_cap.shape}")
|
|
print(f"Columns: {list(market_cap.columns)}")
|
|
print(f"\nLatest values:")
|
|
print(market_cap.tail(3).to_string(index=False))
|
|
|
|
def compare_financial_ratios():
|
|
"""Compare financial ratios"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("4. FINANCIAL RATIOS")
|
|
print("="*80)
|
|
|
|
symbol = 'TSLA'
|
|
db_ticker = Ticker(symbol)
|
|
|
|
print("\n[DEFEATBETA-API - ROE]")
|
|
print("-" * 40)
|
|
roe = db_ticker.roe()
|
|
print(f"Type: {type(roe).__name__}")
|
|
print(f"Shape: {roe.shape}")
|
|
print(f"Columns: {list(roe.columns)}")
|
|
print(f"\nData:")
|
|
print(roe.to_string(index=False))
|
|
|
|
print("\n[DEFEATBETA-API - ROIC]")
|
|
print("-" * 40)
|
|
roic = db_ticker.roic()
|
|
print(f"Type: {type(roic).__name__}")
|
|
print(f"Shape: {roic.shape}")
|
|
print(f"Columns: {list(roic.columns)}")
|
|
print(f"\nData:")
|
|
print(roic.to_string(index=False))
|
|
|
|
print("\n[DEFEATBETA-API - WACC]")
|
|
print("-" * 40)
|
|
wacc = db_ticker.wacc()
|
|
print(f"Type: {type(wacc).__name__}")
|
|
print(f"Shape: {wacc.shape}")
|
|
print(f"Columns: {list(wacc.columns)}")
|
|
print(f"\nLatest values:")
|
|
print(wacc.tail(3).to_string(index=False))
|
|
|
|
def compare_special_features():
|
|
"""Compare special features"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("5. SPECIAL FEATURES - EARNINGS TRANSCRIPTS")
|
|
print("="*80)
|
|
|
|
symbol = 'AAPL'
|
|
db_ticker = Ticker(symbol)
|
|
|
|
print("\n[DEFEATBETA-API - Earnings Call Transcripts]")
|
|
print("-" * 40)
|
|
transcripts = db_ticker.earning_call_transcripts()
|
|
transcript_list = transcripts.get_transcripts_list()
|
|
|
|
print(f"Transcripts object type: {type(transcripts).__name__}")
|
|
print(f"get_transcripts_list() type: {type(transcript_list).__name__}")
|
|
print(f"Shape: {transcript_list.shape}")
|
|
print(f"Columns: {list(transcript_list.columns)}")
|
|
print(f"\nMost recent transcript:")
|
|
recent = transcript_list.iloc[-1]
|
|
print(f" FY {recent['fiscal_year']} Q{recent['fiscal_quarter']} ({recent['report_date']})")
|
|
print(f" Transcript ID: {recent['transcripts_id']}")
|
|
print(f" Paragraphs in transcript: {len(recent['transcripts'])}")
|
|
|
|
# Get specific transcript
|
|
print("\n Sample from Q4 2025 transcript:")
|
|
q4_2025 = transcripts.get_transcript(2025, 4)
|
|
if q4_2025 is not None and len(q4_2025) > 0:
|
|
print(f" Type: {type(q4_2025).__name__}")
|
|
print(f" Shape: {q4_2025.shape}")
|
|
print(f" Columns: {list(q4_2025.columns)}")
|
|
print(f" First 2 paragraphs:")
|
|
for _, row in q4_2025.head(2).iterrows():
|
|
speaker = row['speaker']
|
|
content = row['content'][:100] + "..." if len(row['content']) > 100 else row['content']
|
|
print(f" {speaker}: {content}")
|
|
|
|
print("\n[DEFEATBETA-API - Revenue by Segment]")
|
|
print("-" * 40)
|
|
revenue_seg = db_ticker.revenue_by_segment()
|
|
print(f"Type: {type(revenue_seg).__name__}")
|
|
print(f"Shape: {revenue_seg.shape}")
|
|
print(f"Columns: {list(revenue_seg.columns)}")
|
|
print(f"\nLatest data:")
|
|
print(revenue_seg.tail(3).to_string(index=False))
|
|
|
|
print("\n[DEFEATBETA-API - Revenue by Geography]")
|
|
print("-" * 40)
|
|
revenue_geo = db_ticker.revenue_by_geography()
|
|
print(f"Type: {type(revenue_geo).__name__}")
|
|
if hasattr(revenue_geo, 'shape'):
|
|
print(f"Shape: {revenue_geo.shape}")
|
|
print(f"Columns: {list(revenue_geo.columns)}")
|
|
print(f"\nLatest data:")
|
|
print(revenue_geo.tail(3).to_string(index=False))
|
|
else:
|
|
print(f"Data: {revenue_geo}")
|
|
|
|
def show_all_available_methods():
|
|
"""Show all available methods in Ticker class"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("6. COMPLETE API METHOD REFERENCE")
|
|
print("="*80)
|
|
|
|
db_ticker = Ticker('AAPL')
|
|
|
|
print("\n[DEFEATBETA-API - All Ticker Methods]")
|
|
print("-" * 40)
|
|
|
|
all_methods = [m for m in dir(db_ticker) if not m.startswith('_')]
|
|
|
|
categories = {
|
|
'Price & Volume': ['price'],
|
|
'Valuation': ['ttm_eps', 'ttm_pe', 'market_capitalization', 'ps_ratio', 'pb_ratio', 'peg_ratio'],
|
|
'Financial Ratios': ['roe', 'roic', 'roa', 'wacc', 'beta', 'equity_multiplier', 'asset_turnover'],
|
|
'Income Statement': ['quarterly_income_statement', 'annual_income_statement'],
|
|
'Balance Sheet': ['quarterly_balance_sheet', 'annual_balance_sheet'],
|
|
'Cash Flow': ['quarterly_cash_flow', 'annual_cash_flow'],
|
|
'Growth Metrics': [m for m in all_methods if 'yoy_growth' in m.lower()],
|
|
'Margin Metrics': [m for m in all_methods if 'margin' in m.lower()],
|
|
'Special Data': ['earning_call_transcripts', 'news', 'sec_filing', 'dividends', 'splits'],
|
|
'Revenue Breakdown': ['revenue_by_segment', 'revenue_by_product', 'revenue_by_geography'],
|
|
'Industry Metrics': [m for m in all_methods if 'industry' in m.lower()],
|
|
'Info & Calendar': ['info', 'calendar', 'currency', 'shares', 'officers']
|
|
}
|
|
|
|
for category, methods in categories.items():
|
|
matching = [m for m in methods if m in all_methods]
|
|
if matching:
|
|
print(f"\n {category}:")
|
|
for method in sorted(matching):
|
|
print(f" • ticker.{method}()")
|
|
|
|
def compare_data_reliability():
|
|
"""Compare data reliability and availability"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("7. DATA RELIABILITY COMPARISON")
|
|
print("="*80)
|
|
|
|
print("\n[DEFEATBETA-API Advantages]")
|
|
print("-" * 40)
|
|
print("✓ Data hosted on Hugging Face - no rate limits")
|
|
print("✓ DuckDB engine for fast queries")
|
|
print("✓ Sub-second query performance")
|
|
print("✓ Consistent data format across all endpoints")
|
|
print("✓ Historical data from 1990s for most stocks")
|
|
print("✓ SEC filings, earnings transcripts available")
|
|
print("✓ Revenue segmentation (by product/geography)")
|
|
print("✓ Automated DCF valuation with Excel output")
|
|
print("✓ LLM-powered analysis capabilities")
|
|
|
|
print("\n[YFINANCE Advantages]")
|
|
print("-" * 40)
|
|
print("✓ Real-time data (delayed by 15min)")
|
|
print("✓ Larger ecosystem and community support")
|
|
print("✓ More extensive options/derivatives data")
|
|
print("✓ Institutional ownership data")
|
|
print("✓ Analyst recommendations and price targets")
|
|
print("✓ Splits and dividends detailed history")
|
|
|
|
def demonstrate_unique_features():
|
|
"""Demonstrate features unique to defeatbeta-api"""
|
|
|
|
print(f"\n{'='*80}")
|
|
print("8. UNIQUE DEFEATBETA-API FEATURES")
|
|
print("="*80)
|
|
|
|
print("\n[DCF Valuation]")
|
|
print("-" * 40)
|
|
db_ticker = Ticker('AAPL')
|
|
try:
|
|
dcf_result = db_ticker.dcf()
|
|
print(f"DCF method returns: {type(dcf_result).__name__}")
|
|
if isinstance(dcf_result, dict):
|
|
print(f"Keys: {list(dcf_result.keys())}")
|
|
print("(Note: DCF generates professional Excel output with WACC, cash flow projections, fair price)")
|
|
except Exception as e:
|
|
print(f"DCF error: {type(e).__name__}")
|
|
|
|
print("\n[AI-Powered Analysis on Earnings Transcripts]")
|
|
print("-" * 40)
|
|
transcripts = db_ticker.earning_call_transcripts()
|
|
ai_methods = [m for m in dir(transcripts) if 'ai' in m.lower() or 'analyze' in m.lower()]
|
|
print(f"Available AI methods:")
|
|
for method in ai_methods:
|
|
print(f" • transcripts.{method}()")
|
|
|
|
def main():
|
|
"""Main function"""
|
|
print("DEFEATBETA-API vs YFINANCE - COMPREHENSIVE COMPARISON")
|
|
print("="*80)
|
|
|
|
compare_data_structures()
|
|
compare_financial_data()
|
|
compare_valuation_metrics()
|
|
compare_financial_ratios()
|
|
compare_special_features()
|
|
show_all_available_methods()
|
|
compare_data_reliability()
|
|
demonstrate_unique_features()
|
|
|
|
print(f"\n{'='*80}")
|
|
print("SUMMARY")
|
|
print("="*80)
|
|
print("""
|
|
DEFEATBETA-API:
|
|
• Best for: Historical analysis, financial modeling, backtesting
|
|
• Strengths: No rate limits, comprehensive fundamentals, fast queries
|
|
• Data: Time-series heavy, quarterly/annual financial statements
|
|
• Unique: Revenue segmentation, WACC calculations, DCF automation
|
|
|
|
YFINANCE:
|
|
• Best for: Real-time data, market scanning, quick lookups
|
|
• Strengths: Real-time prices, analyst data, institutional ownership
|
|
• Data: Mixed frequency, real-time + historical
|
|
• Unique: Analyst recommendations, options data, recommendations
|
|
""")
|
|
|
|
if __name__ == "__main__":
|
|
main() |