Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Value investing analysis tool for Chinese A-share stocks with screening, financial analysis, industry comparison, and DCF valuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/data_fetcher.py
1#!/usr/bin/env python32"""3A股数据获取模块4使用akshare获取股票财务数据、行情数据、股东信息等56依赖: pip install akshare pandas7"""89import argparse10import json11import sys12import time13import os14from datetime import datetime, timedelta15from typing import Optional, Callable16from functools import wraps1718try:19import akshare as ak20import pandas as pd21except ImportError:22print("错误: 请先安装依赖库")23print("pip install akshare pandas")24sys.exit(1)252627def retry_on_failure(max_retries: int = 3, delay: float = 1.0):28"""网络请求重试装饰器"""29def decorator(func: Callable):30@wraps(func)31def wrapper(*args, **kwargs):32last_error = None33for attempt in range(max_retries):34try:35return func(*args, **kwargs)36except Exception as e:37last_error = e38if attempt < max_retries - 1:39time.sleep(delay * (attempt + 1)) # 递增等待40return {"error": f"重试{max_retries}次后失败: {str(last_error)}"}41return wrapper42return decorator434445def safe_float(value) -> Optional[float]:46"""安全转换为浮点数"""47if value is None or value == '' or value == '--':48return None49try:50if pd.isna(value):51return None52if isinstance(value, str):53value = value.replace('%', '').replace(',', '').replace('亿', '')54return float(value)55except (ValueError, TypeError):56return None575859def get_cache_path(code: str, data_type: str) -> str:60"""获取缓存文件路径"""61cache_dir = os.path.join(os.path.dirname(__file__), '.cache')62os.makedirs(cache_dir, exist_ok=True)63today = datetime.now().strftime('%Y%m%d')64return os.path.join(cache_dir, f"{code}_{data_type}_{today}.json")656667def load_cache(code: str, data_type: str) -> Optional[dict]:68"""加载缓存数据(当天有效)"""69cache_path = get_cache_path(code, data_type)70if os.path.exists(cache_path):71try:72with open(cache_path, 'r', encoding='utf-8') as f:73return json.load(f)74except (json.JSONDecodeError, IOError):75return None76return None777879def save_cache(code: str, data_type: str, data: dict):80"""保存缓存数据"""81cache_path = get_cache_path(code, data_type)82try:83with open(cache_path, 'w', encoding='utf-8') as f:84json.dump(data, f, ensure_ascii=False, default=str)85except IOError:86pass878889@retry_on_failure(max_retries=2, delay=1.0)90def get_stock_info(code: str) -> dict:91"""获取股票基本信息"""92try:93df = ak.stock_individual_info_em(symbol=code)94info = {}95for _, row in df.iterrows():96info[row['item']] = row['value']97return {98"code": code,99"name": info.get("股票简称", ""),100"industry": info.get("行业", ""),101"market_cap": safe_float(info.get("总市值")),102"float_cap": safe_float(info.get("流通市值")),103"total_shares": safe_float(info.get("总股本")),104"float_shares": safe_float(info.get("流通股")),105"pe_ttm": safe_float(info.get("市盈率(动态)")),106"pb": safe_float(info.get("市净率")),107"listing_date": info.get("上市时间", "")108}109except Exception as e:110return {"code": code, "error": str(e)}111112113@retry_on_failure(max_retries=2, delay=1.0)114def get_financial_data(code: str, years: int = 3) -> dict:115"""获取财务数据(资产负债表、利润表、现金流量表)"""116max_records = min(years * 4, 12)117result = {118"balance_sheet": [],119"income_statement": [],120"cash_flow": []121}122123fetch_configs = [124("balance_sheet", ak.stock_balance_sheet_by_report_em),125("income_statement", ak.stock_profit_sheet_by_report_em),126("cash_flow", ak.stock_cash_flow_sheet_by_report_em),127]128129for key, fetch_func in fetch_configs:130try:131df = fetch_func(symbol=code)132if df is not None and not df.empty:133result[key] = df.head(max_records).to_dict(orient='records')134except Exception as e:135result[f"{key}_error"] = str(e)136137return result138139140def get_financial_indicators(code: str, limit: int = 8) -> dict:141"""获取财务指标,优先使用快速API,失败时降级到备用API"""142apis = [ak.stock_financial_abstract, ak.stock_financial_analysis_indicator]143144for api in apis:145try:146df = api(symbol=code)147if df is not None and not df.empty:148return df.head(limit).to_dict(orient='records')149except Exception:150continue151152return []153154155def get_valuation_data(code: str) -> dict:156"""获取估值数据"""157result = {}158159try:160df = ak.stock_a_ttm_lyr(symbol=code)161if df is None or df.empty:162return result163164latest = df.iloc[-1].to_dict()165result["latest"] = latest166result["history_count"] = len(df)167168for col in ['pe_ttm', 'pb']:169val = latest.get(col)170if val and not pd.isna(val):171result[f"{col}_percentile"] = (df[col].dropna() < val).mean() * 100172173except Exception as e:174result["error"] = str(e)175result["note"] = "估值历史数据获取失败,将使用基本信息中的估值"176177return result178179180@retry_on_failure(max_retries=2, delay=1.0)181def get_holder_data(code: str) -> dict:182"""获取股东信息"""183result = {}184185try:186df_top10 = ak.stock_gdfx_top_10_em(symbol=code)187if df_top10 is not None and not df_top10.empty:188result["top_10_holders"] = df_top10.head(10).to_dict(orient='records')189except Exception as e:190result["top_10_holders_error"] = str(e)191192try:193df_holder_num = ak.stock_zh_a_gdhs(symbol=code)194if df_holder_num is not None and not df_holder_num.empty:195result["holder_count_history"] = df_holder_num.head(10).to_dict(orient='records')196except Exception as e:197result["holder_count_error"] = str(e)198199return result200201202@retry_on_failure(max_retries=2, delay=1.0)203def get_dividend_data(code: str) -> dict:204"""获取分红数据,优先使用主API,失败时降级到备用API"""205apis = [206lambda c: ak.stock_dividend_cninfo(symbol=c),207lambda c: ak.stock_history_dividend_detail(symbol=c, indicator="分红"),208]209210for api in apis:211try:212df = api(code)213if df is not None and not df.empty:214return {215"dividend_history": df.to_dict(orient='records'),216"dividend_count": len(df)217}218except Exception:219continue220221return {"dividend_history": [], "dividend_count": 0}222223224@retry_on_failure(max_retries=2, delay=1.0)225def get_price_data(code: str, days: int = 60) -> dict:226"""获取价格数据"""227try:228end_date = datetime.now().strftime('%Y%m%d')229start_date = (datetime.now() - timedelta(days=days)).strftime('%Y%m%d')230231df = ak.stock_zh_a_hist(symbol=code, period="daily",232start_date=start_date, end_date=end_date, adjust="qfq")233if df is not None and not df.empty:234latest = df.iloc[-1]235return {236"latest_price": safe_float(latest['收盘']),237"latest_date": str(latest['日期']),238"price_change_pct": safe_float(latest['涨跌幅']),239"volume": safe_float(latest['成交量']),240"turnover": safe_float(latest['成交额']),241"high_60d": safe_float(df['最高'].max()),242"low_60d": safe_float(df['最低'].min()),243"avg_volume_20d": safe_float(df.tail(20)['成交量'].mean()),244"price_data": df.tail(30).to_dict(orient='records') # 只保留30天245}246return {}247except Exception as e:248return {"error": str(e)}249250251@retry_on_failure(max_retries=2, delay=1.0)252def get_index_constituents(index_name: str) -> list:253"""获取指数成分股"""254index_map = {255"hs300": "000300",256"zz500": "000905",257"zz1000": "000852",258"cyb": "399006",259"kcb": "000688"260}261262index_code = index_map.get(index_name)263if not index_code:264return []265266try:267df = ak.index_stock_cons(symbol=index_code)268if df is not None and not df.empty:269return df['品种代码'].tolist()270return []271except Exception as e:272print(f"获取指数成分股失败: {e}")273return []274275276def get_all_a_stocks() -> list:277"""获取全部A股代码"""278try:279df = ak.stock_zh_a_spot_em()280if df is not None and not df.empty:281return df['代码'].tolist()282return []283except Exception as e:284print(f"获取全部A股失败: {e}")285return []286287288def fetch_stock_data(code: str, data_type: str = "all", years: int = 3, use_cache: bool = True) -> dict:289"""获取单只股票的数据"""290# 尝试加载缓存291if use_cache:292cached = load_cache(code, data_type)293if cached:294print(f"使用缓存数据: {code}")295return cached296297result = {298"code": code,299"fetch_time": datetime.now().isoformat(),300"data_type": data_type301}302303print(f"正在获取 {code} 的数据...")304305if data_type in ["all", "basic"]:306print(" - 获取基本信息...")307result["basic_info"] = get_stock_info(code)308309if data_type in ["all", "financial"]:310print(" - 获取财务数据...")311result["financial_data"] = get_financial_data(code, years)312print(" - 获取财务指标...")313result["financial_indicators"] = get_financial_indicators(code)314315if data_type in ["all", "valuation"]:316print(" - 获取估值数据...")317result["valuation"] = get_valuation_data(code)318print(" - 获取价格数据...")319result["price"] = get_price_data(code)320321if data_type in ["all", "holder"]:322print(" - 获取股东数据...")323result["holder"] = get_holder_data(code)324print(" - 获取分红数据...")325result["dividend"] = get_dividend_data(code)326327# 保存缓存328if use_cache:329save_cache(code, data_type, result)330331print(f"数据获取完成: {code}")332return result333334335def fetch_multiple_stocks(codes: list, data_type: str = "basic") -> dict:336"""获取多只股票数据"""337result = {338"fetch_time": datetime.now().isoformat(),339"stocks": [],340"success_count": 0,341"fail_count": 0342}343344total = len(codes)345for i, code in enumerate(codes):346print(f"[{i+1}/{total}] 获取 {code}...")347try:348stock_data = fetch_stock_data(code, data_type, use_cache=True)349if "error" not in stock_data.get("basic_info", {}):350result["stocks"].append(stock_data)351result["success_count"] += 1352else:353result["fail_count"] += 1354except Exception as e:355print(f" 获取失败: {e}")356result["fail_count"] += 1357358# 避免请求过快359if i < total - 1:360time.sleep(0.5)361362return result363364365def main():366parser = argparse.ArgumentParser(description="A股数据获取工具")367parser.add_argument("--code", type=str, help="股票代码 (如: 600519)")368parser.add_argument("--codes", type=str, help="多个股票代码,逗号分隔 (如: 600519,000858)")369parser.add_argument("--data-type", type=str, default="basic",370choices=["all", "basic", "financial", "valuation", "holder"],371help="数据类型 (默认: basic)")372parser.add_argument("--years", type=int, default=3, help="获取多少年的历史数据 (默认: 3)")373parser.add_argument("--scope", type=str, help="筛选范围: hs300/zz500/cyb/kcb/all")374parser.add_argument("--no-cache", action="store_true", help="不使用缓存")375parser.add_argument("--output", type=str, help="输出文件路径 (JSON)")376377args = parser.parse_args()378379result = {}380381if args.code:382result = fetch_stock_data(args.code, args.data_type, args.years,383use_cache=not args.no_cache)384elif args.codes:385codes = [c.strip() for c in args.codes.split(",")]386result = fetch_multiple_stocks(codes, args.data_type)387elif args.scope:388if args.scope == "all":389codes = get_all_a_stocks()390else:391codes = get_index_constituents(args.scope)392result = {"scope": args.scope, "stocks": codes, "count": len(codes)}393else:394print("请提供 --code, --codes 或 --scope 参数")395sys.exit(1)396397# 输出结果398output = json.dumps(result, ensure_ascii=False, indent=2, default=str)399400if args.output:401with open(args.output, 'w', encoding='utf-8') as f:402f.write(output)403print(f"\n数据已保存到: {args.output}")404else:405print(output)406407408if __name__ == "__main__":409main()410