diff --git a/.claude/plans/20250423-project-restructure-design.md b/.claude/plans/20250423-project-restructure-design.md new file mode 100644 index 00000000..40670e46 --- /dev/null +++ b/.claude/plans/20250423-project-restructure-design.md @@ -0,0 +1,191 @@ +# Project Restructure Design + +**Date:** 2025-04-23 +**Scope:** One-shot restructure — directory migration, pyproject.toml rewrite, import fixup, cleanup + +--- + +## 1. Goal + +将项目从 Poetry-based 结构重构为标准 `src/` layout + PEP 621,同时重命名 `data_provider` → `processor`,并将 `provider/` 的第三方依赖正规化写入 `pyproject.toml`。 + +## 2. Target Directory Structure + +``` +massive-com/ +├── src/ +│ ├── massive/ # SDK 客户端(REST + WebSocket) +│ │ ├── __init__.py +│ │ ├── rest/ +│ │ ├── websocket/ +│ │ ├── exceptions.py +│ │ ├── logging.py +│ │ └── modelclass.py +│ ├── provider/ # 多数据源获取层 +│ │ ├── __init__.py +│ │ ├── base.py +│ │ ├── realtime_types.py +│ │ ├── us_index_mapping.py +│ │ ├── fundamental_adapter.py +│ │ ├── efinance_fetcher.py +│ │ ├── akshare_fetcher.py +│ │ ├── tushare_fetcher.py +│ │ ├── pytdx_fetcher.py +│ │ ├── baostock_fetcher.py +│ │ ├── yfinance_fetcher.py +│ │ ├── longbridge_fetcher.py +│ │ └── tickflow_fetcher.py +│ └── processor/ # 原 data_provider,重命名 +│ ├── __init__.py +│ └── us_daily/ +│ ├── __init__.py +│ ├── __main__.py +│ ├── config.py +│ ├── config.json +│ ├── storage.py +│ ├── ticker_filter.py +│ └── agg_fetcher.py +├── tests/ +│ ├── test_rest/ # 原顶层 test_rest/ +│ ├── test_websocket/ # 原顶层 test_websocket/ +│ └── test_us_daily/ # 原 tests/test_us_daily/ +├── examples/ # 不动 +├── docs/ # 不动 +├── data/ # 不动 +├── logs/ # 不动 +├── pyproject.toml # 重写 +└── README.md +``` + +## 3. pyproject.toml + +从 `[tool.poetry]` 迁移到 PEP 621 + setuptools: + +```toml +[project] +name = "massive" +version = "0.0.0" +description = "Official Massive (formerly Polygon.io) REST and Websocket client." +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", +] +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", +] + +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" +``` + +## 4. Import Path Changes + +### 4.1 `data_provider` → `processor`(~36 处) + +**源码文件(3 个):** +- `src/processor/us_daily/__main__.py` +- `src/processor/us_daily/ticker_filter.py` +- `src/processor/us_daily/agg_fetcher.py` + +**测试文件(4 个):** +- `tests/test_us_daily/test_agg_fetcher.py`(含 `patch()` 路径) +- `tests/test_us_daily/test_config.py` +- `tests/test_us_daily/test_storage.py` +- `tests/test_us_daily/test_ticker_filter.py`(含 `patch()` 路径) + +全部执行 `data_provider` → `processor` 全局替换。 + +### 4.2 `massive` 包(0 处变更) + +内部使用相对 import,搬入 `src/massive/` 后路径自动生效。两处绝对 import(`indicators.py`、`summaries.py`)配合 `pythonpath = ["src"]` 仍然有效。 + +### 4.3 `provider` 包(0 处变更) + +当前无任何文件 import `provider`。 + +### 4.4 测试文件(0 处变更) + +`test_rest/`、`test_websocket/` 中的 `from massive import ...` 路径不变,配合 `pythonpath = ["src"]` 生效。 + +## 5. config.py 路径修复 + +`processor/us_daily/config.py` 的 `load_config()` 默认参数从硬编码路径改为基于 `__file__` 的相对定位: + +```python +def load_config(config_path: str = None) -> Config: + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + ... +``` + +`data_dir = "data/us_daily"` 保持不变(相对于项目根目录)。 + +## 6. CLAUDE.md 更新 + +- 去掉 Poetry/Makefile 命令 +- 新命令:`pip install -e ".[all,dev]"`、`pytest`、`black src/ tests/`、`mypy src/` +- 补充 `src/` layout、`provider/`、`processor/` 架构说明 +- `python -m processor.us_daily` 作为 processor 运行入口 + +## 7. Delete & Cleanup + +| 操作 | 目标 | +|------|------| +| 删除 | `Makefile`、`poetry.lock` | +| 保留不动 | `.massive/`、`docs/`、`examples/`、`data/`、`logs/`、`README.md` | + +## 8. Not In Scope + +- `provider/` 内部重组织(保持扁平结构不变) +- `processor/` 功能扩展(仅重命名占位) +- examples 路径更新(`from massive import` 不变) +- 测试框架迁移(保留 unittest,增加 pytest 支持) diff --git a/.claude/plans/20250423-project-restructure-plan.md b/.claude/plans/20250423-project-restructure-plan.md new file mode 100644 index 00000000..98a309a4 --- /dev/null +++ b/.claude/plans/20250423-project-restructure-plan.md @@ -0,0 +1,534 @@ +# Project Restructure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Restructure the project from Poetry-based layout to standard `src/` layout with PEP 621, rename `data_provider` to `processor`, and normalize dependencies in `pyproject.toml`. + +**Architecture:** Move `massive/`, `provider/`, `data_provider/` into `src/` (renaming `data_provider` to `processor`). Move `test_rest/` and `test_websocket/` into `tests/`. Rewrite `pyproject.toml` from `[tool.poetry]` to PEP 621 + setuptools. Fix all `data_provider` → `processor` imports. Delete `Makefile` and `poetry.lock`. + +**Tech Stack:** Python 3.9+, setuptools, pytest + +**Design Doc:** `.claude/plans/20250423-project-restructure-design.md` + +--- + +### Task 1: Create `src/` directory and move packages + +**Files:** +- Create: `src/` directory +- Move: `massive/` → `src/massive/` +- Move: `provider/` → `src/provider/` +- Move: `data_provider/` → `src/processor/` (rename) + +- [ ] **Step 1: Create src directory** + +```bash +mkdir -p src +``` + +- [ ] **Step 2: Move massive/ into src/** + +```bash +git mv massive src/massive +``` + +- [ ] **Step 3: Move provider/ into src/** + +```bash +git mv provider src/provider +``` + +- [ ] **Step 4: Move data_provider/ to src/processor/ (rename)** + +```bash +git mv data_provider src/processor +``` + +- [ ] **Step 5: Verify directory structure** + +```bash +ls src/ +``` + +Expected: `massive processor provider` + +```bash +ls src/processor/us_daily/ +``` + +Expected: `__init__.py __main__.py agg_fetcher.py config.json config.py storage.py ticker_filter.py` + +- [ ] **Step 6: Commit** + +```bash +git add -A +git commit -m "refactor: move massive, provider, data_provider into src/ layout + +Rename data_provider to processor." +``` + +--- + +### Task 2: Move test directories into tests/ + +**Files:** +- Move: `test_rest/` → `tests/test_rest/` +- Move: `test_websocket/` → `tests/test_websocket/` +- Keep: `tests/test_us_daily/` (already in place) + +- [ ] **Step 1: Move test_rest/ into tests/** + +```bash +git mv test_rest tests/test_rest +``` + +- [ ] **Step 2: Move test_websocket/ into tests/** + +```bash +git mv test_websocket tests/test_websocket +``` + +- [ ] **Step 3: Verify structure** + +```bash +ls tests/ +``` + +Expected: `__init__.py test_rest test_us_daily test_websocket` + +- [ ] **Step 4: Commit** + +```bash +git add -A +git commit -m "refactor: move test_rest and test_websocket into tests/" +``` + +--- + +### Task 3: Fix `data_provider` → `processor` imports in source files + +**Files:** +- Modify: `src/processor/us_daily/__main__.py` (lines 8-16) +- Modify: `src/processor/us_daily/ticker_filter.py` (line 5) +- Modify: `src/processor/us_daily/agg_fetcher.py` (lines 7-12) + +- [ ] **Step 1: Fix imports in `__main__.py`** + +In `src/processor/us_daily/__main__.py`, replace all `data_provider` with `processor`: + +```python +# Line 8-16: change from +from data_provider.us_daily.config import load_config +from data_provider.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from data_provider.us_daily.ticker_filter import filter_top_tickers +from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + +# to +from processor.us_daily.config import load_config +from processor.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from processor.us_daily.ticker_filter import filter_top_tickers +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +``` + +- [ ] **Step 2: Fix imports in `ticker_filter.py`** + +In `src/processor/us_daily/ticker_filter.py`, line 5: + +```python +# change from +from data_provider.us_daily.config import Config + +# to +from processor.us_daily.config import Config +``` + +- [ ] **Step 3: Fix imports in `agg_fetcher.py`** + +In `src/processor/us_daily/agg_fetcher.py`, lines 7-12: + +```python +# change from +from data_provider.us_daily.config import Config +from data_provider.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +# to +from processor.us_daily.config import Config +from processor.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/processor/ +git commit -m "refactor: update data_provider imports to processor in source files" +``` + +--- + +### Task 4: Fix `data_provider` → `processor` imports in test files + +**Files:** +- Modify: `tests/test_us_daily/test_agg_fetcher.py` (all `data_provider` refs including `patch()` paths) +- Modify: `tests/test_us_daily/test_config.py` (all `data_provider` refs) +- Modify: `tests/test_us_daily/test_storage.py` (all `data_provider` refs) +- Modify: `tests/test_us_daily/test_ticker_filter.py` (all `data_provider` refs including `patch()` paths) + +- [ ] **Step 1: Fix `test_agg_fetcher.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_agg_fetcher.py`. This covers: +- `from data_provider.us_daily.agg_fetcher import ...` (lines 12, 18, 25, 32, 39, 46, 56, 63, 77, 107, 151, 193) +- `from data_provider.us_daily.config import Config` (lines 78, 108, 152, 194) +- `patch("data_provider.us_daily.agg_fetcher....)` (lines 54, 61, 95, 98, 100, 131, 134, 136, 181, 184, 186, 208, 211, 213) + +All become `processor.us_daily.*`. + +- [ ] **Step 2: Fix `test_config.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_config.py`. This covers: +- `from data_provider.us_daily.config import Config` (line 9) +- `from data_provider.us_daily.config import load_config` (lines 20, 37) + +- [ ] **Step 3: Fix `test_storage.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_storage.py`. This covers: +- `from data_provider.us_daily.storage import ...` (lines 16, 22, 28, 37, 44) + +- [ ] **Step 4: Fix `test_ticker_filter.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_ticker_filter.py`. This covers: +- `from data_provider.us_daily.ticker_filter import filter_top_tickers` (lines 22, 54) +- `from data_provider.us_daily.config import Config` (lines 23, 56) +- `patch("data_provider.us_daily.ticker_filter.EXCHANGES", ...)` (lines 45, 69, 101) +- `patch("data_provider.us_daily.ticker_filter.time.sleep")` (lines 46, 70, 102) + +- [ ] **Step 5: Commit** + +```bash +git add tests/test_us_daily/ +git commit -m "refactor: update data_provider imports to processor in test files" +``` + +--- + +### Task 5: Fix `config.py` path to use `__file__`-relative lookup + +**Files:** +- Modify: `src/processor/us_daily/config.py` (line 16) + +- [ ] **Step 1: Update `load_config` default path** + +In `src/processor/us_daily/config.py`, change: + +```python +# from +def load_config(config_path: str = "data_provider/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + +# to +def load_config(config_path: str = None) -> Config: + config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + if os.path.exists(config_path): +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/processor/us_daily/config.py +git commit -m "fix: use __file__-relative path for config.json lookup" +``` + +--- + +### Task 6: Rewrite `pyproject.toml` + +**Files:** +- Modify: `pyproject.toml` (full rewrite) + +- [ ] **Step 1: Replace pyproject.toml content** + +Replace the entire `pyproject.toml` with: + +```toml +[project] +name = "massive" +version = "0.0.0" +description = "Official Massive (formerly Polygon.io) REST and Websocket client." +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", +] +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", +] + +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" +``` + +- [ ] **Step 2: Commit** + +```bash +git add pyproject.toml +git commit -m "refactor: rewrite pyproject.toml from Poetry to PEP 621 + setuptools" +``` + +--- + +### Task 7: Delete Makefile and poetry.lock + +**Files:** +- Delete: `Makefile` +- Delete: `poetry.lock` + +- [ ] **Step 1: Delete Makefile** + +```bash +git rm Makefile +``` + +- [ ] **Step 2: Delete poetry.lock** + +```bash +git rm poetry.lock +``` + +- [ ] **Step 3: Commit** + +```bash +git add -A +git commit -m "chore: remove Makefile and poetry.lock" +``` + +--- + +### Task 8: Update CLAUDE.md + +**Files:** +- Modify: `CLAUDE.md` + +- [ ] **Step 1: Update Development Commands section** + +Replace the Development Commands section with: + +````markdown +## Development Commands + +```bash +# Install dependencies (core + all data sources + dev tools) +pip install -e ".[all,dev]" + +# Run all tests +pytest + +# Run specific test directory +pytest tests/test_rest/ +pytest tests/test_websocket/ +pytest tests/test_us_daily/ + +# Run a single test file +pytest tests/test_rest/test_aggs.py + +# Run a single test method +pytest tests/test_rest/test_aggs.py::TestAggs::test_list_aggs + +# Code formatting (auto-fix) +black src/ tests/ examples/ + +# Static type checking +mypy src/ + +# Run US daily data processor +python -m processor.us_daily + +# Regenerate REST API spec from OpenAPI +python .massive/rest.py + +# Update WebSocket API spec +curl https://api.massive.com/specs/websocket.json > .massive/websocket.json +``` +```` + +- [ ] **Step 2: Update Architecture section** + +Replace the Architecture section with: + +````markdown +## Architecture + +### Project Layout + +Standard `src/` layout with three top-level packages: + +- `src/massive/` — REST and WebSocket SDK client library +- `src/provider/` — Multi-source data fetcher layer with automatic failover +- `src/processor/` — Data collection and processing pipelines + +### Client Structure + +`RESTClient` (in `massive/rest/__init__.py`) uses multiple inheritance to compose domain-specific client mixins (AggsClient, TradesClient, QuotesClient, etc.) on top of `BaseClient` (`massive/rest/base.py`). Each mixin lives in its own file under `massive/rest/` and handles one API domain. + +`WebSocketClient` (`massive/websocket/__init__.py`) is a standalone async client using the `websockets` library with auto-reconnect support. + +### Provider Layer + +`DataFetcherManager` (in `provider/base.py`) orchestrates multiple data source fetchers (efinance, akshare, tushare, pytdx, baostock, yfinance, longbridge) with automatic priority-based failover. Each fetcher extends `BaseFetcher` and implements source-specific data retrieval. + +### Processor + +`processor/us_daily/` fetches US stock daily OHLCV data via the Massive REST API. Run with `python -m processor.us_daily`. + +### Models + +- REST models: `massive/rest/models/` — one file per domain, using the custom `@modelclass` decorator (from `massive/modelclass.py`) which wraps `@dataclass` with flexible init that accepts positional or keyword args. +- WebSocket models: `massive/websocket/models/` + +### API Spec Codegen + +`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. + +### Tests + +- `tests/test_rest/` — uses `pook` for HTTP mocking, with mock responses in `tests/test_rest/mocks/` +- `tests/test_websocket/` — has its own mock WebSocket server in `mock_server.py` +- `tests/test_us_daily/` — unit tests for the US daily processor +- Test base classes: `tests/test_rest/base.py` and `tests/test_websocket/base_ws.py` + +### Key Conventions + +- API key via `MASSIVE_API_KEY` env var or constructor parameter +- Base URL: `https://api.massive.com` +- Auth header: `Authorization: Bearer ` +- Python 3.9+ required +- Formatting: `black`; type checking: `mypy` +```` + +- [ ] **Step 3: Commit** + +```bash +git add CLAUDE.md +git commit -m "docs: update CLAUDE.md for new project structure" +``` + +--- + +### Task 9: Verify everything works + +- [ ] **Step 1: Install in editable mode** + +```bash +pip install -e ".[dev]" +``` + +Expected: Installs successfully with no errors. + +- [ ] **Step 2: Run us_daily tests** + +```bash +pytest tests/test_us_daily/ -v +``` + +Expected: All tests pass. Specifically: +- `test_config.py` — 3 tests pass +- `test_storage.py` — 5 tests pass +- `test_agg_fetcher.py` — 8 tests pass (4 classes) +- `test_ticker_filter.py` — 3 tests pass + +- [ ] **Step 3: Run REST tests** + +```bash +pytest tests/test_rest/ -v +``` + +Expected: All tests pass. + +- [ ] **Step 4: Run WebSocket tests** + +```bash +pytest tests/test_websocket/ -v +``` + +Expected: All tests pass. + +- [ ] **Step 5: Verify import works** + +```bash +python -c "from massive import RESTClient; print('massive OK')" +python -c "from processor.us_daily.config import Config; print('processor OK')" +python -c "from provider.base import DataFetcherManager; print('provider OK')" +``` + +Expected: All three print their "OK" message. + +- [ ] **Step 6: If any failures, fix and commit** + +Address any import errors or test failures discovered in steps 1-5, then commit fixes. diff --git a/.claude/plans/20250423-provider-deps-design.md b/.claude/plans/20250423-provider-deps-design.md new file mode 100644 index 00000000..b23aeffb --- /dev/null +++ b/.claude/plans/20250423-provider-deps-design.md @@ -0,0 +1,137 @@ +# Provider Dependencies Resolution Design + +**Date:** 2025-04-23 +**Scope:** Resolve `from src.*` imports in `provider/` by creating private internal modules + +--- + +## 1. Goal + +`provider/` 模块依赖 3 个来自外部项目 (daily_stock_analysis) 的模块:`src.config`、`src.data`、`src.report_language`。将这些依赖内化为 `provider/` 的私有模块,使 provider 完全自包含。 + +## 2. Reference Repository + +https://github.com/ZhuLinsen/daily_stock_analysis — 原始项目,provider 模块从该项目中提取。 + +## 3. New Files + +### 3.1 `src/provider/_config.py` (~60 行) + +精简的 Config 单例,仅包含 provider 实际使用的 15 个属性,从环境变量读取: + +```python +import os +from dataclasses import dataclass +from threading import Lock + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + +def normalize_report_language(value, default="zh"): + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + +@dataclass +class Config: + tushare_token: str = "" + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + tickflow_api_key: str = "" + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + realtime_source_priority: str = "tencent,akshare,efinance" + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + +_instance = None +_lock = Lock() + +def get_config() -> Config: + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + def _env_bool(key, default="true"): + return os.environ.get(key, default).lower() != "false" + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get("REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance"), + fundamental_fetch_timeout_seconds=float(os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30")), + fundamental_stage_timeout_seconds=float(os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60")), + fundamental_cache_ttl_seconds=int(os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600")), + fundamental_cache_max_entries=int(os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256")), + fundamental_retry_max=int(os.environ.get("FUNDAMENTAL_RETRY_MAX", "2")), + ) + return _instance +``` + +### 3.2 `src/provider/_data/stock_mapping.py` + +从参考仓库 `src/data/stock_mapping.py` 完整复制。包含: +- `STOCK_NAME_MAP` — 股票代码→名称映射字典(A 股、美股、港股) +- `is_meaningful_stock_name(name, stock_code)` — 判断股票名是否有效 + +### 3.3 `src/provider/_data/stock_index_loader.py` + +从参考仓库 `src/data/stock_index_loader.py` 完整复制,仅改一处 import: +```python +# from +from src.data.stock_mapping import is_meaningful_stock_name +# to +from provider._data.stock_mapping import is_meaningful_stock_name +``` + +### 3.4 `src/provider/_data/__init__.py` + +```python +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] +``` + +## 4. Import Path Changes (~20 处) + +所有变更均为 `from src.*` → `from provider._*` 的机械替换: + +| 文件 | 原 import | 新 import | +|------|-----------|-----------| +| `base.py` (line 27) | `from src.data.stock_index_loader import get_index_stock_name` | `from provider._data.stock_index_loader import get_index_stock_name` | +| `base.py` (line 28) | `from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name` | `from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name` | +| `base.py` (9 处 lazy) | `from src.config import get_config` | `from provider._config import get_config` | +| `efinance_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `akshare_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `tushare_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `yfinance_fetcher.py` | `from src.data.stock_mapping import ...` | `from provider._data.stock_mapping import ...` | +| `longbridge_fetcher.py` | `from src.report_language import normalize_report_language` | `from provider._config import normalize_report_language` | +| `longbridge_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | + +## 5. Not In Scope + +- `.env` 文件加载(只读 `os.environ`) +- Config 验证逻辑(`ConfigIssue` 系统) +- `report_language.py` 的其他函数 +- provider 功能性测试(当前无测试,不新增) +- `__init__.py` 的 import 清理 diff --git a/.claude/plans/20250423-provider-deps-plan.md b/.claude/plans/20250423-provider-deps-plan.md new file mode 100644 index 00000000..13f23e8c --- /dev/null +++ b/.claude/plans/20250423-provider-deps-plan.md @@ -0,0 +1,702 @@ +# Provider Dependencies Resolution Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Resolve all `from src.*` imports in `provider/` by creating private internal modules (`_config.py`, `_data/`), making provider fully self-contained. + +**Architecture:** Create `src/provider/_config.py` (slim Config singleton + `normalize_report_language`), `src/provider/_data/` (stock mapping copied from reference repo). Then replace all `from src.*` imports with `from provider._*` imports across 6 fetcher files. + +**Tech Stack:** Python 3.9+, dataclasses, os.environ + +**Design Doc:** `.claude/plans/20250423-provider-deps-design.md` + +--- + +### Task 1: Create `_data/stock_mapping.py` + +**Files:** +- Create: `src/provider/_data/__init__.py` +- Create: `src/provider/_data/stock_mapping.py` + +- [ ] **Step 1: Create `_data/` directory** + +```bash +mkdir -p src/provider/_data +``` + +- [ ] **Step 2: Create `_data/__init__.py`** + +Write `src/provider/_data/__init__.py`: + +```python +# -*- coding: utf-8 -*- +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] +``` + +- [ ] **Step 3: Create `_data/stock_mapping.py`** + +Write `src/provider/_data/stock_mapping.py` — copy the complete file from reference repo (https://github.com/ZhuLinsen/daily_stock_analysis/blob/main/src/data/stock_mapping.py). This file has no external imports. It contains: +- `STOCK_NAME_MAP` dict (~90 entries: A-shares, US stocks, HK stocks) +- `is_meaningful_stock_name(name, stock_code)` function + +```python +# -*- coding: utf-8 -*- +from __future__ import annotations + +""" +=================================== +股票代码与名称映射 +=================================== + +Shared stock code -> name mapping, used by analyzer, data_provider, and name_to_code_resolver. +""" + +# Stock code -> name mapping (common stocks) +STOCK_NAME_MAP = { + # === A-shares === + "600519": "贵州茅台", + "000001": "平安银行", + "300750": "宁德时代", + "002594": "比亚迪", + "600036": "招商银行", + "601318": "中国平安", + "000858": "五粮液", + "600276": "恒瑞医药", + "601012": "隆基绿能", + "002475": "立讯精密", + "300059": "东方财富", + "002415": "海康威视", + "600900": "长江电力", + "601166": "兴业银行", + "600028": "中国石化", + "600030": "中信证券", + "600031": "三一重工", + "600050": "中国联通", + "600104": "上汽集团", + "600111": "北方稀土", + "600150": "中国船舶", + "600309": "万华化学", + "600406": "国电南瑞", + "600690": "海尔智家", + "600760": "中航沈飞", + "600809": "山西汾酒", + "600887": "伊利股份", + "600930": "华电新能", + "601088": "中国神华", + "601127": "赛力斯", + "601211": "国泰海通", + "601225": "陕西煤业", + "601288": "农业银行", + "601328": "交通银行", + "601398": "工商银行", + "601601": "中国太保", + "601628": "中国人寿", + "601658": "邮储银行", + "601668": "中国建筑", + "601728": "中国电信", + "601816": "京沪高铁", + "601857": "中国石油", + "601888": "中国中免", + "601899": "紫金矿业", + "601919": "中远海控", + "601985": "中国核电", + "601988": "中国银行", + "603019": "中科曙光", + "603259": "药明康德", + "603501": "豪威集团", + "603993": "洛阳钼业", + "688008": "澜起科技", + "688012": "中微公司", + "688041": "海光信息", + "688111": "金山办公", + "688256": "寒武纪", + "688981": "中芯国际", + # === US stocks === + "AAPL": "苹果", + "TSLA": "特斯拉", + "MSFT": "微软", + "GOOGL": "谷歌A", + "GOOG": "谷歌C", + "AMZN": "亚马逊", + "NVDA": "英伟达", + "META": "Meta", + "AMD": "AMD", + "INTC": "英特尔", + "BABA": "阿里巴巴", + "PDD": "拼多多", + "JD": "京东", + "BIDU": "百度", + "NIO": "蔚来", + "XPEV": "小鹏汽车", + "LI": "理想汽车", + "COIN": "Coinbase", + "MSTR": "MicroStrategy", + # === HK stocks (5-digit) === + "00700": "腾讯控股", + "03690": "美团", + "01810": "小米集团", + "09988": "阿里巴巴", + "09618": "京东集团", + "09888": "百度集团", + "01024": "快手", + "00981": "中芯国际", + "02015": "理想汽车", + "09868": "小鹏汽车", + "00005": "汇丰控股", + "01299": "友邦保险", + "00941": "中国移动", + "00883": "中国海洋石油", +} + + +def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """Return whether a stock name is useful for display or caching.""" + if not name: + return False + + normalized_name = str(name).strip() + if not normalized_name: + return False + + normalized_code = (stock_code or "").strip().upper() + if normalized_name.upper() == normalized_code: + return False + + if normalized_name.startswith("股票"): + return False + + placeholder_values = { + "N/A", + "NA", + "NONE", + "NULL", + "--", + "-", + "UNKNOWN", + "TICKER", + } + if normalized_name.upper() in placeholder_values: + return False + + return True +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/provider/_data/ +git commit -m "feat: add provider/_data/stock_mapping module" +``` + +--- + +### Task 2: Create `_data/stock_index_loader.py` + +**Files:** +- Create: `src/provider/_data/stock_index_loader.py` + +- [ ] **Step 1: Create `_data/stock_index_loader.py`** + +Write `src/provider/_data/stock_index_loader.py` — copy from reference repo (https://github.com/ZhuLinsen/daily_stock_analysis/blob/main/src/data/stock_index_loader.py) with ONE import change on line 10: + +```python +# Change from: +from src.data.stock_mapping import is_meaningful_stock_name +# To: +from provider._data.stock_mapping import is_meaningful_stock_name +``` + +Full file content: + +```python +# -*- coding: utf-8 -*- +from __future__ import annotations + +import json +import logging +from pathlib import Path +from threading import RLock +from typing import Dict, Iterable + +from provider._data.stock_mapping import is_meaningful_stock_name + +logger = logging.getLogger(__name__) + +_STOCK_INDEX_FILENAME = "stocks.index.json" +_STOCK_INDEX_CACHE: Dict[str, str] | None = None +_STOCK_INDEX_CACHE_LOCK = RLock() + + +def get_stock_index_candidate_paths() -> tuple[Path, ...]: + """Return the supported locations for the generated stock index.""" + repo_root = Path(__file__).resolve().parents[2] + return ( + repo_root / "apps" / "dsa-web" / "public" / _STOCK_INDEX_FILENAME, + repo_root / "static" / _STOCK_INDEX_FILENAME, + ) + + +def _add_lookup_key(keys: set[str], value: str) -> None: + candidate = str(value or "").strip() + if not candidate: + return + keys.add(candidate) + keys.add(candidate.upper()) + + +def _build_lookup_keys(canonical_code: str, display_code: str) -> Iterable[str]: + keys: set[str] = set() + _add_lookup_key(keys, canonical_code) + _add_lookup_key(keys, display_code) + + canonical_upper = str(canonical_code or "").strip().upper() + display_upper = str(display_code or "").strip().upper() + + if "." in canonical_upper: + base, suffix = canonical_upper.rsplit(".", 1) + if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit(): + _add_lookup_key(keys, base) + elif suffix == "HK" and base.isdigit() and 1 <= len(base) <= 5: + digits = base.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + for candidate in (canonical_upper, display_upper): + if candidate.startswith("HK"): + digits = candidate[2:] + if digits.isdigit() and 1 <= len(digits) <= 5: + digits = digits.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + return keys + + +def _load_stock_index_file(index_path: Path) -> Dict[str, str]: + with index_path.open("r", encoding="utf-8") as fh: + raw_items = json.load(fh) + + if not isinstance(raw_items, list): + raise ValueError( + f"Unexpected {_STOCK_INDEX_FILENAME} payload type: {type(raw_items).__name__}" + ) + + stock_name_map: Dict[str, str] = {} + for item in raw_items: + if not isinstance(item, list) or len(item) < 3: + continue + + canonical_code, display_code, name_zh = item[0], item[1], item[2] + if not is_meaningful_stock_name(name_zh, str(display_code or canonical_code or "")): + continue + + for key in _build_lookup_keys(str(canonical_code or ""), str(display_code or "")): + stock_name_map[key] = str(name_zh).strip() + + return stock_name_map + + +def get_stock_name_index_map() -> Dict[str, str]: + """Lazily load and cache the generated stock-name index.""" + global _STOCK_INDEX_CACHE + + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + with _STOCK_INDEX_CACHE_LOCK: + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + for candidate_path in get_stock_index_candidate_paths(): + if not candidate_path.is_file(): + continue + + try: + _STOCK_INDEX_CACHE = _load_stock_index_file(candidate_path) + logger.debug( + "[股票名称] 已加载前端股票索引映射: %s (%d 条)", + candidate_path, + len(_STOCK_INDEX_CACHE), + ) + return _STOCK_INDEX_CACHE + except (OSError, TypeError, ValueError) as exc: + logger.debug("[股票名称] 读取股票索引失败 %s: %s", candidate_path, exc) + + _STOCK_INDEX_CACHE = {} + return _STOCK_INDEX_CACHE + + +def get_index_stock_name(stock_code: str) -> str | None: + """Resolve a stock name from the generated frontend stock index.""" + code = str(stock_code or "").strip() + if not code: + return None + + stock_name_map = get_stock_name_index_map() + for key in _build_lookup_keys(code, code): + name = stock_name_map.get(key) + if is_meaningful_stock_name(name, code): + return name + + return None + + +def _clear_stock_index_cache_for_tests() -> None: + global _STOCK_INDEX_CACHE + with _STOCK_INDEX_CACHE_LOCK: + _STOCK_INDEX_CACHE = None +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/provider/_data/stock_index_loader.py +git commit -m "feat: add provider/_data/stock_index_loader module" +``` + +--- + +### Task 3: Create `_config.py` + +**Files:** +- Create: `src/provider/_config.py` + +- [ ] **Step 1: Create `_config.py`** + +Write `src/provider/_config.py`: + +```python +# -*- coding: utf-8 -*- +""" +Slim configuration singleton for provider module. + +Reads configuration from environment variables. Only includes attributes +actually used by provider fetchers. +""" + +import os +from dataclasses import dataclass +from threading import Lock +from typing import Optional + + +# --------------------------------------------------------------------------- +# normalize_report_language (extracted from src/report_language.py) +# --------------------------------------------------------------------------- + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") + +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + + +def normalize_report_language(value: Optional[str], default: str = "zh") -> str: + """Normalize report language to a supported short code.""" + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + + +# --------------------------------------------------------------------------- +# Config singleton +# --------------------------------------------------------------------------- + +@dataclass +class Config: + # Tushare + tushare_token: str = "" + # Longbridge + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + # TickFlow + tickflow_api_key: str = "" + # Feature toggles + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + # Realtime source priority + realtime_source_priority: str = "tencent,akshare,efinance" + # Fundamental pipeline + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + + +_instance: Optional[Config] = None +_lock = Lock() + + +def _env_bool(key: str, default: str = "true") -> bool: + return os.environ.get(key, default).lower() != "false" + + +def get_config() -> Config: + """Return the global Config singleton, creating it on first call.""" + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get( + "REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance" + ), + fundamental_fetch_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30") + ), + fundamental_stage_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60") + ), + fundamental_cache_ttl_seconds=int( + os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600") + ), + fundamental_cache_max_entries=int( + os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256") + ), + fundamental_retry_max=int( + os.environ.get("FUNDAMENTAL_RETRY_MAX", "2") + ), + ) + return _instance +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/provider/_config.py +git commit -m "feat: add provider/_config module with slim Config singleton" +``` + +--- + +### Task 4: Update imports in `base.py` + +**Files:** +- Modify: `src/provider/base.py` + +- [ ] **Step 1: Replace top-level imports (lines 27-28)** + +In `src/provider/base.py`, replace: + +```python +from src.data.stock_index_loader import get_index_stock_name +from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +``` + +with: + +```python +from provider._data.stock_index_loader import get_index_stock_name +from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +``` + +- [ ] **Step 2: Replace all lazy `from src.config import get_config` (9 occurrences)** + +Global replace in `src/provider/base.py`: + +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +This appears at lines 564, 1066, 1151, 1400, 1751, 1973, 2270, 2334, 2384. + +- [ ] **Step 3: Verify no remaining `from src.` in base.py** + +```bash +grep "from src\." src/provider/base.py +``` + +Expected: no output. + +- [ ] **Step 4: Commit** + +```bash +git add src/provider/base.py +git commit -m "refactor: update base.py imports from src.* to provider._*" +``` + +--- + +### Task 5: Update imports in fetcher files + +**Files:** +- Modify: `src/provider/efinance_fetcher.py` (line 55) +- Modify: `src/provider/akshare_fetcher.py` (line 45) +- Modify: `src/provider/tushare_fetcher.py` (line 36) +- Modify: `src/provider/yfinance_fetcher.py` (lines 40-42) +- Modify: `src/provider/longbridge_fetcher.py` (lines 165, 293, 326) + +- [ ] **Step 1: Fix `efinance_fetcher.py`** + +In `src/provider/efinance_fetcher.py`, line 55, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 2: Fix `akshare_fetcher.py`** + +In `src/provider/akshare_fetcher.py`, line 45, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 3: Fix `tushare_fetcher.py`** + +In `src/provider/tushare_fetcher.py`, line 36, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 4: Fix `yfinance_fetcher.py`** + +In `src/provider/yfinance_fetcher.py`, lines 39-42, replace: + +```python +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): +``` + +with: + +```python +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): +``` + +- [ ] **Step 5: Fix `longbridge_fetcher.py`** + +Three lazy imports inside try blocks. Replace each occurrence: + +Line 165: +```python +# from +from src.report_language import normalize_report_language +# to +from provider._config import normalize_report_language +``` + +Line 293: +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +Line 326: +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +- [ ] **Step 6: Verify no remaining `from src.` in any provider file** + +```bash +grep -r "from src\." src/provider/ +``` + +Expected: no output. + +- [ ] **Step 7: Commit** + +```bash +git add src/provider/efinance_fetcher.py src/provider/akshare_fetcher.py src/provider/tushare_fetcher.py src/provider/yfinance_fetcher.py src/provider/longbridge_fetcher.py +git commit -m "refactor: update fetcher imports from src.* to provider._*" +``` + +--- + +### Task 6: Verify provider imports work + +- [ ] **Step 1: Test that _config imports cleanly** + +```bash +python -c "from provider._config import get_config, normalize_report_language; c = get_config(); print(f'Config OK: tushare_token={c.tushare_token!r}'); print(f'Lang: {normalize_report_language(\"chinese\")}')" +``` + +Expected: +``` +Config OK: tushare_token='' +Lang: zh +``` + +- [ ] **Step 2: Test that _data imports cleanly** + +```bash +python -c "from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name; print(f'Mapping OK: {len(STOCK_NAME_MAP)} entries'); print(f'茅台: {is_meaningful_stock_name(\"贵州茅台\", \"600519\")}')" +``` + +Expected: +``` +Mapping OK: 90 entries +茅台: True +``` + +```bash +python -c "from provider._data.stock_index_loader import get_index_stock_name; print(f'Index loader OK: {get_index_stock_name(\"600519\")}')" +``` + +Expected: `Index loader OK: None` (no index file present, graceful fallback) + +- [ ] **Step 3: Test that provider.__init__ imports without error** + +```bash +python -c "from provider import DataFetcherManager; print('provider OK')" +``` + +Expected: `provider OK` (may show warnings about missing optional deps like efinance/akshare, but no ImportError) + +- [ ] **Step 4: If any failures, fix and commit** + +Address any remaining import errors discovered in steps 1-3. diff --git a/.claude/plans/2026-04-22-top-usstock-daily-data-design.md b/.claude/plans/2026-04-22-top-usstock-daily-data-design.md new file mode 100644 index 00000000..ac48e12c --- /dev/null +++ b/.claude/plans/2026-04-22-top-usstock-daily-data-design.md @@ -0,0 +1,223 @@ +# 头部美股日K数据采集 — 设计文档 + +## 概述 + +构建一个数据采集模块,获取纳斯达克、纽交所、NYSE Arca 交易所中市值 >= 50亿美金的头部公司,按月采集自 2020 年以来的日 K 线数据,支持增量更新。 + +## 模块结构 + +``` +project/us_daily/ +├── __init__.py +├── __main__.py # 入口:加载配置 → 筛选 ticker → 逐个抓取 +├── config.py # Config dataclass + 默认值 + 配置文件加载 +├── ticker_filter.py # 调用 list_tickers + get_ticker_details 筛选头部公司 +├── agg_fetcher.py # 按月调用 list_aggs,含增量判断和重试逻辑 +└── storage.py # JSON 文件读写,路径管理 + +data/us_daily/ +├── top_tickers.json # 筛选出的头部公司列表 +└── {TICKER}/ # 每个 ticker 一个目录 + ├── 2020-01.json + ├── 2020-02.json + └── ... + +logs/ +└── us_daily.log # 运行日志 +``` + +## 执行流程 + +``` +1. 加载配置(project/us_daily/config.json) +2. 初始化 RESTClient +3. 是否刷新 ticker 列表? + ├── refresh_tickers=true 或 top_tickers.json 不存在 → 调用 API 筛选 → 写入 top_tickers.json + └── refresh_tickers=false 且文件存在 → 读取 top_tickers.json +4. 遍历每个 ticker: + 4.1 创建 ticker 目录(如不存在) + 4.2 生成 start_date 到当前月的月份列表 + 4.3 对每个月份: + - 文件已存在 且 不是当前月 → 跳过 + - 文件已存在 且 是当前月 → 重新请求并覆盖 + - 文件不存在 → 请求并写入 + - 每次 API 请求后 sleep request_interval 秒 +5. 输出汇总(完成数、失败数及详情) +``` + +## 配置模块 + +### Config dataclass + +```python +@dataclass +class Config: + refresh_tickers: bool = False # 是否刷新头部公司列表 + market_cap_min: float = 5e9 # 市值阈值(美元) + start_date: str = "2020-01" # 数据起始年月 + request_interval: int = 20 # API 请求间隔(秒) + data_dir: str = "data/us_daily" # 数据存储路径 + max_retries: int = 3 # 请求失败重试次数 +``` + +### 配置文件 + +路径:`project/us_daily/config.json`,不存在则使用默认值。 + +```json +{ + "refresh_tickers": true, + "market_cap_min": 5000000000, + "start_date": "2020-01", + "request_interval": 20, + "data_dir": "data/us_daily", + "max_retries": 3 +} +``` + +## Ticker 筛选模块 + +### ticker_filter.py + +**流程:** + +1. 调用 `client.list_tickers(market="stocks", exchange=exchange, active=True, limit=1000)` 遍历三个交易所: + - `XNAS`(纳斯达克) + - `XNYS`(纽约证券交易所) + - `ARCX`(NYSE Arca) +2. 对每个 ticker 调用 `client.get_ticker_details(ticker)` 获取 `market_cap` +3. 过滤 `market_cap >= config.market_cap_min` +4. 每次 API 请求后 sleep `config.request_interval` 秒 +5. 结果写入 `data/us_daily/top_tickers.json` + +### top_tickers.json 格式 + +```json +{ + "updated_at": "2026-04-22", + "market_cap_min": 5000000000, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc.", "market_cap": 3200000000000, "exchange": "XNAS"}, + {"ticker": "MSFT", "name": "Microsoft Corporation", "market_cap": 2800000000000, "exchange": "XNAS"} + ] +} +``` + +## 数据抓取模块 + +### agg_fetcher.py + +**核心逻辑:** + +```python +def fetch_ticker_aggs(client, ticker, config): + months = generate_months(config.start_date, current_month()) + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + # 增量判断 + if file_exists(file_path) and not is_current_month(month): + continue # 历史月份已有数据,跳过 + + # 请求数据(带重试) + aggs = fetch_with_retry(client, ticker, month, config.max_retries) + + # 写入文件 + save_month_data(file_path, aggs) + + sleep(config.request_interval) +``` + +**月份范围:** `generate_months("2020-01", "2026-04")` → `["2020-01", "2020-02", ..., "2026-04"]` + +**API 调用:** `client.list_aggs(ticker, 1, "day", from_=月初, to=月末, adjusted=True, sort="asc")` + +**重试逻辑:** 最多 `max_retries` 次,每次重试前 sleep `request_interval`。仍然失败则记录日志,跳过该月份继续。 + +### 月数据文件格式 + +`data/us_daily/{TICKER}/{YYYY-MM}.json`: + +```json +{ + "ticker": "AAPL", + "month": "2020-01", + "fetched_at": "2026-04-22T10:30:00", + "data": [ + { + "open": 74.06, + "high": 75.15, + "low": 73.80, + "close": 74.36, + "volume": 108872000, + "vwap": 74.53, + "timestamp": 1577854800000, + "transactions": 480012 + } + ] +} +``` + +## 存储模块 + +### storage.py + +**核心函数:** + +- `get_tickers_file_path(data_dir)` → `data/us_daily/top_tickers.json` +- `get_month_file_path(data_dir, ticker, month)` → `data/us_daily/AAPL/2020-01.json` +- `save_json(path, data)` — 写入 JSON,自动创建父目录 +- `load_json(path)` — 读取 JSON +- `file_exists(path)` — 判断文件是否存在 + +## 入口模块 + +### __main__.py + +```python +def main(): + # 1. 加载配置 + config = load_config() + + # 2. 初始化日志(输出到 logs/us_daily.log + stdout) + setup_logging() + + # 3. 初始化 REST 客户端 + client = RESTClient() + + # 4. 获取 ticker 列表 + if config.refresh_tickers or not tickers_file_exists(config): + tickers = filter_top_tickers(client, config) + save_tickers(config, tickers) + else: + tickers = load_tickers(config) + + # 5. 逐个抓取日K数据 + failed = [] + for i, ticker_info in enumerate(tickers): + logger.info(f"[{i+1}/{len(tickers)}] 开始处理 {ticker_info['ticker']}") + result = fetch_ticker_aggs(client, ticker_info["ticker"], config) + if result.failures: + failed.extend(result.failures) + + # 6. 输出汇总 + logger.info(f"完成:{len(tickers)} 只股票") + if failed: + logger.warning(f"失败:{len(failed)} 个月份") + for f in failed: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") +``` + +**运行方式:** `python -m project.us_daily` + +## 日志 + +- 使用 Python `logging` 模块 +- 同时输出到 `logs/us_daily.log` 和 stdout +- 格式:`2026-04-22 10:30:00 [INFO] [3/150] AAPL - 2020-01 fetched` +- 包含进度信息,便于监控长时间运行 + +## 限流 + +- 每次 API 请求后 sleep `config.request_interval` 秒(默认 20s) +- 适用于 list_tickers 分页、get_ticker_details、list_aggs 所有请求 diff --git a/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md b/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md new file mode 100644 index 00000000..ed0c1b51 --- /dev/null +++ b/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md @@ -0,0 +1,997 @@ +# 头部美股日K数据采集 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** 构建 `project/us_daily` 模块,采集主要美股交易所中市值 >= 50 亿美金公司的日 K 线数据(2020 年至今),按月存储为 JSON 文件,支持增量更新。 + +**Architecture:** 单进程流水线:加载配置 → 筛选 ticker → 按月抓取日K → JSON 存储。通过检查文件是否存在实现增量更新。每次 API 请求后 sleep 20s 满足限流要求。 + +**Tech Stack:** Python 3.9+, `massive` SDK(RESTClient, list_tickers, get_ticker_details, list_aggs),标准库 json/logging/dataclasses/calendar/datetime + +--- + +## File Structure + +| File | Responsibility | +|------|---------------| +| `project/__init__.py` | 空,使 project 成为 package | +| `project/us_daily/__init__.py` | 空,使 us_daily 成为 package | +| `project/us_daily/config.py` | Config dataclass + load_config() | +| `project/us_daily/storage.py` | JSON 读写、路径计算、文件存在判断 | +| `project/us_daily/ticker_filter.py` | 遍历交易所获取 ticker、查详情过滤市值 | +| `project/us_daily/agg_fetcher.py` | 按月获取日K数据、增量判断、重试逻辑 | +| `project/us_daily/__main__.py` | 入口:配置加载 → ticker 筛选 → 数据抓取 → 汇总 | +| `project/us_daily/config.json` | 默认配置文件 | +| `tests/test_us_daily/test_config.py` | config 模块测试 | +| `tests/test_us_daily/test_storage.py` | storage 模块测试 | +| `tests/test_us_daily/test_ticker_filter.py` | ticker_filter 模块测试 | +| `tests/test_us_daily/test_agg_fetcher.py` | agg_fetcher 模块测试 | + +--- + +### Task 1: Config 模块 + +**Files:** +- Create: `project/__init__.py` +- Create: `project/us_daily/__init__.py` +- Create: `project/us_daily/config.py` +- Create: `project/us_daily/config.json` +- Create: `tests/test_us_daily/__init__.py` +- Create: `tests/test_us_daily/test_config.py` + +- [ ] **Step 1: Write the failing test for Config defaults and load_config** + +Create `tests/test_us_daily/__init__.py` (empty) and `tests/test_us_daily/test_config.py`: + +```python +import unittest +import json +import os +import tempfile + + +class TestConfig(unittest.TestCase): + def test_default_config(self): + from project.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + self.assertEqual(config.data_dir, "data/us_daily") + self.assertEqual(config.max_retries, 3) + + def test_load_config_from_file(self): + from project.us_daily.config import load_config + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as f: + json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.market_cap_min, 1e10) + # defaults preserved for unspecified fields + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from project.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_config.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.config` + +- [ ] **Step 3: Create package files and implement Config** + +Create `project/__init__.py` (empty): + +```python +``` + +Create `project/us_daily/__init__.py` (empty): + +```python +``` + +Create `project/us_daily/config.py`: + +```python +import json +import os +from dataclasses import dataclass + + +@dataclass +class Config: + refresh_tickers: bool = False + market_cap_min: float = 5e9 + start_date: str = "2020-01" + request_interval: int = 20 + data_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = "project/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config +``` + +Create `project/us_daily/config.json`: + +```json +{ + "refresh_tickers": false, + "market_cap_min": 5000000000, + "start_date": "2020-01", + "request_interval": 20, + "data_dir": "data/us_daily", + "max_retries": 3 +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_config.py -v` +Expected: 3 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/__init__.py project/us_daily/__init__.py project/us_daily/config.py project/us_daily/config.json tests/test_us_daily/__init__.py tests/test_us_daily/test_config.py +git commit -m "feat: add config module for us_daily data fetcher" +``` + +--- + +### Task 2: Storage 模块 + +**Files:** +- Create: `project/us_daily/storage.py` +- Create: `tests/test_us_daily/test_storage.py` + +- [ ] **Step 1: Write the failing tests for storage functions** + +Create `tests/test_us_daily/test_storage.py`: + +```python +import unittest +import json +import os +import tempfile +import shutil + + +class TestStorage(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_tickers_file_path(self): + from project.us_daily.storage import get_tickers_file_path + + result = get_tickers_file_path("data/us_daily") + self.assertEqual(result, "data/us_daily/top_tickers.json") + + def test_get_month_file_path(self): + from project.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + + def test_save_and_load_json(self): + from project.us_daily.storage import save_json, load_json + + file_path = os.path.join(self.test_dir, "sub", "test.json") + data = {"key": "value", "num": 42} + save_json(file_path, data) + loaded = load_json(file_path) + self.assertEqual(loaded, data) + + def test_save_json_creates_parent_dirs(self): + from project.us_daily.storage import save_json + + file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") + save_json(file_path, {"x": 1}) + self.assertTrue(os.path.exists(file_path)) + + def test_file_exists(self): + from project.us_daily.storage import file_exists + + existing = os.path.join(self.test_dir, "exists.json") + with open(existing, "w") as f: + f.write("{}") + + self.assertTrue(file_exists(existing)) + self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_storage.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.storage` + +- [ ] **Step 3: Implement storage module** + +Create `project/us_daily/storage.py`: + +```python +import json +import os + + +def get_tickers_file_path(data_dir: str) -> str: + return os.path.join(data_dir, "top_tickers.json") + + +def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: + return os.path.join(data_dir, ticker, f"{month}.json") + + +def save_json(path: str, data: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + +def load_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def file_exists(path: str) -> bool: + return os.path.isfile(path) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_storage.py -v` +Expected: 5 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/storage.py tests/test_us_daily/test_storage.py +git commit -m "feat: add storage module for JSON file I/O and path management" +``` + +--- + +### Task 3: Ticker Filter 模块 + +**Files:** +- Create: `project/us_daily/ticker_filter.py` +- Create: `tests/test_us_daily/test_ticker_filter.py` + +- [ ] **Step 1: Write the failing tests** + +Create `tests/test_us_daily/test_ticker_filter.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +from dataclasses import dataclass + + +class TestTickerFilter(unittest.TestCase): + def _make_ticker(self, ticker_str, exchange): + t = MagicMock() + t.ticker = ticker_str + t.primary_exchange = exchange + return t + + def _make_details(self, ticker_str, name, market_cap, exchange): + d = MagicMock() + d.ticker = ticker_str + d.name = name + d.market_cap = market_cap + d.primary_exchange = exchange + return d + + def test_filter_top_tickers_filters_by_market_cap(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + # list_tickers returns different tickers per exchange + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL", "XNAS"), + self._make_ticker("TINY", "XNAS"), + ]) + + # get_ticker_details: AAPL has large cap, TINY does not + def mock_details(ticker): + if ticker == "AAPL": + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + elif ticker == "TINY": + return self._make_details("TINY", "Tiny Corp", 1e9, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("TINY", tickers) + + def test_filter_top_tickers_includes_required_fields(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("MSFT", "XNYS"), + ]) + client.get_ticker_details.return_value = self._make_details( + "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" + ) + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + self.assertEqual(len(result), 1) + entry = result[0] + self.assertEqual(entry["ticker"], "MSFT") + self.assertEqual(entry["name"], "Microsoft Corporation") + self.assertEqual(entry["market_cap"], 2.8e12) + self.assertEqual(entry["exchange"], "XNYS") + + def test_filter_skips_ticker_on_details_error(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL", "XNAS"), + self._make_ticker("AAPL", "XNAS"), + ]) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_ticker_filter.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.ticker_filter` + +- [ ] **Step 3: Implement ticker_filter module** + +Create `project/us_daily/ticker_filter.py`: + +```python +import logging +import time +from typing import List + +from project.us_daily.config import Config + +logger = logging.getLogger("us_daily") + +EXCHANGES = ["XNAS", "XNYS", "ARCX"] + + +def filter_top_tickers(client, config: Config) -> List[dict]: + result = [] + for exchange in EXCHANGES: + logger.info(f"Fetching tickers for exchange: {exchange}") + try: + tickers = client.list_tickers( + market="stocks", + exchange=exchange, + active=True, + limit=1000, + ) + except Exception as e: + logger.error(f"Failed to list tickers for {exchange}: {e}") + continue + + time.sleep(config.request_interval) + + for ticker_obj in tickers: + ticker_str = ticker_obj.ticker + try: + details = client.get_ticker_details(ticker_str) + time.sleep(config.request_interval) + except Exception as e: + logger.warning( + f"Failed to get details for {ticker_str}: {e}" + ) + continue + + if details.market_cap is None: + logger.debug(f"{ticker_str}: no market_cap data, skipping") + continue + + if details.market_cap >= config.market_cap_min: + entry = { + "ticker": details.ticker, + "name": details.name, + "market_cap": details.market_cap, + "exchange": details.primary_exchange, + } + result.append(entry) + logger.info( + f" {details.ticker}: market_cap={details.market_cap:.0f} ✓" + ) + else: + logger.debug( + f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" + ) + + logger.info(f"Total top tickers found: {len(result)}") + return result +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_ticker_filter.py -v` +Expected: 3 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/ticker_filter.py tests/test_us_daily/test_ticker_filter.py +git commit -m "feat: add ticker_filter module to select top US stocks by market cap" +``` + +--- + +### Task 4: Agg Fetcher 模块 + +**Files:** +- Create: `project/us_daily/agg_fetcher.py` +- Create: `tests/test_us_daily/test_agg_fetcher.py` + +- [ ] **Step 1: Write the failing tests** + +Create `tests/test_us_daily/test_agg_fetcher.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json +from datetime import date + + +class TestGenerateMonths(unittest.TestCase): + def test_generate_months_basic(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2020-01", "2020-04") + self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) + + def test_generate_months_cross_year(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2023-11", "2024-02") + self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) + + def test_generate_months_single(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2024-06", "2024-06") + self.assertEqual(result, ["2024-06"]) + + +class TestMonthBounds(unittest.TestCase): + def test_month_bounds_january(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2020-01") + self.assertEqual(start, "2020-01-01") + self.assertEqual(end, "2020-01-31") + + def test_month_bounds_february_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2024-02") + self.assertEqual(start, "2024-02-01") + self.assertEqual(end, "2024-02-29") + + def test_month_bounds_february_non_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2023-02") + self.assertEqual(start, "2023-02-01") + self.assertEqual(end, "2023-02-28") + + +class TestIsCurrentMonth(unittest.TestCase): + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_true(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertTrue(is_current_month("2026-04")) + + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_false(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertFalse(is_current_month("2026-03")) + + +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_skips_existing_historical_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for 2020-01 + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + + client = MagicMock() + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should not have called list_aggs since file exists and not current month + client.list_aggs.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000.0 + agg1.vwap = 74.53 + agg1.timestamp = 1577854800000 + agg1.transactions = 480012 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Verify file was created + file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["month"], "2020-01") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["open"], 74.06) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2026-04", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for current month + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + + agg1 = MagicMock() + agg1.open = 200.0 + agg1.high = 210.0 + agg1.low = 195.0 + agg1.close = 205.0 + agg1.volume = 50000000.0 + agg1.vwap = 203.0 + agg1.timestamp = 1714348800000 + agg1.transactions = 300000 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=True): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should have called list_aggs even though file exists + client.list_aggs.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_after_retries(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + max_retries=2, + ) + + client = MagicMock() + client.list_aggs.side_effect = Exception("API timeout") + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["month"], "2020-01") + self.assertIn("API timeout", result["failures"][0]["error"]) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.agg_fetcher` + +- [ ] **Step 3: Implement agg_fetcher module** + +Create `project/us_daily/agg_fetcher.py`: + +```python +import calendar +import logging +import time +from datetime import date, datetime +from typing import List, Tuple + +from project.us_daily.config import Config +from project.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + aggs = None + + for attempt in range(1, config.max_retries + 1): + try: + aggs_iter = client.list_aggs( + ticker, + 1, + "day", + from_=start_date, + to=end_date, + adjusted=True, + sort="asc", + ) + aggs = list(aggs_iter) + break + except Exception as e: + logger.warning( + f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" + ) + if attempt < config.max_retries: + time.sleep(config.request_interval) + + if aggs is None: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(e), + }) + logger.error(f" {ticker} {month}: all retries failed, skipping") + continue + + data = { + "ticker": ticker, + "month": month, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": [ + { + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + "vwap": a.vwap, + "timestamp": a.timestamp, + "transactions": a.transactions, + } + for a in aggs + ], + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") + time.sleep(config.request_interval) + + return {"failures": failures} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: 8 tests PASS (4 utility + 4 integration) + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/agg_fetcher.py tests/test_us_daily/test_agg_fetcher.py +git commit -m "feat: add agg_fetcher module for incremental daily bar data collection" +``` + +--- + +### Task 5: 入口模块 (__main__.py) + +**Files:** +- Create: `project/us_daily/__main__.py` + +- [ ] **Step 1: Implement __main__.py** + +Create `project/us_daily/__main__.py`: + +```python +import logging +import os +import sys +from datetime import datetime + +from massive import RESTClient + +from project.us_daily.config import load_config +from project.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from project.us_daily.ticker_filter import filter_top_tickers +from project.us_daily.agg_fetcher import fetch_ticker_aggs + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Get ticker list + tickers_path = get_tickers_file_path(config.data_dir) + if config.refresh_tickers or not file_exists(tickers_path): + logger.info("Filtering top tickers from API...") + tickers = filter_top_tickers(client, config) + save_json(tickers_path, { + "updated_at": datetime.now().strftime("%Y-%m-%d"), + "market_cap_min": config.market_cap_min, + "tickers": tickers, + }) + logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") + else: + data = load_json(tickers_path) + tickers = data["tickers"] + logger.info( + f"Loaded {len(tickers)} tickers from {tickers_path} " + f"(updated: {data.get('updated_at', 'unknown')})" + ) + + # Step 2: Fetch agg data for each ticker + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(client, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Verify it can be invoked (dry run)** + +Run: `poetry run python -m project.us_daily --help 2>&1 || echo "Module loads OK (no --help support expected)"` + +This just checks the module can be imported without errors. Actual execution requires a valid API key and would hit the real API. + +- [ ] **Step 3: Commit** + +```bash +git add project/us_daily/__main__.py +git commit -m "feat: add __main__.py entry point for us_daily data fetcher" +``` + +--- + +### Task 6: Run all tests and final verification + +**Files:** None (verification only) + +- [ ] **Step 1: Run the full test suite** + +Run: `poetry run python -m pytest tests/test_us_daily/ -v` +Expected: All tests PASS (3 config + 5 storage + 3 ticker_filter + 8 agg_fetcher = 19 tests) + +- [ ] **Step 2: Run type check** + +Run: `poetry run mypy project/` +Expected: No errors (or only notes about the massive library types) + +- [ ] **Step 3: Run style check** + +Run: `make style` +Expected: Files formatted + +- [ ] **Step 4: Final commit if style changes** + +```bash +git add -A project/ tests/test_us_daily/ +git commit -m "style: format us_daily module with black" +``` diff --git a/.claude/plans/2026-04-23-opt-data-provider-design.md b/.claude/plans/2026-04-23-opt-data-provider-design.md new file mode 100644 index 00000000..7a11b2f3 --- /dev/null +++ b/.claude/plans/2026-04-23-opt-data-provider-design.md @@ -0,0 +1,216 @@ +# US Data Provider 优化设计 + +**Date:** 2026-04-23 +**Scope:** 重构 `processor/us_daily`,升级股票列表获取和天级数据获取逻辑 + +--- + +## 1. 目标 + +将 `processor/us_daily` 升级为两部分: + +1. **股票列表获取** — 按交易所(NASDAQ, NYSE, ARCA)全量获取所有上市股票及 TickerDetails 全部字段,不做市值过滤,固定使用 massive API +2. **天级数据获取** — 支持 akshare > yfinance > massive 三数据源优先级 failover,归一化到统一列存储,每个数据源独立配置请求间隔 + +## 2. 架构 + +### 2.1 目录结构 + +``` +processor/us_daily/ +├── __init__.py +├── __main__.py # 入口,编排两步流程 +├── config.py # 配置(数据源优先级、各源间隔等) +├── storage.py # 文件 I/O +├── ticker_lister.py # 新文件,替代 ticker_filter.py +├── sources/ # 新目录:数据源抽象 + 实现 +│ ├── __init__.py # 导出 SourceManager +│ ├── base.py # BaseSource 接口 +│ ├── manager.py # SourceManager(failover 编排) +│ ├── akshare_source.py +│ ├── yfinance_source.py +│ └── massive_source.py +└── agg_fetcher.py # 改造:调用 SourceManager +``` + +### 2.2 数据流 + +1. `__main__.py` 加载 config → 初始化 `RESTClient` + `SourceManager` +2. Step 1:`ticker_lister.py` 用 `RESTClient` 从 massive API 按交易所获取全量股票 + TickerDetails,存到 `./data/us_list/` +3. Step 2:`agg_fetcher.py` 遍历股票列表,按月调用 `SourceManager.fetch_daily()` 获取天级数据,归一化后存到 `./data/us_daily//.json` + +## 3. 数据源抽象与 Failover + +### 3.1 BaseSource 接口 + +```python +class BaseSource(ABC): + name: str # "akshare" / "yfinance" / "massive" + request_interval: float # 从 config 读取,每次请求后 sleep + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """返回归一化后的 DataFrame,列为 STANDARD_COLUMNS""" + ... +``` + +### 3.2 STANDARD_COLUMNS + +```python +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] +``` + +只保留所有数据源都能提供的 6 列。 + +### 3.3 SourceManager + +```python +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources # 已按优先级排序 + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> Tuple[pd.DataFrame, str]: + """依次尝试各 source,返回 (df, source_name),全部失败则抛异常""" + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if not df.empty: + time.sleep(source.request_interval) + return df, source.name + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + continue + raise FetchError(f"All sources failed for {ticker}") +``` + +### 3.4 三个实现 + +| Source | 库调用 | ticker 转换 | 请求间隔默认值 | +|--------|--------|------------|--------------| +| AkshareSource | `ak.stock_us_daily(symbol=ticker)` | 直接用 ticker | 2s | +| YfinanceSource | `yf.download(ticker, start, end)` | 直接用 ticker | 1s | +| MassiveSource | `client.list_aggs(ticker, ...)` | 直接用 ticker | 12s | + +## 4. 股票列表获取(ticker_lister.py) + +### 4.1 交易所映射 + +```python +EXCHANGES = { + "nasdaq": "XNAS", + "nyse": "XNYS", + "arca": "ARCX", +} +``` + +### 4.2 流程 + +1. 遍历配置的交易所列表(默认全部三个) +2. 对每个交易所调用 `client.list_tickers(market="stocks", exchange=ex, active=True, limit=1000)` 获取所有 ticker +3. 对每个 ticker 调用 `client.get_ticker_details(ticker)` 获取完整详情 +4. 每次请求后 sleep `config.massive_interval`(12s) +5. 按交易所分别存储 +6. 支持断点续传:如果交易所文件已存在,加载其中已有的 tickers 列表作为已完成集合,只对不在集合中的 ticker 调用 `get_ticker_details`,完成后覆盖写入整个文件 + +### 4.3 存储结构 + +``` +data/us_list/ +├── nasdaq.json +├── nyse.json +└── arca.json +``` + +文件格式: + +```json +{ + "updated_at": "2026-04-23", + "exchange": "XNAS", + "count": 3500, + "tickers": [ + { + "ticker": "AAPL", + "name": "Apple Inc", + "market_cap": 3.2e12, + "description": "...", + "sic_code": "3571", + "total_employees": 164000, + "list_date": "1980-12-12", + "share_class_shares_outstanding": 15500000000 + } + ] +} +``` + +## 5. 天级数据获取与存储(agg_fetcher.py) + +### 5.1 流程 + +1. 从 `./data/us_list/` 加载股票列表(合并所有交易所) +2. 对每个 ticker,生成月份列表(`config.start_date` 到当前月) +3. 对每个月份: + - 文件已存在且不是当前月 → 跳过 + - 文件已存在且是当前月 → 重新获取 + - 文件不存在 → 获取 +4. 调用 `source_manager.fetch_daily(ticker, month_start, month_end)` +5. 归一化后存储 + +### 5.2 存储格式 + +```json +{ + "ticker": "AAPL", + "month": "2026-04", + "source": "akshare", + "fetched_at": "2026-04-23T10:30:45", + "data": [ + { + "date": "2026-04-01", + "open": 150.5, + "high": 152.1, + "low": 150.0, + "close": 151.8, + "volume": 45000000 + } + ] +} +``` + +### 5.3 错误处理 + +所有数据源都失败时,记录到 failures 列表,继续处理下一个 ticker,最后汇总输出失败报告。 + +## 6. 配置(config.py) + +```python +@dataclass +class Config: + # --- 股票列表 --- + refresh_tickers: bool = False + exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) + + # --- 天级数据 --- + start_date: str = "2026-01" + data_source_priority: List[str] = field(default_factory=lambda: ["akshare", "yfinance", "massive"]) + + # --- 各数据源请求间隔(秒)--- + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + + # --- 路径 --- + list_dir: str = "data/us_list" + daily_dir: str = "data/us_daily" + + # --- 重试 --- + max_retries: int = 3 +``` + +删除的配置项:`market_cap_min`(不再做市值过滤)。 + +## 7. 旧文件处理 + +- **删除:** `ticker_filter.py` — 被 `ticker_lister.py` 替代 +- **改造:** `__main__.py`、`agg_fetcher.py`、`config.py`、`storage.py` +- **新增:** `ticker_lister.py`、`sources/` 目录及其下 6 个文件 diff --git a/.claude/plans/2026-04-23-opt-data-provider-plan.md b/.claude/plans/2026-04-23-opt-data-provider-plan.md new file mode 100644 index 00000000..7dc617bd --- /dev/null +++ b/.claude/plans/2026-04-23-opt-data-provider-plan.md @@ -0,0 +1,1589 @@ +# US Data Provider Optimization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Refactor `processor/us_daily` to support full stock listing by exchange (no market-cap filter) and multi-source daily data fetching with akshare > yfinance > massive failover. + +**Architecture:** New `sources/` sub-package with `BaseSource` abstract class, three implementations (AkshareSource, YfinanceSource, MassiveSource), and a `SourceManager` for priority-based failover. `ticker_lister.py` replaces `ticker_filter.py` for full-exchange listing. Config updated with per-source intervals and source priority. + +**Tech Stack:** Python 3.9+, dataclasses, akshare, yfinance, massive REST client, pandas + +**Design Doc:** `.claude/plans/2026-04-23-opt-data-provider-design.md` + +--- + +### Task 1: Update Config + +**Files:** +- Modify: `src/processor/us_daily/config.py` +- Modify: `tests/test_us_daily/test_config.py` + +- [ ] **Step 1: Write failing tests for new Config fields** + +In `tests/test_us_daily/test_config.py`, replace the `TestConfig` class with: + +```python +class TestConfig(unittest.TestCase): + def test_default_config(self): + from processor.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.max_retries, 3) + self.assertEqual(config.exchanges, ["nasdaq", "nyse", "arca"]) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) + self.assertEqual(config.akshare_interval, 2.0) + self.assertEqual(config.yfinance_interval, 1.0) + self.assertEqual(config.massive_interval, 12.0) + self.assertEqual(config.list_dir, "data/us_list") + self.assertEqual(config.daily_dir, "data/us_daily") + + def test_load_config_from_file(self): + from processor.us_daily.config import load_config + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump({ + "refresh_tickers": True, + "akshare_interval": 3.0, + "exchanges": ["nasdaq"], + }, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.akshare_interval, 3.0) + self.assertEqual(config.exchanges, ["nasdaq"]) + # defaults preserved for unspecified fields + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.massive_interval, 12.0) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from processor.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_config.py -v` +Expected: FAIL — `Config` does not have `exchanges`, `data_source_priority`, etc. + +- [ ] **Step 3: Update Config dataclass** + +Replace `src/processor/us_daily/config.py` with: + +```python +import json +import os +from dataclasses import dataclass, field +from typing import List + + +@dataclass +class Config: + refresh_tickers: bool = False + exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) + start_date: str = "2026-01" + data_source_priority: List[str] = field( + default_factory=lambda: ["akshare", "yfinance", "massive"] + ) + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + list_dir: str = "data/us_list" + daily_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = None) -> Config: + config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_config.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/config.py tests/test_us_daily/test_config.py +git commit -m "refactor: update us_daily Config with multi-source fields" +``` + +--- + +### Task 2: Update Storage helpers + +**Files:** +- Modify: `src/processor/us_daily/storage.py` +- Modify: `tests/test_us_daily/test_storage.py` + +- [ ] **Step 1: Write failing tests for new storage helpers** + +Add new test methods to `TestStorage` in `tests/test_us_daily/test_storage.py`: + +```python + def test_get_list_file_path(self): + from processor.us_daily.storage import get_list_file_path + + result = get_list_file_path("data/us_list", "nasdaq") + self.assertEqual(result, "data/us_list/nasdaq.json") + + def test_get_month_file_path_daily_dir(self): + from processor.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") +``` + +- [ ] **Step 2: Run tests to verify new test fails** + +Run: `pytest tests/test_us_daily/test_storage.py::TestStorage::test_get_list_file_path -v` +Expected: FAIL — `get_list_file_path` does not exist. + +- [ ] **Step 3: Add get_list_file_path to storage.py** + +In `src/processor/us_daily/storage.py`, add after the `get_tickers_file_path` function: + +```python +def get_list_file_path(list_dir: str, exchange: str) -> str: + return os.path.join(list_dir, f"{exchange}.json") +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_storage.py -v` +Expected: All tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/storage.py tests/test_us_daily/test_storage.py +git commit -m "feat: add get_list_file_path to storage helpers" +``` + +--- + +### Task 3: Create BaseSource and SourceManager + +**Files:** +- Create: `src/processor/us_daily/sources/__init__.py` +- Create: `src/processor/us_daily/sources/base.py` +- Create: `src/processor/us_daily/sources/manager.py` +- Create: `tests/test_us_daily/test_sources/__init__.py` +- Create: `tests/test_us_daily/test_sources/test_manager.py` + +- [ ] **Step 1: Write failing tests for SourceManager** + +Create `tests/test_us_daily/test_sources/__init__.py` (empty file). + +Create `tests/test_us_daily/test_sources/test_manager.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + + +class TestSourceManager(unittest.TestCase): + def _make_source(self, name, data=None, error=None): + """Create a mock source that returns data or raises error.""" + from processor.us_daily.sources.base import BaseSource + + source = MagicMock(spec=BaseSource) + source.name = name + source.request_interval = 0.0 + if error: + source.fetch_daily.side_effect = error + elif data is not None: + source.fetch_daily.return_value = data + else: + source.fetch_daily.return_value = pd.DataFrame() + return source + + def test_returns_first_successful_source(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source1") + s1.fetch_daily.assert_called_once_with("AAPL", "2020-01-01", "2020-01-31") + s2.fetch_daily.assert_not_called() + + def test_falls_back_on_failure(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", error=Exception("API down")) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_falls_back_on_empty_dataframe(self): + from processor.us_daily.sources.manager import SourceManager + + empty_df = pd.DataFrame() + good_df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=empty_df) + s2 = self._make_source("source2", data=good_df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_raises_when_all_fail(self): + from processor.us_daily.sources.manager import SourceManager, FetchError + + s1 = self._make_source("source1", error=Exception("fail1")) + s2 = self._make_source("source2", error=Exception("fail2")) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + with self.assertRaises(FetchError): + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + def test_sleeps_after_successful_fetch(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s1.request_interval = 5.0 + + manager = SourceManager([s1]) + with patch("processor.us_daily.sources.manager.time.sleep") as mock_sleep: + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + mock_sleep.assert_called_once_with(5.0) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_manager.py -v` +Expected: FAIL — modules do not exist. + +- [ ] **Step 3: Create sources package with BaseSource** + +Create `src/processor/us_daily/sources/__init__.py`: + +```python +from processor.us_daily.sources.manager import SourceManager, FetchError + +__all__ = ["SourceManager", "FetchError"] +``` + +Create `src/processor/us_daily/sources/base.py`: + +```python +from abc import ABC, abstractmethod + +import pandas as pd + +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] + + +class BaseSource(ABC): + name: str + request_interval: float + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch daily OHLCV data for a US stock ticker. + + Returns a DataFrame with columns matching STANDARD_COLUMNS. + Raises on unrecoverable errors. Returns empty DataFrame if no data. + """ + ... +``` + +- [ ] **Step 4: Create SourceManager** + +Create `src/processor/us_daily/sources/manager.py`: + +```python +import logging +import time +from typing import List, Tuple + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource + +logger = logging.getLogger("us_daily") + + +class FetchError(Exception): + """Raised when all data sources fail.""" + pass + + +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources + + def fetch_daily( + self, ticker: str, start_date: str, end_date: str + ) -> Tuple[pd.DataFrame, str]: + """Try each source in priority order. Return (df, source_name). + + Raises FetchError if all sources fail or return empty data. + """ + errors = [] + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if df is not None and not df.empty: + time.sleep(source.request_interval) + return df, source.name + else: + logger.debug( + f"{source.name} returned empty data for {ticker}" + ) + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + errors.append(f"{source.name}: {e}") + continue + raise FetchError( + f"All sources failed for {ticker}: {'; '.join(errors)}" + ) +``` + +- [ ] **Step 5: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_manager.py -v` +Expected: All 5 tests PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/processor/us_daily/sources/ tests/test_us_daily/test_sources/ +git commit -m "feat: add BaseSource interface and SourceManager with failover" +``` + +--- + +### Task 4: Implement AkshareSource + +**Files:** +- Create: `src/processor/us_daily/sources/akshare_source.py` +- Create: `tests/test_us_daily/test_sources/test_akshare_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_akshare_source.py`: + +```python +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestAkshareSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.akshare_source import AkshareSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2020-01-02", "2020-01-03"]), + "open": [74.06, 75.0], + "high": [75.15, 76.0], + "low": [73.80, 74.5], + "close": [74.36, 75.5], + "volume": [108872000, 98000000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_filters_by_date(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2019-12-31", "2020-01-02", "2020-02-01"]), + "open": [70.0, 74.06, 80.0], + "high": [71.0, 75.15, 81.0], + "low": [69.0, 73.80, 79.0], + "close": [70.5, 74.36, 80.5], + "volume": [100000, 108872000, 90000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_with_correct_symbol(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_ak.stock_us_daily.assert_called_once_with(symbol="AAPL", adjust="qfq") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_akshare_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement AkshareSource** + +Create `src/processor/us_daily/sources/akshare_source.py`: + +```python +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class AkshareSource(BaseSource): + name = "akshare" + + def __init__(self, request_interval: float = 2.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + import akshare as ak + + symbol = ticker.strip().upper() + logger.debug(f"[akshare] fetching {symbol} {start_date}~{end_date}") + + df = ak.stock_us_daily(symbol=symbol, adjust="qfq") + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = pd.to_datetime(df["date"]) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df["date"] >= start_dt) & (df["date"] <= end_dt)] + + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = df["date"].dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_akshare_source.py -v` +Expected: All 4 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/akshare_source.py tests/test_us_daily/test_sources/test_akshare_source.py +git commit -m "feat: add AkshareSource for US daily data" +``` + +--- + +### Task 5: Implement YfinanceSource + +**Files:** +- Create: `src/processor/us_daily/sources/yfinance_source.py` +- Create: `tests/test_us_daily/test_sources/test_yfinance_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_yfinance_source.py`: + +```python +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestYfinanceSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame( + { + "Open": [74.06, 75.0], + "High": [75.15, 76.0], + "Low": [73.80, 74.5], + "Close": [74.36, 75.5], + "Volume": [108872000, 98000000], + }, + index=pd.to_datetime(["2020-01-02", "2020-01-03"]), + ) + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = raw_df + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_passes_correct_params(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_yf.Ticker.assert_called_once_with("AAPL") + mock_ticker.history.assert_called_once_with(start="2020-01-01", end="2020-01-31") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_yfinance_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement YfinanceSource** + +Create `src/processor/us_daily/sources/yfinance_source.py`: + +```python +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class YfinanceSource(BaseSource): + name = "yfinance" + + def __init__(self, request_interval: float = 1.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + import yfinance as yf + + symbol = ticker.strip().upper() + logger.debug(f"[yfinance] fetching {symbol} {start_date}~{end_date}") + + t = yf.Ticker(symbol) + df = t.history(start=start_date, end=end_date) + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df = df.reset_index() + df = df.rename(columns={ + "Date": "date", + "Open": "open", + "High": "high", + "Low": "low", + "Close": "close", + "Volume": "volume", + }) + + df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_yfinance_source.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/yfinance_source.py tests/test_us_daily/test_sources/test_yfinance_source.py +git commit -m "feat: add YfinanceSource for US daily data" +``` + +--- + +### Task 6: Implement MassiveSource + +**Files:** +- Create: `src/processor/us_daily/sources/massive_source.py` +- Create: `tests/test_us_daily/test_sources/test_massive_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_massive_source.py`: + +```python +import unittest +from unittest.mock import MagicMock +import pandas as pd + + +class TestMassiveSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.massive_source import MassiveSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000 + agg1.timestamp = 1577944800000 # 2020-01-02 UTC + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["close"], 74.36) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_client_correctly(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + client.list_aggs.assert_called_once_with( + "AAPL", 1, "day", + from_="2020-01-01", to="2020-01-31", + adjusted=True, sort="asc", + ) + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_massive_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement MassiveSource** + +Create `src/processor/us_daily/sources/massive_source.py`: + +```python +import logging +from datetime import datetime, timezone + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class MassiveSource(BaseSource): + name = "massive" + + def __init__(self, client, request_interval: float = 12.0): + self.client = client + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + logger.debug(f"[massive] fetching {ticker} {start_date}~{end_date}") + + aggs = list( + self.client.list_aggs( + ticker, 1, "day", + from_=start_date, to=end_date, + adjusted=True, sort="asc", + ) + ) + + if not aggs: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rows = [] + for a in aggs: + dt = datetime.fromtimestamp(a.timestamp / 1000, tz=timezone.utc) + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + }) + + df = pd.DataFrame(rows, columns=STANDARD_COLUMNS) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_massive_source.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/massive_source.py tests/test_us_daily/test_sources/test_massive_source.py +git commit -m "feat: add MassiveSource for US daily data" +``` + +--- + +### Task 7: Create ticker_lister.py + +**Files:** +- Create: `src/processor/us_daily/ticker_lister.py` +- Create: `tests/test_us_daily/test_ticker_lister.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_ticker_lister.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json + + +class TestTickerLister(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_ticker(self, ticker_str): + t = MagicMock() + t.ticker = ticker_str + return t + + def _make_details(self, **kwargs): + """Create a mock TickerDetails with all fields as attributes.""" + d = MagicMock() + for k, v in kwargs.items(): + setattr(d, k, v) + # Simulate __dict__ for serialization + d.__dict__ = kwargs + return d + + def test_list_tickers_for_exchange(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + return {"AAPL": details_aapl, "MSFT": details_msft}[ticker] + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + + self.assertEqual(data["exchange"], "XNAS") + self.assertEqual(data["count"], 2) + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertIn("MSFT", tickers) + + def test_resume_skips_existing_tickers(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + # Pre-populate file with AAPL already fetched + file_path = os.path.join(self.test_dir, "nasdaq.json") + existing_data = { + "updated_at": "2026-04-22", + "exchange": "XNAS", + "count": 1, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc", "market_cap": 3e12}, + ], + } + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "w") as f: + json.dump(existing_data, f) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + client.get_ticker_details.return_value = details_msft + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + # Should only call get_ticker_details for MSFT (AAPL already exists) + client.get_ticker_details.assert_called_once_with("MSFT") + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["count"], 2) + + def test_skips_ticker_on_details_error(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL"), + self._make_ticker("AAPL"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return details_aapl + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + with open(file_path) as f: + data = json.load(f) + + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_ticker_lister.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement ticker_lister.py** + +Create `src/processor/us_daily/ticker_lister.py`: + +```python +import logging +import time +from datetime import date +from typing import Dict, List + +from processor.us_daily.config import Config +from processor.us_daily.storage import get_list_file_path, save_json, load_json, file_exists + +logger = logging.getLogger("us_daily") + +EXCHANGES: Dict[str, str] = { + "nasdaq": "XNAS", + "nyse": "XNYS", + "arca": "ARCX", +} + + +def _details_to_dict(details) -> dict: + """Convert a TickerDetails object to a plain dict, dropping None values.""" + result = {} + for key, value in vars(details).items(): + if key.startswith("_"): + continue + if value is None: + continue + # Handle nested objects with their own __dict__ + if hasattr(value, "__dict__") and not isinstance(value, (str, int, float, bool)): + value = {k: v for k, v in vars(value).items() if not k.startswith("_") and v is not None} + result[key] = value + return result + + +def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> List[dict]: + """Fetch all tickers for an exchange and save to file. + + Supports resume: if the output file already exists, previously fetched + tickers are kept and only missing ones are fetched. + """ + exchange_code = EXCHANGES[exchange_name] + file_path = get_list_file_path(config.list_dir, exchange_name) + + # Load existing tickers for resume + existing_tickers: Dict[str, dict] = {} + if file_exists(file_path): + data = load_json(file_path) + for t in data.get("tickers", []): + existing_tickers[t["ticker"]] = t + logger.info( + f"[{exchange_name}] Resuming: {len(existing_tickers)} tickers already fetched" + ) + + # Get full ticker list from API + logger.info(f"[{exchange_name}] Listing tickers for {exchange_code}") + try: + ticker_objs = list( + client.list_tickers( + market="stocks", exchange=exchange_code, active=True, limit=1000 + ) + ) + except Exception as e: + logger.error(f"[{exchange_name}] Failed to list tickers: {e}") + return list(existing_tickers.values()) + + time.sleep(config.massive_interval) + logger.info(f"[{exchange_name}] Found {len(ticker_objs)} tickers") + + # Fetch details for new tickers only + for i, ticker_obj in enumerate(ticker_objs): + ticker_str = ticker_obj.ticker + if ticker_str in existing_tickers: + continue + + try: + details = client.get_ticker_details(ticker_str) + entry = _details_to_dict(details) + existing_tickers[ticker_str] = entry + logger.info( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" + ) + except Exception as e: + logger.warning( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" + ) + + time.sleep(config.massive_interval) + + # Save result + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "exchange": exchange_code, + "count": len(tickers_list), + "tickers": tickers_list, + }) + + logger.info(f"[{exchange_name}] Saved {len(tickers_list)} tickers to {file_path}") + return tickers_list +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_ticker_lister.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/ticker_lister.py tests/test_us_daily/test_ticker_lister.py +git commit -m "feat: add ticker_lister with full exchange listing and resume support" +``` + +--- + +### Task 8: Refactor agg_fetcher.py to use SourceManager + +**Files:** +- Modify: `src/processor/us_daily/agg_fetcher.py` +- Modify: `tests/test_us_daily/test_agg_fetcher.py` + +- [ ] **Step 1: Update tests for new agg_fetcher interface** + +Replace `TestFetchTickerAggs` class in `tests/test_us_daily/test_agg_fetcher.py` (keep `TestGenerateMonths`, `TestMonthBounds`, `TestIsCurrentMonth` unchanged): + +```python +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_manager(self, df=None, source_name="akshare", error=None): + from processor.us_daily.sources.manager import SourceManager + + manager = MagicMock(spec=SourceManager) + if error: + manager.fetch_daily.side_effect = error + else: + manager.fetch_daily.return_value = (df, source_name) + return manager + + def test_skips_existing_historical_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + + config = Config(start_date="2020-01", daily_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + + manager = self._make_manager() + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_date="2020-01", daily_dir=self.test_dir) + + df = pd.DataFrame({ + "date": ["2020-01-02"], + "open": [74.06], + "high": [75.15], + "low": [73.80], + "close": [74.36], + "volume": [108872000], + }) + manager = self._make_manager(df=df, source_name="akshare") + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["month"], "2020-01") + self.assertEqual(data["source"], "akshare") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["close"], 74.36) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_date="2026-04", daily_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + + df = pd.DataFrame({ + "date": ["2026-04-01"], + "open": [200.0], + "high": [210.0], + "low": [195.0], + "close": [205.0], + "volume": [50000000], + }) + manager = self._make_manager(df=df, source_name="yfinance") + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=True + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_when_all_sources_fail(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + from processor.us_daily.sources.manager import FetchError + + config = Config(start_date="2020-01", daily_dir=self.test_dir, max_retries=2) + + manager = self._make_manager( + error=FetchError("All sources failed for AAPL") + ) + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["month"], "2020-01") +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_agg_fetcher.py::TestFetchTickerAggs -v` +Expected: FAIL — `fetch_ticker_aggs` still expects `client` as first arg, not `manager`. + +- [ ] **Step 3: Rewrite agg_fetcher.py to use SourceManager** + +Replace `src/processor/us_daily/agg_fetcher.py` with: + +```python +import calendar +import logging +from datetime import date, datetime +from typing import List, Tuple + +from processor.us_daily.config import Config +from processor.us_daily.sources.manager import FetchError +from processor.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: + """Fetch monthly OHLCV data for a ticker using SourceManager. + + Args: + source_manager: SourceManager instance with failover sources. + ticker: Stock ticker symbol (e.g. "AAPL"). + config: Config with daily_dir, start_date, max_retries. + + Returns: + Dict with "failures" list of failed months. + """ + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.daily_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + + try: + df, source_name = source_manager.fetch_daily(ticker, start_date, end_date) + except FetchError as e: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(e), + }) + logger.error(f" {ticker} {month}: {e}") + continue + + data = { + "ticker": ticker, + "month": month, + "source": source_name, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": df.to_dict(orient="records"), + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(df)} bars from {source_name}") + + return {"failures": failures} +``` + +- [ ] **Step 4: Run all agg_fetcher tests** + +Run: `pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: All tests PASS (including `TestGenerateMonths`, `TestMonthBounds`, `TestIsCurrentMonth`, and the updated `TestFetchTickerAggs`). + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/agg_fetcher.py tests/test_us_daily/test_agg_fetcher.py +git commit -m "refactor: update agg_fetcher to use SourceManager with failover" +``` + +--- + +### Task 9: Update __main__.py and delete ticker_filter.py + +**Files:** +- Modify: `src/processor/us_daily/__main__.py` +- Delete: `src/processor/us_daily/ticker_filter.py` +- Delete: `tests/test_us_daily/test_ticker_filter.py` + +- [ ] **Step 1: Rewrite __main__.py** + +Replace `src/processor/us_daily/__main__.py` with: + +```python +import logging +import os +import sys + +from massive import RESTClient + +from processor.us_daily.config import load_config +from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.sources.manager import SourceManager +from processor.us_daily.storage import get_list_file_path, load_json, file_exists + + +SOURCE_CLASSES = { + "akshare": AkshareSource, + "yfinance": YfinanceSource, + "massive": MassiveSource, +} + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def build_source_manager(config, client) -> SourceManager: + """Build SourceManager from config priority list.""" + interval_map = { + "akshare": config.akshare_interval, + "yfinance": config.yfinance_interval, + "massive": config.massive_interval, + } + sources = [] + for name in config.data_source_priority: + cls = SOURCE_CLASSES.get(name) + if cls is None: + continue + if name == "massive": + sources.append(cls(client=client, request_interval=interval_map[name])) + else: + sources.append(cls(request_interval=interval_map[name])) + return SourceManager(sources) + + +def load_all_tickers(config) -> list: + """Load tickers from all exchange files in list_dir.""" + all_tickers = [] + seen = set() + for exchange_name in config.exchanges: + file_path = get_list_file_path(config.list_dir, exchange_name) + if not file_exists(file_path): + continue + data = load_json(file_path) + for t in data.get("tickers", []): + ticker = t["ticker"] + if ticker not in seen: + seen.add(ticker) + all_tickers.append(t) + return all_tickers + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Fetch ticker lists per exchange + if config.refresh_tickers or any( + not file_exists(get_list_file_path(config.list_dir, ex)) + for ex in config.exchanges + ): + for exchange_name in config.exchanges: + if exchange_name not in EXCHANGES: + logger.warning(f"Unknown exchange: {exchange_name}, skipping") + continue + logger.info(f"Fetching ticker list for {exchange_name}...") + list_tickers_for_exchange(client, exchange_name, config) + + # Load all tickers + tickers = load_all_tickers(config) + logger.info(f"Total tickers loaded: {len(tickers)}") + + # Step 2: Fetch daily data + source_manager = build_source_manager(config, client) + + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(source_manager, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Delete old ticker_filter.py and its tests** + +```bash +git rm src/processor/us_daily/ticker_filter.py +git rm tests/test_us_daily/test_ticker_filter.py +``` + +- [ ] **Step 3: Run full test suite to verify nothing is broken** + +Run: `pytest tests/test_us_daily/ -v` +Expected: All tests PASS. No imports reference `ticker_filter`. + +- [ ] **Step 4: Commit** + +```bash +git add src/processor/us_daily/__main__.py +git commit -m "refactor: update __main__.py with SourceManager and remove ticker_filter" +``` + +--- + +### Task 10: Run full test suite and verify + +- [ ] **Step 1: Run all us_daily tests** + +```bash +pytest tests/test_us_daily/ -v +``` + +Expected: All tests PASS. + +- [ ] **Step 2: Run import smoke test** + +```bash +python -c " +from processor.us_daily.config import Config, load_config +from processor.us_daily.sources import SourceManager, FetchError +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +print('All imports OK') +print(f'STANDARD_COLUMNS: {STANDARD_COLUMNS}') +print(f'EXCHANGES: {EXCHANGES}') +print(f'Default config: {Config()}') +" +``` + +Expected: `All imports OK` with correct values printed. + +- [ ] **Step 3: Verify no remaining references to deleted code** + +```bash +grep -r "ticker_filter\|market_cap_min\|top_tickers\|data_dir\|request_interval" src/processor/us_daily/ --include="*.py" +``` + +Expected: No references to `ticker_filter`, `market_cap_min`, `top_tickers`, or the old `data_dir`/`request_interval` fields. + +- [ ] **Step 4: Commit if any fixups needed** + +If any issues found in steps 1-3, fix them and commit: + +```bash +git add -A src/processor/us_daily/ tests/test_us_daily/ +git commit -m "fix: resolve remaining issues from us_daily refactor" +``` diff --git a/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md b/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md new file mode 100644 index 00000000..0f370bc8 --- /dev/null +++ b/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md @@ -0,0 +1,18 @@ +# 需求描述 + +搜集头部公司的日级别交易数据(massive/rest/models/aggs.py:Agg) +1、获取纳斯达克、道琼斯、标普500 市值>=50亿美金的公司列表,存储到data/us_daily_data目录下 +2、针对每个股票,按月获取从2020年开始的日级别数据,每个股票有一个单独的文件夹,每月有一份数据(存储到data/us_daily_data) + - 如果已经存储给定月份的股票数据,当不是当前月份,则更新,否则不用重新请求 + +# 限制 +一次请求后,sleep 20s + + +# 相关文件: +- rest模块:./massive/rest +- 数据存储:./data/us_daily +- 代码目录:./project + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.claude/prds/20250423-OPT_CODE.md b/.claude/prds/20250423-OPT_CODE.md new file mode 100644 index 00000000..c4fa9816 --- /dev/null +++ b/.claude/prds/20250423-OPT_CODE.md @@ -0,0 +1,14 @@ +# 需求描述 + +## 一、目录修改 +data_provider 目录 修改成 processor 目录 + +## 二、引入更多获取股票数据的源头 +阅读 ./provider,调整包结构 + +## 三、修改项目结构 +1、不再使用poetry方式,test相关内容可以不要 +2、不要Makefile方式 + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.claude/prds/20250423-OPT_DATA_PROVIDER.md b/.claude/prds/20250423-OPT_DATA_PROVIDER.md new file mode 100644 index 00000000..66e063fc --- /dev/null +++ b/.claude/prds/20250423-OPT_DATA_PROVIDER.md @@ -0,0 +1,26 @@ +# 需求描述 + +逻辑优化 +将data_provider升级成两部分: + +1、获取股票列表 +功能描述:给定交易所(纳斯达克、道琼斯、标普500),获取对应交易所所有股票,并获取股票detail信息,不再做市值过滤,存储TickerDetails所有字段 +文件地址:./data/us_list + +2、获取股票天级数据 +* 功能描述:给定股票列表文件和时间区间,获取股票文件对应的天级别数据 + - 每个股票一个文件夹,放在./data/us_daily + - 每个股票中的数据按月存储 + - 如果目录中的月份已经存在,当不是当前月份,则更新,否则不用重新请求 +* 除了原来的massive获取方式,增加 askshare 和 yfinance 两种获取方式,通过配置项来调整 + + +相关限制: +massive由于限流,一次请求后,sleep 12s + +## 相关文件: +- 数据存储:./data +- 代码目录:./src/ + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.gitignore b/.gitignore index 300a17c1..971634ab 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ target/ #Ipython Notebook .ipynb_checkpoints + +# Data files +data/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..10b731c5 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/massive-com.iml b/.idea/massive-com.iml new file mode 100644 index 00000000..07abf202 --- /dev/null +++ b/.idea/massive-com.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..db8786c0 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..e2e44839 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..0a8d8793 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,91 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Official Python client library for the Massive (formerly Polygon.io) REST and WebSocket APIs. Provides market data access for stocks, options, forex, crypto, and more. Published as the `massive` package on PyPI. + +## Development Commands + +```bash +# Install dependencies (core + all data sources + dev tools) +pip install -e ".[all,dev]" + +# Run all tests +pytest + +# Run specific test directory +pytest tests/test_rest/ +pytest tests/test_websocket/ +pytest tests/test_us_daily/ + +# Run a single test file +pytest tests/test_rest/test_aggs.py + +# Run a single test method +pytest tests/test_rest/test_aggs.py::TestAggs::test_list_aggs + +# Code formatting (auto-fix) +black src/ tests/ examples/ + +# Static type checking +mypy src/ + +# Run US daily data processor +python -m processor.us_daily + +# Regenerate REST API spec from OpenAPI +python .massive/rest.py + +# Update WebSocket API spec +curl https://api.massive.com/specs/websocket.json > .massive/websocket.json +``` + +## Architecture + +### Project Layout + +Standard `src/` layout with three top-level packages: + +- `src/massive/` — REST and WebSocket SDK client library +- `src/provider/` — Multi-source data fetcher layer with automatic failover +- `src/processor/` — Data collection and processing pipelines + +### Client Structure + +`RESTClient` (in `massive/rest/__init__.py`) uses multiple inheritance to compose domain-specific client mixins (AggsClient, TradesClient, QuotesClient, etc.) on top of `BaseClient` (`massive/rest/base.py`). Each mixin lives in its own file under `massive/rest/` and handles one API domain. + +`WebSocketClient` (`massive/websocket/__init__.py`) is a standalone async client using the `websockets` library with auto-reconnect support. + +### Provider Layer + +`DataFetcherManager` (in `provider/base.py`) orchestrates multiple data source fetchers (efinance, akshare, tushare, pytdx, baostock, yfinance, longbridge) with automatic priority-based failover. Each fetcher extends `BaseFetcher` and implements source-specific data retrieval. + +### Processor + +`processor/us_daily/` fetches US stock daily OHLCV data via the Massive REST API. Run with `python -m processor.us_daily`. + +### Models + +- REST models: `massive/rest/models/` — one file per domain, using the custom `@modelclass` decorator (from `massive/modelclass.py`) which wraps `@dataclass` with flexible init that accepts positional or keyword args. +- WebSocket models: `massive/websocket/models/` + +### API Spec Codegen + +`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. + +### Tests + +- `tests/test_rest/` — uses `pook` for HTTP mocking, with mock responses in `tests/test_rest/mocks/` +- `tests/test_websocket/` — has its own mock WebSocket server in `mock_server.py` +- `tests/test_us_daily/` — unit tests for the US daily processor +- Test base classes: `tests/test_rest/base.py` and `tests/test_websocket/base_ws.py` + +### Key Conventions + +- API key via `MASSIVE_API_KEY` env var or constructor parameter +- Base URL: `https://api.massive.com` +- Auth header: `Authorization: Bearer ` +- Python 3.9+ required +- Formatting: `black`; type checking: `mypy` diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 96f1555d..00000000 --- a/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2018 The Python Packaging Authority - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/Makefile b/Makefile deleted file mode 100644 index b3732930..00000000 --- a/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -.DEFAULT_GOAL := help -TARGET_MAX_CHAR_NUM := 20 - -GREEN := $(shell tput -Txterm setaf 2) -YELLOW := $(shell tput -Txterm setaf 3) -WHITE := $(shell tput -Txterm setaf 7) -RESET := $(shell tput -Txterm sgr0) - -.PHONY: help lint style static test test_rest test_websocket - -## Show help -help: - @awk '/^[a-zA-Z\-_0-9]+:/ { \ - helpMessage = match(lastLine, /^## (.*)/); \ - if (helpMessage) { \ - helpCommand = substr($$1, 0, index($$1, ":")-1); \ - helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \ - printf " ${YELLOW}%-$(TARGET_MAX_CHAR_NUM)s${RESET} ${GREEN}%s${RESET}\n", helpCommand, helpMessage; \ - } \ - } \ - { lastLine = $$0 }' $(MAKEFILE_LIST) - -## Check code style -style: - poetry run black $(if $(CI),--check,) massive test_* examples - -## Check static types -static: - poetry run mypy massive test_* examples - -## Check code style and static types -lint: style static - -## Update the REST API spec -rest-spec: - poetry run python .massive/rest.py - -## Update the WebSocket API spec -ws-spec: - curl https://api.massive.com/specs/websocket.json > .massive/websocket.json - -test_rest: - poetry run python -m unittest discover -s test_rest - -test_websocket: - poetry run python -m unittest discover -s test_websocket - -## Run the unit tests -test: test_rest test_websocket diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..ee704b09 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,570 @@ +================================================================================ +Massive Python Client — 代码架构与逻辑文档 +================================================================================ + +项目概述 +-------- +Massive(原 Polygon.io)官方 Python 客户端库,提供 REST 和 WebSocket 两种方式 +访问股票、期权、外汇、加密货币、期货、指数等金融市场数据。发布为 PyPI 包 `massive`, +要求 Python 3.9+。 + + +================================================================================ +一、顶层目录结构 +================================================================================ + +massive-com/ +├── massive/ # 库源码 +│ ├── __init__.py # 公共 API 导出: RESTClient, WebSocketClient, exceptions +│ ├── modelclass.py # @modelclass 装饰器(自定义 dataclass 封装) +│ ├── exceptions.py # AuthError, BadResponse +│ ├── logging.py # 统一日志工具 get_logger() +│ ├── rest/ # REST 客户端 +│ │ ├── __init__.py # RESTClient(多重继承组合所有 domain mixin) +│ │ ├── base.py # BaseClient: HTTP 请求、分页、重试、参数转换 +│ │ ├── aggs.py # AggsClient — 聚合K线 +│ │ ├── trades.py # TradesClient — 逐笔成交 +│ │ ├── quotes.py # QuotesClient — 报价/NBBO +│ │ ├── snapshot.py # SnapshotClient — 快照 +│ │ ├── reference.py # 参考数据: Markets/Tickers/Splits/Dividends/... +│ │ ├── indicators.py # IndicatorsClient — 技术指标 SMA/EMA/RSI/MACD +│ │ ├── financials.py # FinancialsClient — 财务报表 +│ │ ├── benzinga.py # BenzingaClient — 研报/评级 +│ │ ├── economy.py # EconomyClient — 宏观经济 +│ │ ├── etf_global.py # EtfGlobalClient — ETF 分析 +│ │ ├── futures.py # FuturesClient — 期货 +│ │ ├── tmx.py # TmxClient — 多伦多交易所 +│ │ ├── summaries.py # SummariesClient — 摘要 +│ │ ├── vX.py # VXClient — 旧版 vX 端点 +│ │ └── models/ # REST 数据模型 +│ │ ├── __init__.py # 统一导出所有模型 +│ │ ├── common.py # 公共枚举: Sort, Market, Timeframe 等 +│ │ ├── request.py # RequestOptionBuilder(Launchpad 边缘头构建器) +│ │ ├── aggs.py # Agg, GroupedDailyAgg, DailyOpenCloseAgg, PreviousCloseAgg +│ │ ├── trades.py # Trade, LastTrade, CryptoTrade +│ │ └── ... # 各领域模型文件(与 rest/ 下 mixin 一一对应) +│ └── websocket/ # WebSocket 客户端 +│ ├── __init__.py # WebSocketClient: 异步连接、认证、订阅、重连 +│ └── models/ +│ ├── __init__.py # MARKET_EVENT_MAP 注册表 + parse() 解析器 +│ ├── common.py # Feed, Market, EventType 枚举 +│ └── models.py # 消息模型: EquityTrade, CryptoQuote, Level2Book 等 +├── test_rest/ # REST 单元测试(pook HTTP mock) +│ ├── base.py # BaseTest: 自动加载 mocks/ 下 JSON 文件注册 mock +│ ├── mocks/ # Mock 响应 JSON 文件(目录结构映射 URL 路径) +│ └── test_*.py # 各领域测试 +├── test_websocket/ # WebSocket 单元测试 +│ ├── base_ws.py # BaseTest: IsolatedAsyncioTestCase + mock server +│ ├── mock_server.py # 内置 mock WebSocket 服务器 +│ └── test_conn.py # 连接/认证/订阅测试 +├── examples/ # 使用示例 +│ ├── rest/ # REST 示例脚本 +│ └── websocket/ # WebSocket 示例脚本 +├── .massive/ # OpenAPI 规范与代码生成 +│ ├── rest.json # REST OpenAPI 规范文件 +│ ├── rest.py # 从 api.massive.com/openapi 拉取规范的脚本 +│ └── websocket.json # WebSocket 规范文件 +├── docs/ # Sphinx 文档 +├── pyproject.toml # Poetry 项目配置 +├── Makefile # 开发命令入口 +└── poetry.lock # 依赖锁文件 + + +================================================================================ +二、核心架构:REST 客户端 +================================================================================ + +2.1 多重继承 Mixin 组合模式 +---------------------------- + +RESTClient 通过多重继承将 19 个领域 Mixin 组合为一个统一客户端: + + class RESTClient( + AggsClient, # 聚合K线 /v2/aggs + FuturesClient, # 期货 /v1/futures + FinancialsClient, # 财报 /vX/reference/financials + BenzingaClient, # 研报 /v1/meta/symbols + EconomyClient, # 宏观 /v1/economy + EtfGlobalClient, # ETF /v1/etf + TmxClient, # TMX /v1/tmx + TradesClient, # 成交 /v3/trades + QuotesClient, # 报价 /v3/quotes + SnapshotClient, # 快照 /v3/snapshot + MarketsClient, # 市场 /v3/reference/markets + TickersClient, # 标的 /v3/reference/tickers + SplitsClient, # 拆股 /v3/reference/splits + DividendsClient, # 分红 /v3/reference/dividends + ConditionsClient, # 条件码 /v3/reference/conditions + ExchangesClient, # 交易所 /v3/reference/exchanges + ContractsClient, # 合约 /v3/reference/options/contracts + IndicatorsClient, # 技术指标 /v1/indicators + SummariesClient, # 摘要 /v3/summaries + ) + + 构造函数将所有参数传递给 BaseClient.__init__(), + 并额外实例化 self.vx = VXClient(...) 用于旧版端点。 + +优点: + - 每个领域独立文件,职责单一 + - 新增 API 领域只需添加 Mixin + 模型,在 RESTClient 继承链中注册 + - 各 Mixin 可独立测试 + + +2.2 BaseClient — HTTP 基础设施 +------------------------------- + +所在文件: massive/rest/base.py + +BaseClient 是所有 REST Mixin 的共同基类,封装全部 HTTP 通信逻辑。 + +初始化流程: + 1. 验证 API key(缺失则抛出 AuthError) + 2. 构建默认请求头: Authorization: Bearer , Accept-Encoding: gzip, User-Agent + 3. 创建 urllib3.PoolManager: + - SSL 证书验证(certifi) + - 重试策略: 默认 3 次,指数退避(因子 0.1),针对 [413,429,499,500,502,503,504] + - 可配置连接池数量、超时时间 + 4. 初始化可选的自定义 JSON 编解码器 + +核心方法: + + _get(path, params, result_key, deserializer, raw, options) + │ 执行 GET 请求到 BASE + path + │ params 作为查询参数 + │ raw=True 时返回原始 HTTPResponse + │ 否则解析 JSON,提取 result_key 对应的字段 + └→ 用 deserializer 函数将每条数据转换为模型对象 + + _get_params(fn, caller_locals, datetime_res="nanos") + │ 参数转换引擎: 将 Python 函数参数自动映射为 API 查询参数 + │ 处理规则: + │ - Enum → 取 .value + │ - bool → 小写字符串 "true"/"false" + │ - datetime → 按精度转换为 Unix 时间戳 + │ - 下划线后缀 → 点号(如 timestamp_lt → timestamp.lt) + │ - any_of 后缀 → 逗号拼接列表 + └→ 返回可直接用于请求的 dict + + _paginate(path, params, raw, deserializer, result_key, options) + │ 分页入口 + │ raw=True → 返回单页原始响应 + └→ raw=False → 返回 _paginate_iter() 生成器 + + _paginate_iter(path, params, deserializer, result_key, options) + │ 分页迭代生成器 + │ while 循环: + │ 1. 发送请求获取一页数据 + │ 2. 对 result_key 下每条记录调用 deserializer → yield 模型对象 + │ 3. 检查响应中的 next_url + │ 4. 有 next_url 且 pagination=True → 解析 URL 继续请求 + └→ 无 next_url → 结束 + + +2.3 领域 Mixin 方法模式 +------------------------ + +所有 Mixin 方法遵循统一模式: + + def list_xxx(self, ticker, param1, ..., params=None, raw=False, options=None): + url = f"/v3/some/endpoint/{ticker}" + return self._paginate( + path=url, + params=self._get_params(self.list_xxx, locals()), + raw=raw, + deserializer=SomeModel.from_dict, + result_key="results", + options=options, + ) + + def get_xxx(self, ticker, ..., params=None, raw=False, options=None): + url = f"/v2/some/endpoint/{ticker}" + return self._get( + path=url, + params=self._get_params(self.get_xxx, locals()), + result_key="results", + deserializer=SomeModel.from_dict, + raw=raw, + options=options, + ) + +方法命名约定: + - list_xxx() → 分页接口,返回 Iterator[Model] + - get_xxx() → 单次请求,返回 Model 或 List[Model] + +参数命名约定: + - params: Optional[dict] — 额外查询参数 + - raw: bool — True 时跳过反序列化,返回原始 HTTP 响应 + - options: RequestOptionBuilder — 自定义请求头(Launchpad 边缘场景) + + +================================================================================ +三、核心架构:WebSocket 客户端 +================================================================================ + +所在文件: massive/websocket/__init__.py + +3.1 连接与认证流程 +------------------- + + 客户端实例化 + ↓ + WebSocketClient(api_key, feed, market, subscriptions=["T.*"]) + ↓ 存储 scheduled_subs = {"T.*"} + ↓ + client.run(callback) — 同步入口,内部调用 asyncio.run(connect()) + ↓ + connect() — 异步主循环 + ↓ + 建立 WebSocket 连接 → wss://socket.massive.com/{market} + ↓ + 接收 welcome 消息 + ↓ + 发送认证: {"action": "auth", "params": ""} + ↓ + 接收认证响应(失败则抛出 AuthError) + ↓ + 进入主消息循环 + +3.2 订阅管理 +------------- + +WebSocketClient 维护两个集合: + - subs: 当前已向服务器发送的订阅 + - scheduled_subs: 用户期望的订阅集 + +每次循环迭代检查 schedule_resub 标志: + 若 True → 计算差集: + 新增 = scheduled_subs - subs → 发送 {"action": "subscribe", "params": "T.*,..."} + 移除 = subs - scheduled_subs → 发送 {"action": "unsubscribe", "params": "..."} + 更新 subs = scheduled_subs.copy() + +通配符处理: + 订阅 "T.*" 时自动移除已有的 "T.AAPL", "T.MSFT" 等具体订阅 + +用户可在运行时动态调用: + client.subscribe("Q.AAPL") # 添加订阅 + client.unsubscribe("T.*") # 取消订阅 + client.unsubscribe_all() # 清空所有订阅 + +3.3 消息处理 +------------- + + 服务器推送消息(JSON 数组) + ↓ + raw=False 路径: + ↓ + parse(msg_list, logger, market) + ↓ 遍历每条消息 + 查找 MARKET_EVENT_MAP[(market, event_type)] + ↓ 得到对应模型类 + Model.from_dict(msg) → 模型实例 + ↓ + 返回 List[Model] 给用户 callback + + raw=True 路径: + ↓ + 直接将原始 str/bytes 传给用户 callback + +3.4 重连机制 +------------- + + - 默认最大重连 5 次(可配置 max_reconnects) + - ConnectionClosedError 触发重连: 递增计数器 → 重设 schedule_resub → 重建连接 + - 超过最大次数 → 抛出最后一个异常 + - ConnectionClosedOK → 正常退出不重连 + +3.5 WebSocket 消息模型注册表 +----------------------------- + +所在文件: massive/websocket/models/__init__.py + +MARKET_EVENT_MAP 是一个嵌套字典,键为 (Market, EventType),值为模型类: + + MARKET_EVENT_MAP = { + Market.Stocks: { + "T": EquityTrade, # 逐笔成交 + "Q": EquityQuote, # NBBO 报价 + "A": EquityAgg, # 秒级聚合 + "AM": EquityAgg, # 分钟级聚合 + "LULD": LimitUpLimitDown, + "NOI": Imbalance, + ... + }, + Market.Crypto: { + "XT": CryptoTrade, + "XQ": CryptoQuote, + "XA": CurrencyAgg, + "XL2": Level2Book, + ... + }, + ... + } + +新增事件类型只需: 定义模型类 + 在 MARKET_EVENT_MAP 中注册。 + + +================================================================================ +四、模型系统 +================================================================================ + +4.1 @modelclass 装饰器 +----------------------- + +所在文件: massive/modelclass.py + + @modelclass + class Agg: + open: Optional[float] = None + high: Optional[float] = None + ... + +@modelclass 在标准 @dataclass 基础上: + - 重写 __init__: 同时支持位置参数和关键字参数 + - 位置参数按类属性声明顺序映射 + - 允许混合使用: Agg(1.0, 2.0, close=3.0) + +4.2 from_dict() 反序列化 +-------------------------- + +每个模型类定义 @staticmethod from_dict(d) 方法: + + @staticmethod + def from_dict(d): + return Agg( + d.get("o", None), # API 简写 "o" → open + d.get("h", None), # "h" → high + d.get("l", None), # "l" → low + d.get("c", None), # "c" → close + d.get("v", None), # "v" → volume + d.get("t", None), # "t" → timestamp + ... + ) + +此设计将 API 响应的缩写键名与 Python 的可读属性名解耦。 + +4.3 公共枚举 +------------- + +所在文件: massive/rest/models/common.py + + Sort / Order — 排序方向 (ASC, DESC) + Market / AssetClass — 市场/资产类型 + Locale — 地区 (US, GLOBAL) + Timeframe — 时间框架 (ANNUAL, QUARTERLY) + SeriesType — 序列类型 (OPEN, CLOSE, HIGH, LOW) + Direction — 涨跌排行 (GAINERS, LOSERS) + DividendType — 股息类型 + DataType / SIP — 数据源类型 + 等等 + +4.4 RequestOptionBuilder +------------------------- + +所在文件: massive/rest/models/request.py + +用于 Launchpad 多租户场景,构建边缘请求头: + + options = RequestOptionBuilder( + edge_id="user123", + edge_ip_address="192.168.1.1", + edge_user="agent-string" + ) + client.list_trades("AAPL", options=options) + +生成的头部: + X-Massive-Edge-ID: user123 + X-Massive-Edge-IP-Address: 192.168.1.1 + X-Massive-Edge-User-Agent: agent-string + + +================================================================================ +五、异常与日志 +================================================================================ + +5.1 异常体系 +------------- + +所在文件: massive/exceptions.py + + AuthError — API key 为空或认证失败 + BadResponse — API 返回非 200 状态码 + +5.2 日志 +--------- + +所在文件: massive/logging.py + + get_logger(name) → logging.Logger + - 输出到 stdout + - 格式: "%(asctime)s %(name)s %(levelname)s: %(message)s" + + verbose=True → 设置 DEBUG 级别 + trace=True → 额外打印请求 URL 和响应头(API key 已脱敏) + + +================================================================================ +六、OpenAPI 规范与代码生成 +================================================================================ + +所在文件: .massive/ + + rest.json — REST API OpenAPI 规范(从 api.massive.com/openapi 拉取) + rest.py — 拉取脚本: make rest-spec + websocket.json — WebSocket API 规范: make ws-spec + +REST 客户端代码(Mixin + 模型)需与 rest.json 规范保持同步。 +新增/变更 API 端点时: + 1. make rest-spec 更新规范 + 2. 按规范新增或修改 Mixin 方法和模型类 + + +================================================================================ +七、测试体系 +================================================================================ + +7.1 REST 测试 +-------------- + +所在目录: test_rest/ + +基类 BaseTest (test_rest/base.py): + - 继承 unittest.TestCase + - 使用 pook 库拦截 HTTP 请求 + - 自动扫描 test_rest/mocks/ 目录,将 JSON 文件注册为 mock 响应 + - mock 文件路径映射 URL 路径(特殊字符替换: ? → &, : → ;) + - setUpClass() 创建共享 RESTClient 实例 + +运行: + make test_rest + poetry run python -m unittest test_rest.test_aggs + +7.2 WebSocket 测试 +------------------- + +所在目录: test_websocket/ + +基类 BaseTest (test_websocket/base_ws.py): + - 继承 unittest.IsolatedAsyncioTestCase(异步测试支持) + - 内置 mock WebSocket 服务器 (mock_server.py) + - expectResponse() 预设期望消息 + - expectProcessor() 断言收到的消息与期望匹配 + +运行: + make test_websocket + poetry run python -m unittest test_websocket.test_conn + + +================================================================================ +八、完整数据流示例 +================================================================================ + +8.1 REST 分页请求流程 +---------------------- + +用户代码: + for trade in client.list_trades("AAPL", limit=100): + process(trade) + +内部流程: + + TradesClient.list_trades("AAPL", limit=100) + │ + ├→ url = "/v3/trades/AAPL" + ├→ params = _get_params() → {"limit": 100} + └→ _paginate(url, params, deserializer=Trade.from_dict, result_key="results") + │ + └→ _paginate_iter() [生成器] + │ + ├→ _get(url, params, raw=True) → HTTPResponse + │ │ + │ ├→ urllib3.PoolManager.request("GET", BASE+url, fields=params) + │ ├→ 自动重试(指数退避,最多 3 次) + │ └→ 返回 HTTPResponse + │ + ├→ 解析 JSON → {"results": [...], "next_url": "..."} + │ + ├→ for item in results: + │ Trade.from_dict(item) → yield Trade 对象 + │ + ├→ 检查 next_url + │ 有 → 解析新 URL 和参数 → 继续循环 + │ 无 → 生成器结束 + │ + └→ 用户逐个接收 Trade 对象(惰性加载,按需翻页) + +8.2 WebSocket 实时数据流程 +--------------------------- + +用户代码: + def handle(msgs): + for m in msgs: + print(m) + client = WebSocketClient(subscriptions=["T.*"]) + client.run(handle) + +内部流程: + + asyncio.run(connect(handle)) + │ + ├→ 建立 wss://socket.massive.com/stocks 连接 + │ + ├→ 接收 welcome → 发送 auth → 接收 auth 确认 + │ + ├→ 检查 schedule_resub=True + │ └→ 发送 {"action": "subscribe", "params": "T.*"} + │ + └→ 消息循环(永久运行): + │ + ├→ ws.recv(timeout=1s) + │ 超时 → 继续循环 + │ 收到数据 → 解析 JSON + │ + ├→ parse(msg_list, logger, Market.Stocks) + │ │ + │ ├→ msg["ev"] = "T" (trade 事件) + │ ├→ MARKET_EVENT_MAP[Stocks]["T"] → EquityTrade + │ └→ EquityTrade.from_dict(msg) → 模型实例 + │ + ├→ await handle([EquityTrade, ...]) + │ + └→ 异常处理: + ConnectionClosedError → 重连(最多 5 次) + ConnectionClosedOK → 正常退出 + + +================================================================================ +九、扩展指南 +================================================================================ + +新增 REST API 领域: + 1. 在 massive/rest/models/ 下创建模型文件,定义 @modelclass + from_dict() + 2. 在 massive/rest/ 下创建 Mixin 文件,继承 BaseClient,实现方法 + 3. 在 massive/rest/__init__.py 的 RESTClient 继承链中加入新 Mixin + 4. 在 massive/rest/models/__init__.py 中导出新模型 + 5. 在 test_rest/ 下添加测试和 mock 数据 + +新增 WebSocket 事件类型: + 1. 在 massive/websocket/models/models.py 定义消息模型 + 2. 在 massive/websocket/models/__init__.py 的 MARKET_EVENT_MAP 中注册 + 3. 在 test_websocket/ 下添加测试 + +自定义 JSON 编解码器: + client = RESTClient(custom_json=orjson) + — 自定义编解码器需提供 loads() 和 dumps() 方法 + +Launchpad 边缘请求: + opts = RequestOptionBuilder(edge_id="uid", edge_ip_address="1.2.3.4") + client.list_trades("AAPL", options=opts) + + +================================================================================ +十、关键设计决策总结 +================================================================================ + + 1. Mixin 多重继承 — 领域隔离,组合灵活,避免深层继承链 + 2. @modelclass 装饰器 — 在 dataclass 基础上支持位置参数,简化 from_dict() 调用 + 3. 参数自动转换 (_get_params) — 利用 inspect 反射将函数签名直接映射为 API 参数 + 4. 生成器分页 — 惰性加载,用户无需关心分页细节,内存友好 + 5. 异步 WebSocket + 同步包装 — connect() 原生 async,run() 提供便捷同步入口 + 6. 事件注册表 (MARKET_EVENT_MAP) — 解耦消息路由与模型定义,扩展性好 + 7. pook HTTP mock — 测试不依赖真实 API,mock 文件按 URL 路径组织 diff --git a/docs/requirements.txt b/docs/requirements.txt index e8c712fd..97d8d2fb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ sphinx-autodoc-typehints~=1.19.2 -websockets~=10.3 +websockets>=14.0 diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 8cb653a5..00000000 --- a/poetry.lock +++ /dev/null @@ -1,1120 +0,0 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. - -[[package]] -name = "alabaster" -version = "0.7.16" -description = "A light, configurable Sphinx theme" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"}, - {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, -] - -[[package]] -name = "attrs" -version = "22.1.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, - {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, -] - -[package.extras] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] -docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] -tests-no-zope = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] - -[[package]] -name = "babel" -version = "2.17.0" -description = "Internationalization utilities" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, - {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, -] - -[package.extras] -dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] - -[[package]] -name = "black" -version = "24.8.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6"}, - {file = "black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb"}, - {file = "black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42"}, - {file = "black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a"}, - {file = "black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1"}, - {file = "black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af"}, - {file = "black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4"}, - {file = "black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af"}, - {file = "black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368"}, - {file = "black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed"}, - {file = "black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018"}, - {file = "black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2"}, - {file = "black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd"}, - {file = "black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2"}, - {file = "black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e"}, - {file = "black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920"}, - {file = "black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c"}, - {file = "black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e"}, - {file = "black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47"}, - {file = "black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb"}, - {file = "black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed"}, - {file = "black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "certifi" -version = "2026.2.25" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.7" -groups = ["main", "dev"] -files = [ - {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, - {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, -] - -[[package]] -name = "charset-normalizer" -version = "2.1.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.6.0" -groups = ["dev"] -files = [ - {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, - {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, -] - -[package.extras] -unicode-backport = ["unicodedata2"] - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] -markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "docutils" -version = "0.21.2" -description = "Docutils -- Python Documentation Utilities" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, - {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, -] - -[[package]] -name = "furl" -version = "2.1.3" -description = "URL manipulation made simple." -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "furl-2.1.3-py2.py3-none-any.whl", hash = "sha256:9ab425062c4217f9802508e45feb4a83e54324273ac4b202f1850363309666c0"}, - {file = "furl-2.1.3.tar.gz", hash = "sha256:5a6188fe2666c484a12159c18be97a1977a71d632ef5bb867ef15f54af39cc4e"}, -] - -[package.dependencies] -orderedmultidict = ">=1.0.1" -six = ">=1.8.0" - -[[package]] -name = "idna" -version = "3.7" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, -] - -[[package]] -name = "imagesize" -version = "1.4.1" -description = "Getting image size from png/jpeg/jpeg2000/gif file" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["dev"] -files = [ - {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, - {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, -] - -[[package]] -name = "importlib-metadata" -version = "8.7.0" -description = "Read metadata from Python packages" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "python_version == \"3.9\"" -files = [ - {file = "importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd"}, - {file = "importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000"}, -] - -[package.dependencies] -zipp = ">=3.20" - -[package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -perf = ["ipython"] -test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] -type = ["pytest-mypy"] - -[[package]] -name = "jinja2" -version = "3.1.6" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, - {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "jsonschema" -version = "4.17.1" -description = "An implementation of JSON Schema validation for Python" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "jsonschema-4.17.1-py3-none-any.whl", hash = "sha256:410ef23dcdbca4eaedc08b850079179883c2ed09378bd1f760d4af4aacfa28d7"}, - {file = "jsonschema-4.17.1.tar.gz", hash = "sha256:05b2d22c83640cde0b7e0aa329ca7754fbd98ea66ad8ae24aa61328dfe057fa3"}, -] - -[package.dependencies] -attrs = ">=17.4.0" -pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" - -[package.extras] -format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] - -[[package]] -name = "librt" -version = "0.7.2" -description = "Mypyc runtime library" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "platform_python_implementation != \"PyPy\"" -files = [ - {file = "librt-0.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0090f146caa593f47e641307bd0bef778b76629b1d7a5bec95d3a83ed49d49de"}, - {file = "librt-0.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c44321bc013cf4b41169e463a2c441412497cea44dbf79eee0ccad8104d05b7b"}, - {file = "librt-0.7.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8913d92224da3e0ef54e40cdc36f1c0789f375349aa36f7fd44c89dfda1e6d24"}, - {file = "librt-0.7.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f968b951f0713b15ad56090c5499bc63e4718e7636d698e1e1fc2eb66c855f97"}, - {file = "librt-0.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e8801d41dcfbb76407daa5e35e69ebe7b0fc826b7c63d462cbbab530b5672b"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9672ee71a08c5b1cb5bb92fc5cc07f88c947716ff3c6b8c3bc0f57ee7ddc12fa"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9786b621b5c7e6e2aaab0cacf118c1c3af5f70b9c0e3fe614734b1d9fbc37cd3"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:332bd6505e345c0d92ad5ede7419bdd2c96ad7526681be5feb2bb26667819c4f"}, - {file = "librt-0.7.2-cp310-cp310-win32.whl", hash = "sha256:0ca4ff852be76094074bede6fcd1fc75374962ec365aceb396fa7aa3bc733c12"}, - {file = "librt-0.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:dd2b75815270534c62e203ee5755ae1f66540ce4ee08432d4b1e623ddb2fa175"}, - {file = "librt-0.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f8f02d40621f55c659ff1ed7ea91320f8bc16e75fe67f822445cd0e9b5fa1d1"}, - {file = "librt-0.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0bc8425c7f9e9bfc16fae651b56b402b11e25c223a90353fb71fa47ed3e1c048"}, - {file = "librt-0.7.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f9a8a6e3cea9c01d2d9c55cf81ab68580b10d01c54b82cab89e85ba036e1d272"}, - {file = "librt-0.7.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de0aceb7d19f6dd4aa6594be45f82af19c74bd0fcf2fa2d42c116d25826f1625"}, - {file = "librt-0.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d29bb29aba2a849ea8255744655b359ce420ab55018c31a9b58c103415e47918"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f172088974eac0101ecbe460d89411c945fa57601e4fc3dc461e718991322e00"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab4ca61a3b774d3b1886b26f7cc295e75a42ebc26c7a1a04e11c427e5313922f"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d891fb657a14d8d77e3b565332e064fbcd67741e99043634e5b7cbded88d9d8e"}, - {file = "librt-0.7.2-cp311-cp311-win32.whl", hash = "sha256:2272e1a4752ad0b9f59793f63ffce06178fbe15a1fd4d2d8ad9ea2fe026d9912"}, - {file = "librt-0.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:eab548b8c771a1846d328a01e83c14ed0414853bf9a91fe7c692f74de513238f"}, - {file = "librt-0.7.2-cp311-cp311-win_arm64.whl", hash = "sha256:0259a726416369e22306177be3404cc29b88fc806d31100802c816fd29f58873"}, - {file = "librt-0.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18d56630bd5793ca860f148cfa6d79a81b3d9c7d5544991c906a8f412eecce63"}, - {file = "librt-0.7.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4076beec27478116ff276731daf676ecd03ceae03fabdefdca400f7e837f477a"}, - {file = "librt-0.7.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7698a3b75f0aa004fa089410b44088628851b3c62c9044822c61a8367fc8caea"}, - {file = "librt-0.7.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e706fdfef8692ee82ac5464c822800d99b436511a9bba402a88e878751b342a9"}, - {file = "librt-0.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39d2b8df134910a2c58d91fbf50cd6ea0b815a50fcdf45de1e21af0a10fcb606"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:035c5f2f4bd96326f4528ce48bd60ed19ae35f0c000540971aa597a441e83509"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:14798167e2be3cb8202c9617d90d5e4b2b50a92a9c30f8aceb672e12cf26abbf"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f6b564c8e9e768fe79651d626917b4b3d10b3d587779eda2231e235b64caab41"}, - {file = "librt-0.7.2-cp312-cp312-win32.whl", hash = "sha256:605c7bbc94aa30288d33d2ade86d3a70c939efa01f3e64d98d72a72466d43161"}, - {file = "librt-0.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:a48f4c5d3d12eced3462d135ecfe0c4e2a143e64161a471b3f3c1491330fcd74"}, - {file = "librt-0.7.2-cp312-cp312-win_arm64.whl", hash = "sha256:0cbe93690e07c9d4ac76bed107e1be8a612dd6fbc94e21a17a5cff002f5f55d5"}, - {file = "librt-0.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b8fdc5e6eb9698ed66bb652f18fa637853fd03b016864bed098f1a28a8d129d"}, - {file = "librt-0.7.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:66d0f0de87033ab7e54f48bd46c042d047ecc3d4e4d5b7b1071e934f34d97054"}, - {file = "librt-0.7.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9da65ed19f6c7c4bbebd7acb37d4dbb95943792b51a74bc96d35673270853e16"}, - {file = "librt-0.7.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eeb76e18c2adac6bcc709ba7f728acca2d42baf0c7a3b9eba392bab84d591961"}, - {file = "librt-0.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b5d5f8f617fc3db80864f7353f43db69d9282bf9cd74c7e6cf5be1a7e5d5a83f"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cae1b429f9077254622d7d12ade5d04a6b326b2ff456d032fa3fa653ef994979"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:edd20b266055b41ccee667b9373b3eff9d77b8e0890fd26a469c89ef48b29bf0"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cf748211b5782fb9e85945d7ffdef9587bf303344e2ad3e65dee55b44b1c8ac1"}, - {file = "librt-0.7.2-cp313-cp313-win32.whl", hash = "sha256:c4fefe752dcf30564b031e85e6cbc70d82685e52fbbfffc6fab275a47b5c3de7"}, - {file = "librt-0.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:65cd928b7e0c1142235e54e4b615a0a7f4ad046d1d4cbdd454c311bafca97aed"}, - {file = "librt-0.7.2-cp313-cp313-win_arm64.whl", hash = "sha256:10d6d5d52026e44ddd0f638e822a5d451df0d5b6701cb5112362a3a9f4b00229"}, - {file = "librt-0.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0baabd8daa4339f6cbffada3c66795722c37880ce768de83c7cba379d469ee3b"}, - {file = "librt-0.7.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:462d9672a4ade935d78c70713847bcba643bf4d94c013fdf29ea5f153bb15922"}, - {file = "librt-0.7.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838b16343fc4ed6869edb3ed9dc89c4bc9b113b1c6028592bede4a93ad360aa4"}, - {file = "librt-0.7.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b6ee74bfa7055c07e0acb56226efd49687488486db8fcfdea5da4cf25323a91"}, - {file = "librt-0.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5e3502a543b9b3f906f6d4e88582b7ba13320897e19c60d7c098fa9fda1611f"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0f330d6af5bcfba339690694bf7c4aedabfa3dd40b17212a2b94a417962ccf"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:610a25e8239836fe8eff92628602db13dca5d867e868503239c37f3809b3ce9a"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98854ffd0dd6fd64b8a4be4973593746038152e6c239251de908b5a176d8f64a"}, - {file = "librt-0.7.2-cp314-cp314-win32.whl", hash = "sha256:879f789b22e9534df279a6cd3af12d26f8fd96785c47db0d2508304cfc6fd7d9"}, - {file = "librt-0.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:cba3ee432767960ce1e908c67c1fa136831c25ac3705e1e188e63ddaf1b46a06"}, - {file = "librt-0.7.2-cp314-cp314-win_arm64.whl", hash = "sha256:d775e5de996105c9a85136c18bce94204f57021af77a913644e8f9b17733a917"}, - {file = "librt-0.7.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:7fecc4dcc74e0c97ca36435048e3392ee6aa2ae3e77c285394192f9ad1e1a283"}, - {file = "librt-0.7.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d643941996b678699fed64271d02439fe23d31d8dee45f0e0b02c81ee77a4d79"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dcefbd09a5db038693d22adc1962111d4c2df0b838fde2f3a61fceec9953b9c5"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11445c0460d4522c5959f7770015cdcd7dd025ac2c095c50b78e27878f9cab15"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c10ae62472a03dc8db52d5dca4a9af5d0935899cf8c550565a39645bf7735d87"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a38575adf344ca7423bfb10c3a7b5df066dfbe9b95e8b35f1f79eb84e4b38cad"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:2dcae85482674912bdd9dc98c6a236a9698c2c13ee53366a996851e3460da26a"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f02f9a7a8b720ae3c46b4df736a71d2ef07b59f3149180ad1e1eba7fccabaadf"}, - {file = "librt-0.7.2-cp314-cp314t-win32.whl", hash = "sha256:062de7065ec0d060f0541602a16bed566c4b948aa1d8466c483bb949e27e0ef7"}, - {file = "librt-0.7.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fb6a190f76a687b034362e610c4990306ad0d913e98a8e588dcec91486797869"}, - {file = "librt-0.7.2-cp314-cp314t-win_arm64.whl", hash = "sha256:35e1c435ee1e24ba2b018172a3ed1caed5275168a016e560e695057acd532add"}, - {file = "librt-0.7.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9da7e00662b75fe2245f6c709c1a2c3b84e3c70aef0c088d3d25cfcfb6ec13c6"}, - {file = "librt-0.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6d412f959d485db6ad02a7b4685f7cbd2d6e80530d95e1add553bc4278c415c5"}, - {file = "librt-0.7.2-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c52990838b821f0fc86a40d244360426550312dac55da984a0878947d62598b6"}, - {file = "librt-0.7.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e53979afcf7bcc6c4a7d31d61e88feb83ed9f3f00407a835df3283ff450eac"}, - {file = "librt-0.7.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca151523e0ea06015e070ccc5fdea0dc374f292fba62e65ba315aef241296c93"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:025871d474b48eae232562d575ee7a82fa69ac12b0aa9c9138c27900178fd8ca"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:13b72520500fae1e6d10fb10a8972858a10ed4b6edb7e800f1d11b385803a868"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5292bf3d6756301ff72e578703837afebc6660b235cf338ab9a1074cb3b988fd"}, - {file = "librt-0.7.2-cp39-cp39-win32.whl", hash = "sha256:22a7a751590444a90e3a663205caa582da4a2e6cdbb7515ae579ca7b95e015ae"}, - {file = "librt-0.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:fd612a78cd330c0371d2b918bf73aeb976f2c031562c4b571e0100069626b390"}, - {file = "librt-0.7.2.tar.gz", hash = "sha256:48aa0f311bdf90ec9a63e3669b6aff04967f24f2f67fe9372c570a21dc9ae873"}, -] - -[[package]] -name = "MarkupSafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, - {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, -] - -[[package]] -name = "mypy" -version = "1.19.1" -description = "Optional static typing for Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"}, - {file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"}, - {file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"}, - {file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"}, - {file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"}, - {file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"}, - {file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"}, - {file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"}, - {file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"}, - {file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"}, - {file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"}, - {file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"}, - {file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"}, - {file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"}, - {file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"}, - {file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"}, - {file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"}, - {file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"}, - {file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"}, - {file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"}, - {file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"}, - {file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"}, - {file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"}, - {file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"}, - {file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"}, - {file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"}, - {file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"}, - {file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"}, - {file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"}, - {file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"}, - {file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"}, - {file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"}, - {file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"}, - {file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"}, - {file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"}, - {file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"}, - {file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"}, - {file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"}, -] - -[package.dependencies] -librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""} -mypy_extensions = ">=1.0.0" -pathspec = ">=0.9.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing_extensions = ">=4.6.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -faster-cache = ["orjson"] -install-types = ["pip"] -mypyc = ["setuptools (>=50)"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] - -[[package]] -name = "orderedmultidict" -version = "1.0.1" -description = "Ordered Multivalue Dictionary" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "orderedmultidict-1.0.1-py2.py3-none-any.whl", hash = "sha256:43c839a17ee3cdd62234c47deca1a8508a3f2ca1d0678a3bf791c87cf84adbf3"}, - {file = "orderedmultidict-1.0.1.tar.gz", hash = "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad"}, -] - -[package.dependencies] -six = ">=1.8.0" - -[[package]] -name = "orjson" -version = "3.11.5" -description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "orjson-3.11.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:df9eadb2a6386d5ea2bfd81309c505e125cfc9ba2b1b99a97e60985b0b3665d1"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccc70da619744467d8f1f49a8cadae5ec7bbe054e5232d95f92ed8737f8c5870"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:073aab025294c2f6fc0807201c76fdaed86f8fc4be52c440fb78fbb759a1ac09"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:835f26fa24ba0bb8c53ae2a9328d1706135b74ec653ed933869b74b6909e63fd"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667c132f1f3651c14522a119e4dd631fad98761fa960c55e8e7430bb2a1ba4ac"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42e8961196af655bb5e63ce6c60d25e8798cd4dfbc04f4203457fa3869322c2e"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75412ca06e20904c19170f8a24486c4e6c7887dea591ba18a1ab572f1300ee9f"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6af8680328c69e15324b5af3ae38abbfcf9cbec37b5346ebfd52339c3d7e8a18"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a86fe4ff4ea523eac8f4b57fdac319faf037d3c1be12405e6a7e86b3fbc4756a"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e607b49b1a106ee2086633167033afbd63f76f2999e9236f638b06b112b24ea7"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7339f41c244d0eea251637727f016b3d20050636695bc78345cce9029b189401"}, - {file = "orjson-3.11.5-cp310-cp310-win32.whl", hash = "sha256:8be318da8413cdbbce77b8c5fac8d13f6eb0f0db41b30bb598631412619572e8"}, - {file = "orjson-3.11.5-cp310-cp310-win_amd64.whl", hash = "sha256:b9f86d69ae822cabc2a0f6c099b43e8733dda788405cba2665595b7e8dd8d167"}, - {file = "orjson-3.11.5-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9c8494625ad60a923af6b2b0bd74107146efe9b55099e20d7740d995f338fcd8"}, - {file = "orjson-3.11.5-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:7bb2ce0b82bc9fd1168a513ddae7a857994b780b2945a8c51db4ab1c4b751ebc"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67394d3becd50b954c4ecd24ac90b5051ee7c903d167459f93e77fc6f5b4c968"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:298d2451f375e5f17b897794bcc3e7b821c0f32b4788b9bcae47ada24d7f3cf7"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa5e4244063db8e1d87e0f54c3f7522f14b2dc937e65d5241ef0076a096409fd"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1db2088b490761976c1b2e956d5d4e6409f3732e9d79cfa69f876c5248d1baf9"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2ed66358f32c24e10ceea518e16eb3549e34f33a9d51f99ce23b0251776a1ef"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2021afda46c1ed64d74b555065dbd4c2558d510d8cec5ea6a53001b3e5e82a9"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b42ffbed9128e547a1647a3e50bc88ab28ae9daa61713962e0d3dd35e820c125"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:8d5f16195bb671a5dd3d1dbea758918bada8f6cc27de72bd64adfbd748770814"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c0e5d9f7a0227df2927d343a6e3859bebf9208b427c79bd31949abcc2fa32fa5"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:23d04c4543e78f724c4dfe656b3791b5f98e4c9253e13b2636f1af5d90e4a880"}, - {file = "orjson-3.11.5-cp311-cp311-win32.whl", hash = "sha256:c404603df4865f8e0afe981aa3c4b62b406e6d06049564d58934860b62b7f91d"}, - {file = "orjson-3.11.5-cp311-cp311-win_amd64.whl", hash = "sha256:9645ef655735a74da4990c24ffbd6894828fbfa117bc97c1edd98c282ecb52e1"}, - {file = "orjson-3.11.5-cp311-cp311-win_arm64.whl", hash = "sha256:1cbf2735722623fcdee8e712cbaaab9e372bbcb0c7924ad711b261c2eccf4a5c"}, - {file = "orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d"}, - {file = "orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca"}, - {file = "orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98"}, - {file = "orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875"}, - {file = "orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe"}, - {file = "orjson-3.11.5-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3b01799262081a4c47c035dd77c1301d40f568f77cc7ec1bb7db5d63b0a01629"}, - {file = "orjson-3.11.5-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:61de247948108484779f57a9f406e4c84d636fa5a59e411e6352484985e8a7c3"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:894aea2e63d4f24a7f04a1908307c738d0dce992e9249e744b8f4e8dd9197f39"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ddc21521598dbe369d83d4d40338e23d4101dad21dae0e79fa20465dbace019f"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cce16ae2f5fb2c53c3eafdd1706cb7b6530a67cc1c17abe8ec747f5cd7c0c51"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e46c762d9f0e1cfb4ccc8515de7f349abbc95b59cb5a2bd68df5973fdef913f8"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7345c759276b798ccd6d77a87136029e71e66a8bbf2d2755cbdde1d82e78706"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75bc2e59e6a2ac1dd28901d07115abdebc4563b5b07dd612bf64260a201b1c7f"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:54aae9b654554c3b4edd61896b978568c6daa16af96fa4681c9b5babd469f863"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4bdd8d164a871c4ec773f9de0f6fe8769c2d6727879c37a9666ba4183b7f8228"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a261fef929bcf98a60713bf5e95ad067cea16ae345d9a35034e73c3990e927d2"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c028a394c766693c5c9909dec76b24f37e6a1b91999e8d0c0d5feecbe93c3e05"}, - {file = "orjson-3.11.5-cp313-cp313-win32.whl", hash = "sha256:2cc79aaad1dfabe1bd2d50ee09814a1253164b3da4c00a78c458d82d04b3bdef"}, - {file = "orjson-3.11.5-cp313-cp313-win_amd64.whl", hash = "sha256:ff7877d376add4e16b274e35a3f58b7f37b362abf4aa31863dadacdd20e3a583"}, - {file = "orjson-3.11.5-cp313-cp313-win_arm64.whl", hash = "sha256:59ac72ea775c88b163ba8d21b0177628bd015c5dd060647bbab6e22da3aad287"}, - {file = "orjson-3.11.5-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e446a8ea0a4c366ceafc7d97067bfd55292969143b57e3c846d87fc701e797a0"}, - {file = "orjson-3.11.5-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:53deb5addae9c22bbe3739298f5f2196afa881ea75944e7720681c7080909a81"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82cd00d49d6063d2b8791da5d4f9d20539c5951f965e45ccf4e96d33505ce68f"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3fd15f9fc8c203aeceff4fda211157fad114dde66e92e24097b3647a08f4ee9e"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df95000fbe6777bf9820ae82ab7578e8662051bb5f83d71a28992f539d2cda7"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92a8d676748fca47ade5bc3da7430ed7767afe51b2f8100e3cd65e151c0eaceb"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa0f513be38b40234c77975e68805506cad5d57b3dfd8fe3baa7f4f4051e15b4"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1863e75b92891f553b7922ce4ee10ed06db061e104f2b7815de80cdcb135ad"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4be86b58e9ea262617b8ca6251a2f0d63cc132a6da4b5fcc8e0a4128782c829"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:b923c1c13fa02084eb38c9c065afd860a5cff58026813319a06949c3af5732ac"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1b6bd351202b2cd987f35a13b5e16471cf4d952b42a73c391cc537974c43ef6d"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb150d529637d541e6af06bbe3d02f5498d628b7f98267ff87647584293ab439"}, - {file = "orjson-3.11.5-cp314-cp314-win32.whl", hash = "sha256:9cc1e55c884921434a84a0c3dd2699eb9f92e7b441d7f53f3941079ec6ce7499"}, - {file = "orjson-3.11.5-cp314-cp314-win_amd64.whl", hash = "sha256:a4f3cb2d874e03bc7767c8f88adaa1a9a05cecea3712649c3b58589ec7317310"}, - {file = "orjson-3.11.5-cp314-cp314-win_arm64.whl", hash = "sha256:38b22f476c351f9a1c43e5b07d8b5a02eb24a6ab8e75f700f7d479d4568346a5"}, - {file = "orjson-3.11.5-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1b280e2d2d284a6713b0cfec7b08918ebe57df23e3f76b27586197afca3cb1e9"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c8d8a112b274fae8c5f0f01954cb0480137072c271f3f4958127b010dfefaec"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f0a2ae6f09ac7bd47d2d5a5305c1d9ed08ac057cda55bb0a49fa506f0d2da00"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0d87bd1896faac0d10b4f849016db81a63e4ec5df38757ffae84d45ab38aa71"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:801a821e8e6099b8c459ac7540b3c32dba6013437c57fdcaec205b169754f38c"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a0f6ac618c98c74b7fbc8c0172ba86f9e01dbf9f62aa0b1776c2231a7bffe5"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea7339bdd22e6f1060c55ac31b6a755d86a5b2ad3657f2669ec243f8e3b2bdb"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4dad582bc93cef8f26513e12771e76385a7e6187fd713157e971c784112aad56"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:0522003e9f7fba91982e83a97fec0708f5a714c96c4209db7104e6b9d132f111"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7403851e430a478440ecc1258bcbacbfbd8175f9ac1e39031a7121dd0de05ff8"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5f691263425d3177977c8d1dd896cde7b98d93cbf390b2544a090675e83a6a0a"}, - {file = "orjson-3.11.5-cp39-cp39-win32.whl", hash = "sha256:61026196a1c4b968e1b1e540563e277843082e9e97d78afa03eb89315af531f1"}, - {file = "orjson-3.11.5-cp39-cp39-win_amd64.whl", hash = "sha256:09b94b947ac08586af635ef922d69dc9bc63321527a3a04647f4986a73f4bd30"}, - {file = "orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5"}, -] - -[[package]] -name = "packaging" -version = "23.1" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, -] - -[[package]] -name = "pathspec" -version = "0.10.2" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "pathspec-0.10.2-py3-none-any.whl", hash = "sha256:88c2606f2c1e818b978540f73ecc908e13999c6c3a383daf3705652ae79807a5"}, - {file = "pathspec-0.10.2.tar.gz", hash = "sha256:8f6bf73e5758fd365ef5d58ce09ac7c27d2833a8d7da51712eac6e27e35141b0"}, -] - -[[package]] -name = "platformdirs" -version = "2.5.4" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "platformdirs-2.5.4-py3-none-any.whl", hash = "sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10"}, - {file = "platformdirs-2.5.4.tar.gz", hash = "sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7"}, -] - -[package.extras] -docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.4)"] -test = ["appdirs (==1.4.4)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] - -[[package]] -name = "pook" -version = "2.1.4" -description = "HTTP traffic mocking and expectations made easy" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pook-2.1.4-py3-none-any.whl", hash = "sha256:3f273ab189874dd775a15c3fa1b1bf89f28b001d2619c5f909e4d3f7df66d36e"}, - {file = "pook-2.1.4.tar.gz", hash = "sha256:2bcbc7d58d1d88b6f2da98c711f5391d5f690292bdd5ff2ccda927576500937a"}, -] - -[package.dependencies] -furl = ">=0.5.6" -jsonschema = ">=2.5.1" -xmltodict = ">=0.11.0" - -[[package]] -name = "pygments" -version = "2.20.0" -description = "Pygments is a syntax highlighting package written in Python." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, - {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, -] - -[package.extras] -windows-terminal = ["colorama (>=0.4.6)"] - -[[package]] -name = "pyrsistent" -version = "0.19.2" -description = "Persistent/Functional/Immutable data structures" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "pyrsistent-0.19.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d6982b5a0237e1b7d876b60265564648a69b14017f3b5f908c5be2de3f9abb7a"}, - {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:187d5730b0507d9285a96fca9716310d572e5464cadd19f22b63a6976254d77a"}, - {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:055ab45d5911d7cae397dc418808d8802fb95262751872c841c170b0dbf51eed"}, - {file = "pyrsistent-0.19.2-cp310-cp310-win32.whl", hash = "sha256:456cb30ca8bff00596519f2c53e42c245c09e1a4543945703acd4312949bfd41"}, - {file = "pyrsistent-0.19.2-cp310-cp310-win_amd64.whl", hash = "sha256:b39725209e06759217d1ac5fcdb510e98670af9e37223985f330b611f62e7425"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aede922a488861de0ad00c7630a6e2d57e8023e4be72d9d7147a9fcd2d30712"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879b4c2f4d41585c42df4d7654ddffff1239dc4065bc88b745f0341828b83e78"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43bec251bbd10e3cb58ced80609c5c1eb238da9ca78b964aea410fb820d00d6"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-win32.whl", hash = "sha256:d690b18ac4b3e3cab73b0b7aa7dbe65978a172ff94970ff98d82f2031f8971c2"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-win_amd64.whl", hash = "sha256:3ba4134a3ff0fc7ad225b6b457d1309f4698108fb6b35532d015dca8f5abed73"}, - {file = "pyrsistent-0.19.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a178209e2df710e3f142cbd05313ba0c5ebed0a55d78d9945ac7a4e09d923308"}, - {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e371b844cec09d8dc424d940e54bba8f67a03ebea20ff7b7b0d56f526c71d584"}, - {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111156137b2e71f3a9936baf27cb322e8024dac3dc54ec7fb9f0bcf3249e68bb"}, - {file = "pyrsistent-0.19.2-cp38-cp38-win32.whl", hash = "sha256:e5d8f84d81e3729c3b506657dddfe46e8ba9c330bf1858ee33108f8bb2adb38a"}, - {file = "pyrsistent-0.19.2-cp38-cp38-win_amd64.whl", hash = "sha256:9cd3e9978d12b5d99cbdc727a3022da0430ad007dacf33d0bf554b96427f33ab"}, - {file = "pyrsistent-0.19.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f1258f4e6c42ad0b20f9cfcc3ada5bd6b83374516cd01c0960e3cb75fdca6770"}, - {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21455e2b16000440e896ab99e8304617151981ed40c29e9507ef1c2e4314ee95"}, - {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfd880614c6237243ff53a0539f1cb26987a6dc8ac6e66e0c5a40617296a045e"}, - {file = "pyrsistent-0.19.2-cp39-cp39-win32.whl", hash = "sha256:71d332b0320642b3261e9fee47ab9e65872c2bd90260e5d225dabeed93cbd42b"}, - {file = "pyrsistent-0.19.2-cp39-cp39-win_amd64.whl", hash = "sha256:dec3eac7549869365fe263831f576c8457f6c833937c68542d08fde73457d291"}, - {file = "pyrsistent-0.19.2-py3-none-any.whl", hash = "sha256:ea6b79a02a28550c98b6ca9c35b9f492beaa54d7c5c9e9949555893c8a9234d0"}, - {file = "pyrsistent-0.19.2.tar.gz", hash = "sha256:bfa0351be89c9fcbcb8c9879b826f4353be10f58f8a677efab0c017bf7137ec2"}, -] - -[[package]] -name = "requests" -version = "2.32.4" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, - {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset_normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["dev"] -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[[package]] -name = "snowballstemmer" -version = "2.2.0" -description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, - {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, -] - -[[package]] -name = "sphinx" -version = "7.4.7" -description = "Python documentation generator" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"}, - {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"}, -] - -[package.dependencies] -alabaster = ">=0.7.14,<0.8.0" -babel = ">=2.13" -colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""} -docutils = ">=0.20,<0.22" -imagesize = ">=1.3" -importlib-metadata = {version = ">=6.0", markers = "python_version < \"3.10\""} -Jinja2 = ">=3.1" -packaging = ">=23.0" -Pygments = ">=2.17" -requests = ">=2.30.0" -snowballstemmer = ">=2.2" -sphinxcontrib-applehelp = "*" -sphinxcontrib-devhelp = "*" -sphinxcontrib-htmlhelp = ">=2.0.0" -sphinxcontrib-jsmath = "*" -sphinxcontrib-qthelp = "*" -sphinxcontrib-serializinghtml = ">=1.1.9" -tomli = {version = ">=2", markers = "python_version < \"3.11\""} - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"] -test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] - -[[package]] -name = "sphinx-autodoc-typehints" -version = "2.3.0" -description = "Type hints (PEP 484) support for the Sphinx autodoc extension" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinx_autodoc_typehints-2.3.0-py3-none-any.whl", hash = "sha256:3098e2c6d0ba99eacd013eb06861acc9b51c6e595be86ab05c08ee5506ac0c67"}, - {file = "sphinx_autodoc_typehints-2.3.0.tar.gz", hash = "sha256:535c78ed2d6a1bad393ba9f3dfa2602cf424e2631ee207263e07874c38fde084"}, -] - -[package.dependencies] -sphinx = ">=7.3.5" - -[package.extras] -docs = ["furo (>=2024.1.29)"] -numpy = ["nptyping (>=2.5)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.4.4)", "defusedxml (>=0.7.1)", "diff-cover (>=9)", "pytest (>=8.1.1)", "pytest-cov (>=5)", "sphobjinv (>=2.3.1)", "typing-extensions (>=4.11)"] - -[[package]] -name = "sphinx-rtd-theme" -version = "3.1.0" -description = "Read the Docs theme for Sphinx" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl", hash = "sha256:1785824ae8e6632060490f67cf3a72d404a85d2d9fc26bce3619944de5682b89"}, - {file = "sphinx_rtd_theme-3.1.0.tar.gz", hash = "sha256:b44276f2c276e909239a4f6c955aa667aaafeb78597923b1c60babc76db78e4c"}, -] - -[package.dependencies] -docutils = ">0.18,<0.23" -sphinx = ">=6,<10" -sphinxcontrib-jquery = ">=4,<5" - -[package.extras] -dev = ["bump2version", "transifex-client", "twine", "wheel"] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.2" -description = "sphinxcontrib-applehelp is a sphinx extension which outputs Apple help books" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-applehelp-1.0.2.tar.gz", hash = "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"}, - {file = "sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.2" -description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"}, - {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.0" -description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" -optional = false -python-versions = ">=3.6" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-htmlhelp-2.0.0.tar.gz", hash = "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"}, - {file = "sphinxcontrib_htmlhelp-2.0.0-py2.py3-none-any.whl", hash = "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["html5lib", "pytest"] - -[[package]] -name = "sphinxcontrib-jquery" -version = "4.1" -description = "Extension to include jQuery on newer Sphinx releases" -optional = false -python-versions = ">=2.7" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, - {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"}, -] - -[package.dependencies] -Sphinx = ">=1.8" - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -description = "A sphinx extension which renders display math in HTML via JavaScript" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, - {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, -] - -[package.extras] -test = ["flake8", "mypy", "pytest"] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.3" -description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"}, - {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "2.0.0" -description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"}, - {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"}, -] - -[package.extras] -lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] -standalone = ["Sphinx (>=5)"] -test = ["pytest"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] - -[[package]] -name = "types-certifi" -version = "2021.10.8.3" -description = "Typing stubs for certifi" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f"}, - {file = "types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a"}, -] - -[[package]] -name = "types-setuptools" -version = "81.0.0.20260209" -description = "Typing stubs for setuptools" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "types_setuptools-81.0.0.20260209-py3-none-any.whl", hash = "sha256:4facf71e3f953f8f5ac0020cd6c1b5e493aaff0183e85830bc34870b6abf8475"}, - {file = "types_setuptools-81.0.0.20260209.tar.gz", hash = "sha256:2c2eb64499b41b672c387f6f45678a28d20a143a81b45a5c77acbfd4da0df3e1"}, -] - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, - {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "urllib3" -version = "2.6.3" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.9" -groups = ["main", "dev"] -files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, -] - -[package.extras] -brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] - -[[package]] -name = "websockets" -version = "15.0.1" -description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c"}, - {file = "websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256"}, - {file = "websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf"}, - {file = "websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85"}, - {file = "websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597"}, - {file = "websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9"}, - {file = "websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4"}, - {file = "websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa"}, - {file = "websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880"}, - {file = "websockets-15.0.1-cp39-cp39-win32.whl", hash = "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411"}, - {file = "websockets-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123"}, - {file = "websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f"}, - {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, -] - -[[package]] -name = "xmltodict" -version = "0.13.0" -description = "Makes working with XML feel like you are working with JSON" -optional = false -python-versions = ">=3.4" -groups = ["dev"] -files = [ - {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, - {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, -] - -[[package]] -name = "zipp" -version = "3.23.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "python_version == \"3.9\"" -files = [ - {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, - {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, -] - -[package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] -type = ["pytest-mypy"] - -[metadata] -lock-version = "2.1" -python-versions = "^3.9" -content-hash = "859fb753010770932bb13116107b08bf52ef64a130954852216b405cc219fc21" diff --git a/pyproject.toml b/pyproject.toml index 058444af..11d5dc72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,47 +1,67 @@ -[tool.poetry] +[project] name = "massive" version = "0.0.0" description = "Official Massive (formerly Polygon.io) REST and Websocket client." -authors = ["massive.com"] -license = "MIT" -homepage = "https://massive.com" -repository = "https://github.com/massive-com/client-python" -documentation = "https://massive.com/docs" -keywords = [ - "massive", - "free", - "rest", - "stock", - "market", - "data", - "api", - "massive.com", - "websocket", - "client" +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", + "requests", + "tenacity", + "fake-useragent", + "python-dotenv", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +# longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + # "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", ] -packages = [ - { include = "massive" } +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", ] -[tool.poetry.dependencies] -python = "^3.9" -urllib3 = ">=1.26.9" -websockets = ">=14.0" -certifi = ">=2022.5.18,<2027.0.0" +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" -[tool.poetry.dev-dependencies] -black = "^24.8.0" -mypy = "^1.19" -types-urllib3 = "^1.26.25" -Sphinx = "^7.4.7" -sphinx-rtd-theme = "^3.1.0" -# keep this in sync with docs/requirements.txt for readthedocs.org -sphinx-autodoc-typehints = "^2.3.0" -types-certifi = "^2021.10.8" -types-setuptools = "^81.0.0" -pook = "^2.1.4" -orjson = "^3.11.5" +[tool.setuptools.packages.find] +where = ["src"] -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" diff --git a/massive/__init__.py b/src/massive/__init__.py similarity index 100% rename from massive/__init__.py rename to src/massive/__init__.py diff --git a/massive/exceptions.py b/src/massive/exceptions.py similarity index 100% rename from massive/exceptions.py rename to src/massive/exceptions.py diff --git a/massive/logging.py b/src/massive/logging.py similarity index 100% rename from massive/logging.py rename to src/massive/logging.py diff --git a/massive/modelclass.py b/src/massive/modelclass.py similarity index 100% rename from massive/modelclass.py rename to src/massive/modelclass.py diff --git a/massive/rest/__init__.py b/src/massive/rest/__init__.py similarity index 97% rename from massive/rest/__init__.py rename to src/massive/rest/__init__.py index 5a00da5a..8fc6c2a7 100644 --- a/massive/rest/__init__.py +++ b/src/massive/rest/__init__.py @@ -22,6 +22,9 @@ from .vX import VXClient from typing import Optional, Any import os +from dotenv import load_dotenv + +load_dotenv() BASE = "https://api.massive.com" ENV_KEY = "MASSIVE_API_KEY" diff --git a/massive/rest/aggs.py b/src/massive/rest/aggs.py similarity index 100% rename from massive/rest/aggs.py rename to src/massive/rest/aggs.py diff --git a/massive/rest/base.py b/src/massive/rest/base.py similarity index 99% rename from massive/rest/base.py rename to src/massive/rest/base.py index 3349d7ef..288798e3 100644 --- a/massive/rest/base.py +++ b/src/massive/rest/base.py @@ -2,6 +2,7 @@ import json import urllib3 import inspect +import time from urllib3.util.retry import Retry from enum import Enum from typing import Optional, Any, Dict @@ -241,6 +242,7 @@ def _paginate_iter( if parsed.query: path += "?" + parsed.query params = {} + time.sleep(12) def _paginate( self, diff --git a/massive/rest/benzinga.py b/src/massive/rest/benzinga.py similarity index 100% rename from massive/rest/benzinga.py rename to src/massive/rest/benzinga.py diff --git a/massive/rest/economy.py b/src/massive/rest/economy.py similarity index 100% rename from massive/rest/economy.py rename to src/massive/rest/economy.py diff --git a/massive/rest/etf_global.py b/src/massive/rest/etf_global.py similarity index 100% rename from massive/rest/etf_global.py rename to src/massive/rest/etf_global.py diff --git a/massive/rest/financials.py b/src/massive/rest/financials.py similarity index 100% rename from massive/rest/financials.py rename to src/massive/rest/financials.py diff --git a/massive/rest/futures.py b/src/massive/rest/futures.py similarity index 100% rename from massive/rest/futures.py rename to src/massive/rest/futures.py diff --git a/massive/rest/indicators.py b/src/massive/rest/indicators.py similarity index 100% rename from massive/rest/indicators.py rename to src/massive/rest/indicators.py diff --git a/massive/rest/models/__init__.py b/src/massive/rest/models/__init__.py similarity index 100% rename from massive/rest/models/__init__.py rename to src/massive/rest/models/__init__.py diff --git a/massive/rest/models/aggs.py b/src/massive/rest/models/aggs.py similarity index 100% rename from massive/rest/models/aggs.py rename to src/massive/rest/models/aggs.py diff --git a/massive/rest/models/benzinga.py b/src/massive/rest/models/benzinga.py similarity index 100% rename from massive/rest/models/benzinga.py rename to src/massive/rest/models/benzinga.py diff --git a/massive/rest/models/common.py b/src/massive/rest/models/common.py similarity index 100% rename from massive/rest/models/common.py rename to src/massive/rest/models/common.py diff --git a/massive/rest/models/conditions.py b/src/massive/rest/models/conditions.py similarity index 100% rename from massive/rest/models/conditions.py rename to src/massive/rest/models/conditions.py diff --git a/massive/rest/models/contracts.py b/src/massive/rest/models/contracts.py similarity index 100% rename from massive/rest/models/contracts.py rename to src/massive/rest/models/contracts.py diff --git a/massive/rest/models/dividends.py b/src/massive/rest/models/dividends.py similarity index 100% rename from massive/rest/models/dividends.py rename to src/massive/rest/models/dividends.py diff --git a/massive/rest/models/economy.py b/src/massive/rest/models/economy.py similarity index 100% rename from massive/rest/models/economy.py rename to src/massive/rest/models/economy.py diff --git a/massive/rest/models/etf_global.py b/src/massive/rest/models/etf_global.py similarity index 100% rename from massive/rest/models/etf_global.py rename to src/massive/rest/models/etf_global.py diff --git a/massive/rest/models/exchanges.py b/src/massive/rest/models/exchanges.py similarity index 100% rename from massive/rest/models/exchanges.py rename to src/massive/rest/models/exchanges.py diff --git a/massive/rest/models/financials.py b/src/massive/rest/models/financials.py similarity index 100% rename from massive/rest/models/financials.py rename to src/massive/rest/models/financials.py diff --git a/massive/rest/models/futures.py b/src/massive/rest/models/futures.py similarity index 100% rename from massive/rest/models/futures.py rename to src/massive/rest/models/futures.py diff --git a/massive/rest/models/indicators.py b/src/massive/rest/models/indicators.py similarity index 100% rename from massive/rest/models/indicators.py rename to src/massive/rest/models/indicators.py diff --git a/massive/rest/models/markets.py b/src/massive/rest/models/markets.py similarity index 100% rename from massive/rest/models/markets.py rename to src/massive/rest/models/markets.py diff --git a/massive/rest/models/quotes.py b/src/massive/rest/models/quotes.py similarity index 100% rename from massive/rest/models/quotes.py rename to src/massive/rest/models/quotes.py diff --git a/massive/rest/models/request.py b/src/massive/rest/models/request.py similarity index 100% rename from massive/rest/models/request.py rename to src/massive/rest/models/request.py diff --git a/massive/rest/models/snapshot.py b/src/massive/rest/models/snapshot.py similarity index 100% rename from massive/rest/models/snapshot.py rename to src/massive/rest/models/snapshot.py diff --git a/massive/rest/models/splits.py b/src/massive/rest/models/splits.py similarity index 100% rename from massive/rest/models/splits.py rename to src/massive/rest/models/splits.py diff --git a/massive/rest/models/summaries.py b/src/massive/rest/models/summaries.py similarity index 100% rename from massive/rest/models/summaries.py rename to src/massive/rest/models/summaries.py diff --git a/massive/rest/models/tickers.py b/src/massive/rest/models/tickers.py similarity index 100% rename from massive/rest/models/tickers.py rename to src/massive/rest/models/tickers.py diff --git a/massive/rest/models/tmx.py b/src/massive/rest/models/tmx.py similarity index 100% rename from massive/rest/models/tmx.py rename to src/massive/rest/models/tmx.py diff --git a/massive/rest/models/trades.py b/src/massive/rest/models/trades.py similarity index 100% rename from massive/rest/models/trades.py rename to src/massive/rest/models/trades.py diff --git a/massive/rest/quotes.py b/src/massive/rest/quotes.py similarity index 100% rename from massive/rest/quotes.py rename to src/massive/rest/quotes.py diff --git a/massive/rest/reference.py b/src/massive/rest/reference.py similarity index 99% rename from massive/rest/reference.py rename to src/massive/rest/reference.py index 06ad037f..12b347b6 100644 --- a/massive/rest/reference.py +++ b/src/massive/rest/reference.py @@ -123,7 +123,7 @@ def list_tickers( :return: List of tickers. """ url = "/v3/reference/tickers" - + print("DEBUG_11") return self._paginate( path=url, params=self._get_params(self.list_tickers, locals()), diff --git a/massive/rest/snapshot.py b/src/massive/rest/snapshot.py similarity index 100% rename from massive/rest/snapshot.py rename to src/massive/rest/snapshot.py diff --git a/massive/rest/summaries.py b/src/massive/rest/summaries.py similarity index 100% rename from massive/rest/summaries.py rename to src/massive/rest/summaries.py diff --git a/massive/rest/tmx.py b/src/massive/rest/tmx.py similarity index 100% rename from massive/rest/tmx.py rename to src/massive/rest/tmx.py diff --git a/massive/rest/trades.py b/src/massive/rest/trades.py similarity index 100% rename from massive/rest/trades.py rename to src/massive/rest/trades.py diff --git a/massive/rest/vX.py b/src/massive/rest/vX.py similarity index 100% rename from massive/rest/vX.py rename to src/massive/rest/vX.py diff --git a/massive/websocket/__init__.py b/src/massive/websocket/__init__.py similarity index 99% rename from massive/websocket/__init__.py rename to src/massive/websocket/__init__.py index 0d5409cd..ef82ac9c 100644 --- a/massive/websocket/__init__.py +++ b/src/massive/websocket/__init__.py @@ -1,4 +1,5 @@ import os +from dotenv import load_dotenv from enum import Enum from typing import Optional, Union, List, Set, Callable, Awaitable, Any import logging @@ -13,6 +14,8 @@ import logging from ..exceptions import AuthError +load_dotenv() + env_key = "MASSIVE_API_KEY" logger = get_logger("WebSocketClient") diff --git a/massive/websocket/models/__init__.py b/src/massive/websocket/models/__init__.py similarity index 100% rename from massive/websocket/models/__init__.py rename to src/massive/websocket/models/__init__.py diff --git a/massive/websocket/models/common.py b/src/massive/websocket/models/common.py similarity index 100% rename from massive/websocket/models/common.py rename to src/massive/websocket/models/common.py diff --git a/massive/websocket/models/models.py b/src/massive/websocket/models/models.py similarity index 100% rename from massive/websocket/models/models.py rename to src/massive/websocket/models/models.py diff --git a/src/processor/__init__.py b/src/processor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/processor/us_daily/__init__.py b/src/processor/us_daily/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py new file mode 100644 index 00000000..84f5b253 --- /dev/null +++ b/src/processor/us_daily/__main__.py @@ -0,0 +1,142 @@ +import logging +import os +import sys + +from massive import RESTClient + +from processor.us_daily.config import load_config +from processor.us_daily.ticker_lister import list_all_tickers, _get_tickers_file +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.sources.manager import SourceManager +from processor.us_daily.storage import load_json, file_exists + + +SOURCE_CLASSES = { + "akshare": AkshareSource, + "yfinance": YfinanceSource, + "massive": MassiveSource, +} + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def build_source_manager(config, client) -> SourceManager: + """Build SourceManager from config priority list.""" + interval_map = { + "akshare": config.akshare_interval, + "yfinance": config.yfinance_interval, + "massive": config.massive_interval, + } + sources = [] + for name in config.data_source_priority: + cls = SOURCE_CLASSES.get(name) + if cls is None: + continue + if name == "massive": + sources.append(cls(client=client, request_interval=interval_map[name])) + else: + sources.append(cls(request_interval=interval_map[name])) + return SourceManager(sources) + + +def load_all_tickers(config) -> list: + """Load tickers from the tickers file.""" + file_path = _get_tickers_file(config) + if not file_exists(file_path): + return [] + data = load_json(file_path) + return data.get("tickers", []) + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Fetch ticker list + tickers_file = _get_tickers_file(config) + if config.refresh_tickers or not file_exists(tickers_file): + logger.info("Fetching ticker list...") + list_all_tickers(client, config) + + # Load all tickers + tickers = load_all_tickers(config) + logger.info(f"Total tickers loaded: {len(tickers)}") + + # Step 2: Fetch daily data + source_manager = build_source_manager(config, client) + + # Filter tickers by market cap + if config.market_cap_min > 0: + filtered = [] + for t in tickers: + market_cap = t.get("market_cap") + if market_cap is not None and market_cap >= config.market_cap_min: + filtered.append(t) + elif market_cap is None: + logger.debug( + f"Skipping {t['ticker']}: market_cap is None" + ) + else: + logger.debug( + f"Skipping {t['ticker']}: market_cap={market_cap:.0f} < {config.market_cap_min:.0f}" + ) + logger.info( + f"Filtered by market_cap >= {config.market_cap_min:.0f}: {len(filtered)}/{len(tickers)} tickers" + ) + tickers = filtered + + all_failures = [] + all_bars = 0 + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(source_manager, ticker, config) + all_bars += result["total_bars"] + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}, total bars fetched: {all_bars}") + if all_failures: + logger.warning(f"Failed years: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['year']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py new file mode 100644 index 00000000..f30a2b9d --- /dev/null +++ b/src/processor/us_daily/agg_fetcher.py @@ -0,0 +1,75 @@ +import logging +from datetime import date, datetime +from typing import List + +from processor.us_daily.config import Config +from processor.us_daily.sources.manager import FetchError +from processor.us_daily.storage import ( + get_year_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_years(start_year: int, end_year: int) -> List[int]: + return list(range(start_year, end_year + 1)) + + +def get_year_bounds(year: int) -> tuple: + return f"{year}-01-01", f"{year}-12-31" + + +def is_current_year(year: int) -> bool: + return year == date.today().year + + +def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: + """Fetch yearly OHLCV data for a ticker using SourceManager. + + Args: + source_manager: SourceManager instance with failover sources. + ticker: Stock ticker symbol (e.g. "AAPL"). + config: Config with daily_data_dir, start_year, max_retries. + + Returns: + Dict with "failures" list of failed years. + """ + years = generate_years(config.start_year, date.today().year) + failures = [] + total_bars = 0 + + for year in years: + file_path = get_year_file_path(config.daily_data_dir, ticker, year) + + if file_exists(file_path) and not is_current_year(year): + logger.debug(f" {ticker} {year}: exists, skipping") + continue + + start_date, end_date = get_year_bounds(year) + + try: + df, source_name = source_manager.fetch_daily(ticker, start_date, end_date) + except FetchError as e: + failures.append({ + "ticker": ticker, + "year": year, + "error": str(e), + }) + logger.error(f" {ticker} {year}: {e}") + continue + + data = { + "ticker": ticker, + "year": year, + "source": source_name, + "count": len(df), + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": df.to_dict(orient="records"), + } + save_json(file_path, data) + total_bars += len(df) + logger.info(f" {ticker} {year}: fetched {len(df)} bars from {source_name}") + + return {"failures": failures, "total_bars": total_bars} diff --git a/src/processor/us_daily/config.json b/src/processor/us_daily/config.json new file mode 100644 index 00000000..69ab49b2 --- /dev/null +++ b/src/processor/us_daily/config.json @@ -0,0 +1,9 @@ +{ + "refresh_tickers": true, + "market_cap_min": 1000000000, + "start_year": 2024, + "request_interval": 12, + "list_data_dir": "data/us_list", + "daily_data_dir": "data/us_daily", + "max_retries": 3 +} diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py new file mode 100644 index 00000000..851f049b --- /dev/null +++ b/src/processor/us_daily/config.py @@ -0,0 +1,33 @@ +import json +import os +from dataclasses import dataclass, field +from typing import List + + +@dataclass +class Config: + refresh_tickers: bool = False + start_year: int = 2024 + data_source_priority: List[str] = field( + default_factory=lambda: ["massive", "akshare", "yfinance"] + ) + market_cap_min: float = 1_000_000_000 + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + list_data_dir: str = "data/us_list" + daily_data_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = None) -> Config: + config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config diff --git a/src/processor/us_daily/sources/__init__.py b/src/processor/us_daily/sources/__init__.py new file mode 100644 index 00000000..6dd559d0 --- /dev/null +++ b/src/processor/us_daily/sources/__init__.py @@ -0,0 +1,3 @@ +from processor.us_daily.sources.manager import SourceManager, FetchError + +__all__ = ["SourceManager", "FetchError"] diff --git a/src/processor/us_daily/sources/akshare_source.py b/src/processor/us_daily/sources/akshare_source.py new file mode 100644 index 00000000..86bce45e --- /dev/null +++ b/src/processor/us_daily/sources/akshare_source.py @@ -0,0 +1,43 @@ +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + +try: + import akshare as ak +except ImportError: # pragma: no cover + ak = None # type: ignore[assignment] + + +class AkshareSource(BaseSource): + name = "akshare" + + def __init__(self, request_interval: float = 2.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + if ak is None: + raise ImportError("akshare is not installed") + + symbol = ticker.strip().upper() + logger.debug(f"[akshare] fetching {symbol} {start_date}~{end_date}") + + df = ak.stock_us_daily(symbol=symbol, adjust="qfq") + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = pd.to_datetime(df["date"]) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df["date"] >= start_dt) & (df["date"] <= end_dt)] + + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = df["date"].dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df diff --git a/src/processor/us_daily/sources/base.py b/src/processor/us_daily/sources/base.py new file mode 100644 index 00000000..19560d6f --- /dev/null +++ b/src/processor/us_daily/sources/base.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod + +import pandas as pd + +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] + + +class BaseSource(ABC): + name: str + request_interval: float + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch daily OHLCV data for a US stock ticker. + + Returns a DataFrame with columns matching STANDARD_COLUMNS. + Raises on unrecoverable errors. Returns empty DataFrame if no data. + """ + ... diff --git a/src/processor/us_daily/sources/manager.py b/src/processor/us_daily/sources/manager.py new file mode 100644 index 00000000..7ce7fe57 --- /dev/null +++ b/src/processor/us_daily/sources/manager.py @@ -0,0 +1,45 @@ +import logging +import time +from typing import List, Tuple + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource + +logger = logging.getLogger("us_daily") + + +class FetchError(Exception): + """Raised when all data sources fail.""" + pass + + +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources + + def fetch_daily( + self, ticker: str, start_date: str, end_date: str + ) -> Tuple[pd.DataFrame, str]: + """Try each source in priority order. Return (df, source_name). + + Raises FetchError if all sources fail or return empty data. + """ + errors = [] + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if df is not None and not df.empty: + time.sleep(source.request_interval) + return df, source.name + else: + logger.debug( + f"{source.name} returned empty data for {ticker}" + ) + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + errors.append(f"{source.name}: {e}") + continue + raise FetchError( + f"All sources failed for {ticker}: {'; '.join(errors)}" + ) diff --git a/src/processor/us_daily/sources/massive_source.py b/src/processor/us_daily/sources/massive_source.py new file mode 100644 index 00000000..555a0a19 --- /dev/null +++ b/src/processor/us_daily/sources/massive_source.py @@ -0,0 +1,49 @@ +import logging +from datetime import datetime, timezone + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class MassiveSource(BaseSource): + name = "massive" + + def __init__(self, client, request_interval: float = 12.0): + self.client = client + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + logger.debug(f"[massive] fetching {ticker} {start_date}~{end_date}") + + aggs = list( + self.client.list_aggs( + ticker, 1, "day", + from_=start_date, to=end_date, + adjusted=True, sort="asc", + ) + ) + + if not aggs: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rows = [] + for a in aggs: + dt = datetime.fromtimestamp(a.timestamp / 1000, tz=timezone.utc) + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + "vwap": a.vwap, + "transactions": a.transactions, + "otc": a.otc, + }) + + columns = STANDARD_COLUMNS + ["vwap", "transactions", "otc"] + df = pd.DataFrame(rows, columns=columns) + return df diff --git a/src/processor/us_daily/sources/yfinance_source.py b/src/processor/us_daily/sources/yfinance_source.py new file mode 100644 index 00000000..d5061705 --- /dev/null +++ b/src/processor/us_daily/sources/yfinance_source.py @@ -0,0 +1,44 @@ +import logging + +import pandas as pd + +try: + import yfinance as yf +except ImportError: # pragma: no cover + yf = None # type: ignore[assignment] + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class YfinanceSource(BaseSource): + name = "yfinance" + + def __init__(self, request_interval: float = 1.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + symbol = ticker.strip().upper() + logger.debug(f"[yfinance] fetching {symbol} {start_date}~{end_date}") + + t = yf.Ticker(symbol) + df = t.history(start=start_date, end=end_date) + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df.index.name = "Date" + df = df.reset_index() + df = df.rename(columns={ + "Date": "date", + "Open": "open", + "High": "high", + "Low": "low", + "Close": "close", + "Volume": "volume", + }) + + df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df diff --git a/src/processor/us_daily/storage.py b/src/processor/us_daily/storage.py new file mode 100644 index 00000000..526a2218 --- /dev/null +++ b/src/processor/us_daily/storage.py @@ -0,0 +1,29 @@ +import json +import os + + +def get_tickers_file_path(data_dir: str) -> str: + return os.path.join(data_dir, "top_tickers.json") + + +def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: + return os.path.join(data_dir, ticker, f"{month}.json") + + +def get_year_file_path(data_dir: str, ticker: str, year: int) -> str: + return os.path.join(data_dir, ticker, f"{year}.json") + + +def save_json(path: str, data: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + +def load_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def file_exists(path: str) -> bool: + return os.path.isfile(path) diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py new file mode 100644 index 00000000..38c19fbe --- /dev/null +++ b/src/processor/us_daily/ticker_lister.py @@ -0,0 +1,112 @@ +import logging +import time +from datetime import date +from typing import Dict, List + +from processor.us_daily.config import Config +from processor.us_daily.storage import save_json, load_json, file_exists + +logger = logging.getLogger("us_daily") + +TICKERS_FILE = "tickers.json" + + +def _details_to_dict(details) -> dict: + """Convert a TickerDetails object to a plain dict, dropping None values.""" + result = {} + for key, value in vars(details).items(): + if key.startswith("_"): + continue + if value is None: + continue + if hasattr(value, "__dict__") and not isinstance(value, (str, int, float, bool)): + value = {k: v for k, v in vars(value).items() if not k.startswith("_") and v is not None} + result[key] = value + return result + + +def _get_tickers_file(config: Config) -> str: + import os + return os.path.join(config.list_data_dir, TICKERS_FILE) + + +def list_all_tickers(client, config: Config) -> List[dict]: + """Fetch all US stock tickers and save to file. + + Supports resume: if the output file already exists, previously fetched + tickers are kept and only missing ones are fetched. + """ + file_path = _get_tickers_file(config) + + # Load existing tickers for resume + existing_tickers: Dict[str, dict] = {} + if file_exists(file_path): + data = load_json(file_path) + for t in data.get("tickers", []): + existing_tickers[t["ticker"]] = t + logger.info( + f"Resuming: {len(existing_tickers)} tickers already fetched" + ) + + # Get full ticker list from API (all US stocks, no exchange filter) + logger.info("Listing all US stock tickers") + try: + ticker_objs = list( + client.list_tickers( + market="stocks", active=True, limit=1000 + ) + ) + except Exception as e: + logger.error(f"Failed to list tickers: {e}") + return list(existing_tickers.values()) + + time.sleep(config.massive_interval) + logger.info(f"Found {len(ticker_objs)} tickers") + + # Fetch details for new tickers only + new_count = 0 + for i, ticker_obj in enumerate(ticker_objs): + # if new_count >= 6: + # break + + ticker_str = ticker_obj.ticker + if ticker_str in existing_tickers: + continue + + try: + details = client.get_ticker_details(ticker_str) + entry = _details_to_dict(details) + existing_tickers[ticker_str] = entry + new_count += 1 + logger.info( + f"[{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" + ) + except Exception as e: + logger.warning( + f"[{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" + ) + + time.sleep(config.massive_interval) + + # Flush to disk every 100 new details to avoid losing progress + if new_count > 0 and new_count % 20 == 0: + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "count": len(tickers_list), + "tickers": tickers_list, + }) + logger.info( + f"Checkpoint: saved {len(tickers_list)} tickers to {file_path}" + ) + + # Final save + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "count": len(tickers_list), + "tickers": tickers_list, + }) + + logger.info(f"Saved {len(tickers_list)} tickers to {file_path}") + return tickers_list diff --git a/src/provider/__init__.py b/src/provider/__init__.py new file mode 100644 index 00000000..5973abc3 --- /dev/null +++ b/src/provider/__init__.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" +=================================== +数据源策略层 - 包初始化 +=================================== + +本包实现策略模式管理多个数据源,实现: +1. 统一的数据获取接口 +2. 自动故障切换 +3. 防封禁流控策略 + +数据源优先级(动态调整): +【配置了 TUSHARE_TOKEN 时】 +1. TushareFetcher (Priority 0) - 🔥 最高优先级(动态提升) +2. EfinanceFetcher (Priority 0) - 同优先级 +3. AkshareFetcher (Priority 1) - 来自 akshare 库 +4. PytdxFetcher (Priority 2) - 来自 pytdx 库(通达信) +5. BaostockFetcher (Priority 3) - 来自 baostock 库 +6. YfinanceFetcher (Priority 4) - 来自 yfinance 库 + +【未配置 TUSHARE_TOKEN 时】 +1. EfinanceFetcher (Priority 0) - 最高优先级,来自 efinance 库 +2. AkshareFetcher (Priority 1) - 来自 akshare 库 +3. PytdxFetcher (Priority 2) - 来自 pytdx 库(通达信) +4. TushareFetcher (Priority 2) - 来自 tushare 库(不可用) +5. BaostockFetcher (Priority 3) - 来自 baostock 库 +6. YfinanceFetcher (Priority 4) - 来自 yfinance 库 +7. LongbridgeFetcher (Priority 5) - 长桥 OpenAPI(美股/港股兜底) + +提示:优先级数字越小越优先,同优先级按初始化顺序排列 +""" + +from .base import BaseFetcher, DataFetcherManager +from .efinance_fetcher import EfinanceFetcher +from .akshare_fetcher import AkshareFetcher, is_hk_stock_code +from .tushare_fetcher import TushareFetcher +from .pytdx_fetcher import PytdxFetcher +from .baostock_fetcher import BaostockFetcher +from .yfinance_fetcher import YfinanceFetcher +from .longbridge_fetcher import LongbridgeFetcher +from .us_index_mapping import is_us_index_code, is_us_stock_code, get_us_index_yf_symbol, US_INDEX_MAPPING + +__all__ = [ + 'BaseFetcher', + 'DataFetcherManager', + 'EfinanceFetcher', + 'AkshareFetcher', + 'TushareFetcher', + 'PytdxFetcher', + 'BaostockFetcher', + 'YfinanceFetcher', + 'LongbridgeFetcher', + 'is_us_index_code', + 'is_us_stock_code', + 'is_hk_stock_code', + 'get_us_index_yf_symbol', + 'US_INDEX_MAPPING', +] diff --git a/src/provider/_config.py b/src/provider/_config.py new file mode 100644 index 00000000..f50f24e6 --- /dev/null +++ b/src/provider/_config.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +""" +Slim configuration singleton for provider module. + +Reads configuration from environment variables. Only includes attributes +actually used by provider fetchers. +""" + +import os +from dataclasses import dataclass +from threading import Lock +from typing import Optional + + +# --------------------------------------------------------------------------- +# normalize_report_language (extracted from src/report_language.py) +# --------------------------------------------------------------------------- + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") + +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + + +def normalize_report_language(value: Optional[str], default: str = "zh") -> str: + """Normalize report language to a supported short code.""" + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + + +# --------------------------------------------------------------------------- +# Config singleton +# --------------------------------------------------------------------------- + +@dataclass +class Config: + # Tushare + tushare_token: str = "" + # Longbridge + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + # TickFlow + tickflow_api_key: str = "" + # Feature toggles + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + # Realtime source priority + realtime_source_priority: str = "tencent,akshare,efinance" + # Fundamental pipeline + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + + +_instance: Optional[Config] = None +_lock = Lock() + + +def _env_bool(key: str, default: str = "true") -> bool: + return os.environ.get(key, default).lower() != "false" + + +def get_config() -> Config: + """Return the global Config singleton, creating it on first call.""" + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get( + "REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance" + ), + fundamental_fetch_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30") + ), + fundamental_stage_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60") + ), + fundamental_cache_ttl_seconds=int( + os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600") + ), + fundamental_cache_max_entries=int( + os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256") + ), + fundamental_retry_max=int( + os.environ.get("FUNDAMENTAL_RETRY_MAX", "2") + ), + ) + return _instance diff --git a/src/provider/_data/__init__.py b/src/provider/_data/__init__.py new file mode 100644 index 00000000..d699b040 --- /dev/null +++ b/src/provider/_data/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] diff --git a/src/provider/_data/stock_index_loader.py b/src/provider/_data/stock_index_loader.py new file mode 100644 index 00000000..61685f1d --- /dev/null +++ b/src/provider/_data/stock_index_loader.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations + +import json +import logging +from pathlib import Path +from threading import RLock +from typing import Dict, Iterable + +from provider._data.stock_mapping import is_meaningful_stock_name + +logger = logging.getLogger(__name__) + +_STOCK_INDEX_FILENAME = "stocks.index.json" +_STOCK_INDEX_CACHE: Dict[str, str] | None = None +_STOCK_INDEX_CACHE_LOCK = RLock() + + +def get_stock_index_candidate_paths() -> tuple[Path, ...]: + """Return the supported locations for the generated stock index.""" + repo_root = Path(__file__).resolve().parents[2] + return ( + repo_root / "apps" / "dsa-web" / "public" / _STOCK_INDEX_FILENAME, + repo_root / "static" / _STOCK_INDEX_FILENAME, + ) + + +def _add_lookup_key(keys: set[str], value: str) -> None: + candidate = str(value or "").strip() + if not candidate: + return + keys.add(candidate) + keys.add(candidate.upper()) + + +def _build_lookup_keys(canonical_code: str, display_code: str) -> Iterable[str]: + keys: set[str] = set() + _add_lookup_key(keys, canonical_code) + _add_lookup_key(keys, display_code) + + canonical_upper = str(canonical_code or "").strip().upper() + display_upper = str(display_code or "").strip().upper() + + if "." in canonical_upper: + base, suffix = canonical_upper.rsplit(".", 1) + if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit(): + _add_lookup_key(keys, base) + elif suffix == "HK" and base.isdigit() and 1 <= len(base) <= 5: + digits = base.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + for candidate in (canonical_upper, display_upper): + if candidate.startswith("HK"): + digits = candidate[2:] + if digits.isdigit() and 1 <= len(digits) <= 5: + digits = digits.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + return keys + + +def _load_stock_index_file(index_path: Path) -> Dict[str, str]: + with index_path.open("r", encoding="utf-8") as fh: + raw_items = json.load(fh) + + if not isinstance(raw_items, list): + raise ValueError( + f"Unexpected {_STOCK_INDEX_FILENAME} payload type: {type(raw_items).__name__}" + ) + + stock_name_map: Dict[str, str] = {} + for item in raw_items: + if not isinstance(item, list) or len(item) < 3: + continue + + canonical_code, display_code, name_zh = item[0], item[1], item[2] + if not is_meaningful_stock_name(name_zh, str(display_code or canonical_code or "")): + continue + + for key in _build_lookup_keys(str(canonical_code or ""), str(display_code or "")): + stock_name_map[key] = str(name_zh).strip() + + return stock_name_map + + +def get_stock_name_index_map() -> Dict[str, str]: + """Lazily load and cache the generated stock-name index.""" + global _STOCK_INDEX_CACHE + + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + with _STOCK_INDEX_CACHE_LOCK: + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + for candidate_path in get_stock_index_candidate_paths(): + if not candidate_path.is_file(): + continue + + try: + _STOCK_INDEX_CACHE = _load_stock_index_file(candidate_path) + logger.debug( + "[股票名称] 已加载前端股票索引映射: %s (%d 条)", + candidate_path, + len(_STOCK_INDEX_CACHE), + ) + return _STOCK_INDEX_CACHE + except (OSError, TypeError, ValueError) as exc: + logger.debug("[股票名称] 读取股票索引失败 %s: %s", candidate_path, exc) + + _STOCK_INDEX_CACHE = {} + return _STOCK_INDEX_CACHE + + +def get_index_stock_name(stock_code: str) -> str | None: + """Resolve a stock name from the generated frontend stock index.""" + code = str(stock_code or "").strip() + if not code: + return None + + stock_name_map = get_stock_name_index_map() + for key in _build_lookup_keys(code, code): + name = stock_name_map.get(key) + if is_meaningful_stock_name(name, code): + return name + + return None + + +def _clear_stock_index_cache_for_tests() -> None: + global _STOCK_INDEX_CACHE + with _STOCK_INDEX_CACHE_LOCK: + _STOCK_INDEX_CACHE = None diff --git a/src/provider/_data/stock_mapping.py b/src/provider/_data/stock_mapping.py new file mode 100644 index 00000000..211811bf --- /dev/null +++ b/src/provider/_data/stock_mapping.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations + +""" +=================================== +股票代码与名称映射 +=================================== + +Shared stock code -> name mapping, used by analyzer, data_provider, and name_to_code_resolver. +""" + +# Stock code -> name mapping (common stocks) +STOCK_NAME_MAP = { + # === A-shares === + "600519": "贵州茅台", + "000001": "平安银行", + "300750": "宁德时代", + "002594": "比亚迪", + "600036": "招商银行", + "601318": "中国平安", + "000858": "五粮液", + "600276": "恒瑞医药", + "601012": "隆基绿能", + "002475": "立讯精密", + "300059": "东方财富", + "002415": "海康威视", + "600900": "长江电力", + "601166": "兴业银行", + "600028": "中国石化", + "600030": "中信证券", + "600031": "三一重工", + "600050": "中国联通", + "600104": "上汽集团", + "600111": "北方稀土", + "600150": "中国船舶", + "600309": "万华化学", + "600406": "国电南瑞", + "600690": "海尔智家", + "600760": "中航沈飞", + "600809": "山西汾酒", + "600887": "伊利股份", + "600930": "华电新能", + "601088": "中国神华", + "601127": "赛力斯", + "601211": "国泰海通", + "601225": "陕西煤业", + "601288": "农业银行", + "601328": "交通银行", + "601398": "工商银行", + "601601": "中国太保", + "601628": "中国人寿", + "601658": "邮储银行", + "601668": "中国建筑", + "601728": "中国电信", + "601816": "京沪高铁", + "601857": "中国石油", + "601888": "中国中免", + "601899": "紫金矿业", + "601919": "中远海控", + "601985": "中国核电", + "601988": "中国银行", + "603019": "中科曙光", + "603259": "药明康德", + "603501": "豪威集团", + "603993": "洛阳钼业", + "688008": "澜起科技", + "688012": "中微公司", + "688041": "海光信息", + "688111": "金山办公", + "688256": "寒武纪", + "688981": "中芯国际", + # === US stocks === + "AAPL": "苹果", + "TSLA": "特斯拉", + "MSFT": "微软", + "GOOGL": "谷歌A", + "GOOG": "谷歌C", + "AMZN": "亚马逊", + "NVDA": "英伟达", + "META": "Meta", + "AMD": "AMD", + "INTC": "英特尔", + "BABA": "阿里巴巴", + "PDD": "拼多多", + "JD": "京东", + "BIDU": "百度", + "NIO": "蔚来", + "XPEV": "小鹏汽车", + "LI": "理想汽车", + "COIN": "Coinbase", + "MSTR": "MicroStrategy", + # === HK stocks (5-digit) === + "00700": "腾讯控股", + "03690": "美团", + "01810": "小米集团", + "09988": "阿里巴巴", + "09618": "京东集团", + "09888": "百度集团", + "01024": "快手", + "00981": "中芯国际", + "02015": "理想汽车", + "09868": "小鹏汽车", + "00005": "汇丰控股", + "01299": "友邦保险", + "00941": "中国移动", + "00883": "中国海洋石油", +} + + +def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """Return whether a stock name is useful for display or caching.""" + if not name: + return False + + normalized_name = str(name).strip() + if not normalized_name: + return False + + normalized_code = (stock_code or "").strip().upper() + if normalized_name.upper() == normalized_code: + return False + + if normalized_name.startswith("股票"): + return False + + placeholder_values = { + "N/A", + "NA", + "NONE", + "NULL", + "--", + "-", + "UNKNOWN", + "TICKER", + } + if normalized_name.upper() in placeholder_values: + return False + + return True diff --git a/src/provider/_patch/__init__.py b/src/provider/_patch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/provider/_patch/eastmoney_patch.py b/src/provider/_patch/eastmoney_patch.py new file mode 100644 index 00000000..b0e08107 --- /dev/null +++ b/src/provider/_patch/eastmoney_patch.py @@ -0,0 +1,182 @@ +import hashlib +import random +import secrets +import threading +import time +import requests +import json +import uuid +import logging +from fake_useragent import UserAgent + +logger = logging.getLogger(__name__) + +original_request = requests.Session.request + +ua = UserAgent() + + +class AuthCache: + def __init__(self): + self.data = None + self.expire_at = 0 + self.lock = threading.Lock() + self.ttl = 20 + + +_cache = AuthCache() + + +class PatchSign: + def __init__(self): + self.patched = False + + def set_patch(self, patched): + self.patched = patched + + def is_patched(self): + return self.patched + + +_patch_sign = PatchSign() + + +def _get_nid(user_agent): + """ + 获取东方财富的 NID 授权令牌 + + Args: + user_agent (str): 用户代理字符串,用于模拟不同的浏览器访问 + + Returns: + str: 返回获取到的 NID 授权令牌,如果获取失败则返回 None + + 功能说明: + 该函数通过向东方财富的授权接口发送请求来获取 NID 令牌, + 用于后续的数据访问授权。函数实现了缓存机制来避免频繁请求。 + """ + now = time.time() + # 检查缓存是否有效,避免重复请求 + if _cache.data and now < _cache.expire_at: + return _cache.data + # 使用线程锁确保并发安全 + with _cache.lock: + try: + def generate_uuid_md5(): + """ + 生成 UUID 并对其进行 MD5 哈希处理 + :return: MD5 哈希值(32位十六进制字符串) + """ + # 生成 UUID + unique_id = str(uuid.uuid4()) + # 对 UUID 进行 MD5 哈希 + md5_hash = hashlib.md5(unique_id.encode('utf-8')).hexdigest() + return md5_hash + + def generate_st_nvi(): + """ + 生成 st_nvi 值的方法 + :return: 返回生成的 st_nvi 值 + """ + HASH_LENGTH = 4 # 截取哈希值的前几位 + + def generate_random_string(length=21): + """ + 生成指定长度的随机字符串 + :param length: 字符串长度,默认为 21 + :return: 随机字符串 + """ + charset = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict" + return ''.join(secrets.choice(charset) for _ in range(length)) + + def sha256(input_str): + """ + 计算 SHA-256 哈希值 + :param input_str: 输入字符串 + :return: 哈希值(十六进制) + """ + return hashlib.sha256(input_str.encode('utf-8')).hexdigest() + + random_str = generate_random_string() + hash_prefix = sha256(random_str)[:HASH_LENGTH] + return random_str + hash_prefix + + url = "https://anonflow2.eastmoney.com/backend/api/webreport" + # 随机选择屏幕分辨率,增加请求的真实性 + screen_resolution = random.choice(['1920X1080', '2560X1440', '3840X2160']) + payload = json.dumps({ + "osPlatform": "Windows", + "sourceType": "WEB", + "osversion": "Windows 10.0", + "language": "zh-CN", + "timezone": "Asia/Shanghai", + "webDeviceInfo": { + "screenResolution": screen_resolution, + "userAgent": user_agent, + "canvasKey": generate_uuid_md5(), + "webglKey": generate_uuid_md5(), + "fontKey": generate_uuid_md5(), + "audioKey": generate_uuid_md5() + } + }) + headers = { + 'Cookie': f'st_nvi={generate_st_nvi()}', + 'Content-Type': 'application/json' + } + # 增加超时,防止无限等待 + response = requests.request("POST", url, headers=headers, data=payload, timeout=30) + response.raise_for_status() # 对 4xx/5xx 响应抛出 HTTPError + + data = response.json() + nid = data['data']['nid'] + + _cache.data = nid + _cache.expire_at = now + _cache.ttl + return nid + except requests.exceptions.RequestException as e: + logger.warning(f"请求东方财富授权接口失败: {e}") + _cache.data = None + # 该接口请求失败时,方案可能已失效,后续大概率会继续失败,因无法成功获取,下次会继续请求,设置较长过期时间,可避免频繁请求 + _cache.expire_at = now + 5 * 60 + return None + except (KeyError, json.JSONDecodeError) as e: + logger.warning(f"解析东方财富授权接口响应失败: {e}") + _cache.data = None + # 该接口请求失败时,方案可能已失效,后续大概率会继续失败,因无法成功获取,下次会继续请求,设置较长过期时间,可避免频繁请求 + _cache.expire_at = now + 5 * 60 + return None + + +def eastmoney_patch(): + if _patch_sign.is_patched(): + return + + def patched_request(self, method, url, **kwargs): + # 排除非目标域名 + is_target = any( + d in (url or "") + for d in [ + "fund.eastmoney.com", + "push2.eastmoney.com", + "push2his.eastmoney.com", + ] + ) + if not is_target: + return original_request(self, method, url, **kwargs) + # 获取一个随机的 User-Agent + user_agent = ua.random + # 处理 Headers:确保不破坏业务代码传入的 headers + headers = kwargs.get("headers", {}) + headers["User-Agent"] = user_agent + nid = _get_nid(user_agent) + if nid: + headers["Cookie"] = f"nid18={nid}" + kwargs["headers"] = headers + # 随机休眠,降低被封风险 + sleep_time = random.uniform(1, 4) + time.sleep(sleep_time) + return original_request(self, method, url, **kwargs) + + # 全局替换 Session 的 request 入口 + requests.Session.request = patched_request + _patch_sign.set_patch(True) diff --git a/src/provider/akshare_fetcher.py b/src/provider/akshare_fetcher.py new file mode 100644 index 00000000..f86133d0 --- /dev/null +++ b/src/provider/akshare_fetcher.py @@ -0,0 +1,1917 @@ +# -*- coding: utf-8 -*- +""" +=================================== +AkshareFetcher - 主数据源 (Priority 1) +=================================== + +数据来源: +1. 东方财富爬虫(通过 akshare 库) - 默认数据源 +2. 新浪财经接口 - 备选数据源 +3. 腾讯财经接口 - 备选数据源 + +特点:免费、无需 Token、数据全面 +风险:爬虫机制易被反爬封禁 + +防封禁策略: +1. 每次请求前随机休眠 2-5 秒 +2. 随机轮换 User-Agent +3. 使用 tenacity 实现指数退避重试 +4. 熔断器机制:连续失败后自动冷却 + +增强数据: +- 实时行情:量比、换手率、市盈率、市净率、总市值、流通市值 +- 筹码分布:获利比例、平均成本、筹码集中度 +""" + +import logging +import os +import random +import time +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional, Dict, Any, List, Tuple + +import pandas as pd +import requests +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from provider._patch.eastmoney_patch import eastmoney_patch +from provider._config import get_config +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS, is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code +from .realtime_types import ( + UnifiedRealtimeQuote, ChipDistribution, RealtimeSource, + get_realtime_circuit_breaker, get_chip_circuit_breaker, + safe_float, safe_int # 使用统一的类型转换函数 +) +from .us_index_mapping import is_us_index_code, is_us_stock_code + + +# 保留旧的 RealtimeQuote 别名,用于向后兼容 +RealtimeQuote = UnifiedRealtimeQuote + + +logger = logging.getLogger(__name__) + +SINA_REALTIME_ENDPOINT = "hq.sinajs.cn/list" +TENCENT_REALTIME_ENDPOINT = "qt.gtimg.cn/q" + + +# User-Agent 池,用于随机轮换 +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', +] + + +# 缓存实时行情数据(避免重复请求) +# TTL 设为 20 分钟 (1200秒): +# - 批量分析场景:通常 30 只股票在 5 分钟内分析完,20 分钟足够覆盖 +# - 实时性要求:股票分析不需要秒级实时数据,20 分钟延迟可接受 +# - 防封禁:减少 API 调用频率 +_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 1200 # 20分钟缓存有效期 +} + +# ETF 实时行情缓存 +_etf_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 1200 # 20分钟缓存有效期 +} + + +def _is_etf_code(stock_code: str) -> bool: + """ + 判断代码是否为 ETF 基金 + + ETF 代码规则: + - 上交所 ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - 深交所 ETF: 15xxxx, 16xxxx, 18xxxx + + Args: + stock_code: 股票/基金代码 + + Returns: + True 表示是 ETF 代码,False 表示是普通股票代码 + """ + etf_prefixes = ('51', '52', '56', '58', '15', '16', '18') + code = stock_code.strip().split('.')[0] + return code.startswith(etf_prefixes) and len(code) == 6 + + +def _is_hk_code(stock_code: str) -> bool: + """ + 判断代码是否为港股 + + 港股代码规则: + - 5位数字代码,如 '00700' (腾讯控股) + - 部分港股代码可能带有前缀,如 'hk00700', 'hk1810' + + Args: + stock_code: 股票代码 + + Returns: + True 表示是港股代码,False 表示不是港股代码 + """ + # 去除可能的 'hk' 前缀并检查是否为纯数字 + code = stock_code.strip().lower() + if code.endswith('.hk'): + numeric_part = code[:-3] + return numeric_part.isdigit() and 1 <= len(numeric_part) <= 5 + if code.startswith('hk'): + # 带 hk 前缀的一定是港股,去掉前缀后应为纯数字(1-5位) + numeric_part = code[2:] + return numeric_part.isdigit() and 1 <= len(numeric_part) <= 5 + # 无前缀时,5位纯数字才视为港股(避免误判 A 股代码) + return code.isdigit() and len(code) == 5 + + +def is_hk_stock_code(stock_code: str) -> bool: + """ + Public API: determine if a stock code is a Hong Kong stock. + + Delegates to _is_hk_code for internal compatibility. + + Args: + stock_code: Stock code (e.g. '00700', 'hk00700') + + Returns: + True if HK stock, False otherwise + """ + return _is_hk_code(stock_code) + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股股票(不包括美股指数)。 + + 委托给 us_index_mapping 模块的 is_us_stock_code()。 + + Args: + stock_code: 股票代码 + + Returns: + True 表示是美股代码,False 表示不是美股代码 + + Examples: + >>> _is_us_code('AAPL') + True + >>> _is_us_code('TSLA') + True + >>> _is_us_code('SPX') + False + >>> _is_us_code('600519') + False + """ + return is_us_stock_code(stock_code) + + +def _to_sina_tx_symbol(stock_code: str) -> str: + """Convert 6-digit A-share code to sh/sz/bj prefixed symbol for Sina/Tencent APIs.""" + base = (stock_code.strip().split(".")[0] if "." in stock_code else stock_code).strip() + if is_bse_code(base): + return f"bj{base}" + # Shanghai: 60xxxx, 5xxxx (ETF), 90xxxx (B-shares) + if base.startswith(("6", "5", "90")): + return f"sh{base}" + return f"sz{base}" + + +def _classify_realtime_http_error(exc: Exception) -> Tuple[str, str]: + """ + Classify Sina/Tencent realtime quote failures into stable categories. + """ + detail = str(exc).strip() or type(exc).__name__ + lowered = detail.lower() + + remote_disconnect_keywords = ( + "remotedisconnected", + "remote end closed connection without response", + "connection aborted", + "connection broken", + "protocolerror", + "chunkedencodingerror", + ) + timeout_keywords = ( + "timeout", + "timed out", + "readtimeout", + "connecttimeout", + ) + rate_limit_keywords = ( + "banned", + "blocked", + "频率", + "rate limit", + "too many requests", + "429", + "限制", + "forbidden", + "403", + ) + + if any(keyword in lowered for keyword in remote_disconnect_keywords): + return "remote_disconnect", detail + if isinstance(exc, (TimeoutError, requests.exceptions.Timeout)) or any( + keyword in lowered for keyword in timeout_keywords + ): + return "timeout", detail + if any(keyword in lowered for keyword in rate_limit_keywords): + return "rate_limit_or_anti_bot", detail + if isinstance(exc, requests.exceptions.RequestException): + return "request_error", detail + return "unknown_request_error", detail + + +def _build_realtime_failure_message( + source_name: str, + endpoint: str, + stock_code: str, + symbol: str, + category: str, + detail: str, + elapsed: float, + error_type: str, +) -> str: + return ( + f"{source_name} 实时行情接口失败: endpoint={endpoint}, stock_code={stock_code}, " + f"symbol={symbol}, category={category}, error_type={error_type}, " + f"elapsed={elapsed:.2f}s, detail={detail}" + ) + + +class AkshareFetcher(BaseFetcher): + """ + Akshare 数据源实现 + + 优先级:1(最高) + 数据来源:东方财富网爬虫 + + 关键策略: + - 每次请求前随机休眠 2.0-5.0 秒 + - 随机 User-Agent 轮换 + - 失败后指数退避重试(最多3次) + """ + + name = "AkshareFetcher" + priority = int(os.getenv("AKSHARE_PRIORITY", "1")) + + def __init__(self, sleep_min: float = 2.0, sleep_max: float = 5.0): + """ + 初始化 AkshareFetcher + + Args: + sleep_min: 最小休眠时间(秒) + sleep_max: 最大休眠时间(秒) + """ + self.sleep_min = sleep_min + self.sleep_max = sleep_max + self._last_request_time: Optional[float] = None + # 东财补丁开启才执行打补丁操作 + if get_config().enable_eastmoney_patch: + eastmoney_patch() + + def _set_random_user_agent(self) -> None: + """ + 设置随机 User-Agent + + 通过修改 requests Session 的 headers 实现 + 这是关键的反爬策略之一 + """ + try: + import akshare as ak + # akshare 内部使用 requests,我们通过环境变量或直接设置来影响 + # 实际上 akshare 可能不直接暴露 session,这里通过 fake_useragent 作为补充 + random_ua = random.choice(USER_AGENTS) + logger.debug(f"设置 User-Agent: {random_ua[:50]}...") + except Exception as e: + logger.debug(f"设置 User-Agent 失败: {e}") + + def _enforce_rate_limit(self) -> None: + """ + 强制执行速率限制 + + 策略: + 1. 检查距离上次请求的时间间隔 + 2. 如果间隔不足,补充休眠时间 + 3. 然后再执行随机 jitter 休眠 + """ + if self._last_request_time is not None: + elapsed = time.time() - self._last_request_time + min_interval = self.sleep_min + if elapsed < min_interval: + additional_sleep = min_interval - elapsed + logger.debug(f"补充休眠 {additional_sleep:.2f} 秒") + time.sleep(additional_sleep) + + # 执行随机 jitter 休眠 + self.random_sleep(self.sleep_min, self.sleep_max) + self._last_request_time = time.time() + + @retry( + stop=stop_after_attempt(3), # 最多重试3次 + wait=wait_exponential(multiplier=1, min=2, max=30), # 指数退避:2, 4, 8... 最大30秒 + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Akshare 获取原始数据 + + 根据代码类型自动选择 API: + - 美股:不支持,抛出异常由 YfinanceFetcher 处理(Issue #311) + - 港股:使用 ak.stock_hk_hist() + - ETF 基金:使用 ak.fund_etf_hist_em() + - 普通 A 股:使用 ak.stock_zh_a_hist() + + 流程: + 1. 判断代码类型(美股/港股/ETF/A股) + 2. 设置随机 User-Agent + 3. 执行速率限制(随机休眠) + 4. 调用对应的 akshare API + 5. 处理返回数据 + """ + # 根据代码类型选择不同的获取方法 + if _is_us_code(stock_code): + # 美股:akshare 的 stock_us_daily 接口复权存在已知问题(参见 Issue #311) + # 交由 YfinanceFetcher 处理,确保复权价格一致 + raise DataFetchError( + f"AkshareFetcher 不支持美股 {stock_code},请使用 YfinanceFetcher 获取正确的复权价格" + ) + elif _is_hk_code(stock_code): + return self._fetch_hk_data(stock_code, start_date, end_date) + elif _is_etf_code(stock_code): + return self._fetch_etf_data(stock_code, start_date, end_date) + else: + return self._fetch_stock_data(stock_code, start_date, end_date) + + def _fetch_stock_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 + + 策略: + 1. 优先尝试东方财富接口 (ak.stock_zh_a_hist) + 2. 失败后尝试新浪财经接口 (ak.stock_zh_a_daily) + 3. 最后尝试腾讯财经接口 (ak.stock_zh_a_hist_tx) + """ + # 尝试列表 + methods = [ + (self._fetch_stock_data_em, "东方财富"), + (self._fetch_stock_data_sina, "新浪财经"), + (self._fetch_stock_data_tx, "腾讯财经"), + ] + + last_error = None + + for fetch_method, source_name in methods: + try: + logger.info(f"[数据源] 尝试使用 {source_name} 获取 {stock_code}...") + df = fetch_method(stock_code, start_date, end_date) + + if df is not None and not df.empty: + logger.info(f"[数据源] {source_name} 获取成功") + return df + except Exception as e: + last_error = e + logger.warning(f"[数据源] {source_name} 获取失败: {e}") + # 继续尝试下一个 + + # 所有都失败 + raise DataFetchError(f"Akshare 所有渠道获取失败: {last_error}") + + def _fetch_stock_data_em(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (东方财富) + 数据来源:ak.stock_zh_a_hist() + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_zh_a_hist(symbol={stock_code}, ...)") + + try: + import time as _time + api_start = _time.time() + + df = ak.stock_zh_a_hist( + symbol=stock_code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_zh_a_hist 成功: {len(df)} 行, 耗时 {api_elapsed:.2f}s") + return df + else: + logger.warning(f"[API返回] ak.stock_zh_a_hist 返回空数据") + return pd.DataFrame() + + except Exception as e: + error_msg = str(e).lower() + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + raise RateLimitError(f"Akshare(EM) 可能被限流: {e}") from e + raise e + + def _fetch_stock_data_sina(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (新浪财经) + 数据来源:ak.stock_zh_a_daily() + """ + import akshare as ak + + # 转换代码格式:sh600000, sz000001, bj920748 + symbol = _to_sina_tx_symbol(stock_code) + + self._enforce_rate_limit() + + try: + df = ak.stock_zh_a_daily( + symbol=symbol, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + # 标准化新浪数据列名 + # 新浪返回:date, open, high, low, close, volume, amount, outstanding_share, turnover + if df is not None and not df.empty: + # 确保日期列存在 + if 'date' in df.columns: + df = df.rename(columns={'date': '日期'}) + + # 映射其他列以匹配 _normalize_data 的期望 + # _normalize_data 期望:日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额 + rename_map = { + 'open': '开盘', 'high': '最高', 'low': '最低', + 'close': '收盘', 'volume': '成交量', 'amount': '成交额' + } + df = df.rename(columns=rename_map) + + # 计算涨跌幅(新浪接口可能不返回) + if '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + return df + return pd.DataFrame() + + except Exception as e: + raise e + + def _fetch_stock_data_tx(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (腾讯财经) + 数据来源:ak.stock_zh_a_hist_tx() + """ + import akshare as ak + + # 转换代码格式:sh600000, sz000001, bj920748 + symbol = _to_sina_tx_symbol(stock_code) + + self._enforce_rate_limit() + + try: + df = ak.stock_zh_a_hist_tx( + symbol=symbol, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + # 标准化腾讯数据列名 + # 腾讯返回:date, open, close, high, low, volume, amount + if df is not None and not df.empty: + rename_map = { + 'date': '日期', 'open': '开盘', 'high': '最高', + 'low': '最低', 'close': '收盘', 'volume': '成交量', + 'amount': '成交额' + } + df = df.rename(columns=rename_map) + + # 腾讯数据通常包含 '涨跌幅',如果没有则计算 + if 'pct_chg' in df.columns: + df = df.rename(columns={'pct_chg': '涨跌幅'}) + elif '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + return df + return pd.DataFrame() + + except Exception as e: + raise e + + def _fetch_etf_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取 ETF 基金历史数据 + + 数据来源:ak.fund_etf_hist_em() + + Args: + stock_code: ETF 代码,如 '512400', '159883' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + ETF 历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.fund_etf_hist_em(symbol={stock_code}, period=daily, " + f"start_date={start_date.replace('-', '')}, end_date={end_date.replace('-', '')}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取 ETF 日线数据 + df = ak.fund_etf_hist_em( + symbol=stock_code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.fund_etf_hist_em 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] ak.fund_etf_hist_em 返回空数据, 耗时 {api_elapsed:.2f}s") + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取 ETF 数据失败: {e}") from e + + def _fetch_us_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取美股历史数据 + + 数据来源:ak.stock_us_daily()(新浪财经接口) + + Args: + stock_code: 美股代码,如 'AMD', 'AAPL', 'TSLA' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 美股历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 美股代码直接使用大写 + symbol = stock_code.strip().upper() + + logger.info(f"[API调用] ak.stock_us_daily(symbol={symbol}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取美股日线数据 + # stock_us_daily 返回全部历史数据,后续需要按日期过滤 + df = ak.stock_us_daily( + symbol=symbol, + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_us_daily 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + + # 按日期过滤 + df['date'] = pd.to_datetime(df['date']) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df['date'] >= start_dt) & (df['date'] <= end_dt)] + + if not df.empty: + logger.info(f"[API返回] 过滤后日期范围: {df['date'].iloc[0].strftime('%Y-%m-%d')} ~ {df['date'].iloc[-1].strftime('%Y-%m-%d')}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] 过滤后数据为空,日期范围 {start_date} ~ {end_date} 无数据") + + # 转换列名为中文格式以匹配 _normalize_data + # stock_us_daily 返回: date, open, high, low, close, volume + rename_map = { + 'date': '日期', + 'open': '开盘', + 'high': '最高', + 'low': '最低', + 'close': '收盘', + 'volume': '成交量', + } + df = df.rename(columns=rename_map) + + # 计算涨跌幅(美股接口不直接返回) + if '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + # 估算成交额(美股接口不返回) + if '成交量' in df.columns and '收盘' in df.columns: + df['成交额'] = df['成交量'] * df['收盘'] + else: + df['成交额'] = 0 + + return df + else: + logger.warning(f"[API返回] ak.stock_us_daily 返回空数据, 耗时 {api_elapsed:.2f}s") + return pd.DataFrame() + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取美股数据失败: {e}") from e + + def _fetch_hk_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取港股历史数据 + + 数据来源:ak.stock_hk_hist() + + Args: + stock_code: 港股代码,如 '00700', '01810' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 港股历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 确保代码格式正确(5位数字) + code = stock_code.lower().replace('hk', '').zfill(5) + + logger.info(f"[API调用] ak.stock_hk_hist(symbol={code}, period=daily, " + f"start_date={start_date.replace('-', '')}, end_date={end_date.replace('-', '')}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取港股日线数据 + df = ak.stock_hk_hist( + symbol=code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_hk_hist 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] ak.stock_hk_hist 返回空数据, 耗时 {api_elapsed:.2f}s") + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取港股数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Akshare 数据 + + Akshare 返回的列名(中文): + 日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额, 振幅, 涨跌幅, 涨跌额, 换手率 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射(Akshare 中文列名 -> 标准英文列名) + column_mapping = { + '日期': 'date', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '涨跌幅': 'pct_chg', + } + + # 重命名列 + df = df.rename(columns=column_mapping) + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_realtime_quote(self, stock_code: str, source: str = "em") -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情数据(支持多数据源) + + 数据源优先级(可配置): + 1. em: 东方财富(akshare ak.stock_zh_a_spot_em)- 数据最全,含量比/PE/PB/市值等 + 2. sina: 新浪财经(akshare ak.stock_zh_a_spot)- 轻量级,基本行情 + 3. tencent: 腾讯直连接口 - 单股票查询,负载小 + + Args: + stock_code: 股票/ETF代码 + source: 数据源类型,可选 "em", "sina", "tencent" + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + circuit_breaker = get_realtime_circuit_breaker() + + # 根据代码类型选择不同的获取方法 + if _is_us_code(stock_code): + # 美股不使用 Akshare,由 YfinanceFetcher 处理 + logger.debug(f"[API跳过] {stock_code} 是美股,Akshare 不支持美股实时行情") + return None + elif _is_hk_code(stock_code): + return self._get_hk_realtime_quote(stock_code) + elif _is_etf_code(stock_code): + source_key = "akshare_etf" + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + return self._get_etf_realtime_quote(stock_code) + else: + source_key = f"akshare_{source}" + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + # 普通 A 股:根据 source 选择数据源 + if source == "sina": + return self._get_stock_realtime_quote_sina(stock_code) + elif source == "tencent": + return self._get_stock_realtime_quote_tencent(stock_code) + else: + return self._get_stock_realtime_quote_em(stock_code) + + def _get_stock_realtime_quote_em(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(东方财富数据源) + + 数据来源:ak.stock_zh_a_spot_em() + 优点:数据最全,含量比、换手率、市盈率、市净率、总市值、流通市值等 + 缺点:全量拉取,数据量大,容易超时/限流 + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_em" + + try: + # 检查缓存 + current_time = time.time() + if (_realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl']): + df = _realtime_cache['data'] + cache_age = int(current_time - _realtime_cache['timestamp']) + logger.debug(f"[缓存命中] A股实时行情(东财) - 缓存年龄 {cache_age}s/{_realtime_cache['ttl']}s") + else: + # 触发全量刷新 + logger.info(f"[缓存未命中] 触发全量刷新 A股实时行情(东财)") + last_error: Optional[Exception] = None + df = None + for attempt in range(1, 3): + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_zh_a_spot_em() 获取A股实时行情... (attempt {attempt}/2)") + import time as _time + api_start = _time.time() + + df = ak.stock_zh_a_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_zh_a_spot_em 成功: 返回 {len(df)} 只股票, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + break + except Exception as e: + last_error = e + logger.info(f"[API错误] ak.stock_zh_a_spot_em 获取失败 (attempt {attempt}/2): {e}") + time.sleep(min(2 ** attempt, 5)) + + # 更新缓存:成功缓存数据;失败也缓存空数据,避免同一轮任务对同一接口反复请求 + if df is None: + logger.info(f"[API错误] ak.stock_zh_a_spot_em 最终失败: {last_error}") + circuit_breaker.record_failure(source_key, str(last_error)) + df = pd.DataFrame() + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + logger.info(f"[缓存更新] A股实时行情(东财) 缓存已刷新,TTL={_realtime_cache['ttl']}s") + + if df is None or df.empty: + logger.info(f"[实时行情] A股实时行情数据为空,跳过 {stock_code}") + return None + + # 查找指定股票 + row = df[df['代码'] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到股票 {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + open_price=safe_float(row.get('今开')), + high=safe_float(row.get('最高')), + low=safe_float(row.get('最低')), + pe_ratio=safe_float(row.get('市盈率-动态')), + pb_ratio=safe_float(row.get('市净率')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + change_60d=safe_float(row.get('60日涨跌幅')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + + logger.info(f"[实时行情-东财] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"量比={quote.volume_ratio}, 换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.info(f"[API错误] 获取 {stock_code} 实时行情(东财)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_stock_realtime_quote_sina(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(新浪财经数据源) + + 数据来源:新浪财经接口(直连,单股票查询) + 优点:单股票查询,负载小,速度快 + 缺点:数据字段较少,无量比/PE/PB等 + + 接口格式:http://hq.sinajs.cn/list=sh600519,sz000001 + """ + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_sina" + symbol = _to_sina_tx_symbol(stock_code) + url = f"http://{SINA_REALTIME_ENDPOINT}={symbol}" + api_start = time.time() + + try: + headers = { + 'Referer': 'http://finance.sina.com.cn', + 'User-Agent': random.choice(USER_AGENTS) + } + + logger.info( + f"[API调用] 新浪财经接口获取 {stock_code} 实时行情: endpoint={SINA_REALTIME_ENDPOINT}, symbol={symbol}" + ) + + self._enforce_rate_limit() + response = requests.get(url, headers=headers, timeout=10) + response.encoding = 'gbk' + api_elapsed = time.time() - api_start + + if response.status_code != 200: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="http_status", + detail=f"HTTP {response.status_code}", + elapsed=api_elapsed, + error_type="HTTPStatus", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 解析数据:var hq_str_sh600519="贵州茅台,1866.000,1870.000,..." + content = response.text.strip() + if '=""' in content or not content: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="empty_response", + detail="empty quote payload", + elapsed=api_elapsed, + error_type="EmptyResponse", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 提取引号内的数据 + data_start = content.find('"') + data_end = content.rfind('"') + if data_start == -1 or data_end == -1: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="malformed_payload", + detail="quote payload missing quotes", + elapsed=api_elapsed, + error_type="MalformedPayload", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + data_str = content[data_start+1:data_end] + fields = data_str.split(',') + + if len(fields) < 32: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="insufficient_fields", + detail=f"field_count={len(fields)}", + elapsed=api_elapsed, + error_type="InsufficientFields", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + circuit_breaker.record_success(source_key) + + # 新浪数据字段顺序: + # 0:名称 1:今开 2:昨收 3:最新价 4:最高 5:最低 6:买一价 7:卖一价 + # 8:成交量(股) 9:成交额(元) ... 30:日期 31:时间 + # 使用 realtime_types.py 中的统一转换函数 + price = safe_float(fields[3]) + pre_close = safe_float(fields[2]) + change_pct = None + change_amount = None + if price and pre_close and pre_close > 0: + change_amount = price - pre_close + change_pct = (change_amount / pre_close) * 100 + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=fields[0], + source=RealtimeSource.AKSHARE_SINA, + price=price, + change_pct=change_pct, + change_amount=change_amount, + volume=safe_int(fields[8]), # 成交量(股) + amount=safe_float(fields[9]), # 成交额(元) + open_price=safe_float(fields[1]), + high=safe_float(fields[4]), + low=safe_float(fields[5]), + pre_close=pre_close, + ) + + logger.info( + f"[实时行情-新浪] {stock_code} {quote.name}: endpoint={SINA_REALTIME_ENDPOINT}, " + f"价格={quote.price}, 涨跌={quote.change_pct}, 成交量={quote.volume}, elapsed={api_elapsed:.2f}s" + ) + return quote + + except Exception as e: + api_elapsed = time.time() - api_start + category, detail = _classify_realtime_http_error(e) + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category=category, + detail=detail, + elapsed=api_elapsed, + error_type=type(e).__name__, + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + def _get_stock_realtime_quote_tencent(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(腾讯财经数据源) + + 数据来源:腾讯财经接口(直连,单股票查询) + 优点:单股票查询,负载小,包含换手率 + 缺点:无量比/PE/PB等估值数据 + + 接口格式:http://qt.gtimg.cn/q=sh600519,sz000001 + """ + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_tencent" + symbol = _to_sina_tx_symbol(stock_code) + url = f"http://{TENCENT_REALTIME_ENDPOINT}={symbol}" + api_start = time.time() + + try: + headers = { + 'Referer': 'http://finance.qq.com', + 'User-Agent': random.choice(USER_AGENTS) + } + + logger.info( + f"[API调用] 腾讯财经接口获取 {stock_code} 实时行情: endpoint={TENCENT_REALTIME_ENDPOINT}, symbol={symbol}" + ) + + self._enforce_rate_limit() + response = requests.get(url, headers=headers, timeout=10) + response.encoding = 'gbk' + api_elapsed = time.time() - api_start + + if response.status_code != 200: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="http_status", + detail=f"HTTP {response.status_code}", + elapsed=api_elapsed, + error_type="HTTPStatus", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + content = response.text.strip() + if '=""' in content or not content: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="empty_response", + detail="empty quote payload", + elapsed=api_elapsed, + error_type="EmptyResponse", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 提取数据 + data_start = content.find('"') + data_end = content.rfind('"') + if data_start == -1 or data_end == -1: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="malformed_payload", + detail="quote payload missing quotes", + elapsed=api_elapsed, + error_type="MalformedPayload", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + data_str = content[data_start+1:data_end] + fields = data_str.split('~') + + if len(fields) < 45: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="insufficient_fields", + detail=f"field_count={len(fields)}", + elapsed=api_elapsed, + error_type="InsufficientFields", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + circuit_breaker.record_success(source_key) + + # 腾讯数据字段顺序(完整): + # 1:名称 2:代码 3:最新价 4:昨收 5:今开 6:成交量(手) 7:外盘 8:内盘 + # 9-28:买卖五档 30:时间戳 31:涨跌额 32:涨跌幅(%) 33:最高 34:最低 35:收盘/成交量/成交额 + # 36:成交量(手) 37:成交额(万) 38:换手率(%) 39:市盈率 43:振幅(%) + # 44:流通市值(亿) 45:总市值(亿) 46:市净率 47:涨停价 48:跌停价 49:量比 + # 使用 realtime_types.py 中的统一转换函数 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=fields[1] if len(fields) > 1 else "", + source=RealtimeSource.TENCENT, + price=safe_float(fields[3]), + change_pct=safe_float(fields[32]), + change_amount=safe_float(fields[31]) if len(fields) > 31 else None, + volume=safe_int(fields[6]) * 100 if fields[6] else None, # 腾讯返回的是手,转为股 + open_price=safe_float(fields[5]), + high=safe_float(fields[33]) if len(fields) > 33 else None, # 修正:字段 33 是最高价 + low=safe_float(fields[34]) if len(fields) > 34 else None, # 修正:字段 34 是最低价 + pre_close=safe_float(fields[4]), + turnover_rate=safe_float(fields[38]) if len(fields) > 38 else None, + amplitude=safe_float(fields[43]) if len(fields) > 43 else None, + volume_ratio=safe_float(fields[49]) if len(fields) > 49 else None, # 量比 + pe_ratio=safe_float(fields[39]) if len(fields) > 39 else None, # 市盈率 + pb_ratio=safe_float(fields[46]) if len(fields) > 46 else None, # 市净率 + circ_mv=safe_float(fields[44]) * 100000000 if len(fields) > 44 and fields[44] else None, # 流通市值(亿->元) + total_mv=safe_float(fields[45]) * 100000000 if len(fields) > 45 and fields[45] else None, # 总市值(亿->元) + ) + + logger.info( + f"[实时行情-腾讯] {stock_code} {quote.name}: endpoint={TENCENT_REALTIME_ENDPOINT}, " + f"价格={quote.price}, 涨跌={quote.change_pct}%, 量比={quote.volume_ratio}, " + f"换手率={quote.turnover_rate}%, elapsed={api_elapsed:.2f}s" + ) + return quote + + except Exception as e: + api_elapsed = time.time() - api_start + category, detail = _classify_realtime_http_error(e) + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category=category, + detail=detail, + elapsed=api_elapsed, + error_type=type(e).__name__, + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + def _get_etf_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取 ETF 基金实时行情数据 + + 数据来源:ak.fund_etf_spot_em() + 包含:最新价、涨跌幅、成交量、成交额、换手率等 + + Args: + stock_code: ETF 代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_etf" + + try: + # 检查缓存 + current_time = time.time() + if (_etf_realtime_cache['data'] is not None and + current_time - _etf_realtime_cache['timestamp'] < _etf_realtime_cache['ttl']): + df = _etf_realtime_cache['data'] + logger.debug(f"[缓存命中] 使用缓存的ETF实时行情数据") + else: + last_error: Optional[Exception] = None + df = None + for attempt in range(1, 3): + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.fund_etf_spot_em() 获取ETF实时行情... (attempt {attempt}/2)") + import time as _time + api_start = _time.time() + + df = ak.fund_etf_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.fund_etf_spot_em 成功: 返回 {len(df)} 只ETF, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + break + except Exception as e: + last_error = e + logger.info(f"[API错误] ak.fund_etf_spot_em 获取失败 (attempt {attempt}/2): {e}") + time.sleep(min(2 ** attempt, 5)) + + if df is None: + logger.info(f"[API错误] ak.fund_etf_spot_em 最终失败: {last_error}") + circuit_breaker.record_failure(source_key, str(last_error)) + df = pd.DataFrame() + _etf_realtime_cache['data'] = df + _etf_realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.info(f"[实时行情] ETF实时行情数据为空,跳过 {stock_code}") + return None + + # 查找指定 ETF + row = df[df['代码'] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到 ETF {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + # ETF 行情数据构建 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + open_price=safe_float(row.get('开盘价')), + high=safe_float(row.get('最高价')), + low=safe_float(row.get('最低价')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + + logger.info(f"[ETF实时行情] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.info(f"[API错误] 获取 ETF {stock_code} 实时行情失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_hk_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取港股实时行情数据 + + 主数据源:ak.stock_hk_spot_em()(东方财富) + 备用数据源:ak.stock_hk_spot()(新浪) + 包含:最新价、涨跌幅、成交量、成交额等 + + Args: + stock_code: 港股代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + em_key = "akshare_hk_em" + sina_key = "akshare_hk_sina" + + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + # 确保代码格式正确(5位数字) + raw_code = stock_code.strip().lower() + if raw_code.endswith('.hk'): + raw_code = raw_code[:-3] + if raw_code.startswith('hk'): + raw_code = raw_code[2:] + code = raw_code.zfill(5) + + # --- 主数据源:东方财富 --- + if circuit_breaker.is_available(em_key): + try: + logger.info(f"[API调用] ak.stock_hk_spot_em() 获取港股实时行情...") + import time as _time + api_start = _time.time() + + df = ak.stock_hk_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_hk_spot_em 成功: 返回 {len(df)} 只港股, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(em_key) + + # 查找指定港股 + row = df[df['代码'] == code] + if row.empty: + logger.info(f"[API返回] 未找到港股 {code} 的实时行情 (stock_hk_spot_em)") + else: + row = row.iloc[0] + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + pe_ratio=safe_float(row.get('市盈率')), + pb_ratio=safe_float(row.get('市净率')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + logger.info(f"[港股实时行情] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.warning(f"[API错误] ak.stock_hk_spot_em 获取港股 {stock_code} 失败: {e},尝试 stock_hk_spot 备用接口") + circuit_breaker.record_failure(em_key, str(e)) + else: + logger.info(f"[熔断] 数据源 {em_key} 处于熔断状态,尝试使用备用链路") + + # --- 备用数据源:新浪 --- + if not circuit_breaker.is_available(sina_key): + logger.info(f"[熔断] 数据源 {sina_key} 处于熔断状态,跳过备用链路") + return None + + try: + logger.info(f"[API调用] ak.stock_hk_spot() 获取港股实时行情(备用)...") + import time as _time + api_start = _time.time() + + df_spot = ak.stock_hk_spot() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_hk_spot 成功: 返回 {len(df_spot)} 只港股, 耗时 {api_elapsed:.2f}s") + + row = df_spot[df_spot['代码'] == code] + if row.empty: + logger.info(f"[API返回] 未找到港股 {code} 的实时行情 (stock_hk_spot)") + return None + + row = row.iloc[0] + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + ) + circuit_breaker.record_success(sina_key) + logger.info(f"[港股实时行情-备用] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%") + return quote + + except Exception as e: + logger.info(f"[API错误] ak.stock_hk_spot 备用接口也失败: {e}") + circuit_breaker.record_failure(sina_key, str(e)) + return None + + def get_chip_distribution(self, stock_code: str) -> Optional[ChipDistribution]: + """ + 获取筹码分布数据 + + 数据来源:ak.stock_cyq_em() + 包含:获利比例、平均成本、筹码集中度 + + 注意:ETF/指数没有筹码分布数据,会直接返回 None + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象(最新一天的数据),获取失败返回 None + """ + import akshare as ak + + # 美股没有筹码分布数据(Akshare 不支持) + if _is_us_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是美股,无筹码分布数据") + return None + + # 港股没有筹码分布数据(stock_cyq_em 是 A 股专属接口) + if _is_hk_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是港股,无筹码分布数据") + return None + + # ETF/指数没有筹码分布数据 + if _is_etf_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是 ETF/指数,无筹码分布数据") + return None + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_cyq_em(symbol={stock_code}) 获取筹码分布...") + import time as _time + api_start = _time.time() + + df = ak.stock_cyq_em(symbol=stock_code) + + api_elapsed = _time.time() - api_start + + if df.empty: + logger.warning(f"[API返回] ak.stock_cyq_em 返回空数据, 耗时 {api_elapsed:.2f}s") + return None + + logger.info(f"[API返回] ak.stock_cyq_em 成功: 返回 {len(df)} 天数据, 耗时 {api_elapsed:.2f}s") + logger.debug(f"[API返回] 筹码数据列名: {list(df.columns)}") + + # 取最新一天的数据 + latest = df.iloc[-1] + + # 使用 realtime_types.py 中的统一转换函数 + chip = ChipDistribution( + code=stock_code, + date=str(latest.get('日期', '')), + profit_ratio=safe_float(latest.get('获利比例')), + avg_cost=safe_float(latest.get('平均成本')), + cost_90_low=safe_float(latest.get('90成本-低')), + cost_90_high=safe_float(latest.get('90成本-高')), + concentration_90=safe_float(latest.get('90集中度')), + cost_70_low=safe_float(latest.get('70成本-低')), + cost_70_high=safe_float(latest.get('70成本-高')), + concentration_70=safe_float(latest.get('70集中度')), + ) + + logger.info(f"[筹码分布] {stock_code} 日期={chip.date}: 获利比例={chip.profit_ratio:.1%}, " + f"平均成本={chip.avg_cost}, 90%集中度={chip.concentration_90:.2%}, " + f"70%集中度={chip.concentration_70:.2%}") + return chip + + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 筹码分布失败: {e}") + return None + + def get_enhanced_data(self, stock_code: str, days: int = 60) -> Dict[str, Any]: + """ + 获取增强数据(历史K线 + 实时行情 + 筹码分布) + + Args: + stock_code: 股票代码 + days: 历史数据天数 + + Returns: + 包含所有数据的字典 + """ + result = { + 'code': stock_code, + 'daily_data': None, + 'realtime_quote': None, + 'chip_distribution': None, + } + + # 获取日线数据 + try: + df = self.get_daily_data(stock_code, days=days) + result['daily_data'] = df + except Exception as e: + logger.error(f"获取 {stock_code} 日线数据失败: {e}") + + # 获取实时行情 + result['realtime_quote'] = self.get_realtime_quote(stock_code) + + # 获取筹码分布 + result['chip_distribution'] = self.get_chip_distribution(stock_code) + + return result + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 (新浪接口),仅支持 A 股 + """ + if region != "cn": + return None + import akshare as ak + + # 主要指数代码映射 + indices_map = { + 'sh000001': '上证指数', + 'sz399001': '深证成指', + 'sz399006': '创业板指', + 'sh000688': '科创50', + 'sh000016': '上证50', + 'sh000300': '沪深300', + } + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + # 使用 akshare 获取指数行情(新浪财经接口) + df = ak.stock_zh_index_spot_sina() + + results = [] + if df is not None and not df.empty: + for code, name in indices_map.items(): + # 查找对应指数 + row = df[df['代码'] == code] + if row.empty: + # 尝试带前缀查找 + row = df[df['代码'].str.contains(code)] + + if not row.empty: + row = row.iloc[0] + current = safe_float(row.get('最新价', 0)) + prev_close = safe_float(row.get('昨收', 0)) + high = safe_float(row.get('最高', 0)) + low = safe_float(row.get('最低', 0)) + + # 计算振幅 + amplitude = 0.0 + if prev_close > 0: + amplitude = (high - low) / prev_close * 100 + + results.append({ + 'code': code, + 'name': name, + 'current': current, + 'change': safe_float(row.get('涨跌额', 0)), + 'change_pct': safe_float(row.get('涨跌幅', 0)), + 'open': safe_float(row.get('今开', 0)), + 'high': high, + 'low': low, + 'prev_close': prev_close, + 'volume': safe_float(row.get('成交量', 0)), + 'amount': safe_float(row.get('成交额', 0)), + 'amplitude': amplitude, + }) + return results + + except Exception as e: + logger.error(f"[Akshare] 获取指数行情失败: {e}") + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 + + 数据源优先级: + 1. 东财接口 (ak.stock_zh_a_spot_em) + 2. 新浪接口 (ak.stock_zh_a_spot) + """ + import akshare as ak + + # 优先东财接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_zh_a_spot_em() 获取市场统计...") + df = ak.stock_zh_a_spot_em() + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.warning(f"[Akshare] 东财接口获取市场统计失败: {e},尝试新浪接口") + + # 东财失败后,尝试新浪接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_zh_a_spot() 获取市场统计(新浪)...") + df = ak.stock_zh_a_spot() + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[Akshare] 新浪接口获取市场统计也失败: {e}") + + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取行业板块涨跌榜 + + 数据源优先级: + 1. 东财接口 (ak.stock_board_industry_name_em) + 2. 新浪接口 (ak.stock_sector_spot) + """ + import akshare as ak + + def _get_rank_top_n(df: pd.DataFrame, change_col: str, industry_name: str, n: int) -> Tuple[list, list]: + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + + # 涨幅前n + top = df.nlargest(n, change_col) + top_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in top.iterrows() + ] + + bottom = df.nsmallest(n, change_col) + bottom_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + + # 优先东财接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_board_industry_name_em() 获取板块排行...") + df = ak.stock_board_industry_name_em() + if df is not None and not df.empty: + change_col = '涨跌幅' + name = '板块名称' + return _get_rank_top_n(df, change_col, name, n) + + except Exception as e: + logger.warning(f"[Akshare] 东财接口获取行业板块排行失败: {e},尝试新浪接口") + + # 东财失败后,尝试新浪接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_sector_spot() 获取行业板块排行(新浪)...") + df = ak.stock_sector_spot(indicator='行业') + if df is None or df.empty: + return None + change_col = '涨跌幅' + name = '板块' + return _get_rank_top_n(df, change_col, name, n) + + except Exception as e: + logger.error(f"[Akshare] 新浪接口获取板块排行也失败: {e}") + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = AkshareFetcher() + + # 测试普通股票 + print("=" * 50) + print("测试普通股票数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"[股票] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[股票] 获取失败: {e}") + + # 测试 ETF 基金 + print("\n" + "=" * 50) + print("测试 ETF 基金数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('512400') # 有色龙头ETF + print(f"[ETF] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[ETF] 获取失败: {e}") + + # 测试 ETF 实时行情 + print("\n" + "=" * 50) + print("测试 ETF 实时行情获取") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('512880') # 证券ETF + if quote: + print(f"[ETF实时] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[ETF实时] 未获取到数据") + except Exception as e: + print(f"[ETF实时] 获取失败: {e}") + + # 测试港股历史数据 + print("\n" + "=" * 50) + print("测试港股历史数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('00700') # 腾讯控股 + print(f"[港股] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[港股] 获取失败: {e}") + + # 测试港股实时行情 + print("\n" + "=" * 50) + print("测试港股实时行情获取") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('00700') # 腾讯控股 + if quote: + print(f"[港股实时] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[港股实时] 未获取到数据") + except Exception as e: + print(f"[港股实时] 获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (akshare)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") + + # 测试筹码分布数据 + print("\n" + "=" * 50) + print("测试筹码分布数据获取") + print("=" * 50) + try: + chip = fetcher.get_chip_distribution('600519') # 茅台 + except Exception as e: + print(f"[筹码分布] 获取失败: {e}") + + # 测试行业板块排名 + print("\n" + "=" * 50) + print("测试行业板块排名获取") + print("=" * 50) + try: + rankings = fetcher.get_sector_rankings(n=5) + if rankings: + top, bottom = rankings + print("涨幅榜 Top 5:") + for sector in top: + print(f"{sector['name']}: {sector['change_pct']}%") + print("\n跌幅榜 Top 5:") + for sector in bottom: + print(f"{sector['name']}: {sector['change_pct']}%") + else: + print("未获取到行业板块排名数据") + except Exception as e: + print(f"[行业板块排名] 获取失败: {e}") diff --git a/src/provider/baostock_fetcher.py b/src/provider/baostock_fetcher.py new file mode 100644 index 00000000..47cd1718 --- /dev/null +++ b/src/provider/baostock_fetcher.py @@ -0,0 +1,379 @@ +# -*- coding: utf-8 -*- +""" +=================================== +BaostockFetcher - 备用数据源 2 (Priority 3) +=================================== + +数据来源:证券宝(Baostock) +特点:免费、无需 Token、需要登录管理 +优点:稳定、无配额限制 + +关键策略: +1. 管理 bs.login() 和 bs.logout() 生命周期 +2. 使用上下文管理器防止连接泄露 +3. 失败后指数退避重试 +""" + +import logging +import re +from contextlib import contextmanager +from datetime import datetime +from typing import Optional, Generator + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code, _is_hk_market +import os + +logger = logging.getLogger(__name__) + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class BaostockFetcher(BaseFetcher): + """ + Baostock 数据源实现 + + 优先级:3 + 数据来源:证券宝 Baostock API + + 关键策略: + - 使用上下文管理器管理连接生命周期 + - 每次请求都重新登录/登出,防止连接泄露 + - 失败后指数退避重试 + + Baostock 特点: + - 免费、无需注册 + - 需要显式登录/登出 + - 数据更新略有延迟(T+1) + """ + + name = "BaostockFetcher" + priority = int(os.getenv("BAOSTOCK_PRIORITY", "3")) + + def __init__(self): + """初始化 BaostockFetcher""" + self._bs_module = None + + def _get_baostock(self): + """ + 延迟加载 baostock 模块 + + 只在首次使用时导入,避免未安装时报错 + """ + if self._bs_module is None: + import baostock as bs + self._bs_module = bs + return self._bs_module + + @contextmanager + def _baostock_session(self) -> Generator: + """ + Baostock 连接上下文管理器 + + 确保: + 1. 进入上下文时自动登录 + 2. 退出上下文时自动登出 + 3. 异常时也能正确登出 + + 使用示例: + with self._baostock_session(): + # 在这里执行数据查询 + """ + bs = self._get_baostock() + login_result = None + + try: + # 登录 Baostock + login_result = bs.login() + + if login_result.error_code != '0': + raise DataFetchError(f"Baostock 登录失败: {login_result.error_msg}") + + logger.debug("Baostock 登录成功") + + yield bs + + finally: + # 确保登出,防止连接泄露 + try: + logout_result = bs.logout() + if logout_result.error_code == '0': + logger.debug("Baostock 登出成功") + else: + logger.warning(f"Baostock 登出异常: {logout_result.error_msg}") + except Exception as e: + logger.warning(f"Baostock 登出时发生错误: {e}") + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换股票代码为 Baostock 格式 + + Baostock 要求的格式: + - 沪市:sh.600519 + - 深市:sz.000001 + + Args: + stock_code: 原始代码,如 '600519', '000001' + + Returns: + Baostock 格式代码,如 'sh.600519', 'sz.000001' + """ + code = stock_code.strip() + + # HK stocks are not supported by Baostock + if _is_hk_market(code): + raise DataFetchError(f"BaostockFetcher 不支持港股 {code},请使用 AkshareFetcher") + + # 已经包含前缀的情况 + if code.startswith(('sh.', 'sz.')): + return code.lower() + + # 去除可能的后缀 + code = code.replace('.SH', '').replace('.SZ', '').replace('.sh', '').replace('.sz', '') + + # ETF: Shanghai ETF (51xx, 52xx, 56xx, 58xx) -> sh; Shenzhen ETF (15xx, 16xx, 18xx) -> sz + if len(code) == 6: + if code.startswith(('51', '52', '56', '58')): + return f"sh.{code}" + if code.startswith(('15', '16', '18')): + return f"sz.{code}" + + # 根据代码前缀判断市场 + if code.startswith(('600', '601', '603', '688')): + return f"sh.{code}" + elif code.startswith(('000', '002', '300')): + return f"sz.{code}" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"sz.{code}" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Baostock 获取原始数据 + + 使用 query_history_k_data_plus() 获取日线数据 + + 流程: + 1. 检查是否为美股(不支持) + 2. 使用上下文管理器管理连接 + 3. 转换股票代码格式 + 4. 调用 API 查询数据 + 5. 将结果转换为 DataFrame + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_us_code(stock_code): + raise DataFetchError(f"BaostockFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 港股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_hk_market(stock_code): + raise DataFetchError(f"BaostockFetcher 不支持港股 {stock_code},请使用 AkshareFetcher") + + # 北交所不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if is_bse_code(stock_code): + raise DataFetchError( + f"BaostockFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + + # 转换代码格式 + bs_code = self._convert_stock_code(stock_code) + + logger.debug(f"调用 Baostock query_history_k_data_plus({bs_code}, {start_date}, {end_date})") + + with self._baostock_session() as bs: + try: + # 查询日线数据 + # adjustflag: 1-后复权,2-前复权,3-不复权 + rs = bs.query_history_k_data_plus( + code=bs_code, + fields="date,open,high,low,close,volume,amount,pctChg", + start_date=start_date, + end_date=end_date, + frequency="d", # 日线 + adjustflag="2" # 前复权 + ) + + if rs.error_code != '0': + raise DataFetchError(f"Baostock 查询失败: {rs.error_msg}") + + # 转换为 DataFrame + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if not data_list: + raise DataFetchError(f"Baostock 未查询到 {stock_code} 的数据") + + df = pd.DataFrame(data_list, columns=rs.fields) + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Baostock 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Baostock 数据 + + Baostock 返回的列名: + date, open, high, low, close, volume, amount, pctChg + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射(只需要处理 pctChg) + column_mapping = { + 'pctChg': 'pct_chg', + } + + df = df.rename(columns=column_mapping) + + # 数值类型转换(Baostock 返回的都是字符串) + numeric_cols = ['open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + 使用 Baostock 的 query_stock_basic 接口获取股票基本信息 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + # 检查缓存 + if hasattr(self, '_stock_name_cache') and stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + # 初始化缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + + try: + bs_code = self._convert_stock_code(stock_code) + + with self._baostock_session() as bs: + # 查询股票基本信息 + rs = bs.query_stock_basic(code=bs_code) + + if rs.error_code == '0': + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if data_list: + # Baostock 返回的字段:code, code_name, ipoDate, outDate, type, status + fields = rs.fields + name_idx = fields.index('code_name') if 'code_name' in fields else None + if name_idx is not None and len(data_list[0]) > name_idx: + name = data_list[0][name_idx] + self._stock_name_cache[stock_code] = name + logger.debug(f"Baostock 获取股票名称成功: {stock_code} -> {name}") + return name + + except Exception as e: + logger.warning(f"Baostock 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_stock_list(self) -> Optional[pd.DataFrame]: + """ + 获取股票列表 + + 使用 Baostock 的 query_stock_basic 接口获取全部股票列表 + + Returns: + 包含 code, name 列的 DataFrame,失败返回 None + """ + try: + with self._baostock_session() as bs: + # 查询所有股票基本信息 + rs = bs.query_stock_basic() + + if rs.error_code == '0': + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if data_list: + df = pd.DataFrame(data_list, columns=rs.fields) + + # 转换代码格式(去除 sh. 或 sz. 前缀) + df['code'] = df['code'].apply(lambda x: x.split('.')[1] if '.' in x else x) + df = df.rename(columns={'code_name': 'name'}) + + # 更新缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + for _, row in df.iterrows(): + self._stock_name_cache[row['code']] = row['name'] + + logger.info(f"Baostock 获取股票列表成功: {len(df)} 条") + return df[['code', 'name']] + + except Exception as e: + logger.warning(f"Baostock 获取股票列表失败: {e}") + + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = BaostockFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + except Exception as e: + print(f"获取失败: {e}") diff --git a/src/provider/base.py b/src/provider/base.py new file mode 100644 index 00000000..4db74208 --- /dev/null +++ b/src/provider/base.py @@ -0,0 +1,2500 @@ +# -*- coding: utf-8 -*- +""" +=================================== +数据源基类与管理器 +=================================== + +设计模式:策略模式 (Strategy Pattern) +- BaseFetcher: 抽象基类,定义统一接口 +- DataFetcherManager: 策略管理器,实现自动切换 + +防封禁策略: +1. 每个 Fetcher 内置流控逻辑 +2. 失败自动切换到下一个数据源 +3. 指数退避重试机制 +""" + +import logging +import random +import time +from threading import BoundedSemaphore, RLock, Thread +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Callable, Optional, List, Tuple, Dict, Any + +import pandas as pd +import numpy as np +from provider._data.stock_index_loader import get_index_stock_name +from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +from .fundamental_adapter import AkshareFundamentalAdapter + +# 配置日志 +logger = logging.getLogger(__name__) + + +# === 标准化列名定义 === +STANDARD_COLUMNS = ['date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + + +def unwrap_exception(exc: Exception) -> Exception: + """ + Follow chained exceptions and return the deepest non-cyclic cause. + """ + current = exc + visited = set() + + while current is not None and id(current) not in visited: + visited.add(id(current)) + next_exc = current.__cause__ or current.__context__ + if next_exc is None: + break + current = next_exc + + return current + + +def summarize_exception(exc: Exception) -> Tuple[str, str]: + """ + Build a stable summary for logs while preserving the application-layer message. + """ + root = unwrap_exception(exc) + error_type = type(root).__name__ + message = str(exc).strip() or str(root).strip() or error_type + return error_type, " ".join(message.split()) + + +def normalize_stock_code(stock_code: str) -> str: + """ + Normalize stock code by stripping exchange prefixes/suffixes. + + Accepted formats and their normalized results: + - '600519' -> '600519' (already clean) + - 'SH600519' -> '600519' (strip SH prefix) + - 'SZ000001' -> '000001' (strip SZ prefix) + - 'BJ920748' -> '920748' (strip BJ prefix, BSE) + - 'sh600519' -> '600519' (case-insensitive) + - '600519.SH' -> '600519' (strip .SH suffix) + - '000001.SZ' -> '000001' (strip .SZ suffix) + - '920748.BJ' -> '920748' (strip .BJ suffix, BSE) + - 'HK00700' -> 'HK00700' (keep HK prefix for HK stocks) + - '1810.HK' -> 'HK01810' (normalize HK suffix to canonical prefix form) + - 'AAPL' -> 'AAPL' (keep US stock ticker as-is) + + This function is applied at the DataProviderManager layer so that + all individual fetchers receive a clean 6-digit code (for A-shares/ETFs). + """ + code = stock_code.strip() + upper = code.upper() + + # Normalize HK prefix to a canonical 5-digit form (e.g. hk1810 -> HK01810) + if upper.startswith('HK') and not upper.startswith('HK.'): + candidate = upper[2:] + if candidate.isdigit() and 1 <= len(candidate) <= 5: + return f"HK{candidate.zfill(5)}" + + # Strip SH/SZ prefix (e.g. SH600519 -> 600519) + if upper.startswith(('SH', 'SZ')) and not upper.startswith('SH.') and not upper.startswith('SZ.'): + candidate = code[2:] + # Only strip if the remainder looks like a valid numeric code + if candidate.isdigit() and len(candidate) in (5, 6): + return candidate + + # Strip BJ prefix (e.g. BJ920748 -> 920748) + if upper.startswith('BJ') and not upper.startswith('BJ.'): + candidate = code[2:] + if candidate.isdigit() and len(candidate) == 6: + return candidate + + # Strip .SH/.SZ/.BJ suffix (e.g. 600519.SH -> 600519, 920748.BJ -> 920748) + if '.' in code: + base, suffix = code.rsplit('.', 1) + if suffix.upper() == 'HK' and base.isdigit() and 1 <= len(base) <= 5: + return f"HK{base.zfill(5)}" + if suffix.upper() in ('SH', 'SZ', 'SS', 'BJ') and base.isdigit(): + return base + + return code + + +ETF_PREFIXES = ("51", "52", "56", "58", "15", "16", "18") + + +def _is_us_market(code: str) -> bool: + """判断是否为美股/美股指数代码(不含中文前后缀)。""" + from .us_index_mapping import is_us_stock_code, is_us_index_code + + normalized = (code or "").strip().upper() + return is_us_index_code(normalized) or is_us_stock_code(normalized) + + +def _is_hk_market(code: str) -> bool: + """ + 判定是否为港股代码。 + + 支持 `HK00700` 及纯 5 位数字形式(A 股 ETF/股票常见为 6 位)。 + """ + normalized = (code or "").strip().upper() + if normalized.endswith(".HK"): + base = normalized[:-3] + return base.isdigit() and 1 <= len(base) <= 5 + if normalized.startswith("HK"): + digits = normalized[2:] + return digits.isdigit() and 1 <= len(digits) <= 5 + if normalized.isdigit() and len(normalized) == 5: + return True + return False + + +def _is_etf_code(code: str) -> bool: + """判定 A 股 ETF 基金代码(保守规则)。""" + normalized = normalize_stock_code(code) + return ( + normalized.isdigit() + and len(normalized) == 6 + and normalized.startswith(ETF_PREFIXES) + ) + + +def _market_tag(code: str) -> str: + """返回市场标签: cn/us/hk.""" + if _is_us_market(code): + return "us" + if _is_hk_market(code): + return "hk" + return "cn" + + +def is_bse_code(code: str) -> bool: + """ + Check if the code is a Beijing Stock Exchange (BSE) A-share code. + + BSE rules (2026): + - New format (2024+): 92xxxx main trading codes + - Historical ranges: 43xxxx, 83xxxx, 87xxxx, 88xxxx + - Special instruments: 81xxxx convertible bonds, 82xxxx preferred shares + - Subscription codes: 889xxx + Note: 900xxx are Shanghai B-shares and must return False. + """ + c = (code or "").strip().split(".")[0] + if len(c) != 6 or not c.isdigit(): + return False + + if c.startswith("900"): + return False + + return c.startswith(("92", "43", "81", "82", "83", "87", "88")) + +def is_st_stock(name: str) -> bool: + """ + Check if the stock is an ST or *ST stock based on its name. + + ST stocks have special trading rules and typically a ±5% limit. + """ + n = (name or "").upper() + return 'ST' in n + +def is_kc_cy_stock(code: str) -> bool: + """ + Check if the stock is a STAR Market (科创板) or ChiNext (创业板) stock based on its code. + + - STAR Market: Codes starting with 688 + - ChiNext: Codes starting with 300 + Both have a ±20% limit. + """ + c = (code or "").strip().split(".")[0] + return c.startswith("688") or c.startswith("30") + + +def canonical_stock_code(code: str) -> str: + """ + Return the canonical (uppercase) form of a stock code. + + This is a display/storage layer concern, distinct from normalize_stock_code + which strips exchange prefixes. Apply at system input boundaries to ensure + consistent case across BOT, WEB UI, API, and CLI paths (Issue #355). + + Examples: + 'aapl' -> 'AAPL' + 'AAPL' -> 'AAPL' + '600519' -> '600519' (digits are unchanged) + 'hk00700' -> 'HK00700' + """ + return (code or "").strip().upper() + + +class DataFetchError(Exception): + """数据获取异常基类""" + pass + + +class RateLimitError(DataFetchError): + """API 速率限制异常""" + pass + + +class DataSourceUnavailableError(DataFetchError): + """数据源不可用异常""" + pass + + +class BaseFetcher(ABC): + """ + 数据源抽象基类 + + 职责: + 1. 定义统一的数据获取接口 + 2. 提供数据标准化方法 + 3. 实现通用的技术指标计算 + + 子类实现: + - _fetch_raw_data(): 从具体数据源获取原始数据 + - _normalize_data(): 将原始数据转换为标准格式 + """ + + name: str = "BaseFetcher" + priority: int = 99 # 优先级数字越小越优先 + + @abstractmethod + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从数据源获取原始数据(子类必须实现) + + Args: + stock_code: 股票代码,如 '600519', '000001' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 原始数据 DataFrame(列名因数据源而异) + """ + pass + + @abstractmethod + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化数据列名(子类必须实现) + + 将不同数据源的列名统一为: + ['date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + """ + pass + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 + + Args: + region: 市场区域,cn=A股 us=美股 + + Returns: + List[Dict]: 指数列表,每个元素为字典,包含: + - code: 指数代码 + - name: 指数名称 + - current: 当前点位 + - change: 涨跌点数 + - change_pct: 涨跌幅(%) + - volume: 成交量 + - amount: 成交额 + """ + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 + + Returns: + Dict: 包含: + - up_count: 上涨家数 + - down_count: 下跌家数 + - flat_count: 平盘家数 + - limit_up_count: 涨停家数 + - limit_down_count: 跌停家数 + - total_amount: 两市成交额 + """ + return None + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取板块涨跌榜 + + Args: + n: 返回前n个 + + Returns: + Tuple: (领涨板块列表, 领跌板块列表) + """ + return None + + def get_daily_data( + self, + stock_code: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + days: int = 30 + ) -> pd.DataFrame: + """ + 获取日线数据(统一入口) + + 流程: + 1. 计算日期范围 + 2. 调用子类获取原始数据 + 3. 标准化列名 + 4. 计算技术指标 + + Args: + stock_code: 股票代码 + start_date: 开始日期(可选) + end_date: 结束日期(可选,默认今天) + days: 获取天数(当 start_date 未指定时使用) + + Returns: + 标准化的 DataFrame,包含技术指标 + """ + # 计算日期范围 + if end_date is None: + end_date = datetime.now().strftime('%Y-%m-%d') + + if start_date is None: + # 默认获取最近 30 个交易日(按日历日估算,多取一些) + from datetime import timedelta + start_dt = datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=days * 2) + start_date = start_dt.strftime('%Y-%m-%d') + + request_start = time.time() + logger.info(f"[{self.name}] 开始获取 {stock_code} 日线数据: 范围={start_date} ~ {end_date}") + + try: + # Step 1: 获取原始数据 + raw_df = self._fetch_raw_data(stock_code, start_date, end_date) + + if raw_df is None or raw_df.empty: + raise DataFetchError(f"[{self.name}] 未获取到 {stock_code} 的数据") + + # Step 2: 标准化列名 + df = self._normalize_data(raw_df, stock_code) + + # Step 3: 数据清洗 + df = self._clean_data(df) + + # Step 4: 计算技术指标 + df = self._calculate_indicators(df) + + elapsed = time.time() - request_start + logger.info( + f"[{self.name}] {stock_code} 获取成功: 范围={start_date} ~ {end_date}, " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df + + except Exception as e: + elapsed = time.time() - request_start + error_type, error_reason = summarize_exception(e) + logger.error( + f"[{self.name}] {stock_code} 获取失败: 范围={start_date} ~ {end_date}, " + f"error_type={error_type}, elapsed={elapsed:.2f}s, reason={error_reason}" + ) + raise DataFetchError(f"[{self.name}] {stock_code}: {error_reason}") from e + + def _clean_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + 数据清洗 + + 处理: + 1. 确保日期列格式正确 + 2. 数值类型转换 + 3. 去除空值行 + 4. 按日期排序 + """ + df = df.copy() + + # 确保日期列为 datetime 类型 + if 'date' in df.columns: + df['date'] = pd.to_datetime(df['date']) + + # 数值列类型转换 + numeric_cols = ['open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # 去除关键列为空的行 + df = df.dropna(subset=['close', 'volume']) + + # 按日期升序排序 + df = df.sort_values('date', ascending=True).reset_index(drop=True) + + return df + + def _calculate_indicators(self, df: pd.DataFrame) -> pd.DataFrame: + """ + 计算技术指标 + + 计算指标: + - MA5, MA10, MA20: 移动平均线 + - Volume_Ratio: 量比(今日成交量 / 5日平均成交量) + """ + df = df.copy() + + # 移动平均线 + df['ma5'] = df['close'].rolling(window=5, min_periods=1).mean() + df['ma10'] = df['close'].rolling(window=10, min_periods=1).mean() + df['ma20'] = df['close'].rolling(window=20, min_periods=1).mean() + + # 量比:当日成交量 / 5日平均成交量 + # 注意:此处的 volume_ratio 是“日线成交量 / 前5日均量(shift 1)”的相对倍数, + # 与部分交易软件口径的“分时量比(同一时刻对比)”不同,含义更接近“放量倍数”。 + # 该行为目前保留(按需求不改逻辑)。 + avg_volume_5 = df['volume'].rolling(window=5, min_periods=1).mean() + df['volume_ratio'] = df['volume'] / avg_volume_5.shift(1) + df['volume_ratio'] = df['volume_ratio'].fillna(1.0) + + # 保留2位小数 + for col in ['ma5', 'ma10', 'ma20', 'volume_ratio']: + if col in df.columns: + df[col] = df[col].round(2) + + return df + + @staticmethod + def random_sleep(min_seconds: float = 1.0, max_seconds: float = 3.0) -> None: + """ + 智能随机休眠(Jitter) + + 防封禁策略:模拟人类行为的随机延迟 + 在请求之间加入不规则的等待时间 + """ + sleep_time = random.uniform(min_seconds, max_seconds) + logger.debug(f"随机休眠 {sleep_time:.2f} 秒...") + time.sleep(sleep_time) + + +class DataFetcherManager: + """ + 数据源策略管理器 + + 职责: + 1. 管理多个数据源(按优先级排序) + 2. 自动故障切换(Failover) + 3. 提供统一的数据获取接口 + + 切换策略: + - 优先使用高优先级数据源 + - 失败后自动切换到下一个 + - 所有数据源都失败时抛出异常 + """ + + def __init__(self, fetchers: Optional[List[BaseFetcher]] = None): + """ + 初始化管理器 + + Args: + fetchers: 数据源列表(可选,默认按优先级自动创建) + """ + self._fetchers: List[BaseFetcher] = [] + self._fetchers_lock = RLock() + self._fetcher_call_locks: Dict[int, RLock] = {} + self._fetcher_call_locks_lock = RLock() + self._stock_name_cache: Dict[str, str] = {} + self._stock_name_cache_lock = RLock() + + if fetchers: + # 按优先级排序 + self._fetchers = sorted(fetchers, key=lambda f: f.priority) + else: + # 默认数据源将在首次使用时延迟加载 + self._init_default_fetchers() + self._fundamental_adapter = AkshareFundamentalAdapter() + self._tickflow_fetcher = None + self._tickflow_api_key: Optional[str] = None + self._tickflow_lock = RLock() + self._fundamental_cache: Dict[str, Dict[str, Any]] = {} + self._fundamental_cache_lock = RLock() + self._fundamental_timeout_worker_limit = 8 + self._fundamental_timeout_slots = BoundedSemaphore(self._fundamental_timeout_worker_limit) + + def _ensure_concurrency_guards(self) -> None: + """Lazily initialize thread-safety primitives for test scaffolds using __new__.""" + if not hasattr(self, "_fetchers_lock") or self._fetchers_lock is None: + self._fetchers_lock = RLock() + if not hasattr(self, "_fetcher_call_locks") or self._fetcher_call_locks is None: + self._fetcher_call_locks = {} + if not hasattr(self, "_fetcher_call_locks_lock") or self._fetcher_call_locks_lock is None: + self._fetcher_call_locks_lock = RLock() + if not hasattr(self, "_stock_name_cache") or self._stock_name_cache is None: + self._stock_name_cache = {} + if not hasattr(self, "_stock_name_cache_lock") or self._stock_name_cache_lock is None: + self._stock_name_cache_lock = RLock() + + def _get_fetchers_snapshot(self) -> List[BaseFetcher]: + self._ensure_concurrency_guards() + with self._fetchers_lock: + return list(getattr(self, "_fetchers", [])) + + def _get_fetcher_call_lock(self, fetcher: BaseFetcher) -> RLock: + self._ensure_concurrency_guards() + fetcher_id = id(fetcher) + with self._fetcher_call_locks_lock: + lock = self._fetcher_call_locks.get(fetcher_id) + if lock is None: + lock = RLock() + self._fetcher_call_locks[fetcher_id] = lock + return lock + + def _call_fetcher_method(self, fetcher: BaseFetcher, method_name: str, *args, **kwargs): + """Serialize shared fetcher state access through manager-owned per-instance locks.""" + method = getattr(fetcher, method_name) + with self._get_fetcher_call_lock(fetcher): + return method(*args, **kwargs) + + def _get_cached_stock_name(self, stock_code: str) -> Optional[str]: + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + return self._stock_name_cache.get(stock_code) + + def _cache_stock_name(self, stock_code: str, name: Optional[str]) -> Optional[str]: + if name is None: + return None + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + self._stock_name_cache[stock_code] = name + return name + + def _get_tickflow_fetcher(self): + """Lazily create a TickFlow fetcher for market-review-only calls.""" + from provider._config import get_config + + config = get_config() + api_key = (getattr(config, "tickflow_api_key", None) or "").strip() + + if not hasattr(self, "_tickflow_lock") or self._tickflow_lock is None: + self._tickflow_lock = RLock() + + with self._tickflow_lock: + current_fetcher = getattr(self, "_tickflow_fetcher", None) + current_key = getattr(self, "_tickflow_api_key", None) + + if not api_key: + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭旧实例失败: %s", exc) + self._tickflow_fetcher = None + self._tickflow_api_key = None + return None + + if current_fetcher is not None and current_key == api_key: + return current_fetcher + + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 切换实例时关闭失败: %s", exc) + + try: + from .tickflow_fetcher import TickFlowFetcher + + fetcher = TickFlowFetcher(api_key=api_key) + self._tickflow_fetcher = fetcher + self._tickflow_api_key = api_key + return fetcher + except Exception as exc: + logger.warning("[TickFlowFetcher] 初始化失败: %s", exc) + self._tickflow_fetcher = None + self._tickflow_api_key = None + return None + + def close(self) -> None: + """Best-effort release of manager-owned resources.""" + if not hasattr(self, "_tickflow_lock") or self._tickflow_lock is None: + self._tickflow_lock = RLock() + + with self._tickflow_lock: + current_fetcher = getattr(self, "_tickflow_fetcher", None) + self._tickflow_fetcher = None + self._tickflow_api_key = None + + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭管理器资源失败: %s", exc) + + def __del__(self) -> None: + try: + self.close() + except Exception: + # Best-effort cleanup during interpreter shutdown. + pass + + def _get_fundamental_cache_key(self, stock_code: str, budget_seconds: Optional[float] = None) -> str: + """生成基本面缓存 key(包含预算分桶以避免低预算结果污染高预算请求)。""" + normalized_code = normalize_stock_code(stock_code) + if budget_seconds is None: + return f"{normalized_code}|budget=default" + try: + budget = max(0.0, float(budget_seconds)) + except (TypeError, ValueError): + budget = 0.0 + # 100ms bucket to balance cache reuse and scenario isolation. + budget_bucket = int(round(budget * 10)) + return f"{normalized_code}|budget={budget_bucket}" + + def _prune_fundamental_cache(self, ttl_seconds: int, max_entries: int) -> None: + """Prune expired and overflow fundamental cache items.""" + with self._fundamental_cache_lock: + if not self._fundamental_cache: + return + + now_ts = time.time() + if ttl_seconds > 0: + cache_items = list(self._fundamental_cache.items()) + expired_keys = [ + key + for key, value in cache_items + if now_ts - float(value.get("ts", 0)) > ttl_seconds + ] + for key in expired_keys: + self._fundamental_cache.pop(key, None) + + if max_entries > 0 and len(self._fundamental_cache) > max_entries: + overflow = len(self._fundamental_cache) - max_entries + sorted_items = sorted( + list(self._fundamental_cache.items()), + key=lambda item: float(item[1].get("ts", 0)), + ) + for key, _ in sorted_items[:overflow]: + self._fundamental_cache.pop(key, None) + + @staticmethod + def _try_scalar_isna(value: Any, context: str) -> Optional[bool]: + """Return scalar ``pd.isna`` result, or ``None`` when callers should use fallback logic.""" + if isinstance(value, (dict, list, tuple, set, pd.DataFrame, pd.Series, pd.Index)): + return None + + if isinstance(value, np.ndarray): + if value.ndim != 0: + return None + value = value.item() + + try: + isna_result = pd.isna(value) + except (TypeError, ValueError) as exc: + if hasattr(value, "__array__"): + logger.debug( + "[%s] pd.isna failed for array-like object; re-raise: value_type=%s error_type=%s", + context, + type(value).__name__, + type(exc).__name__, + ) + raise + logger.debug( + "[%s] pd.isna fallback: value_type=%s error_type=%s", + context, + type(value).__name__, + type(exc).__name__, + ) + return None + + if isinstance(isna_result, (bool, np.bool_)): + return bool(isna_result) + + if isinstance(isna_result, np.ndarray): + if isna_result.ndim == 0: + return bool(isna_result.item()) + logger.debug( + "[%s] pd.isna returned non-scalar result: value_type=%s result_type=%s", + context, + type(value).__name__, + type(isna_result).__name__, + ) + return None + + logger.debug( + "[%s] pd.isna returned unexpected result type: value_type=%s result_type=%s", + context, + type(value).__name__, + type(isna_result).__name__, + ) + return None + + @staticmethod + def _is_missing_board_value(value: Any) -> bool: + """Return True when a board field value should be treated as missing.""" + if value is None: + return True + is_missing = DataFetcherManager._try_scalar_isna(value, "board_value") + if is_missing is True: + return True + text = str(value).strip() + return text == "" or text.lower() in {"nan", "none", "null", "na", "n/a"} + + @staticmethod + def _normalize_belong_boards(raw_data: Any) -> List[Dict[str, Any]]: + """Normalize belong-board results from heterogeneous providers.""" + if DataFetcherManager._is_missing_board_value(raw_data): + return [] + + normalized: List[Dict[str, Any]] = [] + dedupe = set() + + if isinstance(raw_data, pd.DataFrame): + if raw_data.empty: + return [] + name_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块名称", "板块", "所属板块", "板块名", "name", "industry"} + ), + None, + ) + code_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块代码", "代码", "code"} + ), + None, + ) + type_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块类型", "类别", "type"} + ), + None, + ) + if name_col is None: + return [] + for _, row in raw_data.iterrows(): + board_name_raw = row.get(name_col, "") + if DataFetcherManager._is_missing_board_value(board_name_raw): + continue + board_name = str(board_name_raw).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + item = {"name": board_name} + if code_col is not None: + board_code_raw = row.get(code_col, "") + if not DataFetcherManager._is_missing_board_value(board_code_raw): + item["code"] = str(board_code_raw).strip() + if type_col is not None: + board_type_raw = row.get(type_col, "") + if not DataFetcherManager._is_missing_board_value(board_type_raw): + item["type"] = str(board_type_raw).strip() + normalized.append(item) + return normalized + + if isinstance(raw_data, dict): + raw_data = [raw_data] + + if isinstance(raw_data, (list, tuple, set)): + for item in raw_data: + if isinstance(item, dict): + board_name_raw = ( + item.get("name") + or item.get("board_name") + or item.get("板块名称") + or item.get("板块") + or item.get("所属板块") + or item.get("板块名") + or item.get("industry") + or item.get("行业") + ) + if DataFetcherManager._is_missing_board_value(board_name_raw): + continue + board_name = str(board_name_raw).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + normalized_item: Dict[str, Any] = {"name": board_name} + code_raw = ( + item.get("code") + or item.get("板块代码") + or item.get("代码") + ) + if not DataFetcherManager._is_missing_board_value(code_raw): + normalized_item["code"] = str(code_raw).strip() + type_raw = ( + item.get("type") + or item.get("板块类型") + or item.get("类别") + ) + if not DataFetcherManager._is_missing_board_value(type_raw): + normalized_item["type"] = str(type_raw).strip() + normalized.append(normalized_item) + continue + if DataFetcherManager._is_missing_board_value(item): + continue + board_name = str(item).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + normalized.append({"name": board_name}) + return normalized + + if not DataFetcherManager._is_missing_board_value(raw_data): + board_name = str(raw_data).strip() + return [{"name": board_name}] + return [] + + def _init_default_fetchers(self) -> None: + """ + 初始化默认数据源列表 + + 优先级动态调整逻辑: + - 如果配置了 TUSHARE_TOKEN:Tushare 优先级提升为 0(最高) + - 否则按默认优先级: + 0. EfinanceFetcher (Priority 0) - 最高优先级 + 1. AkshareFetcher (Priority 1) + 2. PytdxFetcher (Priority 2) - 通达信 + 2. TushareFetcher (Priority 2) + 3. BaostockFetcher (Priority 3) + 4. YfinanceFetcher (Priority 4) + 5. LongbridgeFetcher (Priority 5) - 长桥(美股/港股兜底) + """ + from .efinance_fetcher import EfinanceFetcher + from .akshare_fetcher import AkshareFetcher + from .tushare_fetcher import TushareFetcher + from .pytdx_fetcher import PytdxFetcher + from .baostock_fetcher import BaostockFetcher + from .yfinance_fetcher import YfinanceFetcher + from .longbridge_fetcher import LongbridgeFetcher + # 创建所有数据源实例(优先级在各 Fetcher 的 __init__ 中确定) + efinance = EfinanceFetcher() + akshare = AkshareFetcher() + tushare = TushareFetcher() # 会根据 Token 配置自动调整优先级 + pytdx = PytdxFetcher() # 通达信数据源(可配 PYTDX_HOST/PYTDX_PORT) + baostock = BaostockFetcher() + yfinance = YfinanceFetcher() + longbridge = LongbridgeFetcher() # 长桥(美股/港股兜底,懒加载) + + # 初始化数据源列表 + self._ensure_concurrency_guards() + with self._fetchers_lock: + self._fetchers = [ + efinance, + akshare, + tushare, + pytdx, + baostock, + yfinance, + longbridge, + ] + + # 按优先级排序(Tushare 如果配置了 Token 且初始化成功,优先级为 0) + self._fetchers.sort(key=lambda f: f.priority) + + # 构建优先级说明 + priority_info = ", ".join([f"{f.name}(P{f.priority})" for f in self._get_fetchers_snapshot()]) + logger.info(f"已初始化 {len(self._fetchers)} 个数据源(按优先级): {priority_info}") + + def add_fetcher(self, fetcher: BaseFetcher) -> None: + """添加数据源并重新排序""" + self._ensure_concurrency_guards() + with self._fetchers_lock: + self._fetchers.append(fetcher) + self._fetchers.sort(key=lambda f: f.priority) + + def get_daily_data( + self, + stock_code: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + days: int = 30 + ) -> Tuple[pd.DataFrame, str]: + """ + 获取日线数据(自动切换数据源) + + 故障切换策略: + 1. 美股指数/美股股票直接路由到 YfinanceFetcher + 2. 其他代码从最高优先级数据源开始尝试 + 3. 捕获异常后自动切换到下一个 + 4. 记录每个数据源的失败原因 + 5. 所有数据源失败后抛出详细异常 + + Args: + stock_code: 股票代码 + start_date: 开始日期 + end_date: 结束日期 + days: 获取天数 + + Returns: + Tuple[DataFrame, str]: (数据, 成功的数据源名称) + + Raises: + DataFetchError: 所有数据源都失败时抛出 + """ + from .us_index_mapping import is_us_index_code, is_us_stock_code + + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + fetchers = self._get_fetchers_snapshot() + errors = [] + total_fetchers = len(fetchers) + request_start = time.time() + + # 快速路径:美股/港股使用专用数据源路由 + # - 配置长桥凭据后: Longbridge 为首选, YFinance/AkShare 兜底 + # - 未配置长桥: YFinance 为首选(美股), 通用 fetcher 循环(港股) + # - 美股指数: 始终 YFinance 为首选(Longbridge 不提供指数K线) + is_us_index = is_us_index_code(stock_code) + is_us = is_us_index or is_us_stock_code(stock_code) + is_hk = (not is_us) and _is_hk_market(stock_code) + + # 美股(含美股指数)使用 Longbridge/YFinance 特殊路由;港股走下方通用数据源循环 + if is_us: + prefer_lb = self._longbridge_preferred() and not is_us_index + source_order = ( + ["LongbridgeFetcher", "YfinanceFetcher"] + if prefer_lb + else ["YfinanceFetcher", "LongbridgeFetcher"] + ) + market_label = "美股指数" if is_us_index else "美股" + + for src_name in source_order: + for attempt, fetcher in enumerate(fetchers, start=1): + if fetcher.name != src_name: + continue + try: + role = "首选" if src_name == source_order[0] else "兜底" + logger.info( + f"[数据源尝试 {attempt}/{total_fetchers}] [{fetcher.name}] " + f"{market_label} {stock_code} {role}路由..." + ) + df = self._call_fetcher_method( + fetcher, + "get_daily_data", + stock_code=stock_code, + start_date=start_date, + end_date=end_date, + days=days, + ) + if df is not None and not df.empty: + elapsed = time.time() - request_start + logger.info( + f"[数据源完成] {stock_code} 使用 [{fetcher.name}] 获取成功: " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df, fetcher.name + except Exception as e: + error_type, error_reason = summarize_exception(e) + error_msg = f"[{fetcher.name}] ({error_type}) {error_reason}" + logger.warning( + f"[数据源失败 {attempt}/{total_fetchers}] [{fetcher.name}] {stock_code}: " + f"error_type={error_type}, reason={error_reason}" + ) + errors.append(error_msg) + break + + error_summary = f"{market_label} {stock_code} 获取失败:\n" + "\n".join(errors) + elapsed = time.time() - request_start + logger.error(f"[数据源终止] {stock_code} 获取失败: elapsed={elapsed:.2f}s\n{error_summary}") + raise DataFetchError(error_summary) + + for attempt, fetcher in enumerate(fetchers, start=1): + try: + logger.info(f"[数据源尝试 {attempt}/{total_fetchers}] [{fetcher.name}] 获取 {stock_code}...") + df = self._call_fetcher_method( + fetcher, + "get_daily_data", + stock_code=stock_code, + start_date=start_date, + end_date=end_date, + days=days + ) + + if df is not None and not df.empty: + elapsed = time.time() - request_start + logger.info( + f"[数据源完成] {stock_code} 使用 [{fetcher.name}] 获取成功: " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df, fetcher.name + + except Exception as e: + error_type, error_reason = summarize_exception(e) + error_msg = f"[{fetcher.name}] ({error_type}) {error_reason}" + logger.warning( + f"[数据源失败 {attempt}/{total_fetchers}] [{fetcher.name}] {stock_code}: " + f"error_type={error_type}, reason={error_reason}" + ) + errors.append(error_msg) + if attempt < total_fetchers: + next_fetcher = fetchers[attempt] + logger.info(f"[数据源切换] {stock_code}: [{fetcher.name}] -> [{next_fetcher.name}]") + # 继续尝试下一个数据源 + continue + + # 所有数据源都失败 + error_summary = f"所有数据源获取 {stock_code} 失败:\n" + "\n".join(errors) + elapsed = time.time() - request_start + logger.error(f"[数据源终止] {stock_code} 获取失败: elapsed={elapsed:.2f}s\n{error_summary}") + raise DataFetchError(error_summary) + + @property + def available_fetchers(self) -> List[str]: + """返回可用数据源名称列表""" + return [f.name for f in self._get_fetchers_snapshot()] + + def prefetch_realtime_quotes(self, stock_codes: List[str]) -> int: + """ + 批量预取实时行情数据(在分析开始前调用) + + 策略: + 1. 检查优先级中是否包含全量拉取数据源(efinance/akshare_em) + 2. 如果不包含,跳过预取(新浪/腾讯是单股票查询,无需预取) + 3. 如果自选股数量 >= 5 且使用全量数据源,则预取填充缓存 + + 这样做的好处: + - 使用新浪/腾讯时:每只股票独立查询,无全量拉取问题 + - 使用 efinance/东财时:预取一次,后续缓存命中 + + Args: + stock_codes: 待分析的股票代码列表 + + Returns: + 预取的股票数量(0 表示跳过预取) + """ + # Normalize all codes + stock_codes = [normalize_stock_code(c) for c in stock_codes] + + from provider._config import get_config + + config = get_config() + + # Issue #455: PREFETCH_REALTIME_QUOTES=false 可禁用预取,避免全市场拉取 + if not getattr(config, "prefetch_realtime_quotes", True): + logger.debug("[预取] PREFETCH_REALTIME_QUOTES=false,跳过批量预取") + return 0 + + # 如果实时行情被禁用,跳过预取 + if not config.enable_realtime_quote: + logger.debug("[预取] 实时行情功能已禁用,跳过预取") + return 0 + + # 检查优先级中是否包含全量拉取数据源 + # 注意:新增全量接口(如 tushare_realtime)时需同步更新此列表 + # 全量接口特征:一次 API 调用拉取全市场 5000+ 股票数据 + priority = config.realtime_source_priority.lower() + bulk_sources = ['efinance', 'akshare_em', 'tushare'] # 全量接口列表 + + # 如果优先级中前两个都不是全量数据源,跳过预取 + # 因为新浪/腾讯是单股票查询,不需要预取 + priority_list = [s.strip() for s in priority.split(',')] + first_bulk_source_index = None + for i, source in enumerate(priority_list): + if source in bulk_sources: + first_bulk_source_index = i + break + + # 如果没有全量数据源,或者全量数据源排在第 3 位之后,跳过预取 + if first_bulk_source_index is None or first_bulk_source_index >= 2: + logger.info(f"[预取] 当前优先级使用轻量级数据源(sina/tencent),无需预取") + return 0 + + # 如果股票数量少于 5 个,不进行批量预取(逐个查询更高效) + if len(stock_codes) < 5: + logger.info(f"[预取] 股票数量 {len(stock_codes)} < 5,跳过批量预取") + return 0 + + logger.info(f"[预取] 开始批量预取实时行情,共 {len(stock_codes)} 只股票...") + + # 尝试通过 efinance 或 akshare 预取 + # 只需要调用一次 get_realtime_quote,缓存机制会自动拉取全市场数据 + try: + # 用第一只股票触发全量拉取 + first_code = stock_codes[0] + quote = self.get_realtime_quote(first_code) + + if quote: + logger.info(f"[预取] 批量预取完成,缓存已填充") + return len(stock_codes) + else: + logger.warning(f"[预取] 批量预取失败,将使用逐个查询模式") + return 0 + + except Exception as e: + logger.error(f"[预取] 批量预取异常: {e}") + return 0 + + def get_realtime_quote(self, stock_code: str, *, log_final_failure: bool = True): + """ + 获取实时行情数据(自动故障切换) + + 故障切换策略(按配置的优先级): + 1. 美股:使用 YfinanceFetcher.get_realtime_quote() + 2. EfinanceFetcher.get_realtime_quote() + 3. AkshareFetcher.get_realtime_quote(source="em") - 东财 + 4. AkshareFetcher.get_realtime_quote(source="sina") - 新浪 + 5. AkshareFetcher.get_realtime_quote(source="tencent") - 腾讯 + 6. 返回 None(降级兜底) + + Args: + stock_code: 股票代码 + log_final_failure: Whether to emit the final "all sources failed" + summary log when no realtime quote is available. + + Returns: + UnifiedRealtimeQuote 对象,所有数据源都失败则返回 None + """ + raw_stock_code = (stock_code or "").strip() + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + from .akshare_fetcher import _is_us_code + from .us_index_mapping import is_us_index_code + from provider._config import get_config + + config = get_config() + + # 如果实时行情功能被禁用,直接返回 None + if not config.enable_realtime_quote: + logger.debug(f"[实时行情] 功能已禁用,跳过 {stock_code}") + return None + + # ---------------------------------------------------------- + # 美股 (指数 + 个股) / 港股 — 专用双源路由 + # 配置长桥后: Longbridge 首选, YFinance/AkShare 补充 + # 未配置长桥: YFinance/AkShare 首选, Longbridge 补充 + # 美股指数: 始终 YFinance 首选(Longbridge 不提供指数行情) + # ---------------------------------------------------------- + is_us_index = is_us_index_code(stock_code) + is_us = is_us_index or _is_us_code(stock_code) + is_hk = (not is_us) and _is_hk_market(stock_code) + + if is_us or is_hk: + prefer_lb = self._longbridge_preferred() and not is_us_index + if is_us: + primary_src = "LongbridgeFetcher" if prefer_lb else "YfinanceFetcher" + secondary_src = "YfinanceFetcher" if prefer_lb else "LongbridgeFetcher" + market_label = "美股指数" if is_us_index else "美股" + primary_kw: dict = {} + secondary_kw: dict = {} + else: + primary_src = "LongbridgeFetcher" if prefer_lb else "AkshareFetcher" + secondary_src = "AkshareFetcher" if prefer_lb else "LongbridgeFetcher" + market_label = "港股" + primary_kw = {"source": "hk"} if primary_src == "AkshareFetcher" else {} + secondary_kw = {"source": "hk"} if secondary_src == "AkshareFetcher" else {} + + primary_quote = self._try_fetcher_quote(stock_code, primary_src, **primary_kw) + if primary_quote is not None: + logger.info(f"[实时行情] {market_label} {stock_code} 成功获取 (来源: {primary_src})") + primary_quote = self._supplement_quote( + stock_code, primary_quote, secondary_src, **secondary_kw, + ) + if primary_quote is not None: + return primary_quote + if log_final_failure: + logger.info(f"[实时行情] {market_label} {stock_code} 无可用数据源") + return None + + # 获取配置的数据源优先级 + source_priority = config.realtime_source_priority.split(',') + + errors = [] + # primary_quote holds the first successful result; we may supplement + # missing fields (volume_ratio, turnover_rate, etc.) from later sources. + primary_quote = None + + for source in source_priority: + source = source.strip().lower() + + try: + quote = None + + if source == "efinance": + # 尝试 EfinanceFetcher + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "EfinanceFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code) + break + + elif source == "akshare_em": + # 尝试 AkshareFetcher 东财数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="em") + break + + elif source == "akshare_sina": + # 尝试 AkshareFetcher 新浪数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="sina") + break + + elif source in ("tencent", "akshare_qq"): + # 尝试 AkshareFetcher 腾讯数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="tencent") + break + + elif source == "tushare": + # 尝试 TushareFetcher(需要 Tushare Pro 积分) + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "TushareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', raw_stock_code or stock_code) + break + + if quote is not None and quote.has_basic_data(): + if primary_quote is None: + # First successful source becomes primary + primary_quote = quote + logger.info(f"[实时行情] {stock_code} 成功获取 (来源: {source})") + # If all key supplementary fields are present, return early + if not self._quote_needs_supplement(primary_quote): + return primary_quote + # Otherwise, continue to try later sources for missing fields + logger.debug(f"[实时行情] {stock_code} 部分字段缺失,尝试从后续数据源补充") + supplement_attempts = 0 + else: + # Supplement missing fields from this source (limit attempts) + supplement_attempts += 1 + if supplement_attempts > 1: + logger.debug(f"[实时行情] {stock_code} 补充尝试已达上限,停止继续") + break + merged = self._merge_quote_fields(primary_quote, quote) + if merged: + logger.info(f"[实时行情] {stock_code} 从 {source} 补充了缺失字段: {merged}") + # Stop supplementing once all key fields are filled + if not self._quote_needs_supplement(primary_quote): + break + + except Exception as e: + error_msg = f"[{source}] 失败: {str(e)}" + logger.info(f"[实时行情] {stock_code} {error_msg},继续尝试下一个数据源") + errors.append(error_msg) + continue + + # Return primary even if some fields are still missing + if primary_quote is not None: + return primary_quote + + # 所有数据源都失败,返回 None(降级兜底) + if log_final_failure: + if errors: + logger.info(f"[实时行情] {stock_code} 所有数据源均失败: {'; '.join(errors)}") + else: + logger.info(f"[实时行情] {stock_code} 无可用数据源") + + return None + + # Fields worth supplementing from secondary sources when the primary + # source returns None for them. Ordered by importance. + _SUPPLEMENT_FIELDS = [ + 'volume_ratio', 'turnover_rate', + 'pe_ratio', 'pb_ratio', 'total_mv', 'circ_mv', + 'amplitude', + ] + + @classmethod + def _quote_needs_supplement(cls, quote) -> bool: + """Check if any key supplementary field is still None.""" + for f in cls._SUPPLEMENT_FIELDS: + if getattr(quote, f, None) is None: + return True + return False + + @classmethod + def _merge_quote_fields(cls, primary, secondary) -> list: + """ + Copy non-None fields from *secondary* into *primary* where + *primary* has None. Returns list of field names that were filled. + """ + filled = [] + for f in cls._SUPPLEMENT_FIELDS: + if getattr(primary, f, None) is None: + val = getattr(secondary, f, None) + if val is not None: + setattr(primary, f, val) + filled.append(f) + return filled + + def _longbridge_preferred(self) -> bool: + """Return True when Longbridge keys are configured and available. + + When True, non-A-share routing (US & HK) uses Longbridge as the + primary data source with Yfinance/AkShare as fallback. + """ + for f in self._get_fetchers_snapshot(): + if f.name == "LongbridgeFetcher": + return hasattr(f, '_is_available') and f._is_available() + return False + + def _try_fetcher_quote(self, stock_code: str, fetcher_name: str, **kw): + """Try to get a realtime quote from a named fetcher; returns quote or None.""" + for f in self._get_fetchers_snapshot(): + if f.name != fetcher_name: + continue + if not hasattr(f, 'get_realtime_quote'): + return None + try: + q = self._call_fetcher_method(f, 'get_realtime_quote', stock_code, **kw) + if q is not None and q.has_basic_data(): + return q + except Exception as e: + logger.debug(f"[实时行情] {stock_code} {fetcher_name} 获取失败: {e}") + return None + return None + + def _supplement_quote(self, stock_code: str, primary_quote, fetcher_name: str, **kw): + """Supplement *primary_quote* with data from *fetcher_name*. + + If *primary_quote* is None, try *fetcher_name* as the sole source. + Returns the (potentially enriched) quote, or None. + """ + if primary_quote is not None: + if not self._quote_needs_supplement(primary_quote): + return primary_quote + try: + secondary = self._try_fetcher_quote(stock_code, fetcher_name, **kw) + if secondary is not None: + filled = self._merge_quote_fields(primary_quote, secondary) + if filled: + logger.info(f"[实时行情] {stock_code} 从 {fetcher_name} 补充了: {filled}") + except Exception as e: + logger.debug(f"[实时行情] {stock_code} {fetcher_name} 补充失败: {e}") + return primary_quote + + q = self._try_fetcher_quote(stock_code, fetcher_name, **kw) + if q is not None: + logger.info(f"[实时行情] {stock_code} 从 {fetcher_name} 获取成功 (独立数据源)") + return q + + def _supplement_from_longbridge(self, stock_code: str, primary_quote): + """Shortcut kept for backward-compat with A-share general loop.""" + return self._supplement_quote(stock_code, primary_quote, "LongbridgeFetcher") + + def get_chip_distribution(self, stock_code: str): + """ + 获取筹码分布数据(带熔断和多数据源降级) + + 策略: + 1. 检查配置开关 + 2. 检查熔断器状态 + 3. 依次尝试多个数据源:数据源优先级与获取daily的数据优先级一致 + 4. 所有数据源失败则返回 None(降级兜底) + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象,失败则返回 None + """ + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + from .realtime_types import get_chip_circuit_breaker + from provider._config import get_config + + config = get_config() + + # 如果筹码分布功能被禁用,直接返回 None + if not config.enable_chip_distribution: + logger.debug(f"[筹码分布] 功能已禁用,跳过 {stock_code}") + return None + + circuit_breaker = get_chip_circuit_breaker() + + # 直接遍历管理器已经按 priority 排好序的数据源列表 + for fetcher in self._get_fetchers_snapshot(): + # 只处理实现了筹码分布逻辑的数据源 + if not hasattr(fetcher, 'get_chip_distribution'): + continue + + fetcher_name = fetcher.name + # 动态生成熔断器的 key,例如 "TushareFetcher" -> "tushare_chip" + source_key = f"{fetcher_name.replace('Fetcher', '').lower()}_chip" + + # 检查熔断器状态 + if not circuit_breaker.is_available(source_key): + logger.debug(f"[熔断] {fetcher_name} 筹码接口处于熔断状态,尝试下一个") + continue + + try: + chip = self._call_fetcher_method(fetcher, 'get_chip_distribution', stock_code) + if chip is not None: + circuit_breaker.record_success(source_key) + logger.info(f"[筹码分布] {stock_code} 成功获取 (来源: {fetcher_name})") + return chip + else: + # 空结果:释放 HALF_OPEN 探测名额,避免卡死 + circuit_breaker.record_inconclusive(source_key) + except Exception as e: + logger.warning(f"[筹码分布] {fetcher_name} 获取 {stock_code} 失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + continue + + logger.warning(f"[筹码分布] {stock_code} 所有数据源均失败") + return None + + def get_stock_name(self, stock_code: str, allow_realtime: bool = True) -> Optional[str]: + """ + 获取股票中文名称(自动切换数据源) + + 尝试从多个数据源获取股票名称: + 1. 先从内存缓存中获取(如果有) + 2. 再尝试本地维护映射与 stocks.index.json 索引 + 3. 然后按需查询实时行情 + 4. 依次尝试各个数据源的 get_stock_name 方法 + + Args: + stock_code: 股票代码 + allow_realtime: Whether to query realtime quote first. Set False when + caller only wants lightweight prefetch without triggering heavy + realtime source calls. + + Returns: + 股票中文名称,所有数据源都失败则返回 None + """ + raw_stock_code = (stock_code or "").strip() + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + static_name = STOCK_NAME_MAP.get(stock_code) + + # 1. 先检查缓存 + cached_name = self._get_cached_stock_name(stock_code) + if cached_name is not None: + return cached_name + + if is_meaningful_stock_name(static_name, stock_code): + return self._cache_stock_name(stock_code, static_name) or static_name + + index_name = get_index_stock_name(stock_code) + if is_meaningful_stock_name(index_name, stock_code): + return self._cache_stock_name(stock_code, index_name) or index_name + + # 2. 尝试从实时行情中获取(最快,可按需禁用) + if allow_realtime: + quote = self.get_realtime_quote(raw_stock_code or stock_code, log_final_failure=False) + if quote and hasattr(quote, 'name') and is_meaningful_stock_name(getattr(quote, 'name', ''), stock_code): + name = quote.name + self._cache_stock_name(stock_code, name) + logger.info(f"[股票名称] 从实时行情获取: {stock_code} -> {name}") + return name + + # 3. 依次尝试各个数据源 + from .akshare_fetcher import _is_us_code + is_us = _is_us_code(stock_code) + _US_CAPABLE_FETCHERS = {"YfinanceFetcher", "LongbridgeFetcher"} + for fetcher in self._get_fetchers_snapshot(): + if not hasattr(fetcher, 'get_stock_name'): + continue + if is_us and fetcher.name not in _US_CAPABLE_FETCHERS: + continue + try: + name = self._call_fetcher_method(fetcher, 'get_stock_name', stock_code) + if is_meaningful_stock_name(name, stock_code): + self._cache_stock_name(stock_code, name) + logger.info(f"[股票名称] 从 {fetcher.name} 获取: {stock_code} -> {name}") + return name + except Exception as e: + logger.debug(f"[股票名称] {fetcher.name} 获取失败: {e}") + continue + + # 4. 所有数据源都失败 + logger.warning(f"[股票名称] 所有数据源都无法获取 {stock_code} 的名称") + return "" + + def get_belong_boards(self, stock_code: str) -> List[Dict[str, Any]]: + """ + Get stock membership boards through capability probing. + + Keep this at manager layer to avoid changing BaseFetcher abstraction. + """ + stock_code = normalize_stock_code(stock_code) + if _market_tag(stock_code) != "cn": + return [] + for fetcher in self._fetchers: + if not hasattr(fetcher, "get_belong_board"): + continue + try: + raw_data = fetcher.get_belong_board(stock_code) + boards = self._normalize_belong_boards(raw_data) + if boards: + logger.info(f"[{fetcher.name}] 获取所属板块成功: {stock_code}, count={len(boards)}") + return boards + except Exception as e: + logger.debug(f"[{fetcher.name}] 获取所属板块失败: {e}") + continue + return [] + + def prefetch_stock_names(self, stock_codes: List[str], use_bulk: bool = False) -> None: + """ + Pre-fetch stock names into cache before parallel analysis (Issue #455). + + When use_bulk=False, only calls get_stock_name per code (no get_stock_list), + avoiding full-market fetch. Sequential execution to avoid rate limits. + + Args: + stock_codes: Stock codes to prefetch. + use_bulk: If True, may use get_stock_list (full fetch). Default False. + """ + if not stock_codes: + return + stock_codes = [normalize_stock_code(c) for c in stock_codes] + if use_bulk: + self.batch_get_stock_names(stock_codes) + return + for code in stock_codes: + # Skip realtime lookup to avoid triggering expensive full-market quote + # requests during the prefetch phase. + self.get_stock_name(code, allow_realtime=False) + + def batch_get_stock_names(self, stock_codes: List[str]) -> Dict[str, str]: + """ + 批量获取股票中文名称 + + 先尝试从支持批量查询的数据源获取股票列表, + 然后再逐个查询缺失的股票名称。 + + Args: + stock_codes: 股票代码列表 + + Returns: + {股票代码: 股票名称} 字典 + """ + result = {} + missing_codes = set(stock_codes) + + # 1. 先检查缓存 + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + for code in stock_codes: + cached_name = self._stock_name_cache.get(code) + if cached_name is not None: + result[code] = cached_name + missing_codes.discard(code) + + if not missing_codes: + return result + + # 2. 尝试批量获取股票列表 + for fetcher in self._get_fetchers_snapshot(): + if hasattr(fetcher, 'get_stock_list') and missing_codes: + try: + stock_list = self._call_fetcher_method(fetcher, 'get_stock_list') + if stock_list is not None and not stock_list.empty: + cache_updates: Dict[str, str] = {} + for _, row in stock_list.iterrows(): + code = row.get('code') + name = row.get('name') + if code and name: + cache_updates[code] = name + if code in missing_codes: + result[code] = name + missing_codes.discard(code) + + if cache_updates: + with self._stock_name_cache_lock: + self._stock_name_cache.update(cache_updates) + + if not missing_codes: + break + + logger.info(f"[股票名称] 从 {fetcher.name} 批量获取完成,剩余 {len(missing_codes)} 个待查") + except Exception as e: + logger.debug(f"[股票名称] {fetcher.name} 批量获取失败: {e}") + continue + + # 3. 逐个获取剩余的 + for code in list(missing_codes): + name = self.get_stock_name(code) + if name: + result[code] = name + missing_codes.discard(code) + + logger.info(f"[股票名称] 批量获取完成,成功 {len(result)}/{len(stock_codes)}") + return result + + def get_main_indices(self, region: str = "cn") -> List[Dict[str, Any]]: + """获取主要指数实时行情(自动切换数据源)""" + if region == "cn": + tickflow_fetcher = self._get_tickflow_fetcher() + if tickflow_fetcher is not None: + try: + data = tickflow_fetcher.get_main_indices(region=region) + if data: + logger.info("[TickFlowFetcher] 获取指数行情成功") + return data + except Exception as e: + logger.warning(f"[TickFlowFetcher] 获取指数行情失败: {e}") + + for fetcher in self._fetchers: + try: + data = fetcher.get_main_indices(region=region) + if data: + logger.info(f"[{fetcher.name}] 获取指数行情成功") + return data + except Exception as e: + logger.warning(f"[{fetcher.name}] 获取指数行情失败: {e}") + continue + return [] + + def get_market_stats(self) -> Dict[str, Any]: + """获取市场涨跌统计(自动切换数据源)""" + tickflow_fetcher = self._get_tickflow_fetcher() + if tickflow_fetcher is not None: + try: + data = tickflow_fetcher.get_market_stats() + if data: + logger.info("[TickFlowFetcher] 获取市场统计成功") + return data + except Exception as e: + logger.warning(f"[TickFlowFetcher] 获取市场统计失败: {e}") + + for fetcher in self._fetchers: + try: + data = fetcher.get_market_stats() + if data: + logger.info(f"[{fetcher.name}] 获取市场统计成功") + return data + except Exception as e: + logger.warning(f"[{fetcher.name}] 获取市场统计失败: {e}") + continue + return {} + + def _run_with_timeout( + self, + task: Callable[[], Any], + timeout_seconds: float, + task_name: str, + ) -> Tuple[Optional[Any], Optional[str], int]: + """ + Execute a task in a short-lived thread and enforce a timeout. + + Returns: + (result, error, duration_ms) + """ + start = time.time() + timeout_value = max(0.0, timeout_seconds) + if timeout_value <= 0: + return None, f"{task_name} timeout", 0 + result_holder: Dict[str, Any] = {} + error_holder: Dict[str, Exception] = {} + + if not self._fundamental_timeout_slots.acquire(blocking=False): + return None, f"{task_name} timeout worker pool exhausted", int(timeout_value * 1000) + + def runner() -> None: + try: + result_holder["value"] = task() + except Exception as exc: + error_holder["value"] = exc + finally: + try: + self._fundamental_timeout_slots.release() + except ValueError: + pass + + worker = Thread(target=runner, daemon=True, name=f"fundamental-{task_name}") + try: + worker.start() + except Exception as exc: + try: + self._fundamental_timeout_slots.release() + except ValueError: + pass + return None, str(exc), int((time.time() - start) * 1000) + worker.join(timeout=timeout_value) + if worker.is_alive(): + return None, f"{task_name} timeout", int(timeout_value * 1000) + if "value" in error_holder: + return None, str(error_holder["value"]), int((time.time() - start) * 1000) + return result_holder.get("value"), None, int((time.time() - start) * 1000) + + def _run_with_retry( + self, + task: Callable[[], Any], + timeout_seconds: float, + task_name: str, + ) -> Tuple[Optional[Any], Optional[str], int]: + """ + Execute a task with bounded budget and best-effort retries. + + Returns: + (result, error, total_duration_ms) + """ + config = self._get_fundamental_config() + attempts = max(1, int(config.fundamental_retry_max)) + remaining_seconds = max(0.0, float(timeout_seconds)) + total_cost_ms = 0 + last_error: Optional[str] = None + + for _ in range(attempts): + if remaining_seconds <= 0: + break + result, err, cost_ms = self._run_with_timeout(task, remaining_seconds, task_name) + total_cost_ms += cost_ms + remaining_seconds = max(0.0, remaining_seconds - cost_ms / 1000) + if err is None: + return result, None, total_cost_ms + last_error = err + if remaining_seconds <= 0: + break + + return None, last_error, total_cost_ms + + def _get_fundamental_config(self): + from provider._config import get_config + return get_config() + + @staticmethod + def _normalize_source_chain( + entries: Any, + provider: str, + result: str, + duration_ms: int, + ) -> List[Dict[str, Any]]: + """Normalize free-form source chain entries to structured dict list.""" + if entries is None: + return [{"provider": provider, "result": result, "duration_ms": duration_ms}] + + normalized: List[Dict[str, Any]] = [] + if not isinstance(entries, (list, tuple)): + entries = [entries] + + for item in entries: + if isinstance(item, dict): + normalized.append({ + "provider": str(item.get("provider") or provider), + "result": str(item.get("result") or result), + "duration_ms": int(item.get("duration_ms", duration_ms)), + }) + continue + + if item is None: + continue + + provider_name = str(item) + normalized.append({ + "provider": provider_name, + "result": result, + "duration_ms": duration_ms, + }) + + if not normalized: + return [{"provider": provider, "result": result, "duration_ms": duration_ms}] + + return normalized + + @staticmethod + def _block_status(payload: Dict[str, Any], available: bool = True) -> str: + if not available: + return "not_supported" + if not payload: + return "partial" + return "ok" + + @staticmethod + def _build_fundamental_block( + status: str, + payload: Optional[Dict[str, Any]] = None, + source_chain: Optional[List[Dict[str, Any]]] = None, + errors: Optional[List[str]] = None, + ) -> Dict[str, Any]: + return { + "status": status, + "coverage": {"status": status}, + "source_chain": source_chain or [], + "errors": errors or [], + "data": payload or {}, + } + + @staticmethod + def _has_meaningful_payload(payload: Any) -> bool: + if payload is None: + return False + if isinstance(payload, str): + normalized = payload.strip().lower() + return normalized not in ("", "-", "nan", "none", "null", "n/a", "na") + if isinstance(payload, dict): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload.values()) + if isinstance(payload, pd.DataFrame): + if payload.empty: + return False + return any( + DataFetcherManager._has_meaningful_payload(v) + for v in payload.to_numpy().flat + ) + if isinstance(payload, (pd.Series, pd.Index)): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload.tolist()) + if isinstance(payload, np.ndarray): + if payload.ndim == 0: + payload = payload.item() + else: + return any( + DataFetcherManager._has_meaningful_payload(v) + for v in payload.flat + ) + if isinstance(payload, (list, tuple, set)): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload) + if DataFetcherManager._try_scalar_isna(payload, "fundamental_payload") is True: + return False + return True + + @staticmethod + def _infer_block_status(payload: Any, fallback_status: str) -> str: + if DataFetcherManager._has_meaningful_payload(payload): + return "ok" + if fallback_status in ("failed", "partial", "not_supported"): + return fallback_status + return "partial" + + @staticmethod + def _should_cache_fundamental_context(context: Any) -> bool: + if not isinstance(context, dict): + return False + status = str(context.get("status", "")).strip().lower() + if status == "ok": + return True + if status == "failed": + return False + for block in ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ): + payload = context.get(block, {}) + if isinstance(payload, dict) and DataFetcherManager._has_meaningful_payload(payload.get("data")): + return True + return False + + def _build_market_not_supported(self, market: str, reason: str) -> Dict[str, Any]: + blocks = { + "valuation": self._build_fundamental_block( + "partial" if market == "etf" else "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "growth": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "earnings": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "institution": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "capital_flow": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "dragon_tiger": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "boards": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + } + return { + "market": market, + "status": "partial" if market == "etf" else "not_supported", + "coverage": { + block: blocks[block]["status"] for block in blocks + }, + "source_chain": [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + "errors": [reason], + **blocks, + } + + def build_failed_fundamental_context(self, stock_code: str, reason: str) -> Dict[str, Any]: + """Build a consistent failed-context payload for caller-side fallback.""" + market = _market_tag(stock_code) + block_names = ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ) + blocks = { + block: self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + [reason], + ) + for block in block_names + } + return { + "market": market, + "status": "failed", + "coverage": {block: "failed" for block in block_names}, + "source_chain": [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + "errors": [reason], + **blocks, + } + + def get_fundamental_context( + self, + stock_code: str, + budget_seconds: Optional[float] = None + ) -> Dict[str, Any]: + """ + Aggregate fundamental blocks with fail-open semantics. + """ + from provider._config import get_config + + config = get_config() + if not config.enable_fundamental_pipeline: + return self._build_market_not_supported( + market=_market_tag(stock_code), + reason="fundamental pipeline disabled", + ) + + stock_code = normalize_stock_code(stock_code) + market = _market_tag(stock_code) + is_etf = _is_etf_code(stock_code) + if market in {"us", "hk"}: + return self._build_market_not_supported( + market=market, + reason="market not supported", + ) + + stage_timeout = float( + budget_seconds if budget_seconds is not None else config.fundamental_stage_timeout_seconds + ) + stage_timeout = max(0.0, stage_timeout) + fetch_timeout = float(config.fundamental_fetch_timeout_seconds) + fetch_timeout = max(0.0, fetch_timeout) + + cache_ttl = int(config.fundamental_cache_ttl_seconds) + cache_max_entries = max(0, int(getattr(config, "fundamental_cache_max_entries", 256))) + cache_key = self._get_fundamental_cache_key(stock_code, stage_timeout) + if cache_ttl > 0: + self._prune_fundamental_cache(cache_ttl, cache_max_entries) + with self._fundamental_cache_lock: + cache_item = self._fundamental_cache.get(cache_key) + if cache_item: + age = time.time() - float(cache_item.get("ts", 0)) + if age <= cache_ttl: + return cache_item.get("context", {}) + + remaining_seconds = stage_timeout + result_ctx: Dict[str, Any] = { + "market": market, + "valuation": {}, + "growth": {}, + "earnings": {}, + "institution": {}, + "capital_flow": {}, + "dragon_tiger": {}, + "boards": {}, + "coverage": {}, + "source_chain": [], + "errors": [], + } + + start_ts = time.time() + + def _consume_budget(consumed_ms: int) -> None: + nonlocal remaining_seconds + remaining_seconds = max(0.0, remaining_seconds - consumed_ms / 1000.0) + + valuation_timeout = min(fetch_timeout, remaining_seconds) + if valuation_timeout > 0: + quote_payload, valuation_err, valuation_ms = self._run_with_retry( + lambda: self.get_realtime_quote(stock_code), + valuation_timeout, + "fundamental_valuation", + ) + _consume_budget(valuation_ms) + else: + quote_payload, valuation_err, valuation_ms = None, "fundamental stage timeout", 0 + + valuation_payload = { + "pe_ratio": getattr(quote_payload, "pe_ratio", None) if quote_payload else None, + "pb_ratio": getattr(quote_payload, "pb_ratio", None) if quote_payload else None, + "total_mv": getattr(quote_payload, "total_mv", None) if quote_payload else None, + "circ_mv": getattr(quote_payload, "circ_mv", None) if quote_payload else None, + } + valuation_status = self._infer_block_status( + valuation_payload, + "partial" if quote_payload is not None else "not_supported", + ) + if valuation_status == "partial" and valuation_err and not self._has_meaningful_payload(valuation_payload): + valuation_status = "failed" + result_ctx["valuation"] = self._build_fundamental_block( + valuation_status, + valuation_payload, + self._normalize_source_chain( + [{"provider": "realtime_quote", "result": valuation_status, "duration_ms": valuation_ms}], + "realtime_quote", + valuation_status, + valuation_ms, + ), + [valuation_err] if valuation_err else [], + ) + + # growth / earnings / institution (one AkShare call) + if remaining_seconds <= 0: + bundle_status = "failed" + bundle_payload: Dict[str, Any] = {} + bundle_errors = ["fundamental stage timeout"] + bundle_ms = 0 + else: + bundle_timeout = min(fetch_timeout, remaining_seconds) + bundle_payload, bundle_err_msg, bundle_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_fundamental_bundle(stock_code), + bundle_timeout, + "fundamental_bundle", + ) + _consume_budget(bundle_ms) + if not isinstance(bundle_payload, dict): + bundle_status = "failed" + bundle_payload = {} + bundle_errors = ["fundamental_bundle failed"] + if bundle_err_msg: + bundle_errors.append(bundle_err_msg) + else: + bundle_status = str(bundle_payload.get("status", "not_supported")) + bundle_errors = [bundle_err_msg] if bundle_err_msg else [] + + bundle_chain = self._normalize_source_chain( + bundle_payload.get("source_chain", []), + "fundamental_bundle", + bundle_status, + bundle_ms, + ) if isinstance(bundle_payload, dict) else self._normalize_source_chain( + None, + "fundamental_bundle", + bundle_status, + bundle_ms, + ) + growth_payload = bundle_payload.get("growth", {}) if isinstance(bundle_payload, dict) else {} + earnings_payload = bundle_payload.get("earnings", {}) if isinstance(bundle_payload, dict) else {} + institution_payload = bundle_payload.get("institution", {}) if isinstance(bundle_payload, dict) else {} + if not isinstance(growth_payload, dict): + growth_payload = {} + else: + growth_payload = dict(growth_payload) + if not isinstance(earnings_payload, dict): + earnings_payload = {} + else: + earnings_payload = dict(earnings_payload) + if not isinstance(institution_payload, dict): + institution_payload = {} + else: + institution_payload = dict(institution_payload) + + # Derive TTM dividend yield from already-fetched quote price; avoid extra quote calls. + earnings_extra_errors: List[str] = [] + dividend_payload = earnings_payload.get("dividend") + if isinstance(dividend_payload, dict): + dividend_payload = dict(dividend_payload) + ttm_cash_raw = dividend_payload.get("ttm_cash_dividend_per_share") + ttm_cash = None + if ttm_cash_raw is not None: + try: + ttm_cash = float(ttm_cash_raw) + except (TypeError, ValueError): + earnings_extra_errors.append("invalid_ttm_cash_dividend_per_share") + if isinstance(quote_payload, dict): + latest_price_raw = quote_payload.get("price") + else: + latest_price_raw = getattr(quote_payload, "price", None) if quote_payload else None + latest_price = None + if latest_price_raw is not None: + try: + latest_price = float(latest_price_raw) + except (TypeError, ValueError): + latest_price = None + ttm_yield = None + if ttm_cash is not None: + if latest_price is not None and latest_price > 0: + ttm_yield = round(ttm_cash / latest_price * 100.0, 4) + else: + earnings_extra_errors.append("invalid_price_for_ttm_dividend_yield") + + dividend_payload["ttm_dividend_yield_pct"] = ttm_yield + if ttm_yield is not None: + dividend_payload["yield_formula"] = "ttm_cash_dividend_per_share / latest_price * 100" + earnings_payload["dividend"] = dividend_payload + + adapter_errors = list(bundle_payload.get("errors", [])) if isinstance(bundle_payload, dict) else [] + adapter_errors.extend(bundle_errors) + growth_errors = list(adapter_errors) + earnings_errors = list(adapter_errors) + earnings_errors.extend(earnings_extra_errors) + institution_errors = list(adapter_errors) + + growth_status = self._infer_block_status(growth_payload, bundle_status) + earnings_status = self._infer_block_status(earnings_payload, bundle_status) + institution_status = self._infer_block_status(institution_payload, bundle_status) + + result_ctx["growth"] = self._build_fundamental_block( + growth_status, + growth_payload, + bundle_chain, + growth_errors, + ) + result_ctx["earnings"] = self._build_fundamental_block( + earnings_status, + earnings_payload, + bundle_chain, + earnings_errors, + ) + result_ctx["institution"] = self._build_fundamental_block( + institution_status, + institution_payload, + bundle_chain, + institution_errors, + ) + + # capital flow + if is_etf: + result_ctx["capital_flow"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["dragon_tiger"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["boards"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["status"] = "partial" + else: + capital_flow_budget = min(fetch_timeout, remaining_seconds) + capital_flow_start = time.time() + result_ctx["capital_flow"] = self.get_capital_flow_context( + stock_code, + budget_seconds=capital_flow_budget, + ) + _consume_budget(int((time.time() - capital_flow_start) * 1000)) + + dragon_tiger_budget = min(fetch_timeout, remaining_seconds) + dragon_tiger_start = time.time() + result_ctx["dragon_tiger"] = self.get_dragon_tiger_context( + stock_code, + budget_seconds=dragon_tiger_budget, + ) + _consume_budget(int((time.time() - dragon_tiger_start) * 1000)) + + result_ctx["boards"] = self.get_board_context( + stock_code, + budget_seconds=min(fetch_timeout, remaining_seconds), + ) + + block_statuses = { + "valuation": result_ctx["valuation"].get("status", "not_supported"), + "growth": result_ctx["growth"].get("status", "not_supported"), + "earnings": result_ctx["earnings"].get("status", "not_supported"), + "institution": result_ctx["institution"].get("status", "not_supported"), + "capital_flow": result_ctx["capital_flow"].get("status", "not_supported"), + "dragon_tiger": result_ctx["dragon_tiger"].get("status", "not_supported"), + "boards": result_ctx["boards"].get("status", "not_supported"), + } + result_ctx["coverage"] = block_statuses + for block in ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ): + result_ctx["errors"].extend(result_ctx[block].get("errors", [])) + result_ctx["source_chain"].extend(result_ctx[block].get("source_chain", [])) + + if is_etf: + # Keep ETF downgrade semantics for overall status even when valuation is available. + result_ctx["status"] = ( + "not_supported" if all(value == "not_supported" for value in block_statuses.values()) else "partial" + ) + elif all(value == "not_supported" for value in block_statuses.values()): + result_ctx["status"] = "not_supported" + elif "failed" in block_statuses.values() or "partial" in block_statuses.values(): + result_ctx["status"] = "partial" + else: + result_ctx["status"] = "ok" + + result_ctx["elapsed_ms"] = int((time.time() - start_ts) * 1000) + if cache_ttl > 0 and self._should_cache_fundamental_context(result_ctx): + with self._fundamental_cache_lock: + self._fundamental_cache[cache_key] = { + "ts": time.time(), + "context": result_ctx, + } + self._prune_fundamental_cache(cache_ttl, cache_max_entries) + return result_ctx + + def get_capital_flow_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """资金流向块(fail-open)。""" + from provider._config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + payload, err, cost_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_capital_flow(stock_code), + timeout, + "capital_flow", + ) + if not isinstance(payload, dict): + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": cost_ms}], + [err or "capital_flow failed"], + ) + + stock_flow = payload.get("stock_flow") or {} + sector_rankings = payload.get("sector_rankings") or {} + has_stock_flow = False + if isinstance(stock_flow, dict): + has_stock_flow = any(v is not None for v in stock_flow.values()) + has_sector_rankings = bool(sector_rankings.get("top")) or bool(sector_rankings.get("bottom")) + adapter_status = str(payload.get("status", "not_supported")) + if has_stock_flow or has_sector_rankings: + capital_flow_status = "ok" + elif adapter_status == "not_supported": + capital_flow_status = "not_supported" + else: + capital_flow_status = "partial" + + return self._build_fundamental_block( + capital_flow_status, + { + "stock_flow": payload.get("stock_flow", {}), + "sector_rankings": payload.get("sector_rankings", {}), + }, + self._normalize_source_chain( + payload.get("source_chain", []), + "capital_flow", + capital_flow_status, + cost_ms, + ), + list(payload.get("errors", [])) + ([err] if err else []), + ) + + def get_dragon_tiger_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """龙虎榜块(fail-open)。""" + from provider._config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + payload, err, cost_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_dragon_tiger_flag(stock_code), + timeout, + "dragon_tiger", + ) + if not isinstance(payload, dict): + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": cost_ms}], + [err or "dragon_tiger failed"], + ) + return self._build_fundamental_block( + (payload.get("status") if isinstance(payload.get("status"), str) else "partial"), + { + "is_on_list": bool(payload.get("is_on_list", False)), + "recent_count": int(payload.get("recent_count", 0)), + "latest_date": payload.get("latest_date"), + }, + self._normalize_source_chain( + payload.get("source_chain", []), + "dragon_tiger", + str(payload.get("status", "ok")), + cost_ms, + ), + list(payload.get("errors", [])) + ([err] if err else []), + ) + + def get_board_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """板块榜单块(fail-open)。""" + from provider._config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + + def task() -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]], str]: + return self._get_sector_rankings_with_meta(5) + + rankings, err, cost_ms = self._run_with_retry(task, timeout, "boards") + if isinstance(rankings, tuple) and len(rankings) == 4: + top, bottom, chain, chain_error = rankings + if chain_error and not err: + err = chain_error + if not top and not bottom: + return self._build_fundamental_block( + "failed", + {}, + chain if chain else [{"provider": "sector_rankings", "result": "failed", "duration_ms": cost_ms}], + [err or "boards empty from all sources"], + ) + board_status = "ok" if top and bottom else "partial" + return self._build_fundamental_block( + board_status, + {"top": top or [], "bottom": bottom or []}, + chain if chain else self._normalize_source_chain( + ["sector_rankings"], + "boards", + board_status, + cost_ms, + ), + [err] if err else [], + ) + + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "sector_rankings", "result": "failed", "duration_ms": cost_ms}], + [err or "boards failed"], + ) + + def _get_sector_rankings_with_meta( + self, + n: int = 5, + ) -> Tuple[List[Dict], List[Dict], List[Dict[str, Any]], str]: + """Get sector rankings with ordered fallback chain metadata.""" + source_chain: List[Dict[str, Any]] = [] + last_error = "" + + # 直接遍历管理器已经按 priority 排好序的数据源列表 + for fetcher in self._fetchers: + if not hasattr(fetcher, 'get_sector_rankings'): + continue + + start = time.time() + try: + data = fetcher.get_sector_rankings(n) + duration_ms = int((time.time() - start) * 1000) + if data and data[0] is not None and data[1] is not None: + source_chain.append( + { + "provider": fetcher.name, + "result": "ok", + "duration_ms": duration_ms, + } + ) + logger.info(f"[{fetcher.name}] 获取板块排行成功") + return data[0], data[1], source_chain, "" + + last_error = f"{fetcher.name}返回空结果" + source_chain.append( + { + "provider": fetcher.name, + "result": "empty", + "duration_ms": duration_ms, + "error": last_error, + } + ) + except Exception as e: + error_type, error_reason = summarize_exception(e) + last_error = f"{fetcher.name} ({error_type}) {error_reason}" + duration_ms = int((time.time() - start) * 1000) + source_chain.append( + { + "provider": fetcher.name, + "result": "failed", + "duration_ms": duration_ms, + "error": error_reason, + } + ) + logger.warning(f"[{fetcher.name}] 获取板块排行失败: {error_reason}") + + return [], [], source_chain, last_error + + def get_sector_rankings(self, n: int = 5) -> Tuple[List[Dict], List[Dict]]: + """获取板块涨跌榜(自动切换数据源)""" + # 按需求固定回退顺序:Akshare(EM) -> Akshare(Sina) -> Tushare -> Efinance + top, bottom, _, last_error = self._get_sector_rankings_with_meta(n) + if top or bottom: + return top, bottom + logger.warning(f"[板块排行] 所有数据源均失败,最终错误: {last_error}") + return [], [] diff --git a/src/provider/efinance_fetcher.py b/src/provider/efinance_fetcher.py new file mode 100644 index 00000000..48c4e255 --- /dev/null +++ b/src/provider/efinance_fetcher.py @@ -0,0 +1,1238 @@ +# -*- coding: utf-8 -*- +""" +=================================== +EfinanceFetcher - 优先数据源 (Priority 0) +=================================== + +数据来源:东方财富爬虫(通过 efinance 库) +特点:免费、无需 Token、数据全面、API 简洁 +仓库:https://github.com/Micro-sheep/efinance + +与 AkshareFetcher 类似,但 efinance 库: +1. API 更简洁易用 +2. 支持批量获取数据 +3. 更稳定的接口封装 + +防封禁策略: +1. 每次请求前随机休眠 1.5-3.0 秒 +2. 随机轮换 User-Agent +3. 使用 tenacity 实现指数退避重试 +4. 熔断器机制:连续失败后自动冷却 +""" + +import logging +import os +import random +import re +import time +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional, Dict, Any, List, Tuple + +import pandas as pd +import requests # 引入 requests 以捕获异常 +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +# Timeout (seconds) for efinance library calls that go through eastmoney APIs +# with no built-in timeout. Prevents indefinite hangs when hosts are unreachable. +try: + _EF_CALL_TIMEOUT = int(os.environ.get("EFINANCE_CALL_TIMEOUT", "30")) +except (ValueError, TypeError): + import logging as _logging + _logging.getLogger(__name__).warning( + "EFINANCE_CALL_TIMEOUT is not a valid integer, using default 30s" + ) + _EF_CALL_TIMEOUT = 30 + +from provider._patch.eastmoney_patch import eastmoney_patch +from provider._config import get_config +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code +from .realtime_types import ( + UnifiedRealtimeQuote, RealtimeSource, + get_realtime_circuit_breaker, + safe_float, safe_int # 使用统一的类型转换函数 +) + + +# 保留旧的类型别名,用于向后兼容 +@dataclass +class EfinanceRealtimeQuote: + """ + 实时行情数据(来自 efinance)- 向后兼容别名 + + 新代码建议使用 UnifiedRealtimeQuote + """ + code: str + name: str = "" + price: float = 0.0 # 最新价 + change_pct: float = 0.0 # 涨跌幅(%) + change_amount: float = 0.0 # 涨跌额 + + # 量价指标 + volume: int = 0 # 成交量 + amount: float = 0.0 # 成交额 + turnover_rate: float = 0.0 # 换手率(%) + amplitude: float = 0.0 # 振幅(%) + + # 价格区间 + high: float = 0.0 # 最高价 + low: float = 0.0 # 最低价 + open_price: float = 0.0 # 开盘价 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'code': self.code, + 'name': self.name, + 'price': self.price, + 'change_pct': self.change_pct, + 'change_amount': self.change_amount, + 'volume': self.volume, + 'amount': self.amount, + 'turnover_rate': self.turnover_rate, + 'amplitude': self.amplitude, + 'high': self.high, + 'low': self.low, + 'open': self.open_price, + } + + +logger = logging.getLogger(__name__) + +EASTMONEY_HISTORY_ENDPOINT = "push2his.eastmoney.com/api/qt/stock/kline/get" + + +# User-Agent 池,用于随机轮换 +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', +] + + +# 缓存实时行情数据(避免重复请求) +# TTL 设为 10 分钟 (600秒):批量分析场景下避免重复拉取 +_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 600 # 10分钟缓存有效期 +} + +# ETF 实时行情缓存(与股票分开缓存) +_etf_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 600 # 10分钟缓存有效期 +} + + +def _is_etf_code(stock_code: str) -> bool: + """ + 判断代码是否为 ETF 基金 + + ETF 代码规则: + - 上交所 ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - 深交所 ETF: 15xxxx, 16xxxx, 18xxxx + + Args: + stock_code: 股票/基金代码 + + Returns: + True 表示是 ETF 代码,False 表示是普通股票代码 + """ + etf_prefixes = ('51', '52', '56', '58', '15', '16', '18') + return stock_code.startswith(etf_prefixes) and len(stock_code) == 6 + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +def _ef_call_with_timeout(func, *args, timeout=None, **kwargs): + """Run an efinance library call in a thread with a timeout. + + efinance internally uses requests/urllib3 with no timeout, so when + eastmoney hosts are unreachable the call can hang for many minutes. + This helper caps the *calling thread's* wait time. Note: Python threads + cannot be forcibly killed, so the worker thread may continue running in + the background until the OS-level TCP timeout fires or the process exits. + This is acceptable — the calling thread returns promptly on timeout. + """ + if timeout is None: + timeout = _EF_CALL_TIMEOUT + # Do NOT use 'with ThreadPoolExecutor(...)' here: the context manager calls + # shutdown(wait=True) on __exit__, which would re-block on the hung thread. + executor = ThreadPoolExecutor(max_workers=1) + try: + future = executor.submit(func, *args, **kwargs) + return future.result(timeout=timeout) + finally: + # wait=False: calling thread returns immediately; worker cleans up later + executor.shutdown(wait=False) + + +def _classify_eastmoney_error(exc: Exception) -> Tuple[str, str]: + """ + Classify Eastmoney request failures into stable log categories. + """ + message = str(exc).strip() + lowered = message.lower() + + remote_disconnect_keywords = ( + 'remotedisconnected', + 'remote end closed connection without response', + 'connection aborted', + 'connection broken', + 'protocolerror', + ) + timeout_keywords = ( + 'timeout', + 'timed out', + 'readtimeout', + 'connecttimeout', + ) + rate_limit_keywords = ( + 'banned', + 'blocked', + '频率', + 'rate limit', + 'too many requests', + '429', + '限制', + 'forbidden', + '403', + ) + + if any(keyword in lowered for keyword in remote_disconnect_keywords): + return "remote_disconnect", message + if isinstance(exc, (TimeoutError, requests.exceptions.Timeout)) or any( + keyword in lowered for keyword in timeout_keywords + ): + return "timeout", message + if any(keyword in lowered for keyword in rate_limit_keywords): + return "rate_limit_or_anti_bot", message + if isinstance(exc, requests.exceptions.RequestException): + return "request_error", message + return "unknown_request_error", message + + +class EfinanceFetcher(BaseFetcher): + """ + Efinance 数据源实现 + + 优先级:0(最高,优先于 AkshareFetcher) + 数据来源:东方财富网(通过 efinance 库封装) + 仓库:https://github.com/Micro-sheep/efinance + + 主要 API: + - ef.stock.get_quote_history(): 获取历史 K 线数据 + - ef.stock.get_base_info(): 获取股票基本信息 + - ef.stock.get_realtime_quotes(): 获取实时行情 + + 关键策略: + - 每次请求前随机休眠 1.5-3.0 秒 + - 随机 User-Agent 轮换 + - 失败后指数退避重试(最多3次) + """ + + name = "EfinanceFetcher" + priority = int(os.getenv("EFINANCE_PRIORITY", "0")) # 最高优先级,排在 AkshareFetcher 之前 + + def __init__(self, sleep_min: float = 1.5, sleep_max: float = 3.0): + """ + 初始化 EfinanceFetcher + + Args: + sleep_min: 最小休眠时间(秒) + sleep_max: 最大休眠时间(秒) + """ + self.sleep_min = sleep_min + self.sleep_max = sleep_max + self._last_request_time: Optional[float] = None + # 东财补丁开启才执行打补丁操作 + if get_config().enable_eastmoney_patch: + eastmoney_patch() + + @staticmethod + def _build_history_failure_message( + stock_code: str, + beg_date: str, + end_date: str, + exc: Exception, + elapsed: float, + is_etf: bool = False, + ) -> Tuple[str, str]: + category, detail = _classify_eastmoney_error(exc) + instrument_type = "ETF" if is_etf else "stock" + message = ( + "Eastmoney 历史K线接口失败: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"market_type={instrument_type}, range={beg_date}~{end_date}, " + f"category={category}, error_type={type(exc).__name__}, elapsed={elapsed:.2f}s, detail={detail}" + ) + return category, message + + def _set_random_user_agent(self) -> None: + """ + 设置随机 User-Agent + + 通过修改 requests Session 的 headers 实现 + 这是关键的反爬策略之一 + """ + try: + random_ua = random.choice(USER_AGENTS) + logger.debug(f"设置 User-Agent: {random_ua[:50]}...") + except Exception as e: + logger.debug(f"设置 User-Agent 失败: {e}") + + def _enforce_rate_limit(self) -> None: + """ + 强制执行速率限制 + + 策略: + 1. 检查距离上次请求的时间间隔 + 2. 如果间隔不足,补充休眠时间 + 3. 然后再执行随机 jitter 休眠 + """ + if self._last_request_time is not None: + elapsed = time.time() - self._last_request_time + min_interval = self.sleep_min + if elapsed < min_interval: + additional_sleep = min_interval - elapsed + logger.debug(f"补充休眠 {additional_sleep:.2f} 秒") + time.sleep(additional_sleep) + + # 执行随机 jitter 休眠 + self.random_sleep(self.sleep_min, self.sleep_max) + self._last_request_time = time.time() + + @retry( + stop=stop_after_attempt(1), # 减少到1次,避免触发限流 + wait=wait_exponential(multiplier=1, min=4, max=60), # 保持等待时间设置 + retry=retry_if_exception_type(( + ConnectionError, + TimeoutError, + requests.exceptions.RequestException, + requests.exceptions.ConnectionError, + requests.exceptions.ChunkedEncodingError + )), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 efinance 获取原始数据 + + 根据代码类型自动选择 API: + - 美股:不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + - 普通股票:使用 ef.stock.get_quote_history() + - ETF 基金:使用 ef.stock.get_quote_history()(ETF 是交易所证券,使用股票 K 线接口) + + 流程: + 1. 判断代码类型(美股/股票/ETF) + 2. 设置随机 User-Agent + 3. 执行速率限制(随机休眠) + 4. 调用对应的 efinance API + 5. 处理返回数据 + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到 AkshareFetcher/YfinanceFetcher + if _is_us_code(stock_code): + raise DataFetchError(f"EfinanceFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 根据代码类型选择不同的获取方法 + if _is_etf_code(stock_code): + return self._fetch_etf_data(stock_code, start_date, end_date) + else: + return self._fetch_stock_data(stock_code, start_date, end_date) + + def _fetch_stock_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 + + 数据来源:ef.stock.get_quote_history() + + API 参数说明: + - stock_codes: 股票代码 + - beg: 开始日期,格式 'YYYYMMDD' + - end: 结束日期,格式 'YYYYMMDD' + - klt: 周期,101=日线 + - fqt: 复权方式,1=前复权 + """ + import efinance as ef + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 格式化日期(efinance 使用 YYYYMMDD 格式) + beg_date = start_date.replace('-', '') + end_date_fmt = end_date.replace('-', '') + + logger.info(f"[API调用] ef.stock.get_quote_history(stock_codes={stock_code}, " + f"beg={beg_date}, end={end_date_fmt}, klt=101, fqt=1)") + + api_start = time.time() + try: + # 调用 efinance 获取 A 股日线数据 + # klt=101 获取日线数据 + # fqt=1 获取前复权数据 + df = _ef_call_with_timeout( + ef.stock.get_quote_history, + stock_codes=stock_code, + beg=beg_date, + end=end_date_fmt, + klt=101, # 日线 + fqt=1, # 前复权 + timeout=60, + ) + + api_elapsed = time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info( + "[API返回] Eastmoney 历史K线成功: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, rows={len(df)}, elapsed={api_elapsed:.2f}s" + ) + logger.info(f"[API返回] 列名: {list(df.columns)}") + if '日期' in df.columns: + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning( + "[API返回] Eastmoney 历史K线为空: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, elapsed={api_elapsed:.2f}s" + ) + + return df + + except Exception as e: + api_elapsed = time.time() - api_start + category, failure_message = self._build_history_failure_message( + stock_code=stock_code, + beg_date=beg_date, + end_date=end_date_fmt, + exc=e, + elapsed=api_elapsed, + ) + + if category == "rate_limit_or_anti_bot": + logger.warning(failure_message) + raise RateLimitError(f"efinance 可能被限流: {failure_message}") from e + + logger.error(failure_message) + raise DataFetchError(f"efinance 获取数据失败: {failure_message}") from e + + def _fetch_etf_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取 ETF 基金历史数据 + + Exchange-traded ETFs have OHLCV data just like regular stocks, so we use + ef.stock.get_quote_history (the stock K-line API) which returns full + open/high/low/close/volume data. + + Previously this method used ef.fund.get_quote_history which only returns + NAV data (单位净值/累计净值) without volume or OHLC, causing: + - Issue #541: 'got an unexpected keyword argument beg' + - Issue #527: ETF volume/turnover always showing 0 + + Args: + stock_code: ETF code, e.g. '512400', '159883', '515120' + start_date: Start date, format 'YYYY-MM-DD' + end_date: End date, format 'YYYY-MM-DD' + + Returns: + ETF historical OHLCV DataFrame + """ + import efinance as ef + + # Anti-ban strategy 1: random User-Agent + self._set_random_user_agent() + + # Anti-ban strategy 2: enforce rate limit + self._enforce_rate_limit() + + # Format dates (efinance uses YYYYMMDD) + beg_date = start_date.replace('-', '') + end_date_fmt = end_date.replace('-', '') + + logger.info(f"[API调用] ef.stock.get_quote_history(stock_codes={stock_code}, " + f"beg={beg_date}, end={end_date_fmt}, klt=101, fqt=1) [ETF]") + + api_start = time.time() + try: + # ETFs are exchange-traded securities; use the stock API to get full OHLCV data + df = _ef_call_with_timeout( + ef.stock.get_quote_history, + stock_codes=stock_code, + beg=beg_date, + end=end_date_fmt, + klt=101, # daily + fqt=1, # forward-adjusted + timeout=60, + ) + + api_elapsed = time.time() - api_start + + if df is not None and not df.empty: + logger.info( + "[API返回] Eastmoney 历史K线成功 [ETF]: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, rows={len(df)}, elapsed={api_elapsed:.2f}s" + ) + logger.info(f"[API返回] 列名: {list(df.columns)}") + if '日期' in df.columns: + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning( + "[API返回] Eastmoney 历史K线为空 [ETF]: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, elapsed={api_elapsed:.2f}s" + ) + + return df + + except Exception as e: + api_elapsed = time.time() - api_start + category, failure_message = self._build_history_failure_message( + stock_code=stock_code, + beg_date=beg_date, + end_date=end_date_fmt, + exc=e, + elapsed=api_elapsed, + is_etf=True, + ) + + if category == "rate_limit_or_anti_bot": + logger.warning(failure_message) + raise RateLimitError(f"efinance 可能被限流: {failure_message}") from e + + logger.error(failure_message) + raise DataFetchError(f"efinance 获取 ETF 数据失败: {failure_message}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 efinance 数据 + + efinance 返回的列名(中文): + 股票名称, 股票代码, 日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额, 振幅, 涨跌幅, 涨跌额, 换手率 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # Column mapping (efinance Chinese column names -> standard English column names) + column_mapping = { + '日期': 'date', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '涨跌幅': 'pct_chg', + '股票代码': 'code', + '股票名称': 'name', + } + + # 重命名列 + df = df.rename(columns=column_mapping) + + # Fallback: if OHLC columns are missing (e.g. very old data path), fill from close + if 'close' in df.columns and 'open' not in df.columns: + df['open'] = df['close'] + df['high'] = df['close'] + df['low'] = df['close'] + + # Fill volume and amount if missing + if 'volume' not in df.columns: + df['volume'] = 0 + if 'amount' not in df.columns: + df['amount'] = 0 + + + # 如果没有 code 列,手动添加 + if 'code' not in df.columns: + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情数据 + + 数据来源:ef.stock.get_realtime_quotes() + ETF 数据源:ef.stock.get_realtime_quotes(['ETF']) + + Args: + stock_code: 股票代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + # ETF 需要单独请求 ETF 实时行情接口 + if _is_etf_code(stock_code): + return self._get_etf_realtime_quote(stock_code) + + import efinance as ef + circuit_breaker = get_realtime_circuit_breaker() + source_key = "efinance" + + # 检查熔断器状态 + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + + try: + # 检查缓存 + current_time = time.time() + if (_realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl']): + df = _realtime_cache['data'] + cache_age = int(current_time - _realtime_cache['timestamp']) + logger.debug(f"[缓存命中] 实时行情(efinance) - 缓存年龄 {cache_age}s/{_realtime_cache['ttl']}s") + else: + # 触发全量刷新 + logger.info(f"[缓存未命中] 触发全量刷新 实时行情(efinance)") + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_realtime_quotes() 获取实时行情...") + import time as _time + api_start = _time.time() + + # efinance 的实时行情 API (with timeout to avoid indefinite hangs) + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes) + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ef.stock.get_realtime_quotes 成功: 返回 {len(df)} 只股票, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + + # 更新缓存 + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + logger.info(f"[缓存更新] 实时行情(efinance) 缓存已刷新,TTL={_realtime_cache['ttl']}s") + + # 查找指定股票 + # efinance 返回的列名可能是 '股票代码' 或 'code' + code_col = '股票代码' if '股票代码' in df.columns else 'code' + row = df[df[code_col] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到股票 {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + # 获取列名(可能是中文或英文) + name_col = '股票名称' if '股票名称' in df.columns else 'name' + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + turn_col = '换手率' if '换手率' in df.columns else 'turnover_rate' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + open_col = '开盘' if '开盘' in df.columns else 'open' + # efinance 也返回量比、市盈率、市值等字段 + vol_ratio_col = '量比' if '量比' in df.columns else 'volume_ratio' + pe_col = '市盈率' if '市盈率' in df.columns else 'pe_ratio' + total_mv_col = '总市值' if '总市值' in df.columns else 'total_mv' + circ_mv_col = '流通市值' if '流通市值' in df.columns else 'circ_mv' + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get(name_col, '')), + source=RealtimeSource.EFINANCE, + price=safe_float(row.get(price_col)), + change_pct=safe_float(row.get(pct_col)), + change_amount=safe_float(row.get(chg_col)), + volume=safe_int(row.get(vol_col)), + amount=safe_float(row.get(amt_col)), + turnover_rate=safe_float(row.get(turn_col)), + amplitude=safe_float(row.get(amp_col)), + high=safe_float(row.get(high_col)), + low=safe_float(row.get(low_col)), + open_price=safe_float(row.get(open_col)), + volume_ratio=safe_float(row.get(vol_ratio_col)), # 量比 + pe_ratio=safe_float(row.get(pe_col)), # 市盈率 + total_mv=safe_float(row.get(total_mv_col)), # 总市值 + circ_mv=safe_float(row.get(circ_mv_col)), # 流通市值 + ) + + logger.info(f"[实时行情-efinance] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"量比={quote.volume_ratio}, 换手率={quote.turnover_rate}%") + return quote + + except FuturesTimeoutError: + logger.info(f"[超时] ef.stock.get_realtime_quotes() 超过 {_EF_CALL_TIMEOUT}s,跳过 {stock_code}") + circuit_breaker.record_failure(source_key, "timeout") + return None + except Exception as e: + logger.info(f"[API错误] 获取 {stock_code} 实时行情(efinance)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_etf_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取 ETF 实时行情 + + efinance 默认实时接口仅返回股票数据,ETF 需要显式传入 ['ETF']。 + """ + import efinance as ef + circuit_breaker = get_realtime_circuit_breaker() + source_key = "efinance_etf" + + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + + try: + current_time = time.time() + if ( + _etf_realtime_cache['data'] is not None and + current_time - _etf_realtime_cache['timestamp'] < _etf_realtime_cache['ttl'] + ): + df = _etf_realtime_cache['data'] + cache_age = int(current_time - _etf_realtime_cache['timestamp']) + logger.debug(f"[缓存命中] ETF实时行情(efinance) - 缓存年龄 {cache_age}s/{_etf_realtime_cache['ttl']}s") + else: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['ETF']) 获取ETF实时行情...") + import time as _time + api_start = _time.time() + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['ETF']) + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ETF 实时行情成功: {len(df)} 条, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + else: + logger.info(f"[API返回] ETF 实时行情为空, 耗时 {api_elapsed:.2f}s") + df = pd.DataFrame() + + _etf_realtime_cache['data'] = df + _etf_realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.info(f"[实时行情] ETF实时行情数据为空(efinance),跳过 {stock_code}") + return None + + code_col = '股票代码' if '股票代码' in df.columns else 'code' + code_series = df[code_col].astype(str).str.zfill(6) + target_code = str(stock_code).strip().zfill(6) + row = df[code_series == target_code] + if row.empty: + logger.info(f"[API返回] 未找到 ETF {stock_code} 的实时行情(efinance)") + return None + + row = row.iloc[0] + name_col = '股票名称' if '股票名称' in df.columns else 'name' + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + turn_col = '换手率' if '换手率' in df.columns else 'turnover_rate' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + open_col = '开盘' if '开盘' in df.columns else 'open' + + quote = UnifiedRealtimeQuote( + code=target_code, + name=str(row.get(name_col, '')), + source=RealtimeSource.EFINANCE, + price=safe_float(row.get(price_col)), + change_pct=safe_float(row.get(pct_col)), + change_amount=safe_float(row.get(chg_col)), + volume=safe_int(row.get(vol_col)), + amount=safe_float(row.get(amt_col)), + turnover_rate=safe_float(row.get(turn_col)), + amplitude=safe_float(row.get(amp_col)), + high=safe_float(row.get(high_col)), + low=safe_float(row.get(low_col)), + open_price=safe_float(row.get(open_col)), + ) + + logger.info( + f"[ETF实时行情-efinance] {target_code} {quote.name}: " + f"价格={quote.price}, 涨跌={quote.change_pct}%, 换手率={quote.turnover_rate}%" + ) + return quote + except Exception as e: + logger.info(f"[API错误] 获取 ETF {stock_code} 实时行情(efinance)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 (efinance),仅支持 A 股 + """ + if region != "cn": + return None + import efinance as ef + + indices_map = { + '000001': ('上证指数', 'sh000001'), + '399001': ('深证成指', 'sz399001'), + '399006': ('创业板指', 'sz399006'), + '000688': ('科创50', 'sh000688'), + '000016': ('上证50', 'sh000016'), + '000300': ('沪深300', 'sh000300'), + } + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['沪深系列指数']) 获取指数行情...") + import time as _time + api_start = _time.time() + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['沪深系列指数']) + api_elapsed = _time.time() - api_start + + if df is None or df.empty: + logger.warning(f"[API返回] 指数行情为空, 耗时 {api_elapsed:.2f}s") + return None + + logger.info(f"[API返回] 指数行情成功: {len(df)} 条, 耗时 {api_elapsed:.2f}s") + code_col = '股票代码' if '股票代码' in df.columns else 'code' + code_series = df[code_col].astype(str).str.zfill(6) + + results: List[Dict[str, Any]] = [] + for code, (name, full_code) in indices_map.items(): + row = df[code_series == code] + if row.empty: + continue + item = row.iloc[0] + + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + open_cols = [column for column in ('今开', '开盘', 'open') if column in df.columns] + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + + current = safe_float(item.get(price_col, 0)) + change_amount = safe_float(item.get(chg_col, 0)) + open_price = 0.0 + for column in open_cols: + candidate = safe_float(item.get(column), default=None) + if candidate not in (None, 0.0): + open_price = candidate + break + if open_price == 0.0 and open_cols: + open_price = safe_float(item.get(open_cols[0], 0), 0) + + results.append({ + 'code': full_code, + 'name': name, + 'current': current, + 'change': change_amount, + 'change_pct': safe_float(item.get(pct_col, 0)), + 'open': open_price, + 'high': safe_float(item.get(high_col, 0)), + 'low': safe_float(item.get(low_col, 0)), + 'prev_close': current - change_amount if current or change_amount else 0, + 'volume': safe_float(item.get(vol_col, 0)), + 'amount': safe_float(item.get(amt_col, 0)), + 'amplitude': safe_float(item.get(amp_col, 0)), + }) + + if results: + logger.info(f"[efinance] 获取到 {len(results)} 个指数行情") + return results if results else None + except Exception as e: + logger.error(f"[efinance] 获取指数行情失败: {e}") + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 (efinance) + """ + import efinance as ef + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + current_time = time.time() + if ( + _realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl'] + ): + df = _realtime_cache['data'] + else: + logger.info("[API调用] ef.stock.get_realtime_quotes() 获取市场统计...") + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes) + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.warning("[API返回] 市场统计数据为空") + return None + + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[efinance] 获取市场统计失败: {e}") + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取板块涨跌榜 (efinance) + """ + import efinance as ef + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['行业板块']) 获取板块行情...") + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['行业板块']) + if df is None or df.empty: + logger.warning("[efinance] 板块行情数据为空") + return None + + change_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + name_col = '股票名称' if '股票名称' in df.columns else 'name' + if change_col not in df.columns or name_col not in df.columns: + return None + + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + top = df.nlargest(n, change_col) + bottom = df.nsmallest(n, change_col) + + top_sectors = [ + {'name': str(row[name_col]), 'change_pct': float(row[change_col])} + for _, row in top.iterrows() + ] + bottom_sectors = [ + {'name': str(row[name_col]), 'change_pct': float(row[change_col])} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + except Exception as e: + logger.error(f"[efinance] 获取板块排行失败: {e}") + return None + + def get_base_info(self, stock_code: str) -> Optional[Dict[str, Any]]: + """ + 获取股票基本信息 + + 数据来源:ef.stock.get_base_info() + 包含:市盈率、市净率、所处行业、总市值、流通市值、ROE、净利率等 + + Args: + stock_code: 股票代码 + + Returns: + 包含基本信息的字典,获取失败返回 None + """ + import efinance as ef + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_base_info(stock_codes={stock_code}) 获取基本信息...") + import time as _time + api_start = _time.time() + + info = _ef_call_with_timeout(ef.stock.get_base_info, stock_code) + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ef.stock.get_base_info 成功, 耗时 {api_elapsed:.2f}s") + + if info is None: + logger.warning(f"[API返回] 未获取到 {stock_code} 的基本信息") + return None + + # 转换为字典 + if isinstance(info, pd.Series): + return info.to_dict() + elif isinstance(info, pd.DataFrame): + if not info.empty: + return info.iloc[0].to_dict() + + return None + + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 基本信息失败: {e}") + return None + + def get_belong_board(self, stock_code: str) -> Optional[pd.DataFrame]: + """ + 获取股票所属板块 + + 数据来源:ef.stock.get_belong_board() + + Args: + stock_code: 股票代码 + + Returns: + 所属板块 DataFrame,获取失败返回 None + """ + import efinance as ef + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_belong_board(stock_code={stock_code}) 获取所属板块...") + import time as _time + api_start = _time.time() + + df = _ef_call_with_timeout(ef.stock.get_belong_board, stock_code) + + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ef.stock.get_belong_board 成功: 返回 {len(df)} 个板块, 耗时 {api_elapsed:.2f}s") + return df + else: + logger.warning(f"[API返回] 未获取到 {stock_code} 的板块信息") + return None + + except FuturesTimeoutError: + logger.warning(f"[超时] ef.stock.get_belong_board({stock_code}) 超过 {_EF_CALL_TIMEOUT}s,跳过") + return None + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 所属板块失败: {e}") + return None + + def get_enhanced_data(self, stock_code: str, days: int = 60) -> Dict[str, Any]: + """ + 获取增强数据(历史K线 + 实时行情 + 基本信息) + + Args: + stock_code: 股票代码 + days: 历史数据天数 + + Returns: + 包含所有数据的字典 + """ + result = { + 'code': stock_code, + 'daily_data': None, + 'realtime_quote': None, + 'base_info': None, + 'belong_board': None, + } + + # 获取日线数据 + try: + df = self.get_daily_data(stock_code, days=days) + result['daily_data'] = df + except Exception as e: + logger.error(f"获取 {stock_code} 日线数据失败: {e}") + + # 获取实时行情 + result['realtime_quote'] = self.get_realtime_quote(stock_code) + + # 获取基本信息 + result['base_info'] = self.get_base_info(stock_code) + + # 获取所属板块 + result['belong_board'] = self.get_belong_board(stock_code) + + return result + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = EfinanceFetcher() + + # 测试普通股票 + print("=" * 50) + print("测试普通股票数据获取 (efinance)") + print("=" * 50) + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"[股票] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[股票] 获取失败: {e}") + + # 测试 ETF 基金 + print("\n" + "=" * 50) + print("测试 ETF 基金数据获取 (efinance)") + print("=" * 50) + try: + df = fetcher.get_daily_data('512400') # 有色龙头ETF + print(f"[ETF] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[ETF] 获取失败: {e}") + + # 测试实时行情 + print("\n" + "=" * 50) + print("测试实时行情获取 (efinance)") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('600519') + if quote: + print(f"[实时行情] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[实时行情] 未获取到数据") + except Exception as e: + print(f"[实时行情] 获取失败: {e}") + + # 测试基本信息 + print("\n" + "=" * 50) + print("测试基本信息获取 (efinance)") + print("=" * 50) + try: + info = fetcher.get_base_info('600519') + if info: + print(f"[基本信息] 市盈率={info.get('市盈率(动)', 'N/A')}, 市净率={info.get('市净率', 'N/A')}") + else: + print("[基本信息] 未获取到数据") + except Exception as e: + print(f"[基本信息] 获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (efinance)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") diff --git a/src/provider/fundamental_adapter.py b/src/provider/fundamental_adapter.py new file mode 100644 index 00000000..6fe41e1f --- /dev/null +++ b/src/provider/fundamental_adapter.py @@ -0,0 +1,532 @@ +# -*- coding: utf-8 -*- +""" +AkShare fundamental adapter (fail-open). + +This adapter intentionally uses capability probing against multiple AkShare +endpoint candidates. It should never raise to caller; partial data is allowed. +""" + +from __future__ import annotations + +import logging +import re +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd + +logger = logging.getLogger(__name__) + +_DIVIDEND_KEYWORD_MAP: Dict[str, List[str]] = { + "per_share": [ + "每股派息", + "每股现金红利", + "每股分红", + "每股派现", + "派现(元/股)", + "派息(元/股)", + "税前派息(元/股)", + "现金分红(税前)", + ], + "plan_text": [ + "分配方案", + "分红方案", + "实施方案", + "派息方案", + "方案", + "预案", + "方案说明", + ], + "ex_dividend_date": ["除权除息日", "除息日", "除权日", "除权除息", "除息日期"], + "record_date": ["股权登记日", "登记日"], + "announce_date": ["公告日期", "公告日", "实施公告日", "预案公告日"], + "report_date": ["报告期", "报告日期", "截止日期", "统计截止日期"], +} + + +def _safe_float(value: Any) -> Optional[float]: + """Best-effort float conversion.""" + if value is None: + return None + if isinstance(value, (int, float)): + try: + return float(value) + except (TypeError, ValueError): + return None + s = str(value).strip().replace(",", "").replace("%", "") + if not s: + return None + try: + return float(s) + except (TypeError, ValueError): + return None + + +def _safe_str(value: Any) -> str: + if value is None: + return "" + return str(value).strip() + + +def _safe_datetime(value: Any) -> Optional[datetime]: + if value is None: + return None + try: + parsed = pd.to_datetime(value) + except Exception: + return None + if pd.isna(parsed): + return None + try: + return parsed.to_pydatetime() + except Exception: + return None + + +def _normalize_code(raw: Any) -> str: + s = _safe_str(raw).upper() + if "." in s: + s = s.split(".", 1)[0] + s = re.sub(r"^(SH|SZ|BJ)", "", s) + return s + + +def _pick_by_keywords(row: pd.Series, keywords: List[str]) -> Optional[Any]: + """ + Return first non-empty row value whose column name contains any keyword. + """ + for col in row.index: + col_s = str(col) + if any(k in col_s for k in keywords): + val = row.get(col) + if val is not None and str(val).strip() not in ("", "-", "nan", "None"): + return val + return None + + +def _parse_dividend_plan_to_per_share(plan_text: str) -> Optional[float]: + """Parse per-share cash dividend from Chinese plan text.""" + text = _safe_str(plan_text) + if not text: + return None + + for pattern in ( + r"(?:每)?\s*10\s*股?\s*派(?:发)?\s*([0-9]+(?:\.[0-9]+)?)\s*元", + r"10\s*派\s*([0-9]+(?:\.[0-9]+)?)\s*元", + ): + match = re.search(pattern, text) + if match: + parsed = _safe_float(match.group(1)) + if parsed is not None and parsed > 0: + return parsed / 10.0 + + match_per_share = re.search(r"每\s*股\s*派(?:发)?\s*([0-9]+(?:\.[0-9]+)?)\s*元", text) + if match_per_share: + parsed = _safe_float(match_per_share.group(1)) + if parsed is not None and parsed > 0: + return parsed + return None + + +def _extract_cash_dividend_per_share(row: pd.Series) -> Optional[float]: + """Extract pre-tax cash dividend per share from a row.""" + plan_text = _safe_str(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["plan_text"])) + # Keep pre-tax semantics; skip explicit after-tax plans unless pre-tax marker exists. + if "税后" in plan_text and "税前" not in plan_text and "含税" not in plan_text: + return None + + direct = _safe_float(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["per_share"])) + if direct is not None and direct > 0: + return direct + return _parse_dividend_plan_to_per_share(plan_text) + + +def _filter_rows_by_code(df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + if df is None or df.empty: + return pd.DataFrame() + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码", "symbol", "ts_code"))] + if not code_cols: + return df + + target = _normalize_code(stock_code) + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + filtered = df[series == target] + if not filtered.empty: + return filtered + except Exception: + continue + return pd.DataFrame() + + +def _normalize_report_date(value: Any) -> Optional[str]: + parsed = _safe_datetime(value) + return parsed.date().isoformat() if parsed else None + + +def _build_dividend_payload( + dividend_df: pd.DataFrame, + stock_code: str, + max_events: int = 5, +) -> Dict[str, Any]: + work_df = _filter_rows_by_code(dividend_df, stock_code) + if work_df.empty: + return {} + + now_date = datetime.now().date() + ttm_start_date = now_date - timedelta(days=365) + dedupe_keys = set() + events: List[Dict[str, Any]] = [] + + for _, row in work_df.iterrows(): + if not isinstance(row, pd.Series): + continue + ex_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["ex_dividend_date"])) + record_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["record_date"])) + announce_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["announce_date"])) + event_dt = ex_dt or record_dt or announce_dt + if event_dt is None: + continue + event_date = event_dt.date() + if event_date > now_date: + continue + + per_share = _extract_cash_dividend_per_share(row) + if per_share is None or per_share <= 0: + continue + + dedupe_key = (event_date.isoformat(), round(per_share, 6)) + if dedupe_key in dedupe_keys: + continue + dedupe_keys.add(dedupe_key) + + events.append( + { + "event_date": event_date.isoformat(), + "ex_dividend_date": ex_dt.date().isoformat() if ex_dt else None, + "record_date": record_dt.date().isoformat() if record_dt else None, + "announcement_date": announce_dt.date().isoformat() if announce_dt else None, + "cash_dividend_per_share": round(per_share, 6), + "is_pre_tax": True, + } + ) + + if not events: + return {} + + events.sort(key=lambda item: item.get("event_date") or "", reverse=True) + ttm_events: List[Dict[str, Any]] = [] + for item in events: + event_dt = _safe_datetime(item.get("event_date")) + if event_dt is None: + continue + event_date = event_dt.date() + if ttm_start_date <= event_date <= now_date: + ttm_events.append(item) + + return { + "events": events[:max(1, max_events)], + "ttm_event_count": len(ttm_events), + "ttm_cash_dividend_per_share": ( + round(sum(float(item.get("cash_dividend_per_share") or 0.0) for item in ttm_events), 6) + if ttm_events else None + ), + "coverage": "cash_dividend_pre_tax", + "as_of": now_date.isoformat(), + } + + +def _extract_latest_row(df: pd.DataFrame, stock_code: str) -> Optional[pd.Series]: + """ + Select the most relevant row for the given stock. + """ + if df is None or df.empty: + return None + + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码", "ts_code", "symbol"))] + target = _normalize_code(stock_code) + if code_cols: + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + matched = df[series == target] + if not matched.empty: + return matched.iloc[0] + except Exception: + continue + return None + + # Fallback: use latest row + return df.iloc[0] + + +class AkshareFundamentalAdapter: + """AkShare adapter for fundamentals, capital flow and dragon-tiger signals.""" + + def _call_df_candidates( + self, + candidates: List[Tuple[str, Dict[str, Any]]], + ) -> Tuple[Optional[pd.DataFrame], Optional[str], List[str]]: + errors: List[str] = [] + try: + import akshare as ak + except Exception as exc: + return None, None, [f"import_akshare:{type(exc).__name__}"] + + for func_name, kwargs in candidates: + fn = getattr(ak, func_name, None) + if fn is None: + continue + try: + df = fn(**kwargs) + if isinstance(df, pd.Series): + df = df.to_frame().T + if isinstance(df, pd.DataFrame) and not df.empty: + return df, func_name, errors + except Exception as exc: + errors.append(f"{func_name}:{type(exc).__name__}") + continue + return None, None, errors + + def get_fundamental_bundle(self, stock_code: str) -> Dict[str, Any]: + """ + Return normalized fundamental blocks from AkShare with partial tolerance. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "growth": {}, + "earnings": {}, + "institution": {}, + "source_chain": [], + "errors": [], + } + + # Financial indicators + fin_df, fin_source, fin_errors = self._call_df_candidates([ + ("stock_financial_abstract", {"symbol": stock_code}), + ("stock_financial_analysis_indicator", {"symbol": stock_code}), + ("stock_financial_analysis_indicator", {}), + ]) + result["errors"].extend(fin_errors) + if fin_df is not None: + row = _extract_latest_row(fin_df, stock_code) + if row is not None: + revenue_yoy = _safe_float(_pick_by_keywords(row, ["营业收入同比", "营收同比", "收入同比", "同比增长"])) + profit_yoy = _safe_float(_pick_by_keywords(row, ["净利润同比", "净利同比", "归母净利润同比"])) + roe = _safe_float(_pick_by_keywords(row, ["净资产收益率", "ROE", "净资产收益"])) + gross_margin = _safe_float(_pick_by_keywords(row, ["毛利率"])) + report_date = _normalize_report_date(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["report_date"])) + revenue = _safe_float(_pick_by_keywords(row, ["营业总收入", "营业收入", "营收"])) + net_profit_parent = _safe_float(_pick_by_keywords(row, ["归母净利润", "母公司股东净利润", "净利润"])) + operating_cash_flow = _safe_float( + _pick_by_keywords(row, ["经营活动产生的现金流量净额", "经营现金流", "经营活动现金流"]) + ) + result["growth"] = { + "revenue_yoy": revenue_yoy, + "net_profit_yoy": profit_yoy, + "roe": roe, + "gross_margin": gross_margin, + } + financial_report_payload = { + "report_date": report_date, + "revenue": revenue, + "net_profit_parent": net_profit_parent, + "operating_cash_flow": operating_cash_flow, + "roe": roe, + } + if any(v is not None for v in financial_report_payload.values()): + result["earnings"]["financial_report"] = financial_report_payload + result["source_chain"].append(f"growth:{fin_source}") + + # Earnings forecast + forecast_df, forecast_source, forecast_errors = self._call_df_candidates([ + ("stock_yjyg_em", {"symbol": stock_code}), + ("stock_yjyg_em", {}), + ("stock_yjbb_em", {"symbol": stock_code}), + ("stock_yjbb_em", {}), + ]) + result["errors"].extend(forecast_errors) + if forecast_df is not None: + row = _extract_latest_row(forecast_df, stock_code) + if row is not None: + result["earnings"]["forecast_summary"] = _safe_str( + _pick_by_keywords(row, ["预告", "业绩变动", "内容", "摘要", "公告"]) + )[:200] + result["source_chain"].append(f"earnings_forecast:{forecast_source}") + + # Earnings quick report + quick_df, quick_source, quick_errors = self._call_df_candidates([ + ("stock_yjkb_em", {"symbol": stock_code}), + ("stock_yjkb_em", {}), + ]) + result["errors"].extend(quick_errors) + if quick_df is not None: + row = _extract_latest_row(quick_df, stock_code) + if row is not None: + result["earnings"]["quick_report_summary"] = _safe_str( + _pick_by_keywords(row, ["快报", "摘要", "公告", "说明"]) + )[:200] + result["source_chain"].append(f"earnings_quick:{quick_source}") + + # Dividend details (cash dividend, pre-tax) + dividend_df, dividend_source, dividend_errors = self._call_df_candidates([ + ("stock_fhps_detail_em", {"symbol": stock_code}), + ("stock_history_dividend_detail", {"symbol": stock_code, "indicator": "分红", "date": ""}), + ("stock_dividend_cninfo", {"symbol": stock_code}), + ]) + result["errors"].extend(dividend_errors) + if dividend_df is not None: + dividend_payload = _build_dividend_payload(dividend_df, stock_code, max_events=5) + if dividend_payload: + result["earnings"]["dividend"] = dividend_payload + result["source_chain"].append(f"dividend:{dividend_source}") + + # Institution / top shareholders + inst_df, inst_source, inst_errors = self._call_df_candidates([ + ("stock_institute_hold", {}), + ("stock_institute_recommend", {}), + ]) + result["errors"].extend(inst_errors) + if inst_df is not None: + row = _extract_latest_row(inst_df, stock_code) + if row is not None: + inst_change = _safe_float(_pick_by_keywords(row, ["增减", "变化", "变动", "持股变化"])) + result["institution"]["institution_holding_change"] = inst_change + result["source_chain"].append(f"institution:{inst_source}") + + top10_df, top10_source, top10_errors = self._call_df_candidates([ + ("stock_gdfx_top_10_em", {"symbol": stock_code}), + ("stock_gdfx_top_10_em", {}), + ("stock_zh_a_gdhs_detail_em", {"symbol": stock_code}), + ("stock_zh_a_gdhs_detail_em", {}), + ]) + result["errors"].extend(top10_errors) + if top10_df is not None: + row = _extract_latest_row(top10_df, stock_code) + if row is not None: + holder_change = _safe_float(_pick_by_keywords(row, ["增减", "变化", "持股变化", "变动"])) + result["institution"]["top10_holder_change"] = holder_change + result["source_chain"].append(f"top10:{top10_source}") + + has_content = bool(result["growth"] or result["earnings"] or result["institution"]) + result["status"] = "partial" if has_content else "not_supported" + return result + + def get_capital_flow(self, stock_code: str, top_n: int = 5) -> Dict[str, Any]: + """ + Return stock + sector capital flow. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "stock_flow": {}, + "sector_rankings": {"top": [], "bottom": []}, + "source_chain": [], + "errors": [], + } + + stock_df, stock_source, stock_errors = self._call_df_candidates([ + ("stock_individual_fund_flow", {"stock": stock_code}), + ("stock_individual_fund_flow", {"symbol": stock_code}), + ("stock_individual_fund_flow", {}), + ("stock_main_fund_flow", {"symbol": stock_code}), + ("stock_main_fund_flow", {}), + ]) + result["errors"].extend(stock_errors) + if stock_df is not None: + row = _extract_latest_row(stock_df, stock_code) + if row is not None: + net_inflow = _safe_float(_pick_by_keywords(row, ["主力净流入", "净流入", "净额"])) + inflow_5d = _safe_float(_pick_by_keywords(row, ["5日", "五日"])) + inflow_10d = _safe_float(_pick_by_keywords(row, ["10日", "十日"])) + result["stock_flow"] = { + "main_net_inflow": net_inflow, + "inflow_5d": inflow_5d, + "inflow_10d": inflow_10d, + } + result["source_chain"].append(f"capital_stock:{stock_source}") + + sector_df, sector_source, sector_errors = self._call_df_candidates([ + ("stock_sector_fund_flow_rank", {}), + ("stock_sector_fund_flow_summary", {}), + ]) + result["errors"].extend(sector_errors) + if sector_df is not None: + name_col = next((c for c in sector_df.columns if any(k in str(c) for k in ("板块", "行业", "名称", "name"))), None) + flow_col = next((c for c in sector_df.columns if any(k in str(c) for k in ("净流入", "主力", "flow", "净额"))), None) + if name_col and flow_col: + work_df = sector_df[[name_col, flow_col]].copy() + work_df[flow_col] = pd.to_numeric(work_df[flow_col], errors="coerce") + work_df = work_df.dropna(subset=[flow_col]) + top_df = work_df.nlargest(top_n, flow_col) + bottom_df = work_df.nsmallest(top_n, flow_col) + result["sector_rankings"] = { + "top": [{"name": _safe_str(r[name_col]), "net_inflow": float(r[flow_col])} for _, r in top_df.iterrows()], + "bottom": [{"name": _safe_str(r[name_col]), "net_inflow": float(r[flow_col])} for _, r in bottom_df.iterrows()], + } + result["source_chain"].append(f"capital_sector:{sector_source}") + + has_content = bool(result["stock_flow"] or result["sector_rankings"]["top"] or result["sector_rankings"]["bottom"]) + result["status"] = "partial" if has_content else "not_supported" + return result + + def get_dragon_tiger_flag(self, stock_code: str, lookback_days: int = 20) -> Dict[str, Any]: + """ + Return dragon-tiger signal in lookback window. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "is_on_list": False, + "recent_count": 0, + "latest_date": None, + "source_chain": [], + "errors": [], + } + + df, source, errors = self._call_df_candidates([ + ("stock_lhb_stock_statistic_em", {}), + ("stock_lhb_detail_em", {}), + ("stock_lhb_jgmmtj_em", {}), + ]) + result["errors"].extend(errors) + if df is None: + return result + + # Try code filter + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码"))] + target = _normalize_code(stock_code) + matched = pd.DataFrame() + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + cur = df[series == target] + if not cur.empty: + matched = cur + break + except Exception: + continue + if matched.empty: + result["source_chain"].append(f"dragon_tiger:{source}") + result["status"] = "ok" if code_cols else "partial" + return result + + date_col = next((c for c in matched.columns if any(k in str(c) for k in ("日期", "上榜", "交易日", "time"))), None) + parsed_dates: List[datetime] = [] + if date_col is not None: + for val in matched[date_col].astype(str).tolist(): + try: + parsed_dates.append(pd.to_datetime(val).to_pydatetime()) + except Exception: + continue + now = datetime.now() + start = now - timedelta(days=max(1, lookback_days)) + recent_dates = [d for d in parsed_dates if start <= d <= now] + + result["is_on_list"] = bool(recent_dates) + result["recent_count"] = len(recent_dates) if recent_dates else int(len(matched)) + result["latest_date"] = max(recent_dates).date().isoformat() if recent_dates else ( + max(parsed_dates).date().isoformat() if parsed_dates else None + ) + result["status"] = "ok" + result["source_chain"].append(f"dragon_tiger:{source}") + return result diff --git a/src/provider/longbridge_fetcher.py b/src/provider/longbridge_fetcher.py new file mode 100644 index 00000000..0fa111c4 --- /dev/null +++ b/src/provider/longbridge_fetcher.py @@ -0,0 +1,697 @@ +# -*- coding: utf-8 -*- +""" +=================================== +LongbridgeFetcher - 长桥兜底数据源 (Priority 5) +=================================== + +数据来源:长桥 OpenAPI (https://open.longbridge.com) +特点:覆盖美股 + 港股,可计算量比/换手率/PE 等 yfinance 缺失字段 +定位:美股/港股最后兜底数据源 + +关键策略: +1. 组合 quote + static_info 接口计算 turnover_rate / pe_ratio / total_mv +2. 通过 history_candlesticks 计算 volume_ratio(近5日均量比) +3. 懒加载 QuoteContext,首次调用时才建立连接 +4. static_info 进程内短缓存,减少重复请求(默认 24h,可调;见 LONGBRIDGE_STATIC_INFO_TTL_SECONDS) + +凭证:`LONGBRIDGE_APP_KEY` / `LONGBRIDGE_APP_SECRET` / `LONGBRIDGE_ACCESS_TOKEN`。 +可选:`LONGBRIDGE_STATIC_INFO_TTL_SECONDS`;SDK `language` 取自 `REPORT_LANGUAGE`,`log_path` 为 `{LOG_DIR}/longbridge_sdk.log`; +`LONGBRIDGE_HTTP_URL` / `LONGBRIDGE_QUOTE_WS_URL` / `LONGBRIDGE_TRADE_WS_URL` / `LONGBRIDGE_REGION` (见官方文档默认值)。 +""" + +import logging +import os +import time +import threading +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional, Dict, Any + +import pandas as pd + +from .base import BaseFetcher, STANDARD_COLUMNS +from .realtime_types import UnifiedRealtimeQuote, RealtimeSource, safe_float +from .us_index_mapping import is_us_stock_code, is_us_index_code + +logger = logging.getLogger(__name__) + +_DEFAULT_STATIC_INFO_TTL = 86400 # 24h + + +def _static_info_ttl_seconds() -> int: + """TTL for static_info cache; 0 disables caching (always fetch).""" + raw = os.getenv("LONGBRIDGE_STATIC_INFO_TTL_SECONDS", "").strip() + if raw == "": + return _DEFAULT_STATIC_INFO_TTL + try: + return max(0, int(raw)) + except ValueError: + return _DEFAULT_STATIC_INFO_TTL + + +_REGION_URL_MAP: Dict[str, Dict[str, str]] = { + "cn": { + "http_url": "https://openapi.longbridge.cn", + "quote_ws_url": "wss://openapi-quote.longbridge.cn/v2", + "trade_ws_url": "wss://openapi-trade.longbridge.cn/v2", + }, + "hk": { + "http_url": "https://openapi.longbridge.com", + "quote_ws_url": "wss://openapi-quote.longbridge.com/v2", + "trade_ws_url": "wss://openapi-trade.longbridge.com/v2", + }, +} + + +def _sanitize_longbridge_env() -> None: + """Remove empty-string LONGBRIDGE_*_URL env vars. + + GitHub Actions sets ``LONGBRIDGE_HTTP_URL: ${{ vars.X || secrets.X }}`` + which resolves to an empty string ``""`` when neither var nor secret is + configured. The Rust SDK's ``Config.from_apikey()`` auto-reads these + env vars, and an empty string is *not* the same as "unset" — it causes + the SDK to use a blank URL, which breaks the WebSocket handshake and + results in "context dropped" / "Client is closed" within milliseconds. + + Also mirrors ``LONGBRIDGE_REGION`` → ``LONGPORT_REGION`` because the + Rust SDK's internal ``is_cn()`` function only checks ``LONGPORT_REGION`` + (not ``LONGBRIDGE_REGION``) when deciding which default endpoints to use. + """ + for key in ( + "LONGBRIDGE_HTTP_URL", + "LONGBRIDGE_QUOTE_WS_URL", + "LONGBRIDGE_TRADE_WS_URL", + "LONGBRIDGE_ENABLE_OVERNIGHT", + "LONGBRIDGE_PUSH_CANDLESTICK_MODE", + "LONGBRIDGE_PRINT_QUOTE_PACKAGES", + "LONGBRIDGE_REGION", + "LONGBRIDGE_STATIC_INFO_TTL_SECONDS", + "LONGBRIDGE_LOG_PATH", + ): + val = os.environ.get(key) + if val is not None and val.strip() == "": + del os.environ[key] + logger.debug("[Longbridge] 删除空环境变量 %s", key) + + # App default: quiet (false). Matches README / docs/full-guide / .env.example; SDK alone may default verbose. + if "LONGBRIDGE_PRINT_QUOTE_PACKAGES" not in os.environ: + os.environ["LONGBRIDGE_PRINT_QUOTE_PACKAGES"] = "false" + + if not os.environ.get("LONGBRIDGE_LOG_PATH"): + try: + log_dir = (os.getenv("LOG_DIR") or "./logs").strip() or "./logs" + p = Path(log_dir).expanduser() + p.mkdir(parents=True, exist_ok=True) + os.environ["LONGBRIDGE_LOG_PATH"] = str(p / "longbridge_sdk.log") + logger.debug("[Longbridge] 设置 LONGBRIDGE_LOG_PATH=%s", + os.environ["LONGBRIDGE_LOG_PATH"]) + except Exception: + pass + + region = (os.getenv("LONGBRIDGE_REGION") or "").strip().lower() + if region: + if not os.environ.get("LONGPORT_REGION"): + os.environ["LONGPORT_REGION"] = region + logger.debug("[Longbridge] 同步 LONGPORT_REGION=%s", region) + + urls = _REGION_URL_MAP.get(region, {}) + for env_name, default_url in ( + ("LONGBRIDGE_HTTP_URL", urls.get("http_url")), + ("LONGBRIDGE_QUOTE_WS_URL", urls.get("quote_ws_url")), + ("LONGBRIDGE_TRADE_WS_URL", urls.get("trade_ws_url")), + ): + if default_url and not os.environ.get(env_name): + os.environ[env_name] = default_url + logger.debug("[Longbridge] 根据 REGION=%s 设置 %s=%s", + region, env_name, default_url) + + +def _longbridge_config_kwargs() -> Dict[str, Any]: + """Optional kwargs for ``Config.from_apikey`` (Longbridge OpenAPI SDK).""" + try: + import inspect + from longbridge.openapi import Config, Language, PushCandlestickMode + except Exception: + return {} + + try: + params = inspect.signature(Config.from_apikey).parameters + except Exception: + return {} + + kw: Dict[str, Any] = {} + + if "enable_print_quote_packages" in params: + # Unset / empty → False (quiet); SDK default would be verbose — we opt in explicitly. + raw = os.getenv("LONGBRIDGE_PRINT_QUOTE_PACKAGES") + if raw is None or not str(raw).strip(): + kw["enable_print_quote_packages"] = False + else: + raw_norm = str(raw).strip().lower() + kw["enable_print_quote_packages"] = raw_norm not in ("0", "false", "no") + + for pname, envname in ( + ("http_url", "LONGBRIDGE_HTTP_URL"), + ("quote_ws_url", "LONGBRIDGE_QUOTE_WS_URL"), + ("trade_ws_url", "LONGBRIDGE_TRADE_WS_URL"), + ): + if pname in params: + v = os.getenv(envname, "").strip() + if v: + kw[pname] = v + + if "language" in params: + try: + from provider._config import normalize_report_language + + rl = normalize_report_language(os.getenv("REPORT_LANGUAGE"), default="zh") + if rl == "zh": + kw["language"] = Language.ZH_CN + elif rl == "en": + kw["language"] = Language.EN + except Exception as e: + logger.debug("Longbridge language from REPORT_LANGUAGE skipped: %s", e) + + if "enable_overnight" in params: + o = os.getenv("LONGBRIDGE_ENABLE_OVERNIGHT", "").strip().lower() + if o: + kw["enable_overnight"] = o in ("1", "true", "yes") + + if "push_candlestick_mode" in params: + cm = os.getenv("LONGBRIDGE_PUSH_CANDLESTICK_MODE", "").strip().lower() + if cm == "realtime": + kw["push_candlestick_mode"] = PushCandlestickMode.Realtime + elif cm == "confirmed": + kw["push_candlestick_mode"] = PushCandlestickMode.Confirmed + elif cm: + logger.warning( + "Unknown LONGBRIDGE_PUSH_CANDLESTICK_MODE=%r; use realtime or confirmed", cm + ) + + if "log_path" in params: + try: + log_dir = (os.getenv("LOG_DIR") or "./logs").strip() or "./logs" + p = Path(log_dir).expanduser() + p.mkdir(parents=True, exist_ok=True) + kw["log_path"] = str(p / "longbridge_sdk.log") + except Exception as e: + logger.debug("Longbridge log_path from LOG_DIR skipped: %s", e) + + return kw + + +def _is_us_code(stock_code: str) -> bool: + normalized = stock_code.strip().upper() + return is_us_stock_code(normalized) or is_us_index_code(normalized) + + +def _is_hk_code(stock_code: str) -> bool: + normalized = (stock_code or "").strip().upper() + if normalized.startswith("HK"): + digits = normalized[2:] + return digits.isdigit() and 1 <= len(digits) <= 5 + if normalized.endswith(".HK"): + return True + if normalized.isdigit() and len(normalized) == 5: + return True + return False + + +def _to_longbridge_symbol(stock_code: str) -> Optional[str]: + """Convert internal stock code to Longbridge symbol format. + + Examples: + AAPL -> AAPL.US + HK00700 -> 0700.HK + 00700 -> 0700.HK (5-digit pure number treated as HK) + """ + code = stock_code.strip() + upper = code.upper() + + if upper.endswith(".US"): + return upper + if upper.endswith(".HK"): + return upper + + if _is_us_code(code): + return f"{upper}.US" + + if _is_hk_code(code): + upper = code.upper() + if upper.startswith("HK"): + digits = upper[2:] + else: + digits = upper + digits = digits.lstrip("0") or "0" + return f"{digits.zfill(4)}.HK" + + return None + + +class LongbridgeFetcher(BaseFetcher): + """ + 长桥 OpenAPI 数据源实现 + + 优先级: 5(最低,作为美股/港股最后兜底) + 数据来源: Longbridge OpenAPI + + 通过组合多个 API 计算 yfinance 缺失的指标: + - turnover_rate = volume / circulating_shares * 100 + - volume_ratio = today_volume / avg_5day_volume + - pe_ratio = price / eps_ttm + """ + + name = "LongbridgeFetcher" + priority = int(os.getenv("LONGBRIDGE_PRIORITY", "5")) + + _CONNECTION_ERRORS = ("client is closed", "context closed", "connection closed") + + def __init__(self): + self._ctx = None + self._config = None + self._ctx_lock = threading.Lock() + self._available = None + # {symbol: (StaticInfo, timestamp)} + self._static_cache: Dict[str, Any] = {} + self._static_cache_lock = threading.Lock() + + def _is_connection_error(self, exc: Exception) -> bool: + msg = str(exc).lower() + return any(s in msg for s in self._CONNECTION_ERRORS) + + def _invalidate_ctx(self): + """Reset cached context so the next call rebuilds the connection.""" + with self._ctx_lock: + self._ctx = None + self._config = None + + def _is_available(self) -> bool: + """Check if Longbridge credentials are configured.""" + if self._available is not None: + return self._available + try: + from provider._config import get_config + config = get_config() + has_creds = bool( + config.longbridge_app_key + and config.longbridge_app_secret + and config.longbridge_access_token + ) + except Exception: + has_creds = bool( + os.getenv("LONGBRIDGE_APP_KEY") + and os.getenv("LONGBRIDGE_APP_SECRET") + and os.getenv("LONGBRIDGE_ACCESS_TOKEN") + ) + self._available = has_creds + return has_creds + + def _get_ctx(self): + """Lazy-init the QuoteContext (thread-safe).""" + if self._ctx is not None: + return self._ctx + with self._ctx_lock: + if self._ctx is not None: + return self._ctx + if not self._is_available(): + return None + try: + from longbridge.openapi import QuoteContext, Config + + # ── 1. Clean up empty URL env vars & apply REGION mapping ── + _sanitize_longbridge_env() + + # ── 2. Ensure credentials are available in env ── + try: + from provider._config import get_config + app_config = get_config() + app_key = app_config.longbridge_app_key + app_secret = app_config.longbridge_app_secret + access_token = app_config.longbridge_access_token + except Exception: + app_key = os.getenv("LONGBRIDGE_APP_KEY") + app_secret = os.getenv("LONGBRIDGE_APP_SECRET") + access_token = os.getenv("LONGBRIDGE_ACCESS_TOKEN") + + for k, v in { + "LONGBRIDGE_APP_KEY": app_key, + "LONGBRIDGE_APP_SECRET": app_secret, + "LONGBRIDGE_ACCESS_TOKEN": access_token, + }.items(): + if v and not os.environ.get(k): + os.environ[k] = v + + # ── 3. Build Config ── + extra_kw = _longbridge_config_kwargs() + lb_config = None + + # Prefer from_apikey_env() — reads all LONGBRIDGE_* env vars + # (credentials + URLs + options) including .env files. + # Available in longbridge >= 4.x. from_env() only exists on + # the unreleased master branch. + for factory_name in ("from_apikey_env", "from_env"): + factory = getattr(Config, factory_name, None) + if factory is None: + continue + try: + lb_config = factory() + logger.info("[Longbridge] Config.%s() 成功", factory_name) + break + except Exception as e: + logger.debug( + "[Longbridge] Config.%s() 失败: %s", factory_name, e + ) + + if lb_config is None: + lb_config = Config.from_apikey( + app_key, + app_secret, + access_token, + **extra_kw, + ) + logger.info("[Longbridge] Config.from_apikey() 创建成功") + + # Diagnostic logging + region = os.getenv("LONGBRIDGE_REGION") or os.getenv("LONGPORT_REGION") or "(auto)" + logger.info( + "[Longbridge] 配置: region=%s, http=%s, quote_ws=%s", + region, + os.getenv("LONGBRIDGE_HTTP_URL", "(default)"), + os.getenv("LONGBRIDGE_QUOTE_WS_URL", "(default)"), + ) + + self._config = lb_config + self._ctx = QuoteContext(lb_config) + logger.info("[Longbridge] QuoteContext 初始化成功") + return self._ctx + except Exception as e: + logger.warning("[Longbridge] QuoteContext 初始化失败: %s", e) + self._available = False + return None + + # ------------------------------------------------------------------ + # static_info with cache + # ------------------------------------------------------------------ + + def _get_static_info(self, symbol: str) -> Optional[Any]: + """Fetch static info (shares, EPS, BPS, name) with optional in-process TTL cache.""" + ttl = _static_info_ttl_seconds() + now = time.time() + if ttl > 0: + with self._static_cache_lock: + cached = self._static_cache.get(symbol) + if cached and (now - cached[1]) < ttl: + return cached[0] + + ctx = self._get_ctx() + if ctx is None: + return None + try: + infos = ctx.static_info([symbol]) + if infos: + info = infos[0] + if ttl > 0: + with self._static_cache_lock: + self._static_cache[symbol] = (info, now) + return info + except Exception as e: + logger.debug(f"[Longbridge] static_info({symbol}) 失败: {e}") + if self._is_connection_error(e): + self._invalidate_ctx() + return None + + # ------------------------------------------------------------------ + # get_stock_name via static_info + # ------------------------------------------------------------------ + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """Return stock name from Longbridge static_info (name_cn or name_en).""" + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + return None + info = self._get_static_info(symbol) + if info is None: + return None + name = getattr(info, "name_cn", "") or getattr(info, "name_en", "") or "" + return name.strip() or None + + # ------------------------------------------------------------------ + # volume_ratio from history + # ------------------------------------------------------------------ + + def _ts_sort_key(self, candle: Any) -> float: + """Monotonic sort key for a candle timestamp (UTC seconds or datetime).""" + ts = getattr(candle, "timestamp", None) + if ts is None: + return 0.0 + if hasattr(ts, "timestamp"): + return float(ts.timestamp()) + return float(int(ts)) + + def _compute_volume_ratio(self, symbol: str, today_volume: int) -> Optional[float]: + """Compute volume_ratio = today_volume / avg(recent completed daily volumes). + + Uses the most recent daily bar as \"today/incomplete\" reference window: average + volume of the next 5 older daily bars. Avoids local `date.today()` matching, which + breaks for US symbols when the shell runs in CN timezone. + """ + if not today_volume or today_volume <= 0: + return None + ctx = self._get_ctx() + if ctx is None: + return None + try: + from longbridge.openapi import Period, AdjustType + + candles = ctx.history_candlesticks_by_offset( + symbol, + Period.Day, + AdjustType.NoAdjust, + False, + 6, + datetime.now(), + ) + if not candles or len(candles) < 2: + return None + + ordered = sorted(candles, key=self._ts_sort_key, reverse=True) + past_vols: list = [] + for c in ordered[1:6]: + vol = int(getattr(c, "volume", 0) or 0) + if vol > 0: + past_vols.append(vol) + + if not past_vols: + return None + + avg_vol = sum(past_vols) / len(past_vols) + if avg_vol <= 0: + return None + + return round(today_volume / avg_vol, 2) + except Exception as e: + logger.debug(f"[Longbridge] 计算量比失败({symbol}): {e}") + return None + + # ------------------------------------------------------------------ + # get_realtime_quote + # ------------------------------------------------------------------ + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """Fetch realtime quote from Longbridge, computing derived fields.""" + if not self._is_available(): + return None + + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + logger.debug(f"[Longbridge] 无法转换代码: {stock_code}") + return None + + ctx = self._get_ctx() + if ctx is None: + return None + + try: + quotes = ctx.quote([symbol]) + if not quotes: + return None + q = quotes[0] + except Exception as e: + logger.info(f"[Longbridge] quote({symbol}) 失败: {e}") + if self._is_connection_error(e): + logger.warning("[Longbridge] 检测到连接已断开,将在下次调用时重建连接") + self._invalidate_ctx() + return None + + price = safe_float(getattr(q, "last_done", None)) + if price is None or price <= 0: + return None + + prev_close = safe_float(getattr(q, "prev_close", None)) + open_price = safe_float(getattr(q, "open", None)) + high = safe_float(getattr(q, "high", None)) + low = safe_float(getattr(q, "low", None)) + volume = int(getattr(q, "volume", 0) or 0) + turnover = safe_float(getattr(q, "turnover", None)) + + change_amount = None + change_pct = None + amplitude = None + if prev_close and prev_close > 0: + change_amount = round(price - prev_close, 4) + change_pct = round((price - prev_close) / prev_close * 100, 2) + if high is not None and low is not None: + amplitude = round((high - low) / prev_close * 100, 2) + + # Fetch static info for derived fields + static = self._get_static_info(symbol) + + turnover_rate = None + pe_ratio = None + pb_ratio = None + total_mv = None + circ_mv = None + name = "" + + if static is not None: + name = getattr(static, "name_cn", "") or getattr(static, "name_en", "") or "" + circulating = int(getattr(static, "circulating_shares", 0) or 0) + total_shares = int(getattr(static, "total_shares", 0) or 0) + eps_ttm = safe_float(getattr(static, "eps_ttm", None)) + eps_plain = safe_float(getattr(static, "eps", None)) + bps = safe_float(getattr(static, "bps", None)) + + # US names often report circulating_shares=0 while total_shares is set — use total for turnover. + shares_for_turnover = circulating if circulating > 0 else total_shares + if shares_for_turnover > 0 and volume > 0: + turnover_rate = round(volume / shares_for_turnover * 100, 4) + elif volume > 0: + logger.debug( + "[Longbridge] %s 无法计算换手率: volume=%s circulating=%s total_shares=%s", + symbol, + volume, + circulating, + total_shares, + ) + + eps_for_pe = None + if eps_ttm is not None and eps_ttm > 0: + eps_for_pe = eps_ttm + elif eps_plain is not None and eps_plain > 0: + eps_for_pe = eps_plain + if eps_for_pe: + pe_ratio = round(price / eps_for_pe, 2) + + if bps is not None and bps > 0: + pb_ratio = round(price / bps, 2) + if total_shares > 0: + total_mv = round(price * total_shares, 2) + if circulating > 0: + circ_mv = round(price * circulating, 2) + + volume_ratio = self._compute_volume_ratio(symbol, volume) + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=name, + source=RealtimeSource.LONGBRIDGE, + price=price, + change_pct=change_pct, + change_amount=change_amount, + volume=volume if volume > 0 else None, + amount=turnover, + volume_ratio=volume_ratio, + turnover_rate=turnover_rate, + amplitude=amplitude, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=pe_ratio, + pb_ratio=pb_ratio, + total_mv=total_mv, + circ_mv=circ_mv, + ) + + logger.info( + f"[Longbridge] {symbol} 行情获取成功: " + f"价格={price}, 量比={volume_ratio}, 换手率={turnover_rate}" + ) + return quote + + # ------------------------------------------------------------------ + # BaseFetcher abstract methods (historical daily data) + # ------------------------------------------------------------------ + + def _fetch_raw_data( + self, stock_code: str, start_date: str, end_date: str + ) -> pd.DataFrame: + """Fetch historical candlesticks from Longbridge.""" + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + raise ValueError(f"Cannot convert {stock_code} to Longbridge symbol") + + ctx = self._get_ctx() + if ctx is None: + raise RuntimeError("Longbridge QuoteContext not available") + + from longbridge.openapi import Period, AdjustType + + start_dt = datetime.strptime(start_date, "%Y-%m-%d").date() + end_dt = datetime.strptime(end_date, "%Y-%m-%d").date() + + try: + candles = ctx.history_candlesticks_by_date( + symbol, + Period.Day, + AdjustType.ForwardAdjust, + start_dt, + end_dt, + ) + except Exception as e: + if self._is_connection_error(e): + logger.warning("[Longbridge] 检测到连接已断开,将在下次调用时重建连接") + self._invalidate_ctx() + raise + + if not candles: + return pd.DataFrame() + + rows = [] + for c in candles: + ts = getattr(c, "timestamp", None) + if ts is None: + continue + if hasattr(ts, "date"): + dt = ts.date() + else: + dt = datetime.fromtimestamp(int(ts)).date() + + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": safe_float(getattr(c, "open", None)), + "high": safe_float(getattr(c, "high", None)), + "low": safe_float(getattr(c, "low", None)), + "close": safe_float(getattr(c, "close", None)), + "volume": int(getattr(c, "volume", 0) or 0), + "turnover": safe_float(getattr(c, "turnover", None)), + }) + + return pd.DataFrame(rows) + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """Normalize column names to standard format.""" + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rename_map = {"turnover": "amount"} + df = df.rename(columns=rename_map) + + if "pct_chg" not in df.columns and "close" in df.columns: + df["pct_chg"] = df["close"].pct_change() * 100 + + for col in STANDARD_COLUMNS: + if col not in df.columns: + df[col] = None + + return df[STANDARD_COLUMNS] diff --git a/src/provider/pytdx_fetcher.py b/src/provider/pytdx_fetcher.py new file mode 100644 index 00000000..6455dac3 --- /dev/null +++ b/src/provider/pytdx_fetcher.py @@ -0,0 +1,469 @@ +# -*- coding: utf-8 -*- +""" +=================================== +PytdxFetcher - 通达信数据源 (Priority 2) +=================================== + +数据来源:通达信行情服务器(pytdx 库) +特点:免费、无需 Token、直连行情服务器 +优点:实时数据、稳定、无配额限制 + +关键策略: +1. 多服务器自动切换 +2. 连接超时自动重连 +3. 失败后指数退避重试 +""" + +import logging +import re +from contextlib import contextmanager +from typing import Optional, Generator, List, Tuple + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code, _is_hk_market +import os + +logger = logging.getLogger(__name__) + + +def _parse_hosts_from_env() -> Optional[List[Tuple[str, int]]]: + """ + 从环境变量构建通达信服务器列表。 + + 优先级: + 1. PYTDX_SERVERS:逗号分隔 "ip:port,ip:port"(如 "192.168.1.1:7709,10.0.0.1:7709") + 2. PYTDX_HOST + PYTDX_PORT:单个服务器 + 3. 均未配置时返回 None(调用方使用 DEFAULT_HOSTS) + """ + servers = os.getenv("PYTDX_SERVERS", "").strip() + if servers: + result = [] + for part in servers.split(","): + part = part.strip() + if ":" in part: + host, port_str = part.rsplit(":", 1) + host, port_str = host.strip(), port_str.strip() + if host and port_str: + try: + result.append((host, int(port_str))) + except ValueError: + logger.warning(f"Invalid PYTDX_SERVERS entry: {part}") + else: + logger.warning(f"Invalid PYTDX_SERVERS entry (missing port): {part}") + if result: + return result + + host = os.getenv("PYTDX_HOST", "").strip() + port_str = os.getenv("PYTDX_PORT", "").strip() + if host and port_str: + try: + return [(host, int(port_str))] + except ValueError: + logger.warning(f"Invalid PYTDX_HOST/PYTDX_PORT: {host}:{port_str}") + + return None + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class PytdxFetcher(BaseFetcher): + """ + 通达信数据源实现 + + 优先级:2(与 Tushare 同级) + 数据来源:通达信行情服务器 + + 关键策略: + - 自动选择最优服务器 + - 连接失败自动切换服务器 + - 失败后指数退避重试 + + Pytdx 特点: + - 免费、无需注册 + - 直连行情服务器 + - 支持实时行情和历史数据 + - 支持股票名称查询 + """ + + name = "PytdxFetcher" + priority = int(os.getenv("PYTDX_PRIORITY", "2")) + + # 默认通达信行情服务器列表 + DEFAULT_HOSTS = [ + ("119.147.212.81", 7709), # 深圳 + ("112.74.214.43", 7727), # 深圳 + ("221.231.141.60", 7709), # 上海 + ("101.227.73.20", 7709), # 上海 + ("101.227.77.254", 7709), # 上海 + ("14.215.128.18", 7709), # 广州 + ("59.173.18.140", 7709), # 武汉 + ("180.153.39.51", 7709), # 杭州 + ] + # Pytdx get_security_list returns at most 1000 items per page + SECURITY_LIST_PAGE_SIZE = 1000 + + def __init__(self, hosts: Optional[List[Tuple[str, int]]] = None): + """ + 初始化 PytdxFetcher + + Args: + hosts: 服务器列表 [(host, port), ...]。若未传入,优先使用环境变量 + PYTDX_SERVERS(ip:port,ip:port)或 PYTDX_HOST+PYTDX_PORT, + 否则使用内置 DEFAULT_HOSTS。 + """ + if hosts is not None: + self._hosts = hosts + else: + env_hosts = _parse_hosts_from_env() + self._hosts = env_hosts if env_hosts else self.DEFAULT_HOSTS + self._api = None + self._connected = False + self._current_host_idx = 0 + self._stock_list_cache = None # 股票列表缓存 + self._stock_name_cache = {} # 股票名称缓存 {code: name} + + def _get_pytdx(self): + """ + 延迟加载 pytdx 模块 + + 只在首次使用时导入,避免未安装时报错 + """ + try: + from pytdx.hq import TdxHq_API + return TdxHq_API + except ImportError: + logger.warning("pytdx 未安装,请运行: pip install pytdx") + return None + + @contextmanager + def _pytdx_session(self) -> Generator: + """ + Pytdx 连接上下文管理器 + + 确保: + 1. 进入上下文时自动连接 + 2. 退出上下文时自动断开 + 3. 异常时也能正确断开 + + 使用示例: + with self._pytdx_session() as api: + # 在这里执行数据查询 + """ + TdxHq_API = self._get_pytdx() + if TdxHq_API is None: + raise DataFetchError("pytdx 库未安装") + + api = TdxHq_API() + connected = False + + try: + # 尝试连接服务器(自动选择最优) + for i in range(len(self._hosts)): + host_idx = (self._current_host_idx + i) % len(self._hosts) + host, port = self._hosts[host_idx] + + try: + if api.connect(host, port, time_out=5): + connected = True + self._current_host_idx = host_idx + logger.debug(f"Pytdx 连接成功: {host}:{port}") + break + except Exception as e: + logger.debug(f"Pytdx 连接 {host}:{port} 失败: {e}") + continue + + if not connected: + raise DataFetchError("Pytdx 无法连接任何服务器") + + yield api + + finally: + # 确保断开连接 + try: + api.disconnect() + logger.debug("Pytdx 连接已断开") + except Exception as e: + logger.warning(f"Pytdx 断开连接时出错: {e}") + + def _get_market_code(self, stock_code: str) -> Tuple[int, str]: + """ + 根据股票代码判断市场 + + Pytdx 市场代码: + - 0: 深圳 + - 1: 上海 + + Args: + stock_code: 股票代码 + + Returns: + (market, code) 元组 + """ + code = stock_code.strip() + + # 去除可能的前缀后缀 + code = code.replace('.SH', '').replace('.SZ', '') + code = code.replace('.sh', '').replace('.sz', '') + code = code.replace('sh', '').replace('sz', '') + + # 根据代码前缀判断市场 + # 上海:60xxxx, 68xxxx(科创板) + # 深圳:00xxxx, 30xxxx(创业板), 002xxx(中小板) + if code.startswith(('60', '68')): + return 1, code # 上海 + else: + return 0, code # 深圳 + + def _build_stock_list_cache(self, api) -> None: + """ + Build a full stock code -> name cache from paginated security lists. + """ + self._stock_list_cache = {} + + for market in (0, 1): + start = 0 + while True: + stocks = api.get_security_list(market, start) or [] + for stock in stocks: + code = stock.get('code') + name = stock.get('name') + if code and name: + self._stock_list_cache[code] = name + + if len(stocks) < self.SECURITY_LIST_PAGE_SIZE: + break + + start += self.SECURITY_LIST_PAGE_SIZE + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从通达信获取原始数据 + + 使用 get_security_bars() 获取日线数据 + + 流程: + 1. 检查是否为美股(不支持) + 2. 使用上下文管理器管理连接 + 3. 判断市场代码 + 4. 调用 API 获取 K 线数据 + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_us_code(stock_code): + raise DataFetchError(f"PytdxFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 港股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_hk_market(stock_code): + raise DataFetchError(f"PytdxFetcher 不支持港股 {stock_code},请使用 AkshareFetcher") + + # 北交所不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if is_bse_code(stock_code): + raise DataFetchError( + f"PytdxFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + + market, code = self._get_market_code(stock_code) + + # 计算需要获取的交易日数量(估算) + from datetime import datetime as dt + start_dt = dt.strptime(start_date, '%Y-%m-%d') + end_dt = dt.strptime(end_date, '%Y-%m-%d') + days = (end_dt - start_dt).days + count = min(max(days * 5 // 7 + 10, 30), 800) # 估算交易日,最大 800 条 + + logger.debug(f"调用 Pytdx get_security_bars(market={market}, code={code}, count={count})") + + with self._pytdx_session() as api: + try: + # 获取日 K 线数据 + # category: 9-日线, 0-5分钟, 1-15分钟, 2-30分钟, 3-1小时 + data = api.get_security_bars( + category=9, # 日线 + market=market, + code=code, + start=0, # 从最新开始 + count=count + ) + + if data is None or len(data) == 0: + raise DataFetchError(f"Pytdx 未查询到 {stock_code} 的数据") + + # 转换为 DataFrame + df = api.to_df(data) + + # 过滤日期范围 + df['datetime'] = pd.to_datetime(df['datetime']) + df = df[(df['datetime'] >= start_date) & (df['datetime'] <= end_date)] + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Pytdx 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Pytdx 数据 + + Pytdx 返回的列名: + datetime, open, high, low, close, vol, amount + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射 + column_mapping = { + 'datetime': 'date', + 'vol': 'volume', + } + + df = df.rename(columns=column_mapping) + + # 计算涨跌幅(pytdx 不返回涨跌幅,需要自己计算) + if 'pct_chg' not in df.columns and 'close' in df.columns: + df['pct_chg'] = df['close'].pct_change() * 100 + df['pct_chg'] = df['pct_chg'].fillna(0).round(2) + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + # 港股不支持(pytdx 不含港股数据) + if _is_hk_market(stock_code): + return None + + # 先检查缓存 + if stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + try: + market, code = self._get_market_code(stock_code) + + with self._pytdx_session() as api: + # 获取股票列表(缓存) + if self._stock_list_cache is None: + self._build_stock_list_cache(api) + + # 查找股票名称 + name = self._stock_list_cache.get(code) + if name: + self._stock_name_cache[stock_code] = name + return name + + # 尝试使用 get_finance_info + finance_info = api.get_finance_info(market, code) + if finance_info and 'name' in finance_info: + name = finance_info['name'] + self._stock_name_cache[stock_code] = name + return name + + except Exception as e: + logger.warning(f"Pytdx 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[dict]: + """ + 获取实时行情 + + Args: + stock_code: 股票代码 + + Returns: + 实时行情数据字典,失败返回 None + """ + if is_bse_code(stock_code): + raise DataFetchError( + f"PytdxFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + try: + market, code = self._get_market_code(stock_code) + + with self._pytdx_session() as api: + data = api.get_security_quotes([(market, code)]) + + if data and len(data) > 0: + quote = data[0] + return { + 'code': stock_code, + 'name': quote.get('name', ''), + 'price': quote.get('price', 0), + 'open': quote.get('open', 0), + 'high': quote.get('high', 0), + 'low': quote.get('low', 0), + 'pre_close': quote.get('last_close', 0), + 'volume': quote.get('vol', 0), + 'amount': quote.get('amount', 0), + 'bid_prices': [quote.get(f'bid{i}', 0) for i in range(1, 6)], + 'ask_prices': [quote.get(f'ask{i}', 0) for i in range(1, 6)], + } + except Exception as e: + logger.warning(f"Pytdx 获取实时行情失败 {stock_code}: {e}") + + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = PytdxFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + # 测试实时行情 + quote = fetcher.get_realtime_quote('600519') + print(f"实时行情: {quote}") + + except Exception as e: + print(f"获取失败: {e}") diff --git a/src/provider/realtime_types.py b/src/provider/realtime_types.py new file mode 100644 index 00000000..8d7d3b96 --- /dev/null +++ b/src/provider/realtime_types.py @@ -0,0 +1,449 @@ +# -*- coding: utf-8 -*- +""" +=================================== +实时行情统一类型定义 & 熔断机制 +=================================== + +设计目标: +1. 统一各数据源的实时行情返回结构 +2. 实现熔断/冷却机制,避免连续失败时反复请求 +3. 支持多数据源故障切换 + +使用方式: +- 所有 Fetcher 的 get_realtime_quote() 统一返回 UnifiedRealtimeQuote +- CircuitBreaker 管理各数据源的熔断状态 +""" + +import logging +import time +from threading import RLock +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, Union +from enum import Enum + +logger = logging.getLogger(__name__) + + +# ============================================ +# 通用类型转换工具函数 +# ============================================ +# 设计说明: +# 各数据源返回的原始数据类型不一致(str/float/int/NaN), +# 使用这些函数统一转换,避免在各 Fetcher 中重复定义。 + +def safe_float(val: Any, default: Optional[float] = None) -> Optional[float]: + """ + 安全转换为浮点数 + + 处理场景: + - None / 空字符串 → default + - pandas NaN / numpy NaN → default + - 数值字符串 → float + - 已是数值 → float + + Args: + val: 待转换的值 + default: 转换失败时的默认值 + + Returns: + 转换后的浮点数,或默认值 + """ + try: + if val is None: + return default + + # 处理字符串 + if isinstance(val, str): + val = val.strip() + if val == "" or val == "-" or val == "--": + return default + + # 处理 pandas/numpy NaN + # 使用 math.isnan 而不是 pd.isna,避免强制依赖 pandas + import math + try: + if math.isnan(float(val)): + return default + except (ValueError, TypeError): + pass + + return float(val) + except (ValueError, TypeError): + return default + + +def safe_int(val: Any, default: Optional[int] = None) -> Optional[int]: + """ + 安全转换为整数 + + 先转换为 float,再取整,处理 "123.0" 这类情况 + + Args: + val: 待转换的值 + default: 转换失败时的默认值 + + Returns: + 转换后的整数,或默认值 + """ + f_val = safe_float(val, default=None) + if f_val is not None: + return int(f_val) + return default + + +class RealtimeSource(Enum): + """实时行情数据源""" + EFINANCE = "efinance" # 东方财富(efinance库) + AKSHARE_EM = "akshare_em" # 东方财富(akshare库) + AKSHARE_SINA = "akshare_sina" # 新浪财经 + AKSHARE_QQ = "akshare_qq" # 腾讯财经 + TUSHARE = "tushare" # Tushare Pro + TENCENT = "tencent" # 腾讯直连 + SINA = "sina" # 新浪直连 + STOOQ = "stooq" # Stooq 美股兜底 + LONGBRIDGE = "longbridge" # 长桥(美股/港股兜底) + FALLBACK = "fallback" # 降级兜底 + + +@dataclass +class UnifiedRealtimeQuote: + """ + 统一实时行情数据结构 + + 设计原则: + - 各数据源返回的字段可能不同,缺失字段用 None 表示 + - 主流程使用 getattr(quote, field, None) 获取,保证兼容性 + - source 字段标记数据来源,便于调试 + """ + code: str + name: str = "" + source: RealtimeSource = RealtimeSource.FALLBACK + + # === 核心价格数据(几乎所有源都有)=== + price: Optional[float] = None # 最新价 + change_pct: Optional[float] = None # 涨跌幅(%) + change_amount: Optional[float] = None # 涨跌额 + + # === 量价指标(部分源可能缺失)=== + volume: Optional[int] = None # 成交量(手) + amount: Optional[float] = None # 成交额(元) + volume_ratio: Optional[float] = None # 量比 + turnover_rate: Optional[float] = None # 换手率(%) + amplitude: Optional[float] = None # 振幅(%) + + # === 价格区间 === + open_price: Optional[float] = None # 开盘价 + high: Optional[float] = None # 最高价 + low: Optional[float] = None # 最低价 + pre_close: Optional[float] = None # 昨收价 + + # === 估值指标(仅东财等全量接口有)=== + pe_ratio: Optional[float] = None # 市盈率(动态) + pb_ratio: Optional[float] = None # 市净率 + total_mv: Optional[float] = None # 总市值(元) + circ_mv: Optional[float] = None # 流通市值(元) + + # === 其他指标 === + change_60d: Optional[float] = None # 60日涨跌幅(%) + high_52w: Optional[float] = None # 52周最高 + low_52w: Optional[float] = None # 52周最低 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典(过滤 None 值)""" + result = { + 'code': self.code, + 'name': self.name, + 'source': self.source.value, + } + # 只添加非 None 的字段 + optional_fields = [ + 'price', 'change_pct', 'change_amount', 'volume', 'amount', + 'volume_ratio', 'turnover_rate', 'amplitude', + 'open_price', 'high', 'low', 'pre_close', + 'pe_ratio', 'pb_ratio', 'total_mv', 'circ_mv', + 'change_60d', 'high_52w', 'low_52w' + ] + for f in optional_fields: + val = getattr(self, f, None) + if val is not None: + result[f] = val + return result + + def has_basic_data(self) -> bool: + """检查是否有基本的价格数据""" + return self.price is not None and self.price > 0 + + def has_volume_data(self) -> bool: + """检查是否有量价数据""" + return self.volume_ratio is not None or self.turnover_rate is not None + + +@dataclass +class ChipDistribution: + """ + 筹码分布数据 + + 反映持仓成本分布和获利情况 + """ + code: str + date: str = "" + source: str = "akshare" + + # 获利情况 + profit_ratio: float = 0.0 # 获利比例(0-1) + avg_cost: float = 0.0 # 平均成本 + + # 筹码集中度 + cost_90_low: float = 0.0 # 90%筹码成本下限 + cost_90_high: float = 0.0 # 90%筹码成本上限 + concentration_90: float = 0.0 # 90%筹码集中度(越小越集中) + + cost_70_low: float = 0.0 # 70%筹码成本下限 + cost_70_high: float = 0.0 # 70%筹码成本上限 + concentration_70: float = 0.0 # 70%筹码集中度 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'code': self.code, + 'date': self.date, + 'source': self.source, + 'profit_ratio': self.profit_ratio, + 'avg_cost': self.avg_cost, + 'cost_90_low': self.cost_90_low, + 'cost_90_high': self.cost_90_high, + 'concentration_90': self.concentration_90, + 'concentration_70': self.concentration_70, + } + + def get_chip_status(self, current_price: float) -> str: + """ + 获取筹码状态描述 + + Args: + current_price: 当前股价 + + Returns: + 筹码状态描述 + """ + status_parts = [] + + # 获利比例分析 + if self.profit_ratio >= 0.9: + status_parts.append("获利盘极高(获利盘>90%)") + elif self.profit_ratio >= 0.7: + status_parts.append("获利盘较高(获利盘70-90%)") + elif self.profit_ratio >= 0.5: + status_parts.append("获利盘中等(获利盘50-70%)") + elif self.profit_ratio >= 0.3: + status_parts.append("套牢盘中等(套牢盘50-70%)") + elif self.profit_ratio >= 0.1: + status_parts.append("套牢盘较高(套牢盘70-90%)") + else: + status_parts.append("套牢盘极高(套牢盘>90%)") + + # 筹码集中度分析 (90%集中度 < 10% 表示集中) + if self.concentration_90 < 0.08: + status_parts.append("筹码高度集中") + elif self.concentration_90 < 0.15: + status_parts.append("筹码较集中") + elif self.concentration_90 < 0.25: + status_parts.append("筹码分散度中等") + else: + status_parts.append("筹码较分散") + + # 成本与现价关系 + if current_price > 0 and self.avg_cost > 0: + cost_diff = (current_price - self.avg_cost) / self.avg_cost * 100 + if cost_diff > 20: + status_parts.append(f"现价高于平均成本{cost_diff:.1f}%") + elif cost_diff > 5: + status_parts.append(f"现价略高于成本{cost_diff:.1f}%") + elif cost_diff > -5: + status_parts.append("现价接近平均成本") + else: + status_parts.append(f"现价低于平均成本{abs(cost_diff):.1f}%") + + return ",".join(status_parts) + + +class CircuitBreaker: + """ + 熔断器 - 管理数据源的熔断/冷却状态 + + 策略: + - 连续失败 N 次后进入熔断状态 + - 熔断期间跳过该数据源 + - 冷却时间后自动恢复半开状态 + - 半开状态下单次成功则完全恢复,失败则继续熔断 + + 状态机: + CLOSED(正常) --失败N次--> OPEN(熔断)--冷却时间到--> HALF_OPEN(半开) + HALF_OPEN --成功--> CLOSED + HALF_OPEN --失败--> OPEN + """ + + # 状态常量 + CLOSED = "closed" # 正常状态 + OPEN = "open" # 熔断状态(不可用) + HALF_OPEN = "half_open" # 半开状态(试探性请求) + + def __init__( + self, + failure_threshold: int = 3, # 连续失败次数阈值 + cooldown_seconds: float = 300.0, # 冷却时间(秒),默认5分钟 + half_open_max_calls: int = 1 # 半开状态最大尝试次数 + ): + self.failure_threshold = failure_threshold + self.cooldown_seconds = cooldown_seconds + self.half_open_max_calls = half_open_max_calls + + # 各数据源状态 {source_name: {state, failures, last_failure_time, half_open_calls}} + self._states: Dict[str, Dict[str, Any]] = {} + self._lock = RLock() + + def _get_state_locked(self, source: str) -> Dict[str, Any]: + """获取或初始化数据源状态(调用方需持有锁)。""" + if source not in self._states: + self._states[source] = { + 'state': self.CLOSED, + 'failures': 0, + 'last_failure_time': 0.0, + 'half_open_calls': 0 + } + return self._states[source] + + def is_available(self, source: str) -> bool: + """ + 检查数据源是否可用 + + 返回 True 表示可以尝试请求 + 返回 False 表示应跳过该数据源 + """ + with self._lock: + state = self._get_state_locked(source) + current_time = time.time() + + if state['state'] == self.CLOSED: + return True + + if state['state'] == self.OPEN: + # 检查冷却时间 + time_since_failure = current_time - state['last_failure_time'] + if time_since_failure >= self.cooldown_seconds: + # 冷却完成,进入半开状态(不预占名额,由 HALF_OPEN 分支统一管理) + state['state'] = self.HALF_OPEN + state['half_open_calls'] = 0 + state['last_failure_time'] = current_time + logger.info(f"[熔断器] {source} 冷却完成,进入半开状态") + # Fall through to HALF_OPEN check below + else: + remaining = self.cooldown_seconds - time_since_failure + logger.debug(f"[熔断器] {source} 处于熔断状态,剩余冷却时间: {remaining:.0f}s") + return False + + if state['state'] == self.HALF_OPEN: + if state['half_open_calls'] < self.half_open_max_calls: + state['half_open_calls'] += 1 + return True + # 所有探测名额已用完;若冷却时间再次到期仍未收到 + # record_success/record_failure 回调,重置名额允许重新探测, + # 避免永久卡在 HALF_OPEN。 + time_since_failure = current_time - state['last_failure_time'] + if time_since_failure >= self.cooldown_seconds: + state['half_open_calls'] = 1 + state['last_failure_time'] = current_time + logger.info(f"[熔断器] {source} 半开状态探测超时,重新探测") + return True + return False + + return True + + def record_inconclusive(self, source: str) -> None: + """记录不确定的探测结果(如返回 None)。 + + 仅影响 HALF_OPEN 状态:将其转回 OPEN 以便冷却后重新探测。 + CLOSED 状态下为空操作,不影响失败计数。 + """ + with self._lock: + state = self._get_state_locked(source) + if state['state'] == self.HALF_OPEN: + state['state'] = self.OPEN + state['half_open_calls'] = 0 + state['last_failure_time'] = time.time() + logger.info(f"[熔断器] {source} 半开探测结果不确定,重新进入冷却") + + def record_success(self, source: str) -> None: + """记录成功请求""" + with self._lock: + state = self._get_state_locked(source) + + if state['state'] == self.HALF_OPEN: + # 半开状态下成功,完全恢复 + logger.info(f"[熔断器] {source} 半开状态请求成功,恢复正常") + + # 重置状态 + state['state'] = self.CLOSED + state['failures'] = 0 + state['half_open_calls'] = 0 + + def record_failure(self, source: str, error: Optional[str] = None) -> None: + """记录失败请求""" + with self._lock: + state = self._get_state_locked(source) + current_time = time.time() + + state['failures'] += 1 + state['last_failure_time'] = current_time + + if state['state'] == self.HALF_OPEN: + # 半开状态下失败,继续熔断 + state['state'] = self.OPEN + state['half_open_calls'] = 0 + logger.warning(f"[熔断器] {source} 半开状态请求失败,继续熔断 {self.cooldown_seconds}s") + elif state['failures'] >= self.failure_threshold: + # 达到阈值,进入熔断 + state['state'] = self.OPEN + logger.warning(f"[熔断器] {source} 连续失败 {state['failures']} 次,进入熔断状态 " + f"(冷却 {self.cooldown_seconds}s)") + if error: + logger.warning(f"[熔断器] 最后错误: {error}") + + def get_status(self) -> Dict[str, str]: + """获取所有数据源状态""" + with self._lock: + return {source: info['state'] for source, info in self._states.items()} + + def reset(self, source: Optional[str] = None) -> None: + """重置熔断器状态""" + with self._lock: + if source: + if source in self._states: + del self._states[source] + else: + self._states.clear() + + +# 全局熔断器实例(实时行情专用) +_realtime_circuit_breaker = CircuitBreaker( + failure_threshold=3, # 连续失败3次熔断 + cooldown_seconds=300.0, # 冷却5分钟 + half_open_max_calls=1 +) + +# 筹码接口熔断器(更保守的策略,因为该接口更不稳定) +_chip_circuit_breaker = CircuitBreaker( + failure_threshold=2, # 连续失败2次熔断 + cooldown_seconds=600.0, # 冷却10分钟 + half_open_max_calls=1 +) + + +def get_realtime_circuit_breaker() -> CircuitBreaker: + """获取实时行情熔断器""" + return _realtime_circuit_breaker + + +def get_chip_circuit_breaker() -> CircuitBreaker: + """获取筹码接口熔断器""" + return _chip_circuit_breaker diff --git a/src/provider/tickflow_fetcher.py b/src/provider/tickflow_fetcher.py new file mode 100644 index 00000000..ae415644 --- /dev/null +++ b/src/provider/tickflow_fetcher.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- +""" +=================================== +TickFlowFetcher - market review only +=================================== + +Issue #632 only requires TickFlow for A-share market review stability. +This fetcher intentionally implements a narrow P0 surface: + +1. Main A-share indices quotes +2. A-share market breadth statistics + +It does not participate in the general daily-data or per-stock realtime +pipelines and should only be called explicitly by DataFetcherManager. +""" + +import logging +import math +from threading import RLock +from time import monotonic +from typing import Any, Dict, List, Optional + +import pandas as pd + +from .base import ( + BaseFetcher, + DataFetchError, + is_bse_code, + is_kc_cy_stock, + is_st_stock, + normalize_stock_code, +) + + +logger = logging.getLogger(__name__) + +_CN_MAIN_INDEX_QUOTES = ( + ("000001.SH", "000001", "上证指数"), + ("399001.SZ", "399001", "深证成指"), + ("399006.SZ", "399006", "创业板指"), + ("000688.SH", "000688", "科创50"), + ("000016.SH", "000016", "上证50"), + ("000300.SH", "000300", "沪深300"), +) +_MAX_SYMBOLS_PER_QUOTE_REQUEST = 5 +_UNIVERSE_PERMISSION_NEGATIVE_CACHE_TTL_SECONDS = 900 + + +class TickFlowFetcher(BaseFetcher): + """TickFlow-backed market review helper.""" + + name = "TickFlowFetcher" + priority = 99 + + def __init__(self, api_key: Optional[str], timeout: float = 30.0): + self.api_key = (api_key or "").strip() + self.timeout = timeout + self._client = None + self._client_lock = RLock() + self._universe_query_supported: Optional[bool] = None + self._universe_query_checked_at: Optional[float] = None + + def close(self) -> None: + """Close the underlying TickFlow client if it was created.""" + with self._client_lock: + client = self._client + self._client = None + self._universe_query_supported = None + self._universe_query_checked_at = None + if client is not None: + try: + client.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭客户端失败: %s", exc) + + def __del__(self) -> None: + try: + self.close() + except Exception: + # Best-effort cleanup during interpreter shutdown. + pass + + def _build_client(self): + from tickflow import TickFlow + + return TickFlow(api_key=self.api_key, timeout=self.timeout) + + def _get_client(self): + if not self.api_key: + return None + if self._client is not None: + return self._client + + with self._client_lock: + if self._client is None: + self._client = self._build_client() + return self._client + + def _fetch_raw_data( + self, stock_code: str, start_date: str, end_date: str + ) -> pd.DataFrame: + raise DataFetchError( + "TickFlowFetcher P0 only supports market review endpoints" + ) + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + raise DataFetchError( + "TickFlowFetcher P0 only supports market review endpoints" + ) + + @staticmethod + def _safe_float(value: Any) -> Optional[float]: + if value in (None, "", "-"): + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + @classmethod + def _ratio_to_percent(cls, value: Any) -> Optional[float]: + ratio = cls._safe_float(value) + if ratio is None: + return None + return ratio * 100.0 + + @staticmethod + def _extract_name(quote: Dict[str, Any]) -> str: + ext = quote.get("ext") or {} + name = ext.get("name") or quote.get("name") or "" + return str(name).strip() + + @staticmethod + def _is_universe_permission_error(exc: Exception) -> bool: + status_code = getattr(exc, "status_code", None) + code = str(getattr(exc, "code", "") or "").upper() + message = ( + f"{getattr(exc, 'message', '')} {exc}" + ).strip().lower() + + if status_code == 403: + return True + if code in {"PERMISSION_DENIED", "FORBIDDEN"}: + return True + return any( + keyword in message + for keyword in ( + "标的池查询", + "universe", + "permission", + "forbidden", + ) + ) + + @staticmethod + def _is_cn_equity_symbol(symbol: str) -> bool: + normalized = normalize_stock_code(symbol) + upper_symbol = (symbol or "").strip().upper() + return ( + normalized.isdigit() + and len(normalized) == 6 + and upper_symbol.endswith((".SH", ".SZ", ".BJ")) + ) + + @staticmethod + def _round_limit_price(prev_close: float, ratio: float) -> float: + return math.floor(prev_close * (1 + ratio) * 100 + 0.5) / 100.0 + + @classmethod + def _get_limit_ratio(cls, pure_code: str, name: str) -> float: + if is_bse_code(pure_code): + return 0.30 + if is_kc_cy_stock(pure_code): + return 0.20 + if is_st_stock(name): + return 0.05 + return 0.10 + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """Fetch main A-share indices via TickFlow quotes.""" + if region != "cn": + return None + + client = self._get_client() + if client is None: + return None + + symbols = [symbol for symbol, _, _ in _CN_MAIN_INDEX_QUOTES] + quotes: List[Dict[str, Any]] = [] + for offset in range(0, len(symbols), _MAX_SYMBOLS_PER_QUOTE_REQUEST): + batch_symbols = symbols[offset : offset + _MAX_SYMBOLS_PER_QUOTE_REQUEST] + batch_quotes = client.quotes.get(symbols=batch_symbols) + if batch_quotes: + quotes.extend(batch_quotes) + if not quotes: + logger.warning("[TickFlowFetcher] 指数行情为空") + return None + + quotes_by_symbol = { + str(item.get("symbol", "")).upper(): item for item in quotes if item + } + results: List[Dict[str, Any]] = [] + + for symbol, code, name in _CN_MAIN_INDEX_QUOTES: + quote = quotes_by_symbol.get(symbol) + if not quote: + continue + + ext = quote.get("ext") or {} + current = self._safe_float(quote.get("last_price")) or 0.0 + prev_close = self._safe_float(quote.get("prev_close")) or 0.0 + change = self._safe_float(ext.get("change_amount")) + if change is None: + change = current - prev_close if current or prev_close else 0.0 + amplitude = self._ratio_to_percent(ext.get("amplitude")) + if amplitude is None and prev_close > 0: + high = self._safe_float(quote.get("high")) or 0.0 + low = self._safe_float(quote.get("low")) or 0.0 + amplitude = (high - low) / prev_close * 100 + + results.append( + { + "code": code, + "name": name, + "current": current, + "change": change, + "change_pct": self._ratio_to_percent(ext.get("change_pct")) or 0.0, + "open": self._safe_float(quote.get("open")) or 0.0, + "high": self._safe_float(quote.get("high")) or 0.0, + "low": self._safe_float(quote.get("low")) or 0.0, + "prev_close": prev_close, + "volume": self._safe_float(quote.get("volume")) or 0.0, + "amount": self._safe_float(quote.get("amount")) or 0.0, + "amplitude": amplitude or 0.0, + } + ) + + if len(results) != len(_CN_MAIN_INDEX_QUOTES): + logger.warning( + "[TickFlowFetcher] 指数行情不完整: %s/%s", + len(results), + len(_CN_MAIN_INDEX_QUOTES), + ) + return None + + return results or None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """Calculate A-share market breadth from TickFlow universe quotes.""" + client = self._get_client() + if client is None: + return None + + now = monotonic() + if self._universe_query_supported is False: + checked_at = self._universe_query_checked_at or 0.0 + if ( + now - checked_at + < _UNIVERSE_PERMISSION_NEGATIVE_CACHE_TTL_SECONDS + ): + return None + self._universe_query_supported = None + self._universe_query_checked_at = None + + try: + quotes = client.quotes.get(universes=["CN_Equity_A"]) + self._universe_query_supported = True + self._universe_query_checked_at = now + except Exception as exc: + if self._is_universe_permission_error(exc): + self._universe_query_supported = False + self._universe_query_checked_at = now + logger.info( + "[TickFlowFetcher] 当前套餐不支持标的池查询,市场统计回退到现有数据源" + ) + return None + raise + if not quotes: + logger.warning("[TickFlowFetcher] 市场统计行情为空") + return None + + stats = { + "up_count": 0, + "down_count": 0, + "flat_count": 0, + "limit_up_count": 0, + "limit_down_count": 0, + "total_amount": 0.0, + } + valid_rows = 0 + + for quote in quotes: + if not quote: + continue + + symbol = str(quote.get("symbol") or "").strip().upper() + if not self._is_cn_equity_symbol(symbol): + continue + + amount = self._safe_float(quote.get("amount")) + if amount is not None and amount > 0: + stats["total_amount"] += amount / 1e8 + + pure_code = normalize_stock_code(symbol) + last_price = self._safe_float(quote.get("last_price")) + prev_close = self._safe_float(quote.get("prev_close")) + + if last_price is None or prev_close is None or amount is None or amount <= 0: + continue + + name = self._extract_name(quote) + if not name: + logger.debug("[TickFlowFetcher] 缺少股票名称,按非 ST 处理: %s", symbol) + + ratio = self._get_limit_ratio(pure_code, name) + limit_up = self._round_limit_price(prev_close, ratio) + limit_down = math.floor(prev_close * (1 - ratio) * 100 + 0.5) / 100.0 + limit_up_tolerance = round(abs(prev_close * (1 + ratio) - limit_up), 10) + limit_down_tolerance = round( + abs(prev_close * (1 - ratio) - limit_down), 10 + ) + + valid_rows += 1 + + if abs(last_price - limit_up) <= limit_up_tolerance: + stats["limit_up_count"] += 1 + if abs(last_price - limit_down) <= limit_down_tolerance: + stats["limit_down_count"] += 1 + + if last_price > prev_close: + stats["up_count"] += 1 + elif last_price < prev_close: + stats["down_count"] += 1 + else: + stats["flat_count"] += 1 + + if valid_rows == 0: + logger.warning("[TickFlowFetcher] 市场统计未命中有效 A 股行情") + return None + + return stats diff --git a/src/provider/tushare_fetcher.py b/src/provider/tushare_fetcher.py new file mode 100644 index 00000000..4e5fed43 --- /dev/null +++ b/src/provider/tushare_fetcher.py @@ -0,0 +1,1320 @@ +# -*- coding: utf-8 -*- +""" +=================================== +TushareFetcher - 备用数据源 1 (Priority 2) +=================================== + +数据来源:Tushare Pro API(挖地兔) +特点:需要 Token、有请求配额限制 +优点:数据质量高、接口稳定 + +流控策略: +1. 实现"每分钟调用计数器" +2. 超过免费配额(80次/分)时,强制休眠到下一分钟 +3. 使用 tenacity 实现指数退避重试 +""" + +import json as _json +import logging +import re +import time +from datetime import datetime, timedelta +from typing import Optional, Tuple, List, Dict, Any + +import pandas as pd +import requests +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code, _is_hk_market +from .realtime_types import UnifiedRealtimeQuote, ChipDistribution +from provider._config import get_config +import os +from zoneinfo import ZoneInfo + +logger = logging.getLogger(__name__) + + +# ETF code prefixes by exchange +# Shanghai: 51xxxx, 52xxxx, 56xxxx, 58xxxx +# Shenzhen: 15xxxx, 16xxxx, 18xxxx +_ETF_SH_PREFIXES = ('51', '52', '56', '58') +_ETF_SZ_PREFIXES = ('15', '16', '18') +_ETF_ALL_PREFIXES = _ETF_SH_PREFIXES + _ETF_SZ_PREFIXES + + +def _is_etf_code(stock_code: str) -> bool: + """ + Check if the code is an ETF fund code. + + ETF code ranges: + - Shanghai ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - Shenzhen ETF: 15xxxx, 16xxxx, 18xxxx + """ + code = stock_code.strip().split('.')[0] + return code.startswith(_ETF_ALL_PREFIXES) and len(code) == 6 + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class _TushareHttpClient: + """Lightweight Tushare Pro client that does not require the tushare SDK.""" + + def __init__(self, token: str, timeout: int = 30, api_url: str = "http://api.tushare.pro") -> None: + self._token = token + self._timeout = timeout + self._api_url = api_url + + def query(self, api_name: str, fields: str = "", **kwargs) -> pd.DataFrame: + req_params = { + "api_name": api_name, + "token": self._token, + "params": kwargs, + "fields": fields, + } + res = requests.post(self._api_url, json=req_params, timeout=self._timeout) + if res.status_code != 200: + raise Exception(f"Tushare API HTTP {res.status_code}") + + result = _json.loads(res.text) + if result.get("code") != 0: + raise Exception(result.get("msg") or f"Tushare API error code {result.get('code')}") + + data = result.get("data") or {} + columns = data.get("fields") or [] + items = data.get("items") or [] + return pd.DataFrame(items, columns=columns) + + def __getattr__(self, api_name: str): + if api_name.startswith("_"): + raise AttributeError(api_name) + + def caller(**kwargs) -> pd.DataFrame: + return self.query(api_name, **kwargs) + + return caller + + +class TushareFetcher(BaseFetcher): + """ + Tushare Pro 数据源实现 + + 优先级:2 + 数据来源:Tushare Pro API + + 关键策略: + - 每分钟调用计数器,防止超出配额 + - 超过 80 次/分钟时强制等待 + - 失败后指数退避重试 + + 配额说明(Tushare 免费用户): + - 每分钟最多 80 次请求 + - 每天最多 500 次请求 + """ + + name = "TushareFetcher" + priority = int(os.getenv("TUSHARE_PRIORITY", "2")) # 默认优先级,会在 __init__ 中根据配置动态调整 + + def __init__(self, rate_limit_per_minute: int = 80): + """ + 初始化 TushareFetcher + + Args: + rate_limit_per_minute: 每分钟最大请求数(默认80,Tushare免费配额) + """ + self.rate_limit_per_minute = rate_limit_per_minute + self._call_count = 0 # 当前分钟内的调用次数 + self._minute_start: Optional[float] = None # 当前计数周期开始时间 + self._api: Optional[object] = None # Tushare API 实例 + self.date_list: Optional[List[str]] = None # 交易日列表缓存(倒序,最新日期在前) + self._date_list_end: Optional[str] = None # 缓存对应的截止日期,用于跨日刷新 + + # 尝试初始化 API + self._init_api() + + # 根据 API 初始化结果动态调整优先级 + self.priority = self._determine_priority() + + def _init_api(self) -> None: + """ + 初始化 Tushare API + + 如果 Token 未配置,此数据源将不可用。 + 这里直接使用内置 HTTP client,避免运行时强依赖 tushare SDK, + 从而减少 Docker / PyInstaller / 多虚拟环境场景下因缺包导致的初始化失败。 + """ + config = get_config() + + if not config.tushare_token: + logger.warning("Tushare Token 未配置,此数据源不可用") + return + + try: + self._api = self._build_api_client(config.tushare_token) + logger.info("Tushare API 初始化成功") + except Exception as e: + logger.error(f"Tushare API 初始化失败: {e}") + self._api = None + + def _build_api_client(self, token: str) -> _TushareHttpClient: + """ + Build a lightweight Tushare Pro client over direct HTTP requests. + + The project already normalizes all Pro calls through the same request + contract, so we do not need the official tushare SDK during runtime. + """ + client = _TushareHttpClient(token=token) + logger.debug("Tushare API client configured for direct HTTP calls") + return client + + def _determine_priority(self) -> int: + """ + 根据 Token 配置和 API 初始化状态确定优先级 + + 策略: + - Token 配置且 API 初始化成功:优先级 -1(绝对最高,优于 efinance) + - 其他情况:优先级 2(默认) + + Returns: + 优先级数字(0=最高,数字越大优先级越低) + """ + config = get_config() + + if config.tushare_token and self._api is not None: + # Token 配置且 API 初始化成功,提升为最高优先级 + logger.info("✅ 检测到 TUSHARE_TOKEN 且 API 初始化成功,Tushare 数据源优先级提升为最高 (Priority -1)") + return -1 + + # Token 未配置或 API 初始化失败,保持默认优先级 + return 2 + + def is_available(self) -> bool: + """ + 检查数据源是否可用 + + Returns: + True 表示可用,False 表示不可用 + """ + return self._api is not None + + def _check_rate_limit(self) -> None: + """ + 检查并执行速率限制 + + 流控策略: + 1. 检查是否进入新的一分钟 + 2. 如果是,重置计数器 + 3. 如果当前分钟调用次数超过限制,强制休眠 + """ + current_time = time.time() + + # 检查是否需要重置计数器(新的一分钟) + if self._minute_start is None: + self._minute_start = current_time + self._call_count = 0 + elif current_time - self._minute_start >= 60: + # 已经过了一分钟,重置计数器 + self._minute_start = current_time + self._call_count = 0 + logger.debug("速率限制计数器已重置") + + # 检查是否超过配额 + if self._call_count >= self.rate_limit_per_minute: + # 计算需要等待的时间(到下一分钟) + elapsed = current_time - self._minute_start + sleep_time = max(0, 60 - elapsed) + 1 # +1 秒缓冲 + + logger.warning( + f"Tushare 达到速率限制 ({self._call_count}/{self.rate_limit_per_minute} 次/分钟)," + f"等待 {sleep_time:.1f} 秒..." + ) + + time.sleep(sleep_time) + + # 重置计数器 + self._minute_start = time.time() + self._call_count = 0 + + # 增加调用计数 + self._call_count += 1 + logger.debug(f"Tushare 当前分钟调用次数: {self._call_count}/{self.rate_limit_per_minute}") + + def _call_api_with_rate_limit(self, method_name: str, **kwargs) -> pd.DataFrame: + """统一通过速率限制包装 Tushare API 调用。""" + if self._api is None: + raise DataFetchError("Tushare API 未初始化,请检查 Token 配置") + + self._check_rate_limit() + method = getattr(self._api, method_name) + return method(**kwargs) + + def _get_china_now(self) -> datetime: + """返回上海时区当前时间,方便测试覆盖跨日刷新逻辑。""" + return datetime.now(ZoneInfo("Asia/Shanghai")) + + def _get_trade_dates(self, end_date: Optional[str] = None) -> List[str]: + """按自然日刷新交易日历缓存,避免服务跨日后继续复用旧日历。""" + if self._api is None: + return [] + + china_now = self._get_china_now() + requested_end_date = end_date or china_now.strftime("%Y%m%d") + + if self.date_list is not None and self._date_list_end == requested_end_date: + return self.date_list + + start_date = (china_now - timedelta(days=20)).strftime("%Y%m%d") + df_cal = self._call_api_with_rate_limit( + "trade_cal", + exchange="SSE", + start_date=start_date, + end_date=requested_end_date, + ) + + if df_cal is None or df_cal.empty or "cal_date" not in df_cal.columns: + logger.warning("[Tushare] trade_cal 返回为空,无法更新交易日历缓存") + self.date_list = [] + self._date_list_end = requested_end_date + return self.date_list + + trade_dates = sorted( + df_cal[df_cal["is_open"] == 1]["cal_date"].astype(str).tolist(), + reverse=True, + ) + self.date_list = trade_dates + self._date_list_end = requested_end_date + return trade_dates + + @staticmethod + def _pick_trade_date(trade_dates: List[str], use_today: bool) -> Optional[str]: + """根据可用交易日列表选择当天或前一交易日。""" + if not trade_dates: + return None + if use_today or len(trade_dates) == 1: + return trade_dates[0] + return trade_dates[1] + + @staticmethod + def _detect_exchange_hint(stock_code: str) -> Optional[str]: + """Return SH/SZ/BJ when the raw user input carries an explicit exchange hint.""" + upper = (stock_code or "").strip().upper() + if upper.startswith(("SH", "SS")) or upper.endswith((".SH", ".SS")): + return "SH" + if upper.startswith("SZ") or upper.endswith(".SZ"): + return "SZ" + if upper.startswith("BJ") or upper.endswith(".BJ"): + return "BJ" + return None + + @classmethod + def _get_legacy_realtime_symbol(cls, stock_code: str) -> str: + """Build the legacy tushare symbol while preserving explicit SH/SZ hints.""" + code = normalize_stock_code(stock_code) + exchange_hint = cls._detect_exchange_hint(stock_code) + + if code == '000001' and exchange_hint == 'SH': + return 'sh000001' + if code == '399001': + return 'sz399001' + if code == '399006': + return 'sz399006' + if code == '000300': + return 'sh000300' + if is_bse_code(code): + return f"bj{code}" + return code + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换 A 股 / ETF / 北交所等为 Tushare ts_code(不含港股逻辑)。 + + Tushare 要求的格式示例: + - 沪市股票:600519.SH + - 深市股票:000001.SZ + - 沪市 ETF:510050.SH + - 深市 ETF:159919.SZ + + Args: + stock_code: 原始代码,如 '600519', '000001', '563230' + + Returns: + Tushare 格式代码,如 '600519.SH', '000001.SZ' + """ + raw_code = stock_code.strip() + + # Already has suffix + if '.' in raw_code: + ts_code = raw_code.upper() + if ts_code.endswith('.SS'): + return f"{ts_code[:-3]}.SH" + return ts_code + + if _is_us_code(raw_code): + raise DataFetchError(f"TushareFetcher 不支持美股 {raw_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + if _is_hk_market(raw_code): + #raise DataFetchError(f"TushareFetcher 不支持港股 {raw_code},请使用 AkshareFetcher") + return normalize_stock_code(raw_code) + + code = normalize_stock_code(raw_code) + exchange_hint = self._detect_exchange_hint(raw_code) + + if exchange_hint == "SH": + return f"{code}.SH" + if exchange_hint == "SZ": + return f"{code}.SZ" + if exchange_hint == "BJ": + return f"{code}.BJ" + + # ETF: determine exchange by prefix + if code.startswith(_ETF_SH_PREFIXES) and len(code) == 6: + return f"{code}.SH" + if code.startswith(_ETF_SZ_PREFIXES) and len(code) == 6: + return f"{code}.SZ" + + # BSE (Beijing Stock Exchange): 8xxxxx, 4xxxxx, 920xxx + if is_bse_code(code): + return f"{code}.BJ" + + # Regular stocks + # Shanghai: 600xxx, 601xxx, 603xxx, 688xxx (STAR Market) + # Shenzhen: 000xxx, 002xxx, 300xxx (ChiNext) + if code.startswith(('600', '601', '603', '688')): + return f"{code}.SH" + elif code.startswith(('000', '002', '300')): + return f"{code}.SZ" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"{code}.SZ" + + def _convert_hk_stock_code_for_tushare(self, stock_code: str) -> str: + """ + 将用户输入转为 Tushare Pro 接口所需的 ts_code(含港股 nnnnn.HK)。 + + - 非港股:委托 _convert_stock_code(A 股 / ETF / 北交所等)。 + - 港股:从 HK00700、00700、00700.HK 等形式归一为 5 位数字 + .HK。 + """ + raw_code = stock_code.strip() + if _is_hk_market(raw_code): + if "." in raw_code: + ts_code = raw_code.upper() + if ts_code.endswith(".SS"): + return f"{ts_code[:-3]}.SH" + if ts_code.endswith(".HK"): + return ts_code + digits = re.sub(r"\D", "", raw_code) + if not digits: + raise DataFetchError(f"无法识别港股代码 {raw_code}") + code = digits[-5:].rjust(5, "0") + return f"{code}.HK" + return self._convert_stock_code(stock_code) + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Tushare 获取原始数据 + + 根据代码类型选择不同接口: + - 普通股票:daily() + - ETF 基金:fund_daily() + + 流程: + 1. 检查 API 是否可用 + 2. 检查是否为美股(不支持) + 3. 执行速率限制检查 + 4. 转换股票代码格式 + 5. 根据代码类型选择接口并调用 + """ + if self._api is None: + raise DataFetchError("Tushare API 未初始化,请检查 Token 配置") + + # US stocks not supported + if _is_us_code(stock_code): + raise DataFetchError(f"TushareFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # Rate-limit check + self._check_rate_limit() + + is_hk = _is_hk_market(stock_code) + # 判断是否为 ETF / 港股,以选择不同接口 + is_etf = _is_etf_code(stock_code) + if is_hk: + ts_code = self._convert_hk_stock_code_for_tushare(stock_code) + api_name = "hk_daily" + else: + ts_code = self._convert_stock_code(stock_code) + api_name = "fund_daily" if is_etf else "daily" + + # Convert date format (Tushare requires YYYYMMDD) + ts_start = start_date.replace('-', '') + ts_end = end_date.replace('-', '') + + + + logger.debug(f"调用 Tushare {api_name}({ts_code}, {ts_start}, {ts_end})") + + try: + if is_hk: + # 港股使用 hk_daily 接口 + df = self._api.hk_daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + elif is_etf: + # ETF uses fund_daily interface + df = self._api.fund_daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + else: + # Regular A-share stocks use daily interface + df = self._api.daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测配额超限 + if any(keyword in error_msg for keyword in ['quota', '配额', 'limit', '权限']): + logger.warning(f"Tushare 配额可能超限: {e}") + raise RateLimitError(f"Tushare 配额超限: {e}") from e + + raise DataFetchError(f"Tushare 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Tushare 数据 + + Tushare daily / fund_daily 返回的列名: + ts_code, trade_date, open, high, low, close, pre_close, change, pct_chg, vol, amount + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + + 单位缩放仅适用于 A 股(及 ETF 等使用同一套单位的接口): + - vol 按「手」计,乘以 100 转为「股」 + - amount 按「千元」计,乘以 1000 转为「元」 + + 港股 hk_daily 返回的 vol / amount 已是可直接使用的量级,不做上述缩放。 + """ + df = df.copy() + is_hk = _is_hk_market(stock_code) + + # 列名映射 + column_mapping = { + 'trade_date': 'date', + 'vol': 'volume', + # open, high, low, close, amount, pct_chg 列名相同 + } + + df = df.rename(columns=column_mapping) + + # 转换日期格式(YYYYMMDD -> YYYY-MM-DD) + if 'date' in df.columns: + df['date'] = pd.to_datetime(df['date'], format='%Y%m%d') + + # 成交量 / 成交额:仅 A 股类接口做单位换算(港股 hk_daily 不换算) + if 'volume' in df.columns and not is_hk: + df['volume'] = df['volume'] * 100 + + if 'amount' in df.columns and not is_hk: + df['amount'] = df['amount'] * 1000 + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + 使用 Tushare 的 stock_basic 接口获取股票基本信息 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + if self._api is None: + logger.warning("Tushare API 未初始化,无法获取股票名称") + return None + + # 检查缓存 + if hasattr(self, '_stock_name_cache') and stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + # 初始化缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + + try: + # 速率限制检查 + self._check_rate_limit() + + + # 根据市场/类型选择基础信息接口 + if _is_hk_market(stock_code): + ts_code = self._convert_hk_stock_code_for_tushare(stock_code) + # 港股:使用 hk_basic + df = self._api.hk_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + elif _is_etf_code(stock_code): + ts_code = self._convert_stock_code(stock_code) + # ETF:使用 fund_basic + df = self._api.fund_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + else: + ts_code = self._convert_stock_code(stock_code) + # A 股股票:使用 stock_basic + df = self._api.stock_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + + if df is not None and not df.empty: + name = df.iloc[0]['name'] + self._stock_name_cache[stock_code] = name + logger.debug(f"Tushare 获取股票名称成功: {stock_code} -> {name}") + return name + + except Exception as e: + logger.warning(f"Tushare 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_stock_list(self) -> Optional[pd.DataFrame]: + """ + 获取股票列表 + + 使用 Tushare 的 stock_basic 接口获取 A 股列表(不含港股)。 + + Returns: + 包含 code, name, industry, area, market 列的 DataFrame,失败返回 None + """ + if self._api is None: + logger.warning("Tushare API 未初始化,无法获取股票列表") + return None + + try: + self._check_rate_limit() + + df = self._api.stock_basic( + exchange='', + list_status='L', + fields='ts_code,name,industry,area,market' + ) + + if df is None or df.empty: + return None + + df = df.copy() + df['code'] = df['ts_code'].astype(str).str.split('.').str[0] + + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + for _, row in df.iterrows(): + self._stock_name_cache[row['code']] = row['name'] + + logger.info(f"Tushare 获取股票列表成功: {len(df)} 条") + return df[['code', 'name', 'industry', 'area', 'market']] + + except Exception as e: + logger.warning(f"Tushare 获取股票列表失败: {e}") + + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情 + + 策略: + 1. 优先尝试 Pro 接口(需要2000积分):数据全,稳定性高 + 2. 失败降级到旧版接口:门槛低,数据较少 + + Args: + stock_code: 股票代码 + + Returns: + UnifiedRealtimeQuote 对象,失败返回 None + """ + if self._api is None: + return None + + # HK stocks not supported by Tushare + if _is_hk_market(stock_code): + logger.debug(f"TushareFetcher 跳过港股实时行情 {stock_code}") + return None + + normalized_code = normalize_stock_code(stock_code) + + from .realtime_types import ( + RealtimeSource, + safe_float, safe_int + ) + + # 速率限制检查 + self._check_rate_limit() + + # 尝试 Pro 接口 + try: + ts_code = self._convert_stock_code(stock_code) + # 尝试调用 Pro 实时接口 (需要积分) + df = self._api.quotation(ts_code=ts_code) + + if df is not None and not df.empty: + row = df.iloc[0] + logger.debug(f"Tushare Pro 实时行情获取成功: {stock_code}") + + return UnifiedRealtimeQuote( + code=normalized_code, + name=str(row.get('name', '')), + source=RealtimeSource.TUSHARE, + price=safe_float(row.get('price')), + change_pct=safe_float(row.get('pct_chg')), # Pro 接口通常直接返回涨跌幅 + change_amount=safe_float(row.get('change')), + volume=safe_int(row.get('vol')), + amount=safe_float(row.get('amount')), + high=safe_float(row.get('high')), + low=safe_float(row.get('low')), + open_price=safe_float(row.get('open')), + pre_close=safe_float(row.get('pre_close')), + turnover_rate=safe_float(row.get('turnover_ratio')), # Pro 接口可能有换手率 + pe_ratio=safe_float(row.get('pe')), + pb_ratio=safe_float(row.get('pb')), + total_mv=safe_float(row.get('total_mv')), + ) + except Exception as e: + # 仅记录调试日志,不报错,继续尝试降级 + logger.debug(f"Tushare Pro 实时行情不可用 (可能是积分不足): {e}") + + # 降级:尝试旧版接口 + try: + import tushare as ts + + symbol = self._get_legacy_realtime_symbol(stock_code) + + # 调用旧版实时接口 (ts.get_realtime_quotes) + df = ts.get_realtime_quotes(symbol) + + if df is None or df.empty: + return None + + row = df.iloc[0] + + # 计算涨跌幅 + price = safe_float(row['price']) + pre_close = safe_float(row['pre_close']) + change_pct = 0.0 + change_amount = 0.0 + + if price and pre_close and pre_close > 0: + change_amount = price - pre_close + change_pct = (change_amount / pre_close) * 100 + + # 构建统一对象 + return UnifiedRealtimeQuote( + code=normalized_code, + name=str(row['name']), + source=RealtimeSource.TUSHARE, + price=price, + change_pct=round(change_pct, 2), + change_amount=round(change_amount, 2), + volume=safe_int(row['volume']) // 100, # 转换为手 + amount=safe_float(row['amount']), + high=safe_float(row['high']), + low=safe_float(row['low']), + open_price=safe_float(row['open']), + pre_close=pre_close, + ) + + except Exception as e: + logger.warning(f"Tushare (旧版) 获取实时行情失败 {stock_code}: {e}") + return None + + def get_main_indices(self, region: str = "cn") -> Optional[List[dict]]: + """ + 获取主要指数实时行情 (Tushare Pro),仅支持 A 股 + """ + if region != "cn": + return None + if self._api is None: + return None + + from .realtime_types import safe_float + + # 指数映射:Tushare代码 -> 名称 + indices_map = { + '000001.SH': '上证指数', + '399001.SZ': '深证成指', + '399006.SZ': '创业板指', + '000688.SH': '科创50', + '000016.SH': '上证50', + '000300.SH': '沪深300', + } + + try: + self._check_rate_limit() + + # Tushare index_daily 获取历史数据,实时数据需用其他接口或估算 + # 由于 Tushare 免费用户可能无法获取指数实时行情,这里作为备选 + # 使用 index_daily 获取最近交易日数据 + + end_date = datetime.now().strftime('%Y%m%d') + start_date = (datetime.now() - pd.Timedelta(days=5)).strftime('%Y%m%d') + + results = [] + + # 批量获取所有指数数据 + for ts_code, name in indices_map.items(): + try: + df = self._api.index_daily(ts_code=ts_code, start_date=start_date, end_date=end_date) + if df is not None and not df.empty: + row = df.iloc[0] # 最新一天 + + current = safe_float(row['close']) + prev_close = safe_float(row['pre_close']) + + results.append({ + 'code': ts_code.split('.')[0], # 兼容 sh000001 格式需转换,这里保持纯数字 + 'name': name, + 'current': current, + 'change': safe_float(row['change']), + 'change_pct': safe_float(row['pct_chg']), + 'open': safe_float(row['open']), + 'high': safe_float(row['high']), + 'low': safe_float(row['low']), + 'prev_close': prev_close, + 'volume': safe_float(row['vol']), + 'amount': safe_float(row['amount']) * 1000, # 千元转元 + 'amplitude': 0.0 # Tushare index_daily 不直接返回振幅 + }) + except Exception as e: + logger.debug(f"Tushare 获取指数 {name} 失败: {e}") + continue + + if results: + return results + else: + logger.warning("[Tushare] 未获取到指数行情数据") + + except Exception as e: + logger.error(f"[Tushare] 获取指数行情失败: {e}") + + return None + + def get_market_stats(self) -> Optional[dict]: + """ + 获取市场涨跌统计 (Tushare Pro) + 2000积分 每天访问该接口 ts.pro_api().rt_k 两次 + 接口限制见:https://tushare.pro/document/1?doc_id=108 + """ + if self._api is None: + return None + + try: + logger.info("[Tushare] ts.pro_api() 获取市场统计...") + + # 获取当前中国时间,判断是否在交易时间内 + china_now = self._get_china_now() + current_clock = china_now.strftime("%H:%M") + current_date = china_now.strftime("%Y%m%d") + + trade_dates = self._get_trade_dates(current_date) + if not trade_dates: + return None + + if current_date in trade_dates: + if current_clock < '09:30' or current_clock > '16:30': + use_realtime = False + else: + use_realtime = True + else: + use_realtime = False + + # 若实盘的时候使用 则使用其他可以实盘获取的数据源 akshare、efinance + if use_realtime: + try: + df = self._call_api_with_rate_limit("rt_k", ts_code='3*.SZ,6*.SH,0*.SZ,92*.BJ') + if df is not None and not df.empty: + return self._calc_market_stats(df) + + except Exception as e: + logger.error(f"[Tushare] ts.pro_api().rt_k 尝试获取实时数据失败: {e}") + return None + else: + + if current_date not in trade_dates: + last_date = self._pick_trade_date(trade_dates, use_today=True) # 拿最近的日期 + else: + if current_clock < '09:30': + last_date = self._pick_trade_date(trade_dates, use_today=False) # 拿取前一天的数据 + else: # 即 '> 16:30' + last_date = self._pick_trade_date(trade_dates, use_today=True) # 拿取当天的数据 + + if last_date is None: + return None + + try: + df = self._call_api_with_rate_limit( + "daily", + ts_code='3*.SZ,6*.SH,0*.SZ,92*.BJ', + start_date=last_date, + end_date=last_date, + ) + # 为防止不同接口返回的列名大小写不一致(例如 rt_k 返回小写,daily 返回大写),统一将列名转为小写 + df.columns = [col.lower() for col in df.columns] + + # 获取股票基础信息(包含代码和名称) + df_basic = self._call_api_with_rate_limit("stock_basic", fields='ts_code,name') + df = pd.merge(df, df_basic, on='ts_code', how='left') + # 将 daily的 amount 列的值乘以 1000 来和其他数据源保持一致 + if 'amount' in df.columns: + df['amount'] = df['amount'] * 1000 + + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[Tushare] ts.pro_api().daily 获取数据失败: {e}") + + + + except Exception as e: + logger.error(f"[Tushare] 获取市场统计失败: {e}") + + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_trade_time(self,early_time='09:30',late_time='16:30') -> Optional[str]: + ''' + 获取当前时间可以获得数据的开始时间日期 + + Args: + early_time: 默认 '09:30' + late_time: 默认 '16:30' + early_time-late_time 之间为使用上一个交易日数据的时间段,其他时间为使用当天数据的时间段 + Returns: + start_date: 可以获得数据的开始日期 + ''' + china_now = self._get_china_now() + china_date = china_now.strftime("%Y%m%d") + china_clock = china_now.strftime("%H:%M") + + trade_dates = self._get_trade_dates(china_date) + if not trade_dates: + return None + + if china_date in trade_dates: + if early_time < china_clock < late_time: # 使用上一个交易日数据的时间段 + use_today = False + else: + use_today = True + else: + # 非交易日: today不在trade_dates中,trade_dates[0]就是最近交易日 + use_today = True + + start_date = self._pick_trade_date(trade_dates, use_today=use_today) + if start_date is None: + return None + + if not use_today: + logger.info(f"[Tushare] 当前时间 {china_clock} 可能无法获取当天筹码分布,尝试获取前一个交易日的数据 {start_date}") + + return start_date + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[list, list]]: + """ + 获取行业板块涨跌榜 (Tushare Pro) + + 数据源优先级: + 1. 同花顺接口 (ts.pro_api().moneyflow_ind_ths) + 2. 东财接口 (ts.pro_api().moneyflow_ind_dc) + 注意:每个接口的行业分类和板块定义不同,会导致结果两者不一致 + """ + def _get_rank_top_n(df: pd.DataFrame, change_col: str, industry_name: str, n: int) -> Tuple[list, list]: + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + + # 涨幅前n + top = df.nlargest(n, change_col) + top_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in top.iterrows() + ] + + bottom = df.nsmallest(n, change_col) + bottom_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + + # 15:30之后才有当天数据 + start_date = self.get_trade_time(early_time='00:00', late_time='15:30') + if not start_date: + return None + + # 优先同花顺接口 + logger.info("[Tushare] ts.pro_api().moneyflow_ind_ths 获取板块排行(同花顺)...") + try: + df = self._call_api_with_rate_limit("moneyflow_ind_ths", trade_date=start_date) + if df is not None and not df.empty: + change_col = 'pct_change' + name = 'industry' + if change_col in df.columns: + return _get_rank_top_n(df, change_col, name, n) + except Exception as e: + logger.warning(f"[Tushare] 获取同花顺行业板块涨跌榜失败: {e} 尝试东财接口") + + # 同花顺接口失败,降级尝试东财接口 + logger.info("[Tushare] ts.pro_api().moneyflow_ind_dc 获取板块排行(东财)...") + try: + df = self._call_api_with_rate_limit("moneyflow_ind_dc", trade_date=start_date) + if df is not None and not df.empty: + df = df[df['content_type'] == '行业'] # 过滤出行业板块 + change_col = 'pct_change' + name = 'name' + if change_col in df.columns: + return _get_rank_top_n(df, change_col, name, n) + except Exception as e: + logger.warning(f"[Tushare] 获取东财行业板块涨跌榜失败: {e}") + return None + + # 获取为空或者接口调用失败,返回 None + return None + + + + + def get_chip_distribution(self, stock_code: str) -> Optional[ChipDistribution]: + """ + 获取筹码分布数据 + + 数据来源:ts.pro_api().cyq_chips() + 包含:获利比例、平均成本、筹码集中度 + + 注意:ETF/指数没有筹码分布数据,会直接返回 None;港股不支持,直接返回 None。 + 5000积分以下每天访问15次,每小时访问5次 + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象(最新交易日的数据),获取失败返回 None + + """ + if _is_us_code(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持美股 {stock_code} 的筹码分布") + return None + + if _is_etf_code(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持 ETF {stock_code} 的筹码分布") + return None + + if _is_hk_market(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持港股 {stock_code} 的筹码分布") + return None + + try: + # 19点之后才有当天数据 + start_date = self.get_trade_time(early_time='00:00', late_time='19:00') + if not start_date: + return None + + ts_code = self._convert_stock_code(stock_code) + + df = self._call_api_with_rate_limit( + "cyq_chips", + ts_code=ts_code, + start_date=start_date, + end_date=start_date, + ) + if df is not None and not df.empty: + daily_df = self._call_api_with_rate_limit( + "daily", + ts_code=ts_code, + start_date=start_date, + end_date=start_date, + ) + if daily_df is None or daily_df.empty: + return None + current_price = daily_df.iloc[0]['close'] + metrics = self.compute_cyq_metrics(df, current_price) + + chip = ChipDistribution( + code=stock_code, + date=datetime.strptime(start_date, '%Y%m%d').strftime('%Y-%m-%d'), + profit_ratio=metrics['获利比例'], + avg_cost=metrics['平均成本'], + cost_90_low=metrics['90成本-低'], + cost_90_high=metrics['90成本-高'], + concentration_90=metrics['90集中度'], + cost_70_low=metrics['70成本-低'], + cost_70_high=metrics['70成本-高'], + concentration_70=metrics['70集中度'], + ) + + logger.info(f"[筹码分布] {stock_code} 日期={chip.date}: 获利比例={chip.profit_ratio:.1%}, " + f"平均成本={chip.avg_cost}, 90%集中度={chip.concentration_90:.2%}, " + f"70%集中度={chip.concentration_70:.2%}") + return chip + + except Exception as e: + logger.warning(f"[Tushare] 获取筹码分布失败 {stock_code}: {e}") + return None + + def compute_cyq_metrics(self, df: pd.DataFrame, current_price: float) -> dict: + """ + 基于 Tushare 的筹码分布明细表 (cyq_chips) 计算常用筹码指标 + :param df: 包含 'price' 和 'percent' 列的 DataFrame + :param current_price: 股票当天的当前价/收盘价 (用于计算获利比例) + :return: 包含各项筹码指标的字典 + """ + import numpy as np + # 1. 确保按价格从小到大排序 (Tushare 返回的数据往往是纯倒序的) + df_sorted = df.sort_values(by='price', ascending=True).reset_index(drop=True) + + # 2. 防止原始数据 percent 总和产生浮点数误差,归一化到 100% + total_percent = df_sorted['percent'].sum() + + df_sorted['norm_percent'] = df_sorted['percent'] / total_percent * 100 + + # 3. 计算筹码的累积分布 + df_sorted['cumsum'] = df_sorted['norm_percent'].cumsum() + + # --- 获利比例 --- + # 所有价格 <= 当前价的筹码之和 + winner_rate = df_sorted[df_sorted['price'] <= current_price]['norm_percent'].sum() + + # --- 平均成本 --- + # 价格的加权平均值 + avg_cost = np.average(df_sorted['price'], weights=df_sorted['norm_percent']) + + # --- 辅助函数:求指定累积比例处的价格 --- + def get_percentile_price(target_pct): + # 寻找累积求和第一次大于等于目标百分比的行索引 + idx = df_sorted['cumsum'].searchsorted(target_pct) + idx = min(idx, len(df_sorted) - 1) # 防止越界 + return df_sorted.loc[idx, 'price'] + + # --- 90% 成本区与集中度 --- + # 去头去尾各 5% + cost_90_low = get_percentile_price(5) + cost_90_high = get_percentile_price(95) + if (cost_90_high + cost_90_low) != 0: + concentration_90 = (cost_90_high - cost_90_low) / (cost_90_high + cost_90_low) * 100 + else: + concentration_90 = 0.0 + + # --- 70% 成本区与集中度 --- + # 去头去尾各 15% + cost_70_low = get_percentile_price(15) + cost_70_high = get_percentile_price(85) + if (cost_70_high + cost_70_low) != 0: + concentration_70 = (cost_70_high - cost_70_low) / (cost_70_high + cost_70_low) * 100 + else: + concentration_70 = 0.0 + + # 返回格式化结果 + return { + "获利比例": round(winner_rate/100, 4), # /100 与akshare保持一致,返回小数格式 + "平均成本": round(avg_cost, 4), + "90成本-低": round(cost_90_low, 4), + "90成本-高": round(cost_90_high, 4), + "90集中度": round(concentration_90/100, 4), + "70成本-低": round(cost_70_low, 4), + "70成本-高": round(cost_70_high, 4), + "70集中度": round(concentration_70/100, 4) + } + + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = TushareFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + except Exception as e: + print(f"获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (tushare)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") + + + # 测试筹码分布数据 + print("\n" + "=" * 50) + print("测试筹码分布数据获取") + print("=" * 50) + try: + chip = fetcher.get_chip_distribution('600519') # 茅台 + except Exception as e: + print(f"[筹码分布] 获取失败: {e}") + + # 测试行业板块排名 + print("\n" + "=" * 50) + print("测试行业板块排名获取") + print("=" * 50) + try: + rankings = fetcher.get_sector_rankings(n=5) + if rankings: + top, bottom = rankings + print("涨幅榜 Top 5:") + for sector in top: + print(f"{sector['name']}: {sector['change_pct']}%") + print("\n跌幅榜 Top 5:") + for sector in bottom: + print(f"{sector['name']}: {sector['change_pct']}%") + else: + print("未获取到行业板块排名数据") + except Exception as e: + print(f"[行业板块排名] 获取失败: {e}") diff --git a/src/provider/us_index_mapping.py b/src/provider/us_index_mapping.py new file mode 100644 index 00000000..a90ba042 --- /dev/null +++ b/src/provider/us_index_mapping.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +=================================== +美股指数与股票代码工具 +=================================== + +提供: +1. 美股指数代码映射(如 SPX -> ^GSPC) +2. 美股股票代码识别(AAPL、TSLA 等) + +美股指数在 Yahoo Finance 中需使用 ^ 前缀,与股票代码不同。 +""" + +import re + +# 美股代码正则:1-5 个大写字母,可选 .X 后缀(如 BRK.B) +_US_STOCK_PATTERN = re.compile(r'^[A-Z]{1,5}(\.[A-Z])?$') + + +# 用户输入 -> (Yahoo Finance 符号, 中文名称) +US_INDEX_MAPPING = { + # 标普 500 + 'SPX': ('^GSPC', '标普500指数'), + '^GSPC': ('^GSPC', '标普500指数'), + 'GSPC': ('^GSPC', '标普500指数'), + # 道琼斯工业平均指数 + 'DJI': ('^DJI', '道琼斯工业指数'), + '^DJI': ('^DJI', '道琼斯工业指数'), + 'DJIA': ('^DJI', '道琼斯工业指数'), + # 纳斯达克综合指数 + 'IXIC': ('^IXIC', '纳斯达克综合指数'), + '^IXIC': ('^IXIC', '纳斯达克综合指数'), + 'NASDAQ': ('^IXIC', '纳斯达克综合指数'), + # 纳斯达克 100 + 'NDX': ('^NDX', '纳斯达克100指数'), + '^NDX': ('^NDX', '纳斯达克100指数'), + # VIX 波动率指数 + 'VIX': ('^VIX', 'VIX恐慌指数'), + '^VIX': ('^VIX', 'VIX恐慌指数'), + # 罗素 2000 + 'RUT': ('^RUT', '罗素2000指数'), + '^RUT': ('^RUT', '罗素2000指数'), +} + + +def is_us_index_code(code: str) -> bool: + """ + 判断代码是否为美股指数符号。 + + Args: + code: 股票/指数代码,如 'SPX', 'DJI' + + Returns: + True 表示是已知美股指数符号,否则 False + + Examples: + >>> is_us_index_code('SPX') + True + >>> is_us_index_code('AAPL') + False + """ + return (code or '').strip().upper() in US_INDEX_MAPPING + + +def is_us_stock_code(code: str) -> bool: + """ + 判断代码是否为美股股票符号(排除美股指数)。 + + 美股股票代码为 1-5 个大写字母,可选 .X 后缀如 BRK.B。 + 美股指数(SPX、DJI 等)明确排除。 + + Args: + code: 股票代码,如 'AAPL', 'TSLA', 'BRK.B' + + Returns: + True 表示是美股股票符号,否则 False + + Examples: + >>> is_us_stock_code('AAPL') + True + >>> is_us_stock_code('TSLA') + True + >>> is_us_stock_code('BRK.B') + True + >>> is_us_stock_code('SPX') + False + >>> is_us_stock_code('600519') + False + """ + normalized = (code or '').strip().upper() + # 美股指数不是股票 + if normalized in US_INDEX_MAPPING: + return False + return bool(_US_STOCK_PATTERN.match(normalized)) + + +def get_us_index_yf_symbol(code: str) -> tuple: + """ + 获取美股指数的 Yahoo Finance 符号与中文名称。 + + Args: + code: 用户输入,如 'SPX', '^GSPC', 'DJI' + + Returns: + (yf_symbol, chinese_name) 元组,未找到时返回 (None, None)。 + + Examples: + >>> get_us_index_yf_symbol('SPX') + ('^GSPC', '标普500指数') + >>> get_us_index_yf_symbol('AAPL') + (None, None) + """ + normalized = (code or '').strip().upper() + return US_INDEX_MAPPING.get(normalized, (None, None)) diff --git a/src/provider/yfinance_fetcher.py b/src/provider/yfinance_fetcher.py new file mode 100644 index 00000000..4ba41b6b --- /dev/null +++ b/src/provider/yfinance_fetcher.py @@ -0,0 +1,746 @@ +# -*- coding: utf-8 -*- +""" +=================================== +YfinanceFetcher - 兜底数据源 (Priority 4) +=================================== + +数据来源:Yahoo Finance(通过 yfinance 库) +特点:国际数据源、可能有延迟或缺失 +定位:当所有国内数据源都失败时的最后保障 + +关键策略: +1. 自动将 A 股代码转换为 yfinance 格式(.SS / .SZ) +2. 处理 Yahoo Finance 的数据格式差异 +3. 失败后指数退避重试 +""" + +import csv +import logging +from datetime import datetime +from io import StringIO +from typing import Optional, List, Dict, Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code +from .realtime_types import UnifiedRealtimeQuote, RealtimeSource +from .us_index_mapping import get_us_index_yf_symbol, is_us_stock_code + +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): + STOCK_NAME_MAP = {} + + def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """简单的名称有效性校验兜底""" + if not name: + return False + n = str(name).strip() + return bool(n and n.upper() != str(stock_code).strip().upper()) + +import os + +logger = logging.getLogger(__name__) + + +class YfinanceFetcher(BaseFetcher): + """ + Yahoo Finance 数据源实现 + + 优先级:4(最低,作为兜底) + 数据来源:Yahoo Finance + + 关键策略: + - 自动转换股票代码格式 + - 处理时区和数据格式差异 + - 失败后指数退避重试 + + 注意事项: + - A 股数据可能有延迟 + - 某些股票可能无数据 + - 数据精度可能与国内源略有差异 + """ + + name = "YfinanceFetcher" + priority = int(os.getenv("YFINANCE_PRIORITY", "4")) + + def __init__(self): + """初始化 YfinanceFetcher""" + pass + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换股票代码为 Yahoo Finance 格式 + + Yahoo Finance 代码格式: + - A股沪市:600519.SS (Shanghai Stock Exchange) + - A股深市:000001.SZ (Shenzhen Stock Exchange) + - 港股:0700.HK (Hong Kong Stock Exchange) + - 美股:AAPL, TSLA, GOOGL (无需后缀) + + Args: + stock_code: 原始代码,如 '600519', 'hk00700', 'AAPL' + + Returns: + Yahoo Finance 格式代码 + + Examples: + >>> fetcher._convert_stock_code('600519') + '600519.SS' + >>> fetcher._convert_stock_code('hk00700') + '0700.HK' + >>> fetcher._convert_stock_code('AAPL') + 'AAPL' + """ + code = stock_code.strip().upper() + + # 美股指数:映射到 Yahoo Finance 符号(如 SPX -> ^GSPC) + yf_symbol, _ = get_us_index_yf_symbol(code) + if yf_symbol: + logger.debug(f"识别为美股指数: {code} -> {yf_symbol}") + return yf_symbol + + # 美股:1-5 个大写字母(可选 .X 后缀),原样返回 + if is_us_stock_code(code): + logger.debug(f"识别为美股代码: {code}") + return code + + # 港股:hk前缀 -> .HK后缀 + if code.startswith('HK'): + hk_code = code[2:].lstrip('0') or '0' # 去除前导0,但保留至少一个0 + hk_code = hk_code.zfill(4) # 补齐到4位 + logger.debug(f"转换港股代码: {stock_code} -> {hk_code}.HK") + return f"{hk_code}.HK" + + # 已经包含后缀的情况 + if '.SS' in code or '.SZ' in code or '.HK' in code or '.BJ' in code: + return code + + # 去除可能的 .SH 后缀 + code = code.replace('.SH', '') + + # ETF: Shanghai ETF (51xx, 52xx, 56xx, 58xx) -> .SS; Shenzhen ETF (15xx, 16xx, 18xx) -> .SZ + if len(code) == 6: + if code.startswith(('51', '52', '56', '58')): + return f"{code}.SS" + if code.startswith(('15', '16', '18')): + return f"{code}.SZ" + + # BSE (Beijing Stock Exchange): 8xxxxx, 4xxxxx, 920xxx + if is_bse_code(code): + base = code.split('.')[0] if '.' in code else code + return f"{base}.BJ" + + # A股:根据代码前缀判断市场 + if code.startswith(('600', '601', '603', '688')): + return f"{code}.SS" + elif code.startswith(('000', '002', '300')): + return f"{code}.SZ" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"{code}.SZ" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Yahoo Finance 获取原始数据 + + 使用 yfinance.download() 获取历史数据 + + 流程: + 1. 转换股票代码格式 + 2. 调用 yfinance API + 3. 处理返回数据 + """ + import yfinance as yf + + # 转换代码格式 + yf_code = self._convert_stock_code(stock_code) + + logger.debug(f"调用 yfinance.download({yf_code}, {start_date}, {end_date})") + + try: + # 使用 yfinance 下载数据 + df = yf.download( + tickers=yf_code, + start=start_date, + end=end_date, + progress=False, # 禁止进度条 + auto_adjust=True, # 自动调整价格(复权) + multi_level_index=True + ) + + # 筛选出 yf_code 的列, 避免多只股票数据混淆 + if isinstance(df.columns, pd.MultiIndex) and len(df.columns) > 1: + ticker_level = df.columns.get_level_values(1) + mask = ticker_level == yf_code + if mask.any(): + df = df.loc[:, mask].copy() + + if df.empty: + raise DataFetchError(f"Yahoo Finance 未查询到 {stock_code} 的数据") + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Yahoo Finance 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Yahoo Finance 数据 + + yfinance 返回的列名: + Open, High, Low, Close, Volume(索引是日期) + + 注意:新版 yfinance 返回 MultiIndex 列名,如 ('Close', 'AMD') + 需要先扁平化列名再进行处理 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 处理 MultiIndex 列名(新版 yfinance 返回格式) + # 例如: ('Close', 'AMD') -> 'Close' + if isinstance(df.columns, pd.MultiIndex): + logger.debug("检测到 MultiIndex 列名,进行扁平化处理") + # 取第一级列名(Price level: Close, High, Low, etc.) + df.columns = df.columns.get_level_values(0) + + # 重置索引,将日期从索引变为列 + df = df.reset_index() + + # 列名映射(yfinance 使用首字母大写) + column_mapping = { + 'Date': 'date', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + } + + df = df.rename(columns=column_mapping) + + # 计算涨跌幅(因为 yfinance 不直接提供) + if 'close' in df.columns: + df['pct_chg'] = df['close'].pct_change() * 100 + df['pct_chg'] = df['pct_chg'].fillna(0).round(2) + + # 计算成交额(yfinance 不提供,使用估算值) + # 成交额 ≈ 成交量 * 平均价格 + if 'volume' in df.columns and 'close' in df.columns: + df['amount'] = df['volume'] * df['close'] + else: + df['amount'] = 0 + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def _fetch_yf_ticker_data(self, yf, yf_code: str, name: str, return_code: str) -> Optional[Dict[str, Any]]: + """ + 通过 yfinance 拉取单个指数/股票的行情数据。 + + Args: + yf: yfinance 模块引用 + yf_code: yfinance 使用的代码(如 '000001.SS'、'^GSPC') + name: 指数显示名称 + return_code: 写入结果 dict 的 code 字段(如 'sh000001'、'SPX') + + Returns: + 行情字典,失败时返回 None + """ + ticker = yf.Ticker(yf_code) + # 取近两日数据以计算涨跌幅 + hist = ticker.history(period='2d') + if hist.empty: + return None + today_row = hist.iloc[-1] + prev_row = hist.iloc[-2] if len(hist) > 1 else today_row + price = float(today_row['Close']) + prev_close = float(prev_row['Close']) + change = price - prev_close + change_pct = (change / prev_close) * 100 if prev_close else 0 + high = float(today_row['High']) + low = float(today_row['Low']) + # 振幅 = (最高 - 最低) / 昨收 * 100 + amplitude = ((high - low) / prev_close * 100) if prev_close else 0 + return { + 'code': return_code, + 'name': name, + 'current': price, + 'change': change, + 'change_pct': change_pct, + 'open': float(today_row['Open']), + 'high': high, + 'low': low, + 'prev_close': prev_close, + 'volume': float(today_row['Volume']), + 'amount': 0.0, # Yahoo Finance 不提供准确成交额 + 'amplitude': amplitude, + } + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数行情 (Yahoo Finance),支持 A 股与美股。 + region=us 时委托给 _get_us_main_indices。 + """ + import yfinance as yf + + if region == "us": + return self._get_us_main_indices(yf) + + # A 股指数:akshare 代码 -> (yfinance 代码, 显示名称) + yf_mapping = { + 'sh000001': ('000001.SS', '上证指数'), + 'sz399001': ('399001.SZ', '深证成指'), + 'sz399006': ('399006.SZ', '创业板指'), + 'sh000688': ('000688.SS', '科创50'), + 'sh000016': ('000016.SS', '上证50'), + 'sh000300': ('000300.SS', '沪深300'), + } + + results = [] + try: + for ak_code, (yf_code, name) in yf_mapping.items(): + try: + item = self._fetch_yf_ticker_data(yf, yf_code, name, ak_code) + if item: + results.append(item) + logger.debug(f"[Yfinance] 获取指数 {name} 成功") + except Exception as e: + logger.warning(f"[Yfinance] 获取指数 {name} 失败: {e}") + + if results: + logger.info(f"[Yfinance] 成功获取 {len(results)} 个 A 股指数行情") + return results + + except Exception as e: + logger.error(f"[Yfinance] 获取 A 股指数行情失败: {e}") + + return None + + def _get_us_main_indices(self, yf) -> Optional[List[Dict[str, Any]]]: + """获取美股主要指数行情(SPX、IXIC、DJI、VIX),复用 _fetch_yf_ticker_data""" + # 大盘复盘所需核心美股指数 + us_indices = ['SPX', 'IXIC', 'DJI', 'VIX'] + results = [] + try: + for code in us_indices: + yf_symbol, name = get_us_index_yf_symbol(code) + if not yf_symbol: + continue + try: + item = self._fetch_yf_ticker_data(yf, yf_symbol, name, code) + if item: + results.append(item) + logger.debug(f"[Yfinance] 获取美股指数 {name} 成功") + except Exception as e: + logger.warning(f"[Yfinance] 获取美股指数 {name} 失败: {e}") + + if results: + logger.info(f"[Yfinance] 成功获取 {len(results)} 个美股指数行情") + return results + + except Exception as e: + logger.error(f"[Yfinance] 获取美股指数行情失败: {e}") + + return None + + def _is_us_stock(self, stock_code: str) -> bool: + """ + 判断代码是否为美股股票(排除美股指数)。 + + 委托给 us_index_mapping 模块的 is_us_stock_code()。 + """ + return is_us_stock_code(stock_code) + + def _get_us_stock_quote_from_stooq(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 使用 Stooq 为美股实时行情提供免密钥兜底。 + + Stooq 提供的是最新交易日行情,精度不如分时实时接口,但在 Yahoo / yfinance + 被限流时,至少能为 Web UI 提供可用价格;若可获取到昨收价,则同时提供涨跌幅等衍生指标。 + """ + symbol = stock_code.strip().upper() + stooq_symbol = f"{symbol.lower()}.us" + url = f"https://stooq.com/q/l/?s={stooq_symbol}" + request = Request( + url, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; DSA/1.0; +https://github.com/ZhuLinsen/daily_stock_analysis)", + "Accept": "text/plain,text/csv,*/*", + }, + ) + + try: + with urlopen(request, timeout=15) as response: + payload = response.read().decode("utf-8", "ignore").strip() + except (HTTPError, URLError, TimeoutError) as exc: + logger.warning(f"[Stooq] 获取美股 {symbol} 实时行情失败: {exc}") + return None + + if not payload or payload.upper().startswith("NO DATA"): + logger.warning(f"[Stooq] 无法获取 {symbol} 的行情数据") + return None + + def _fetch_prev_close() -> Optional[float]: + history_url = f"https://stooq.com/q/d/l/?s={stooq_symbol}&i=d" + history_request = Request( + history_url, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; DSA/1.0; +https://github.com/ZhuLinsen/daily_stock_analysis)", + "Accept": "text/plain,text/csv,*/*", + }, + ) + try: + with urlopen(history_request, timeout=15) as response: + history_payload = response.read().decode("utf-8", "ignore").strip() + except (HTTPError, URLError, TimeoutError) as exc: + logger.debug(f"[Stooq] 获取美股 {symbol} 日线历史失败: {exc}") + return None + + if not history_payload or history_payload.upper().startswith("NO DATA"): + return None + + try: + reader = csv.reader(StringIO(history_payload)) + header = next(reader, None) + if not header: + return None + + header_tokens = [cell.strip().lower() for cell in header] + has_header = "close" in header_tokens and "date" in header_tokens + if not has_header: + return None + + date_index = header_tokens.index("date") + close_index = header_tokens.index("close") + + daily_rows: list[tuple[datetime, float]] = [] + for row in reader: + if not row: + continue + date_text = row[date_index].strip() if len(row) > date_index else "" + close_text = row[close_index].strip() if len(row) > close_index else "" + if not date_text or not close_text: + continue + try: + dt = datetime.strptime(date_text, "%Y-%m-%d") + close_val = float(close_text) + except Exception: + continue + daily_rows.append((dt, close_val)) + + if len(daily_rows) < 2: + return None + + daily_rows.sort(key=lambda item: item[0]) + return daily_rows[-2][1] + except Exception: + return None + + try: + reader = csv.reader(StringIO(payload)) + first_row = next(reader, None) + if first_row is None: + raise ValueError(f"unexpected Stooq payload: {payload}") + + normalized_first_row = [cell.strip() for cell in first_row] + header_tokens = {cell.lower() for cell in normalized_first_row if cell} + has_header = 'open' in header_tokens and 'close' in header_tokens + row = next(reader, None) if has_header else first_row + if row is None: + raise ValueError(f"unexpected Stooq payload: {payload}") + + normalized_row = [cell.strip() for cell in row] + while normalized_row and normalized_row[-1] == '': + normalized_row.pop() + + if len(normalized_row) >= 8: + open_index, high_index, low_index, price_index, volume_index = 3, 4, 5, 6, 7 + elif len(normalized_row) >= 7: + open_index, high_index, low_index, price_index, volume_index = 2, 3, 4, 5, 6 + else: + raise ValueError(f"unexpected Stooq payload: {payload}") + + open_price = float(normalized_row[open_index]) + high = float(normalized_row[high_index]) + low = float(normalized_row[low_index]) + price = float(normalized_row[price_index]) + volume = int(float(normalized_row[volume_index])) + + prev_close = _fetch_prev_close() + change_amount = None + change_pct = None + amplitude = None + if prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + amplitude = ((high - low) / prev_close) * 100 + + quote = UnifiedRealtimeQuote( + code=symbol, + name=STOCK_NAME_MAP.get(symbol, ''), + source=RealtimeSource.STOOQ, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=None, + circ_mv=None, + ) + logger.info(f"[Stooq] 获取美股 {symbol} 兜底行情成功: 价格={price}") + return quote + except Exception as exc: + logger.warning(f"[Stooq] 解析美股 {symbol} 行情失败: {exc}") + return None + + def _get_us_index_realtime_quote( + self, + user_code: str, + yf_symbol: str, + index_name: str, + ) -> Optional[UnifiedRealtimeQuote]: + """ + Get realtime quote for US index (e.g. SPX -> ^GSPC). + + Args: + user_code: User input code (e.g. SPX) + yf_symbol: Yahoo Finance symbol (e.g. ^GSPC) + index_name: Chinese name for the index + + Returns: + UnifiedRealtimeQuote or None + """ + import yfinance as yf + + try: + logger.debug(f"[Yfinance] 获取美股指数 {user_code} ({yf_symbol}) 实时行情") + ticker = yf.Ticker(yf_symbol) + + try: + info = ticker.fast_info + if info is None: + raise ValueError("fast_info is None") + price = getattr(info, 'lastPrice', None) or getattr(info, 'last_price', None) + prev_close = getattr(info, 'previousClose', None) or getattr(info, 'previous_close', None) + open_price = getattr(info, 'open', None) + high = getattr(info, 'dayHigh', None) or getattr(info, 'day_high', None) + low = getattr(info, 'dayLow', None) or getattr(info, 'day_low', None) + volume = getattr(info, 'lastVolume', None) or getattr(info, 'last_volume', None) + except Exception: + logger.debug("[Yfinance] fast_info 失败,尝试 history 方法") + hist = ticker.history(period='2d') + if hist.empty: + logger.warning(f"[Yfinance] 无法获取 {yf_symbol} 的数据") + return None + today = hist.iloc[-1] + prev = hist.iloc[-2] if len(hist) > 1 else today + price = float(today['Close']) + prev_close = float(prev['Close']) + open_price = float(today['Open']) + high = float(today['High']) + low = float(today['Low']) + volume = int(today['Volume']) + + change_amount = None + change_pct = None + if price is not None and prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + + amplitude = None + if high is not None and low is not None and prev_close is not None and prev_close > 0: + amplitude = ((high - low) / prev_close) * 100 + + quote = UnifiedRealtimeQuote( + code=user_code, + name=index_name or user_code, + source=RealtimeSource.FALLBACK, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=None, + circ_mv=None, + ) + logger.info(f"[Yfinance] 获取美股指数 {user_code} 实时行情成功: 价格={price}") + return quote + except Exception as e: + logger.warning(f"[Yfinance] 获取美股指数 {user_code} 实时行情失败: {e}") + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取美股/美股指数实时行情数据 + + 支持美股股票(AAPL、TSLA)和美股指数(SPX、DJI 等)。 + 数据来源:yfinance Ticker.info + + Args: + stock_code: 美股代码或指数代码,如 'AMD', 'AAPL', 'SPX', 'DJI' + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import yfinance as yf + + # 美股指数:使用映射(SPX -> ^GSPC) + yf_symbol, index_name = get_us_index_yf_symbol(stock_code) + if yf_symbol: + return self._get_us_index_realtime_quote( + user_code=stock_code.strip().upper(), + yf_symbol=yf_symbol, + index_name=index_name, + ) + + # 仅处理美股股票 + if not self._is_us_stock(stock_code): + logger.debug(f"[Yfinance] {stock_code} 不是美股,跳过") + return None + + try: + symbol = stock_code.strip().upper() + logger.debug(f"[Yfinance] 获取美股 {symbol} 实时行情") + + ticker = yf.Ticker(symbol) + + # 尝试获取 fast_info(更快,但字段较少) + try: + info = ticker.fast_info + if info is None: + raise ValueError("fast_info is None") + + price = getattr(info, 'lastPrice', None) or getattr(info, 'last_price', None) + prev_close = getattr(info, 'previousClose', None) or getattr(info, 'previous_close', None) + open_price = getattr(info, 'open', None) + high = getattr(info, 'dayHigh', None) or getattr(info, 'day_high', None) + low = getattr(info, 'dayLow', None) or getattr(info, 'day_low', None) + volume = getattr(info, 'lastVolume', None) or getattr(info, 'last_volume', None) + market_cap = getattr(info, 'marketCap', None) or getattr(info, 'market_cap', None) + + except Exception: + # 回退到 history 方法获取最新数据 + logger.debug("[Yfinance] fast_info 失败,尝试 history 方法") + hist = ticker.history(period='2d') + if hist.empty: + logger.warning(f"[Yfinance] 无法获取 {symbol} 的数据,尝试 Stooq 兜底") + return self._get_us_stock_quote_from_stooq(symbol) + + today = hist.iloc[-1] + prev = hist.iloc[-2] if len(hist) > 1 else today + + price = float(today['Close']) + prev_close = float(prev['Close']) + open_price = float(today['Open']) + high = float(today['High']) + low = float(today['Low']) + volume = int(today['Volume']) + market_cap = None + + # 计算涨跌幅 + change_amount = None + change_pct = None + if price is not None and prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + + # 计算振幅 + amplitude = None + if high is not None and low is not None and prev_close is not None and prev_close > 0: + amplitude = ((high - low) / prev_close) * 100 + + # 获取股票名称 + try: + info_name = ticker.info.get('shortName', '') or ticker.info.get('longName', '') or '' + name = info_name if is_meaningful_stock_name(info_name, symbol) else STOCK_NAME_MAP.get(symbol, '') + except Exception: + name = STOCK_NAME_MAP.get(symbol, '') + + quote = UnifiedRealtimeQuote( + code=symbol, + name=name, + source=RealtimeSource.FALLBACK, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, # yfinance 不直接提供成交额 + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=market_cap, + circ_mv=None, + ) + + logger.info(f"[Yfinance] 获取美股 {symbol} 实时行情成功: 价格={price}") + return quote + + except Exception as e: + logger.warning(f"[Yfinance] 获取美股 {stock_code} 实时行情失败: {e},尝试 Stooq 兜底") + return self._get_us_stock_quote_from_stooq(stock_code) + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = YfinanceFetcher() + + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"获取失败: {e}") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test_rest/base.py b/tests/test_rest/base.py similarity index 100% rename from test_rest/base.py rename to tests/test_rest/base.py diff --git a/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json b/tests/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json similarity index 100% rename from test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json rename to tests/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json diff --git a/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json b/tests/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json similarity index 100% rename from test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json rename to tests/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json diff --git a/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json b/tests/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json similarity index 100% rename from test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json rename to tests/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json diff --git a/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json b/tests/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json similarity index 100% rename from test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json rename to tests/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json diff --git a/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json b/tests/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json similarity index 100% rename from test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json rename to tests/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json diff --git a/test_rest/mocks/v1/last/crypto/BTC/USD.json b/tests/test_rest/mocks/v1/last/crypto/BTC/USD.json similarity index 100% rename from test_rest/mocks/v1/last/crypto/BTC/USD.json rename to tests/test_rest/mocks/v1/last/crypto/BTC/USD.json diff --git a/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json b/tests/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json similarity index 100% rename from test_rest/mocks/v1/last_quote/currencies/AUD/USD.json rename to tests/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json diff --git a/test_rest/mocks/v1/marketstatus/now.json b/tests/test_rest/mocks/v1/marketstatus/now.json similarity index 100% rename from test_rest/mocks/v1/marketstatus/now.json rename to tests/test_rest/mocks/v1/marketstatus/now.json diff --git a/test_rest/mocks/v1/marketstatus/upcoming.json b/tests/test_rest/mocks/v1/marketstatus/upcoming.json similarity index 100% rename from test_rest/mocks/v1/marketstatus/upcoming.json rename to tests/test_rest/mocks/v1/marketstatus/upcoming.json diff --git a/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json b/tests/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json similarity index 100% rename from test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json rename to tests/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json diff --git a/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json b/tests/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json similarity index 100% rename from test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json rename to tests/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json diff --git a/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json b/tests/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json similarity index 100% rename from test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json rename to tests/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/prev.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json diff --git a/test_rest/mocks/v2/last/nbbo/AAPL.json b/tests/test_rest/mocks/v2/last/nbbo/AAPL.json similarity index 100% rename from test_rest/mocks/v2/last/nbbo/AAPL.json rename to tests/test_rest/mocks/v2/last/nbbo/AAPL.json diff --git a/test_rest/mocks/v2/last/trade/AAPL.json b/tests/test_rest/mocks/v2/last/trade/AAPL.json similarity index 100% rename from test_rest/mocks/v2/last/trade/AAPL.json rename to tests/test_rest/mocks/v2/last/trade/AAPL.json diff --git a/test_rest/mocks/v2/reference/news&ticker=NFLX.json b/tests/test_rest/mocks/v2/reference/news&ticker=NFLX.json similarity index 100% rename from test_rest/mocks/v2/reference/news&ticker=NFLX.json rename to tests/test_rest/mocks/v2/reference/news&ticker=NFLX.json diff --git a/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json b/tests/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json rename to tests/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json diff --git a/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json b/tests/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json similarity index 100% rename from test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json rename to tests/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json diff --git a/test_rest/mocks/v3/quotes/AAPL.json b/tests/test_rest/mocks/v3/quotes/AAPL.json similarity index 100% rename from test_rest/mocks/v3/quotes/AAPL.json rename to tests/test_rest/mocks/v3/quotes/AAPL.json diff --git a/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json b/tests/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json similarity index 100% rename from test_rest/mocks/v3/reference/conditions&asset_class=stocks.json rename to tests/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json diff --git a/test_rest/mocks/v3/reference/dividends.json b/tests/test_rest/mocks/v3/reference/dividends.json similarity index 100% rename from test_rest/mocks/v3/reference/dividends.json rename to tests/test_rest/mocks/v3/reference/dividends.json diff --git a/test_rest/mocks/v3/reference/exchanges.json b/tests/test_rest/mocks/v3/reference/exchanges.json similarity index 100% rename from test_rest/mocks/v3/reference/exchanges.json rename to tests/test_rest/mocks/v3/reference/exchanges.json diff --git a/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json b/tests/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json rename to tests/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json diff --git a/test_rest/mocks/v3/reference/options/contracts.json b/tests/test_rest/mocks/v3/reference/options/contracts.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts.json rename to tests/test_rest/mocks/v3/reference/options/contracts.json diff --git a/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json b/tests/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json rename to tests/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json diff --git a/test_rest/mocks/v3/reference/splits.json b/tests/test_rest/mocks/v3/reference/splits.json similarity index 100% rename from test_rest/mocks/v3/reference/splits.json rename to tests/test_rest/mocks/v3/reference/splits.json diff --git a/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json b/tests/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json rename to tests/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json diff --git a/test_rest/mocks/v3/reference/tickers.json b/tests/test_rest/mocks/v3/reference/tickers.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers.json rename to tests/test_rest/mocks/v3/reference/tickers.json diff --git a/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json b/tests/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json rename to tests/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json diff --git a/test_rest/mocks/v3/reference/tickers/AAPL.json b/tests/test_rest/mocks/v3/reference/tickers/AAPL.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/AAPL.json rename to tests/test_rest/mocks/v3/reference/tickers/AAPL.json diff --git a/test_rest/mocks/v3/reference/tickers/types.json b/tests/test_rest/mocks/v3/reference/tickers/types.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/types.json rename to tests/test_rest/mocks/v3/reference/tickers/types.json diff --git a/test_rest/mocks/v3/snapshot.json b/tests/test_rest/mocks/v3/snapshot.json similarity index 100% rename from test_rest/mocks/v3/snapshot.json rename to tests/test_rest/mocks/v3/snapshot.json diff --git a/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json b/tests/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json similarity index 100% rename from test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json rename to tests/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json diff --git a/test_rest/mocks/v3/snapshot/options/AAPL.json b/tests/test_rest/mocks/v3/snapshot/options/AAPL.json similarity index 100% rename from test_rest/mocks/v3/snapshot/options/AAPL.json rename to tests/test_rest/mocks/v3/snapshot/options/AAPL.json diff --git a/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json b/tests/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json similarity index 100% rename from test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json rename to tests/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json diff --git a/test_rest/mocks/v3/trades/AAPL&limit=2.json b/tests/test_rest/mocks/v3/trades/AAPL&limit=2.json similarity index 100% rename from test_rest/mocks/v3/trades/AAPL&limit=2.json rename to tests/test_rest/mocks/v3/trades/AAPL&limit=2.json diff --git a/test_rest/mocks/vX/reference/financials.json b/tests/test_rest/mocks/vX/reference/financials.json similarity index 100% rename from test_rest/mocks/vX/reference/financials.json rename to tests/test_rest/mocks/vX/reference/financials.json diff --git a/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json b/tests/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json similarity index 100% rename from test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json rename to tests/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json diff --git a/test_rest/models/test_requests.py b/tests/test_rest/models/test_requests.py similarity index 100% rename from test_rest/models/test_requests.py rename to tests/test_rest/models/test_requests.py diff --git a/test_rest/test_aggs.py b/tests/test_rest/test_aggs.py similarity index 100% rename from test_rest/test_aggs.py rename to tests/test_rest/test_aggs.py diff --git a/test_rest/test_conditions.py b/tests/test_rest/test_conditions.py similarity index 100% rename from test_rest/test_conditions.py rename to tests/test_rest/test_conditions.py diff --git a/test_rest/test_contracts.py b/tests/test_rest/test_contracts.py similarity index 100% rename from test_rest/test_contracts.py rename to tests/test_rest/test_contracts.py diff --git a/test_rest/test_dividends.py b/tests/test_rest/test_dividends.py similarity index 100% rename from test_rest/test_dividends.py rename to tests/test_rest/test_dividends.py diff --git a/test_rest/test_exchanges.py b/tests/test_rest/test_exchanges.py similarity index 100% rename from test_rest/test_exchanges.py rename to tests/test_rest/test_exchanges.py diff --git a/test_rest/test_indicators.py b/tests/test_rest/test_indicators.py similarity index 100% rename from test_rest/test_indicators.py rename to tests/test_rest/test_indicators.py diff --git a/test_rest/test_markets.py b/tests/test_rest/test_markets.py similarity index 100% rename from test_rest/test_markets.py rename to tests/test_rest/test_markets.py diff --git a/test_rest/test_modelclass.py b/tests/test_rest/test_modelclass.py similarity index 100% rename from test_rest/test_modelclass.py rename to tests/test_rest/test_modelclass.py diff --git a/test_rest/test_quotes.py b/tests/test_rest/test_quotes.py similarity index 100% rename from test_rest/test_quotes.py rename to tests/test_rest/test_quotes.py diff --git a/test_rest/test_snapshots.py b/tests/test_rest/test_snapshots.py similarity index 100% rename from test_rest/test_snapshots.py rename to tests/test_rest/test_snapshots.py diff --git a/test_rest/test_splits.py b/tests/test_rest/test_splits.py similarity index 100% rename from test_rest/test_splits.py rename to tests/test_rest/test_splits.py diff --git a/test_rest/test_summaries.py b/tests/test_rest/test_summaries.py similarity index 100% rename from test_rest/test_summaries.py rename to tests/test_rest/test_summaries.py diff --git a/test_rest/test_tickers.py b/tests/test_rest/test_tickers.py similarity index 100% rename from test_rest/test_tickers.py rename to tests/test_rest/test_tickers.py diff --git a/test_rest/test_trades.py b/tests/test_rest/test_trades.py similarity index 100% rename from test_rest/test_trades.py rename to tests/test_rest/test_trades.py diff --git a/tests/test_us_daily/__init__.py b/tests/test_us_daily/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py new file mode 100644 index 00000000..0cc42e24 --- /dev/null +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -0,0 +1,185 @@ +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json +from datetime import date + + +class TestGenerateYears(unittest.TestCase): + def test_generate_years_basic(self): + from processor.us_daily.agg_fetcher import generate_years + + result = generate_years(2024, 2026) + self.assertEqual(result, [2024, 2025, 2026]) + + def test_generate_years_single(self): + from processor.us_daily.agg_fetcher import generate_years + + result = generate_years(2024, 2024) + self.assertEqual(result, [2024]) + + +class TestYearBounds(unittest.TestCase): + def test_year_bounds(self): + from processor.us_daily.agg_fetcher import get_year_bounds + + start, end = get_year_bounds(2024) + self.assertEqual(start, "2024-01-01") + self.assertEqual(end, "2024-12-31") + + +class TestIsCurrentYear(unittest.TestCase): + @patch("processor.us_daily.agg_fetcher.date") + def test_is_current_year_true(self, mock_date): + from processor.us_daily.agg_fetcher import is_current_year + + mock_date.today.return_value = date(2026, 4, 22) + self.assertTrue(is_current_year(2026)) + + @patch("processor.us_daily.agg_fetcher.date") + def test_is_current_year_false(self, mock_date): + from processor.us_daily.agg_fetcher import is_current_year + + mock_date.today.return_value = date(2026, 4, 22) + self.assertFalse(is_current_year(2025)) + + +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_manager(self, df=None, source_name="akshare", error=None): + from processor.us_daily.sources.manager import SourceManager + + manager = MagicMock(spec=SourceManager) + if error: + manager.fetch_daily.side_effect = error + else: + manager.fetch_daily.return_value = (df, source_name) + return manager + + def test_skips_existing_historical_year(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + + config = Config(start_year=2024, daily_data_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2024.json"), "w") as f: + json.dump({"ticker": "AAPL", "year": 2024, "data": []}, f) + + manager = self._make_manager() + + with patch( + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_year", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_year(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_year=2024, daily_data_dir=self.test_dir) + + df = pd.DataFrame({ + "date": ["2024-01-02"], + "open": [74.06], + "high": [75.15], + "low": [73.80], + "close": [74.36], + "volume": [108872000], + }) + manager = self._make_manager(df=df, source_name="akshare") + + with patch( + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_year", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + file_path = os.path.join(self.test_dir, "AAPL", "2024.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["year"], 2024) + self.assertEqual(data["source"], "akshare") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["close"], 74.36) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_year(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_year=2026, daily_data_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026.json"), "w") as f: + json.dump({"ticker": "AAPL", "year": 2026, "data": []}, f) + + df = pd.DataFrame({ + "date": ["2026-04-01"], + "open": [200.0], + "high": [210.0], + "low": [195.0], + "close": [205.0], + "volume": [50000000], + }) + manager = self._make_manager(df=df, source_name="yfinance") + + with patch( + "processor.us_daily.agg_fetcher.generate_years", return_value=[2026] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_year", return_value=True + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_when_all_sources_fail(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + from processor.us_daily.sources.manager import FetchError + + config = Config(start_year=2024, daily_data_dir=self.test_dir, max_retries=2) + + manager = self._make_manager( + error=FetchError("All sources failed for AAPL") + ) + + with patch( + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_year", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["year"], 2024) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py new file mode 100644 index 00000000..eb80526c --- /dev/null +++ b/tests/test_us_daily/test_config.py @@ -0,0 +1,51 @@ +import unittest +import json +import os +import tempfile + + +class TestConfig(unittest.TestCase): + def test_default_config(self): + from processor.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.start_year, 2024) + self.assertEqual(config.max_retries, 3) + self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) + self.assertEqual(config.akshare_interval, 2.0) + self.assertEqual(config.yfinance_interval, 1.0) + self.assertEqual(config.massive_interval, 12.0) + self.assertEqual(config.list_data_dir, "data/us_list") + self.assertEqual(config.daily_data_dir, "data/us_daily") + + def test_load_config_from_file(self): + from processor.us_daily.config import load_config + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump({ + "refresh_tickers": True, + "akshare_interval": 3.0, + }, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.akshare_interval, 3.0) + # defaults preserved for unspecified fields + self.assertEqual(config.start_year, 2024) + self.assertEqual(config.massive_interval, 12.0) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from processor.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_sources/__init__.py b/tests/test_us_daily/test_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_us_daily/test_sources/test_akshare_source.py b/tests/test_us_daily/test_sources/test_akshare_source.py new file mode 100644 index 00000000..73738e2b --- /dev/null +++ b/tests/test_us_daily/test_sources/test_akshare_source.py @@ -0,0 +1,71 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestAkshareSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.akshare_source import AkshareSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2020-01-02", "2020-01-03"]), + "open": [74.06, 75.0], + "high": [75.15, 76.0], + "low": [73.80, 74.5], + "close": [74.36, 75.5], + "volume": [108872000, 98000000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_filters_by_date(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2019-12-31", "2020-01-02", "2020-02-01"]), + "open": [70.0, 74.06, 80.0], + "high": [71.0, 75.15, 81.0], + "low": [69.0, 73.80, 79.0], + "close": [70.5, 74.36, 80.5], + "volume": [100000, 108872000, 90000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_with_correct_symbol(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_ak.stock_us_daily.assert_called_once_with(symbol="AAPL", adjust="qfq") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_sources/test_manager.py b/tests/test_us_daily/test_sources/test_manager.py new file mode 100644 index 00000000..8ac8221a --- /dev/null +++ b/tests/test_us_daily/test_sources/test_manager.py @@ -0,0 +1,90 @@ +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + + +class TestSourceManager(unittest.TestCase): + def _make_source(self, name, data=None, error=None): + """Create a mock source that returns data or raises error.""" + from processor.us_daily.sources.base import BaseSource + + source = MagicMock(spec=BaseSource) + source.name = name + source.request_interval = 0.0 + if error: + source.fetch_daily.side_effect = error + elif data is not None: + source.fetch_daily.return_value = data + else: + source.fetch_daily.return_value = pd.DataFrame() + return source + + def test_returns_first_successful_source(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source1") + s1.fetch_daily.assert_called_once_with("AAPL", "2020-01-01", "2020-01-31") + s2.fetch_daily.assert_not_called() + + def test_falls_back_on_failure(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", error=Exception("API down")) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_falls_back_on_empty_dataframe(self): + from processor.us_daily.sources.manager import SourceManager + + empty_df = pd.DataFrame() + good_df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=empty_df) + s2 = self._make_source("source2", data=good_df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_raises_when_all_fail(self): + from processor.us_daily.sources.manager import SourceManager, FetchError + + s1 = self._make_source("source1", error=Exception("fail1")) + s2 = self._make_source("source2", error=Exception("fail2")) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + with self.assertRaises(FetchError): + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + def test_sleeps_after_successful_fetch(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s1.request_interval = 5.0 + + manager = SourceManager([s1]) + with patch("processor.us_daily.sources.manager.time.sleep") as mock_sleep: + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + mock_sleep.assert_called_once_with(5.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_sources/test_massive_source.py b/tests/test_us_daily/test_sources/test_massive_source.py new file mode 100644 index 00000000..5bedbe67 --- /dev/null +++ b/tests/test_us_daily/test_sources/test_massive_source.py @@ -0,0 +1,64 @@ +import unittest +from unittest.mock import MagicMock +import pandas as pd + + +class TestMassiveSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.massive_source import MassiveSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000 + agg1.timestamp = 1577944800000 # 2020-01-02 UTC + agg1.vwap = 74.50 + agg1.transactions = 5000 + agg1.otc = False + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + expected_columns = STANDARD_COLUMNS + ["vwap", "transactions", "otc"] + self.assertListEqual(list(result.columns), expected_columns) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["close"], 74.36) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + self.assertEqual(result.iloc[0]["vwap"], 74.50) + self.assertEqual(result.iloc[0]["transactions"], 5000) + + def test_fetch_daily_calls_client_correctly(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + client.list_aggs.assert_called_once_with( + "AAPL", 1, "day", + from_="2020-01-01", to="2020-01-31", + adjusted=True, sort="asc", + ) + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_sources/test_yfinance_source.py b/tests/test_us_daily/test_sources/test_yfinance_source.py new file mode 100644 index 00000000..a264e9aa --- /dev/null +++ b/tests/test_us_daily/test_sources/test_yfinance_source.py @@ -0,0 +1,60 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestYfinanceSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame( + { + "Open": [74.06, 75.0], + "High": [75.15, 76.0], + "Low": [73.80, 74.5], + "Close": [74.36, 75.5], + "Volume": [108872000, 98000000], + }, + index=pd.to_datetime(["2020-01-02", "2020-01-03"]), + ) + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = raw_df + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_passes_correct_params(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_yf.Ticker.assert_called_once_with("AAPL") + mock_ticker.history.assert_called_once_with(start="2020-01-01", end="2020-01-31") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py new file mode 100644 index 00000000..6fdf5e26 --- /dev/null +++ b/tests/test_us_daily/test_storage.py @@ -0,0 +1,61 @@ +import unittest +import json +import os +import tempfile +import shutil + + +class TestStorage(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_tickers_file_path(self): + from processor.us_daily.storage import get_tickers_file_path + + result = get_tickers_file_path("data/us_daily") + self.assertEqual(result, "data/us_daily/top_tickers.json") + + def test_get_month_file_path(self): + from processor.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + + def test_save_and_load_json(self): + from processor.us_daily.storage import save_json, load_json + + file_path = os.path.join(self.test_dir, "sub", "test.json") + data = {"key": "value", "num": 42} + save_json(file_path, data) + loaded = load_json(file_path) + self.assertEqual(loaded, data) + + def test_save_json_creates_parent_dirs(self): + from processor.us_daily.storage import save_json + + file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") + save_json(file_path, {"x": 1}) + self.assertTrue(os.path.exists(file_path)) + + def test_file_exists(self): + from processor.us_daily.storage import file_exists + + existing = os.path.join(self.test_dir, "exists.json") + with open(existing, "w") as f: + f.write("{}") + + self.assertTrue(file_exists(existing)) + self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) + + def test_get_month_file_path_daily_dir(self): + from processor.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_us_daily/test_ticker_lister.py b/tests/test_us_daily/test_ticker_lister.py new file mode 100644 index 00000000..5c30ab26 --- /dev/null +++ b/tests/test_us_daily/test_ticker_lister.py @@ -0,0 +1,149 @@ +import unittest +from unittest.mock import MagicMock, patch, call +from types import SimpleNamespace +import os +import tempfile +import shutil +import json + + +class TestTickerLister(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_ticker(self, ticker_str): + t = MagicMock() + t.ticker = ticker_str + return t + + def _make_details(self, **kwargs): + """Create a SimpleNamespace TickerDetails with all fields as attributes.""" + return SimpleNamespace(**kwargs) + + def test_list_all_tickers(self): + from processor.us_daily.ticker_lister import list_all_tickers + from processor.us_daily.config import Config + + config = Config(list_data_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + return {"AAPL": details_aapl, "MSFT": details_msft}[ticker] + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_all_tickers(client, config) + + # Called without exchange filter + client.list_tickers.assert_called_once_with( + market="stocks", active=True, limit=1000 + ) + + file_path = os.path.join(self.test_dir, "tickers.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + + self.assertEqual(data["count"], 2) + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertIn("MSFT", tickers) + + def test_resume_skips_existing_tickers(self): + from processor.us_daily.ticker_lister import list_all_tickers + from processor.us_daily.config import Config + + config = Config(list_data_dir=self.test_dir, massive_interval=0) + + # Pre-populate file with AAPL already fetched + file_path = os.path.join(self.test_dir, "tickers.json") + existing_data = { + "updated_at": "2026-04-22", + "count": 1, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc", "market_cap": 3e12}, + ], + } + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "w") as f: + json.dump(existing_data, f) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + client.get_ticker_details.return_value = details_msft + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_all_tickers(client, config) + + # Should only call get_ticker_details for MSFT (AAPL already exists) + client.get_ticker_details.assert_called_once_with("MSFT") + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["count"], 2) + + def test_skips_ticker_on_details_error(self): + from processor.us_daily.ticker_lister import list_all_tickers + from processor.us_daily.config import Config + + config = Config(list_data_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL"), + self._make_ticker("AAPL"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return details_aapl + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_all_tickers(client, config) + + file_path = os.path.join(self.test_dir, "tickers.json") + with open(file_path) as f: + data = json.load(f) + + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_websocket/base_ws.py b/tests/test_websocket/base_ws.py similarity index 100% rename from test_websocket/base_ws.py rename to tests/test_websocket/base_ws.py diff --git a/test_websocket/mock_server.py b/tests/test_websocket/mock_server.py similarity index 100% rename from test_websocket/mock_server.py rename to tests/test_websocket/mock_server.py diff --git a/test_websocket/test_conn.py b/tests/test_websocket/test_conn.py similarity index 100% rename from test_websocket/test_conn.py rename to tests/test_websocket/test_conn.py