From d5d1d5574f050d99c8ecc2db6e2a59cc5e1e318f Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:24:20 +0800 Subject: [PATCH 01/43] feat: add config module for us_daily data fetcher Co-Authored-By: Claude Sonnet 4.6 --- project/__init__.py | 0 project/us_daily/__init__.py | 0 project/us_daily/config.json | 8 +++++ project/us_daily/config.py | 24 +++++++++++++++ tests/__init__.py | 0 tests/test_us_daily/__init__.py | 0 tests/test_us_daily/test_config.py | 47 ++++++++++++++++++++++++++++++ 7 files changed, 79 insertions(+) create mode 100644 project/__init__.py create mode 100644 project/us_daily/__init__.py create mode 100644 project/us_daily/config.json create mode 100644 project/us_daily/config.py create mode 100644 tests/__init__.py create mode 100644 tests/test_us_daily/__init__.py create mode 100644 tests/test_us_daily/test_config.py diff --git a/project/__init__.py b/project/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/project/us_daily/__init__.py b/project/us_daily/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/project/us_daily/config.json b/project/us_daily/config.json new file mode 100644 index 00000000..e7ff2381 --- /dev/null +++ b/project/us_daily/config.json @@ -0,0 +1,8 @@ +{ + "refresh_tickers": false, + "market_cap_min": 5000000000, + "start_date": "2020-01", + "request_interval": 20, + "data_dir": "data/us_daily", + "max_retries": 3 +} diff --git a/project/us_daily/config.py b/project/us_daily/config.py new file mode 100644 index 00000000..c08bfbb2 --- /dev/null +++ b/project/us_daily/config.py @@ -0,0 +1,24 @@ +import json +import os +from dataclasses import dataclass + + +@dataclass +class Config: + refresh_tickers: bool = False + market_cap_min: float = 5e9 + start_date: str = "2020-01" + request_interval: int = 20 + data_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = "project/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_us_daily/__init__.py b/tests/test_us_daily/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py new file mode 100644 index 00000000..4bc9b59f --- /dev/null +++ b/tests/test_us_daily/test_config.py @@ -0,0 +1,47 @@ +import unittest +import json +import os +import tempfile + + +class TestConfig(unittest.TestCase): + def test_default_config(self): + from project.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + self.assertEqual(config.data_dir, "data/us_daily") + self.assertEqual(config.max_retries, 3) + + def test_load_config_from_file(self): + from project.us_daily.config import load_config + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as f: + json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.market_cap_min, 1e10) + # defaults preserved for unspecified fields + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from project.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + + +if __name__ == "__main__": + unittest.main() From 8eff9d6a0088ba58aa9968cdd772f2915a63a534 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:05:11 +0800 Subject: [PATCH 02/43] feat: add storage module for JSON file I/O and path management Co-Authored-By: Claude Sonnet 4.6 --- project/us_daily/storage.py | 25 +++++++++++++ tests/test_us_daily/test_storage.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 project/us_daily/storage.py create mode 100644 tests/test_us_daily/test_storage.py diff --git a/project/us_daily/storage.py b/project/us_daily/storage.py new file mode 100644 index 00000000..af80505b --- /dev/null +++ b/project/us_daily/storage.py @@ -0,0 +1,25 @@ +import json +import os + + +def get_tickers_file_path(data_dir: str) -> str: + return os.path.join(data_dir, "top_tickers.json") + + +def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: + return os.path.join(data_dir, ticker, f"{month}.json") + + +def save_json(path: str, data: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + +def load_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def file_exists(path: str) -> bool: + return os.path.isfile(path) diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py new file mode 100644 index 00000000..11a7f207 --- /dev/null +++ b/tests/test_us_daily/test_storage.py @@ -0,0 +1,55 @@ +import unittest +import json +import os +import tempfile +import shutil + + +class TestStorage(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_tickers_file_path(self): + from project.us_daily.storage import get_tickers_file_path + + result = get_tickers_file_path("data/us_daily") + self.assertEqual(result, "data/us_daily/top_tickers.json") + + def test_get_month_file_path(self): + from project.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + + def test_save_and_load_json(self): + from project.us_daily.storage import save_json, load_json + + file_path = os.path.join(self.test_dir, "sub", "test.json") + data = {"key": "value", "num": 42} + save_json(file_path, data) + loaded = load_json(file_path) + self.assertEqual(loaded, data) + + def test_save_json_creates_parent_dirs(self): + from project.us_daily.storage import save_json + + file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") + save_json(file_path, {"x": 1}) + self.assertTrue(os.path.exists(file_path)) + + def test_file_exists(self): + from project.us_daily.storage import file_exists + + existing = os.path.join(self.test_dir, "exists.json") + with open(existing, "w") as f: + f.write("{}") + + self.assertTrue(file_exists(existing)) + self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) + + +if __name__ == "__main__": + unittest.main() From fed1cf85989ec595f95cbf685c1c90ca71d4c79a Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:06:48 +0800 Subject: [PATCH 03/43] feat: add ticker_filter module to select top US stocks by market cap Co-Authored-By: Claude Sonnet 4.6 --- project/us_daily/ticker_filter.py | 61 +++++++++++++ tests/test_us_daily/test_ticker_filter.py | 105 ++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 project/us_daily/ticker_filter.py create mode 100644 tests/test_us_daily/test_ticker_filter.py diff --git a/project/us_daily/ticker_filter.py b/project/us_daily/ticker_filter.py new file mode 100644 index 00000000..30c666db --- /dev/null +++ b/project/us_daily/ticker_filter.py @@ -0,0 +1,61 @@ +import logging +import time +from typing import List + +from project.us_daily.config import Config + +logger = logging.getLogger("us_daily") + +EXCHANGES = ["XNAS", "XNYS", "ARCX"] + + +def filter_top_tickers(client, config: Config) -> List[dict]: + result = [] + for exchange in EXCHANGES: + logger.info(f"Fetching tickers for exchange: {exchange}") + try: + tickers = client.list_tickers( + market="stocks", + exchange=exchange, + active=True, + limit=1000, + ) + except Exception as e: + logger.error(f"Failed to list tickers for {exchange}: {e}") + continue + + time.sleep(config.request_interval) + + for ticker_obj in tickers: + ticker_str = ticker_obj.ticker + try: + details = client.get_ticker_details(ticker_str) + time.sleep(config.request_interval) + except Exception as e: + logger.warning( + f"Failed to get details for {ticker_str}: {e}" + ) + continue + + if details.market_cap is None: + logger.debug(f"{ticker_str}: no market_cap data, skipping") + continue + + if details.market_cap >= config.market_cap_min: + entry = { + "ticker": details.ticker, + "name": details.name, + "market_cap": details.market_cap, + "exchange": details.primary_exchange, + } + result.append(entry) + logger.info( + f" {details.ticker}: market_cap={details.market_cap:.0f} included" + ) + else: + logger.debug( + f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" + ) + + logger.info(f"Total top tickers found: {len(result)}") + return result diff --git a/tests/test_us_daily/test_ticker_filter.py b/tests/test_us_daily/test_ticker_filter.py new file mode 100644 index 00000000..7e9f202d --- /dev/null +++ b/tests/test_us_daily/test_ticker_filter.py @@ -0,0 +1,105 @@ +import unittest +from unittest.mock import MagicMock, patch, call +from dataclasses import dataclass + + +class TestTickerFilter(unittest.TestCase): + def _make_ticker(self, ticker_str, exchange): + t = MagicMock() + t.ticker = ticker_str + t.primary_exchange = exchange + return t + + def _make_details(self, ticker_str, name, market_cap, exchange): + d = MagicMock() + d.ticker = ticker_str + d.name = name + d.market_cap = market_cap + d.primary_exchange = exchange + return d + + def test_filter_top_tickers_filters_by_market_cap(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + # list_tickers returns different tickers per exchange + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL", "XNAS"), + self._make_ticker("TINY", "XNAS"), + ]) + + # get_ticker_details: AAPL has large cap, TINY does not + def mock_details(ticker): + if ticker == "AAPL": + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + elif ticker == "TINY": + return self._make_details("TINY", "Tiny Corp", 1e9, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("TINY", tickers) + + def test_filter_top_tickers_includes_required_fields(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("MSFT", "XNYS"), + ]) + client.get_ticker_details.return_value = self._make_details( + "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" + ) + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + self.assertEqual(len(result), 1) + entry = result[0] + self.assertEqual(entry["ticker"], "MSFT") + self.assertEqual(entry["name"], "Microsoft Corporation") + self.assertEqual(entry["market_cap"], 2.8e12) + self.assertEqual(entry["exchange"], "XNYS") + + def test_filter_skips_ticker_on_details_error(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL", "XNAS"), + self._make_ticker("AAPL", "XNAS"), + ]) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() From 26faca8ac3450db3ec2ac7851a8aeffaadb7d2df Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:08:00 +0800 Subject: [PATCH 04/43] feat: add agg_fetcher module for incremental daily bar data collection Co-Authored-By: Claude Sonnet 4.6 --- project/us_daily/agg_fetcher.py | 115 +++++++++++++ tests/test_us_daily/test_agg_fetcher.py | 207 ++++++++++++++++++++++++ 2 files changed, 322 insertions(+) create mode 100644 project/us_daily/agg_fetcher.py create mode 100644 tests/test_us_daily/test_agg_fetcher.py diff --git a/project/us_daily/agg_fetcher.py b/project/us_daily/agg_fetcher.py new file mode 100644 index 00000000..c2d52df6 --- /dev/null +++ b/project/us_daily/agg_fetcher.py @@ -0,0 +1,115 @@ +import calendar +import logging +import time +from datetime import date, datetime +from typing import List, Tuple + +from project.us_daily.config import Config +from project.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + aggs = None + last_error = None + + for attempt in range(1, config.max_retries + 1): + try: + aggs_iter = client.list_aggs( + ticker, + 1, + "day", + from_=start_date, + to=end_date, + adjusted=True, + sort="asc", + ) + aggs = list(aggs_iter) + break + except Exception as e: + last_error = e + logger.warning( + f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" + ) + if attempt < config.max_retries: + time.sleep(config.request_interval) + + if aggs is None: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(last_error), + }) + logger.error(f" {ticker} {month}: all retries failed, skipping") + continue + + data = { + "ticker": ticker, + "month": month, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": [ + { + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + "vwap": a.vwap, + "timestamp": a.timestamp, + "transactions": a.transactions, + } + for a in aggs + ], + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") + time.sleep(config.request_interval) + + return {"failures": failures} diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py new file mode 100644 index 00000000..666cc76f --- /dev/null +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -0,0 +1,207 @@ +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json +from datetime import date + + +class TestGenerateMonths(unittest.TestCase): + def test_generate_months_basic(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2020-01", "2020-04") + self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) + + def test_generate_months_cross_year(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2023-11", "2024-02") + self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) + + def test_generate_months_single(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2024-06", "2024-06") + self.assertEqual(result, ["2024-06"]) + + +class TestMonthBounds(unittest.TestCase): + def test_month_bounds_january(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2020-01") + self.assertEqual(start, "2020-01-01") + self.assertEqual(end, "2020-01-31") + + def test_month_bounds_february_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2024-02") + self.assertEqual(start, "2024-02-01") + self.assertEqual(end, "2024-02-29") + + def test_month_bounds_february_non_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2023-02") + self.assertEqual(start, "2023-02-01") + self.assertEqual(end, "2023-02-28") + + +class TestIsCurrentMonth(unittest.TestCase): + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_true(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertTrue(is_current_month("2026-04")) + + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_false(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertFalse(is_current_month("2026-03")) + + +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_skips_existing_historical_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for 2020-01 + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + + client = MagicMock() + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should not have called list_aggs since file exists and not current month + client.list_aggs.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000.0 + agg1.vwap = 74.53 + agg1.timestamp = 1577854800000 + agg1.transactions = 480012 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Verify file was created + file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["month"], "2020-01") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["open"], 74.06) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2026-04", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for current month + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + + agg1 = MagicMock() + agg1.open = 200.0 + agg1.high = 210.0 + agg1.low = 195.0 + agg1.close = 205.0 + agg1.volume = 50000000.0 + agg1.vwap = 203.0 + agg1.timestamp = 1714348800000 + agg1.transactions = 300000 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=True): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should have called list_aggs even though file exists + client.list_aggs.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_after_retries(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + max_retries=2, + ) + + client = MagicMock() + client.list_aggs.side_effect = Exception("API timeout") + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["month"], "2020-01") + self.assertIn("API timeout", result["failures"][0]["error"]) + + +if __name__ == "__main__": + unittest.main() From 7e8610960a93edc2ef9dc408e8345edce1019310 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:09:30 +0800 Subject: [PATCH 05/43] feat: add __main__.py entry point for us_daily data fetcher --- project/us_daily/__main__.py | 94 ++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 project/us_daily/__main__.py diff --git a/project/us_daily/__main__.py b/project/us_daily/__main__.py new file mode 100644 index 00000000..3cac8af2 --- /dev/null +++ b/project/us_daily/__main__.py @@ -0,0 +1,94 @@ +import logging +import os +import sys +from datetime import datetime + +from massive import RESTClient + +from project.us_daily.config import load_config +from project.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from project.us_daily.ticker_filter import filter_top_tickers +from project.us_daily.agg_fetcher import fetch_ticker_aggs + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Get ticker list + tickers_path = get_tickers_file_path(config.data_dir) + if config.refresh_tickers or not file_exists(tickers_path): + logger.info("Filtering top tickers from API...") + tickers = filter_top_tickers(client, config) + save_json(tickers_path, { + "updated_at": datetime.now().strftime("%Y-%m-%d"), + "market_cap_min": config.market_cap_min, + "tickers": tickers, + }) + logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") + else: + data = load_json(tickers_path) + tickers = data["tickers"] + logger.info( + f"Loaded {len(tickers)} tickers from {tickers_path} " + f"(updated: {data.get('updated_at', 'unknown')})" + ) + + # Step 2: Fetch agg data for each ticker + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(client, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() From a9edc35b34a028429fb262c629a5efcc64eeaf2e Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:10:54 +0800 Subject: [PATCH 06/43] style: format us_daily module with black Co-Authored-By: Claude Opus 4.6 (1M context) --- project/us_daily/.config.json.swp | Bin 0 -> 12288 bytes project/us_daily/.config.py.swp | Bin 0 -> 12288 bytes project/us_daily/__main__.py | 13 +++++---- project/us_daily/agg_fetcher.py | 12 ++++---- project/us_daily/ticker_filter.py | 4 +-- tests/test_us_daily/test_agg_fetcher.py | 32 ++++++++++++++++------ tests/test_us_daily/test_config.py | 4 +-- tests/test_us_daily/test_ticker_filter.py | 28 +++++++++++-------- 8 files changed, 58 insertions(+), 35 deletions(-) create mode 100644 project/us_daily/.config.json.swp create mode 100644 project/us_daily/.config.py.swp diff --git a/project/us_daily/.config.json.swp b/project/us_daily/.config.json.swp new file mode 100644 index 0000000000000000000000000000000000000000..48028535fe609c7923ebcb9968f696f2c5aaf255 GIT binary patch literal 12288 zcmeI&u};G<5C&kEi3NdRVNA=~Hfbsb5DRP-s>A{Ub%;#jwr-QS*lq=?fG6M$7}$6Z z9)*dgK%7g;KnJ9575_;m$M&6NeOr`V2N$ijcTjIIiY>;boez2RJZ8HujP(Y6xB6#L zT_V~%K2d5|JJYJ3s+gyXZ4XrF$0`y2@oCG?xG{1fYO%`vkyZl{TYqe#gv<1fs`9Sv z)dxo9i|1ek0uWe6V1qToM&LU4DrIkH`+S*0&;tPoKmY;|fB*y_009WBsDK@n*dyJ2 zt+@YsF_wOf#S{YsAOHafKmY;|fB*y_009U<00Jv0z!S#Gn~Ysiy6^w<_y13tzf-=9C>uS9?i`fB*y_009U<00Izz00bZa0SNqifqUk8m5kp;T39WGsT_Jux0G

t+^n1J&t~Ct8pcXW~n=-D4!bUM4 fMVZWLdq0>*acEZy(~qo-heErnba`q-l}*_fMp0lM literal 0 HcmV?d00001 diff --git a/project/us_daily/.config.py.swp b/project/us_daily/.config.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..54fd2997cdc1154e5f37e006e2d2e11094bad88e GIT binary patch literal 12288 zcmeI2zl#$=6vyYRY)(a+fXx`21d`1rhsCWZ{s7@P4s5isSmt)$UMAVyS!ZU?OQW?| z=pP^`_WlL-YUSS{f?DVw;G5k|Zn;B*>ki+*mrdThpP!@&dAt6@tsTDUjTxQ`j4d90 zmBH(hb>1?zH=8=^KM(rFup^>7O3jD&wenIGiS&5bQ*<0fDu&VR?X6KJjFI~=j8rx% zw3N4sV=Kq3v*VKCH>d_$rM1Tko0U|&IhyW2F0z`la{Ko`19u2nCIj?u1nX|Ny z0V}N4z505<=dc*?eUX;XXKg>3@To9IUHiexDN(u=%w^7FVTC6xWTw~hb4lF|zrkll z<+$A~;$CuC?1{8ft_sMTmcl~V>40~2x6g&)NnH`!QO0vu9vnzxP20PvR*!^r?pEvH zy*`T};jUg-U$tWbZmdS%?%$TjU9;H2@Me=Pz^|2?B@+iBBGwX2fKyw{u~A*PA;Vbe zRc2>c?WA`r9}SO7FgC dict: time.sleep(config.request_interval) if aggs is None: - failures.append({ - "ticker": ticker, - "month": month, - "error": str(last_error), - }) + failures.append( + { + "ticker": ticker, + "month": month, + "error": str(last_error), + } + ) logger.error(f" {ticker} {month}: all retries failed, skipping") continue diff --git a/project/us_daily/ticker_filter.py b/project/us_daily/ticker_filter.py index 30c666db..733904d2 100644 --- a/project/us_daily/ticker_filter.py +++ b/project/us_daily/ticker_filter.py @@ -32,9 +32,7 @@ def filter_top_tickers(client, config: Config) -> List[dict]: details = client.get_ticker_details(ticker_str) time.sleep(config.request_interval) except Exception as e: - logger.warning( - f"Failed to get details for {ticker_str}: {e}" - ) + logger.warning(f"Failed to get details for {ticker_str}: {e}") continue if details.market_cap is None: diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index 666cc76f..cb7b1b92 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -91,8 +91,12 @@ def test_skips_existing_historical_month(self): client = MagicMock() - with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): - with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch( + "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "project.us_daily.agg_fetcher.is_current_month", return_value=False + ): with patch("project.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) @@ -123,8 +127,12 @@ def test_fetches_missing_month(self): client = MagicMock() client.list_aggs.return_value = iter([agg1]) - with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): - with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch( + "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "project.us_daily.agg_fetcher.is_current_month", return_value=False + ): with patch("project.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) @@ -169,8 +177,12 @@ def test_refreshes_current_month(self): client = MagicMock() client.list_aggs.return_value = iter([agg1]) - with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"]): - with patch("project.us_daily.agg_fetcher.is_current_month", return_value=True): + with patch( + "project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + ): + with patch( + "project.us_daily.agg_fetcher.is_current_month", return_value=True + ): with patch("project.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) @@ -192,8 +204,12 @@ def test_records_failure_after_retries(self): client = MagicMock() client.list_aggs.side_effect = Exception("API timeout") - with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): - with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch( + "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "project.us_daily.agg_fetcher.is_current_month", return_value=False + ): with patch("project.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 4bc9b59f..9458d194 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -19,9 +19,7 @@ def test_default_config(self): def test_load_config_from_file(self): from project.us_daily.config import load_config - with tempfile.NamedTemporaryFile( - mode="w", suffix=".json", delete=False - ) as f: + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) tmp_path = f.name diff --git a/tests/test_us_daily/test_ticker_filter.py b/tests/test_us_daily/test_ticker_filter.py index 7e9f202d..4e2b48bb 100644 --- a/tests/test_us_daily/test_ticker_filter.py +++ b/tests/test_us_daily/test_ticker_filter.py @@ -26,10 +26,12 @@ def test_filter_top_tickers_filters_by_market_cap(self): client = MagicMock() # list_tickers returns different tickers per exchange - client.list_tickers.return_value = iter([ - self._make_ticker("AAPL", "XNAS"), - self._make_ticker("TINY", "XNAS"), - ]) + client.list_tickers.return_value = iter( + [ + self._make_ticker("AAPL", "XNAS"), + self._make_ticker("TINY", "XNAS"), + ] + ) # get_ticker_details: AAPL has large cap, TINY does not def mock_details(ticker): @@ -55,9 +57,11 @@ def test_filter_top_tickers_includes_required_fields(self): config = Config(market_cap_min=5e9, request_interval=0) client = MagicMock() - client.list_tickers.return_value = iter([ - self._make_ticker("MSFT", "XNYS"), - ]) + client.list_tickers.return_value = iter( + [ + self._make_ticker("MSFT", "XNYS"), + ] + ) client.get_ticker_details.return_value = self._make_details( "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" ) @@ -80,10 +84,12 @@ def test_filter_skips_ticker_on_details_error(self): config = Config(market_cap_min=5e9, request_interval=0) client = MagicMock() - client.list_tickers.return_value = iter([ - self._make_ticker("FAIL", "XNAS"), - self._make_ticker("AAPL", "XNAS"), - ]) + client.list_tickers.return_value = iter( + [ + self._make_ticker("FAIL", "XNAS"), + self._make_ticker("AAPL", "XNAS"), + ] + ) def mock_details(ticker): if ticker == "FAIL": From 4ec694a1eb6e04e0ca90ff372dc45441588697f6 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:11:16 +0800 Subject: [PATCH 07/43] chore: remove accidentally committed .swp files --- project/us_daily/.config.json.swp | Bin 12288 -> 0 bytes project/us_daily/.config.py.swp | Bin 12288 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 project/us_daily/.config.json.swp delete mode 100644 project/us_daily/.config.py.swp diff --git a/project/us_daily/.config.json.swp b/project/us_daily/.config.json.swp deleted file mode 100644 index 48028535fe609c7923ebcb9968f696f2c5aaf255..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI&u};G<5C&kEi3NdRVNA=~Hfbsb5DRP-s>A{Ub%;#jwr-QS*lq=?fG6M$7}$6Z z9)*dgK%7g;KnJ9575_;m$M&6NeOr`V2N$ijcTjIIiY>;boez2RJZ8HujP(Y6xB6#L zT_V~%K2d5|JJYJ3s+gyXZ4XrF$0`y2@oCG?xG{1fYO%`vkyZl{TYqe#gv<1fs`9Sv z)dxo9i|1ek0uWe6V1qToM&LU4DrIkH`+S*0&;tPoKmY;|fB*y_009WBsDK@n*dyJ2 zt+@YsF_wOf#S{YsAOHafKmY;|fB*y_009U<00Jv0z!S#Gn~Ysiy6^w<_y13tzf-=9C>uS9?i`fB*y_009U<00Izz00bZa0SNqifqUk8m5kp;T39WGsT_Jux0G

t+^n1J&t~Ct8pcXW~n=-D4!bUM4 fMVZWLdq0>*acEZy(~qo-heErnba`q-l}*_fMp0lM diff --git a/project/us_daily/.config.py.swp b/project/us_daily/.config.py.swp deleted file mode 100644 index 54fd2997cdc1154e5f37e006e2d2e11094bad88e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2zl#$=6vyYRY)(a+fXx`21d`1rhsCWZ{s7@P4s5isSmt)$UMAVyS!ZU?OQW?| z=pP^`_WlL-YUSS{f?DVw;G5k|Zn;B*>ki+*mrdThpP!@&dAt6@tsTDUjTxQ`j4d90 zmBH(hb>1?zH=8=^KM(rFup^>7O3jD&wenIGiS&5bQ*<0fDu&VR?X6KJjFI~=j8rx% zw3N4sV=Kq3v*VKCH>d_$rM1Tko0U|&IhyW2F0z`la{Ko`19u2nCIj?u1nX|Ny z0V}N4z505<=dc*?eUX;XXKg>3@To9IUHiexDN(u=%w^7FVTC6xWTw~hb4lF|zrkll z<+$A~;$CuC?1{8ft_sMTmcl~V>40~2x6g&)NnH`!QO0vu9vnzxP20PvR*!^r?pEvH zy*`T};jUg-U$tWbZmdS%?%$TjU9;H2@Me=Pz^|2?B@+iBBGwX2fKyw{u~A*PA;Vbe zRc2>c?WA`r9}SO7FgC Date: Thu, 23 Apr 2026 16:50:00 +0800 Subject: [PATCH 08/43] refactor: move massive, provider, data_provider into src/ layout --- {massive => src/massive}/__init__.py | 0 {massive => src/massive}/exceptions.py | 0 {massive => src/massive}/logging.py | 0 {massive => src/massive}/modelclass.py | 0 {massive => src/massive}/rest/__init__.py | 0 {massive => src/massive}/rest/aggs.py | 0 {massive => src/massive}/rest/base.py | 0 {massive => src/massive}/rest/benzinga.py | 0 {massive => src/massive}/rest/economy.py | 0 {massive => src/massive}/rest/etf_global.py | 0 {massive => src/massive}/rest/financials.py | 0 {massive => src/massive}/rest/futures.py | 0 {massive => src/massive}/rest/indicators.py | 0 .../massive}/rest/models/__init__.py | 0 {massive => src/massive}/rest/models/aggs.py | 0 .../massive}/rest/models/benzinga.py | 0 .../massive}/rest/models/common.py | 0 .../massive}/rest/models/conditions.py | 0 .../massive}/rest/models/contracts.py | 0 .../massive}/rest/models/dividends.py | 0 .../massive}/rest/models/economy.py | 0 .../massive}/rest/models/etf_global.py | 0 .../massive}/rest/models/exchanges.py | 0 .../massive}/rest/models/financials.py | 0 .../massive}/rest/models/futures.py | 0 .../massive}/rest/models/indicators.py | 0 .../massive}/rest/models/markets.py | 0 .../massive}/rest/models/quotes.py | 0 .../massive}/rest/models/request.py | 0 .../massive}/rest/models/snapshot.py | 0 .../massive}/rest/models/splits.py | 0 .../massive}/rest/models/summaries.py | 0 .../massive}/rest/models/tickers.py | 0 {massive => src/massive}/rest/models/tmx.py | 0 .../massive}/rest/models/trades.py | 0 {massive => src/massive}/rest/quotes.py | 0 {massive => src/massive}/rest/reference.py | 0 {massive => src/massive}/rest/snapshot.py | 0 {massive => src/massive}/rest/summaries.py | 0 {massive => src/massive}/rest/tmx.py | 0 {massive => src/massive}/rest/trades.py | 0 {massive => src/massive}/rest/vX.py | 0 .../massive}/websocket/__init__.py | 0 .../massive}/websocket/models/__init__.py | 0 .../massive}/websocket/models/common.py | 0 .../massive}/websocket/models/models.py | 0 src/processor/__init__.py | 0 src/processor/us_daily/__init__.py | 0 src/processor/us_daily/__main__.py | 100 + src/processor/us_daily/agg_fetcher.py | 117 + src/processor/us_daily/config.json | 8 + src/processor/us_daily/config.py | 24 + src/processor/us_daily/storage.py | 25 + src/processor/us_daily/ticker_filter.py | 61 + src/provider/__init__.py | 58 + src/provider/akshare_fetcher.py | 1917 +++++++++++++ src/provider/baostock_fetcher.py | 379 +++ src/provider/base.py | 2500 +++++++++++++++++ src/provider/efinance_fetcher.py | 1238 ++++++++ src/provider/fundamental_adapter.py | 532 ++++ src/provider/longbridge_fetcher.py | 697 +++++ src/provider/pytdx_fetcher.py | 469 ++++ src/provider/realtime_types.py | 449 +++ src/provider/tickflow_fetcher.py | 341 +++ src/provider/tushare_fetcher.py | 1320 +++++++++ src/provider/us_index_mapping.py | 114 + src/provider/yfinance_fetcher.py | 746 +++++ 67 files changed, 11095 insertions(+) rename {massive => src/massive}/__init__.py (100%) rename {massive => src/massive}/exceptions.py (100%) rename {massive => src/massive}/logging.py (100%) rename {massive => src/massive}/modelclass.py (100%) rename {massive => src/massive}/rest/__init__.py (100%) rename {massive => src/massive}/rest/aggs.py (100%) rename {massive => src/massive}/rest/base.py (100%) rename {massive => src/massive}/rest/benzinga.py (100%) rename {massive => src/massive}/rest/economy.py (100%) rename {massive => src/massive}/rest/etf_global.py (100%) rename {massive => src/massive}/rest/financials.py (100%) rename {massive => src/massive}/rest/futures.py (100%) rename {massive => src/massive}/rest/indicators.py (100%) rename {massive => src/massive}/rest/models/__init__.py (100%) rename {massive => src/massive}/rest/models/aggs.py (100%) rename {massive => src/massive}/rest/models/benzinga.py (100%) rename {massive => src/massive}/rest/models/common.py (100%) rename {massive => src/massive}/rest/models/conditions.py (100%) rename {massive => src/massive}/rest/models/contracts.py (100%) rename {massive => src/massive}/rest/models/dividends.py (100%) rename {massive => src/massive}/rest/models/economy.py (100%) rename {massive => src/massive}/rest/models/etf_global.py (100%) rename {massive => src/massive}/rest/models/exchanges.py (100%) rename {massive => src/massive}/rest/models/financials.py (100%) rename {massive => src/massive}/rest/models/futures.py (100%) rename {massive => src/massive}/rest/models/indicators.py (100%) rename {massive => src/massive}/rest/models/markets.py (100%) rename {massive => src/massive}/rest/models/quotes.py (100%) rename {massive => src/massive}/rest/models/request.py (100%) rename {massive => src/massive}/rest/models/snapshot.py (100%) rename {massive => src/massive}/rest/models/splits.py (100%) rename {massive => src/massive}/rest/models/summaries.py (100%) rename {massive => src/massive}/rest/models/tickers.py (100%) rename {massive => src/massive}/rest/models/tmx.py (100%) rename {massive => src/massive}/rest/models/trades.py (100%) rename {massive => src/massive}/rest/quotes.py (100%) rename {massive => src/massive}/rest/reference.py (100%) rename {massive => src/massive}/rest/snapshot.py (100%) rename {massive => src/massive}/rest/summaries.py (100%) rename {massive => src/massive}/rest/tmx.py (100%) rename {massive => src/massive}/rest/trades.py (100%) rename {massive => src/massive}/rest/vX.py (100%) rename {massive => src/massive}/websocket/__init__.py (100%) rename {massive => src/massive}/websocket/models/__init__.py (100%) rename {massive => src/massive}/websocket/models/common.py (100%) rename {massive => src/massive}/websocket/models/models.py (100%) create mode 100644 src/processor/__init__.py create mode 100644 src/processor/us_daily/__init__.py create mode 100644 src/processor/us_daily/__main__.py create mode 100644 src/processor/us_daily/agg_fetcher.py create mode 100644 src/processor/us_daily/config.json create mode 100644 src/processor/us_daily/config.py create mode 100644 src/processor/us_daily/storage.py create mode 100644 src/processor/us_daily/ticker_filter.py create mode 100644 src/provider/__init__.py create mode 100644 src/provider/akshare_fetcher.py create mode 100644 src/provider/baostock_fetcher.py create mode 100644 src/provider/base.py create mode 100644 src/provider/efinance_fetcher.py create mode 100644 src/provider/fundamental_adapter.py create mode 100644 src/provider/longbridge_fetcher.py create mode 100644 src/provider/pytdx_fetcher.py create mode 100644 src/provider/realtime_types.py create mode 100644 src/provider/tickflow_fetcher.py create mode 100644 src/provider/tushare_fetcher.py create mode 100644 src/provider/us_index_mapping.py create mode 100644 src/provider/yfinance_fetcher.py diff --git a/massive/__init__.py b/src/massive/__init__.py similarity index 100% rename from massive/__init__.py rename to src/massive/__init__.py diff --git a/massive/exceptions.py b/src/massive/exceptions.py similarity index 100% rename from massive/exceptions.py rename to src/massive/exceptions.py diff --git a/massive/logging.py b/src/massive/logging.py similarity index 100% rename from massive/logging.py rename to src/massive/logging.py diff --git a/massive/modelclass.py b/src/massive/modelclass.py similarity index 100% rename from massive/modelclass.py rename to src/massive/modelclass.py diff --git a/massive/rest/__init__.py b/src/massive/rest/__init__.py similarity index 100% rename from massive/rest/__init__.py rename to src/massive/rest/__init__.py diff --git a/massive/rest/aggs.py b/src/massive/rest/aggs.py similarity index 100% rename from massive/rest/aggs.py rename to src/massive/rest/aggs.py diff --git a/massive/rest/base.py b/src/massive/rest/base.py similarity index 100% rename from massive/rest/base.py rename to src/massive/rest/base.py diff --git a/massive/rest/benzinga.py b/src/massive/rest/benzinga.py similarity index 100% rename from massive/rest/benzinga.py rename to src/massive/rest/benzinga.py diff --git a/massive/rest/economy.py b/src/massive/rest/economy.py similarity index 100% rename from massive/rest/economy.py rename to src/massive/rest/economy.py diff --git a/massive/rest/etf_global.py b/src/massive/rest/etf_global.py similarity index 100% rename from massive/rest/etf_global.py rename to src/massive/rest/etf_global.py diff --git a/massive/rest/financials.py b/src/massive/rest/financials.py similarity index 100% rename from massive/rest/financials.py rename to src/massive/rest/financials.py diff --git a/massive/rest/futures.py b/src/massive/rest/futures.py similarity index 100% rename from massive/rest/futures.py rename to src/massive/rest/futures.py diff --git a/massive/rest/indicators.py b/src/massive/rest/indicators.py similarity index 100% rename from massive/rest/indicators.py rename to src/massive/rest/indicators.py diff --git a/massive/rest/models/__init__.py b/src/massive/rest/models/__init__.py similarity index 100% rename from massive/rest/models/__init__.py rename to src/massive/rest/models/__init__.py diff --git a/massive/rest/models/aggs.py b/src/massive/rest/models/aggs.py similarity index 100% rename from massive/rest/models/aggs.py rename to src/massive/rest/models/aggs.py diff --git a/massive/rest/models/benzinga.py b/src/massive/rest/models/benzinga.py similarity index 100% rename from massive/rest/models/benzinga.py rename to src/massive/rest/models/benzinga.py diff --git a/massive/rest/models/common.py b/src/massive/rest/models/common.py similarity index 100% rename from massive/rest/models/common.py rename to src/massive/rest/models/common.py diff --git a/massive/rest/models/conditions.py b/src/massive/rest/models/conditions.py similarity index 100% rename from massive/rest/models/conditions.py rename to src/massive/rest/models/conditions.py diff --git a/massive/rest/models/contracts.py b/src/massive/rest/models/contracts.py similarity index 100% rename from massive/rest/models/contracts.py rename to src/massive/rest/models/contracts.py diff --git a/massive/rest/models/dividends.py b/src/massive/rest/models/dividends.py similarity index 100% rename from massive/rest/models/dividends.py rename to src/massive/rest/models/dividends.py diff --git a/massive/rest/models/economy.py b/src/massive/rest/models/economy.py similarity index 100% rename from massive/rest/models/economy.py rename to src/massive/rest/models/economy.py diff --git a/massive/rest/models/etf_global.py b/src/massive/rest/models/etf_global.py similarity index 100% rename from massive/rest/models/etf_global.py rename to src/massive/rest/models/etf_global.py diff --git a/massive/rest/models/exchanges.py b/src/massive/rest/models/exchanges.py similarity index 100% rename from massive/rest/models/exchanges.py rename to src/massive/rest/models/exchanges.py diff --git a/massive/rest/models/financials.py b/src/massive/rest/models/financials.py similarity index 100% rename from massive/rest/models/financials.py rename to src/massive/rest/models/financials.py diff --git a/massive/rest/models/futures.py b/src/massive/rest/models/futures.py similarity index 100% rename from massive/rest/models/futures.py rename to src/massive/rest/models/futures.py diff --git a/massive/rest/models/indicators.py b/src/massive/rest/models/indicators.py similarity index 100% rename from massive/rest/models/indicators.py rename to src/massive/rest/models/indicators.py diff --git a/massive/rest/models/markets.py b/src/massive/rest/models/markets.py similarity index 100% rename from massive/rest/models/markets.py rename to src/massive/rest/models/markets.py diff --git a/massive/rest/models/quotes.py b/src/massive/rest/models/quotes.py similarity index 100% rename from massive/rest/models/quotes.py rename to src/massive/rest/models/quotes.py diff --git a/massive/rest/models/request.py b/src/massive/rest/models/request.py similarity index 100% rename from massive/rest/models/request.py rename to src/massive/rest/models/request.py diff --git a/massive/rest/models/snapshot.py b/src/massive/rest/models/snapshot.py similarity index 100% rename from massive/rest/models/snapshot.py rename to src/massive/rest/models/snapshot.py diff --git a/massive/rest/models/splits.py b/src/massive/rest/models/splits.py similarity index 100% rename from massive/rest/models/splits.py rename to src/massive/rest/models/splits.py diff --git a/massive/rest/models/summaries.py b/src/massive/rest/models/summaries.py similarity index 100% rename from massive/rest/models/summaries.py rename to src/massive/rest/models/summaries.py diff --git a/massive/rest/models/tickers.py b/src/massive/rest/models/tickers.py similarity index 100% rename from massive/rest/models/tickers.py rename to src/massive/rest/models/tickers.py diff --git a/massive/rest/models/tmx.py b/src/massive/rest/models/tmx.py similarity index 100% rename from massive/rest/models/tmx.py rename to src/massive/rest/models/tmx.py diff --git a/massive/rest/models/trades.py b/src/massive/rest/models/trades.py similarity index 100% rename from massive/rest/models/trades.py rename to src/massive/rest/models/trades.py diff --git a/massive/rest/quotes.py b/src/massive/rest/quotes.py similarity index 100% rename from massive/rest/quotes.py rename to src/massive/rest/quotes.py diff --git a/massive/rest/reference.py b/src/massive/rest/reference.py similarity index 100% rename from massive/rest/reference.py rename to src/massive/rest/reference.py diff --git a/massive/rest/snapshot.py b/src/massive/rest/snapshot.py similarity index 100% rename from massive/rest/snapshot.py rename to src/massive/rest/snapshot.py diff --git a/massive/rest/summaries.py b/src/massive/rest/summaries.py similarity index 100% rename from massive/rest/summaries.py rename to src/massive/rest/summaries.py diff --git a/massive/rest/tmx.py b/src/massive/rest/tmx.py similarity index 100% rename from massive/rest/tmx.py rename to src/massive/rest/tmx.py diff --git a/massive/rest/trades.py b/src/massive/rest/trades.py similarity index 100% rename from massive/rest/trades.py rename to src/massive/rest/trades.py diff --git a/massive/rest/vX.py b/src/massive/rest/vX.py similarity index 100% rename from massive/rest/vX.py rename to src/massive/rest/vX.py diff --git a/massive/websocket/__init__.py b/src/massive/websocket/__init__.py similarity index 100% rename from massive/websocket/__init__.py rename to src/massive/websocket/__init__.py diff --git a/massive/websocket/models/__init__.py b/src/massive/websocket/models/__init__.py similarity index 100% rename from massive/websocket/models/__init__.py rename to src/massive/websocket/models/__init__.py diff --git a/massive/websocket/models/common.py b/src/massive/websocket/models/common.py similarity index 100% rename from massive/websocket/models/common.py rename to src/massive/websocket/models/common.py diff --git a/massive/websocket/models/models.py b/src/massive/websocket/models/models.py similarity index 100% rename from massive/websocket/models/models.py rename to src/massive/websocket/models/models.py diff --git a/src/processor/__init__.py b/src/processor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/processor/us_daily/__init__.py b/src/processor/us_daily/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py new file mode 100644 index 00000000..9e68c5f7 --- /dev/null +++ b/src/processor/us_daily/__main__.py @@ -0,0 +1,100 @@ +import logging +import os +import sys +from datetime import datetime + +from massive import RESTClient + +from data_provider.us_daily.config import load_config +from data_provider.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from data_provider.us_daily.ticker_filter import filter_top_tickers +from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Get ticker list + tickers_path = get_tickers_file_path(config.data_dir) + if config.refresh_tickers or not file_exists(tickers_path): + logger.info("Filtering top tickers from API...") + tickers = filter_top_tickers(client, config) + save_json( + tickers_path, + { + "updated_at": datetime.now().strftime("%Y-%m-%d"), + "market_cap_min": config.market_cap_min, + "tickers": tickers, + }, + ) + logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") + else: + data = load_json(tickers_path) + tickers = data["tickers"] + logger.info( + f"Loaded {len(tickers)} tickers from {tickers_path} " + f"(updated: {data.get('updated_at', 'unknown')})" + ) + + # 先获取ticket + return 0 + + # Step 2: Fetch agg data for each ticker + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(client, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py new file mode 100644 index 00000000..656c0820 --- /dev/null +++ b/src/processor/us_daily/agg_fetcher.py @@ -0,0 +1,117 @@ +import calendar +import logging +import time +from datetime import date, datetime +from typing import List, Tuple + +from data_provider.us_daily.config import Config +from data_provider.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + aggs = None + last_error = None + + for attempt in range(1, config.max_retries + 1): + try: + aggs_iter = client.list_aggs( + ticker, + 1, + "day", + from_=start_date, + to=end_date, + adjusted=True, + sort="asc", + ) + aggs = list(aggs_iter) + break + except Exception as e: + last_error = e + logger.warning( + f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" + ) + if attempt < config.max_retries: + time.sleep(config.request_interval) + + if aggs is None: + failures.append( + { + "ticker": ticker, + "month": month, + "error": str(last_error), + } + ) + logger.error(f" {ticker} {month}: all retries failed, skipping") + continue + + data = { + "ticker": ticker, + "month": month, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": [ + { + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + "vwap": a.vwap, + "timestamp": a.timestamp, + "transactions": a.transactions, + } + for a in aggs + ], + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") + time.sleep(config.request_interval) + + return {"failures": failures} diff --git a/src/processor/us_daily/config.json b/src/processor/us_daily/config.json new file mode 100644 index 00000000..20d41292 --- /dev/null +++ b/src/processor/us_daily/config.json @@ -0,0 +1,8 @@ +{ + "refresh_tickers": false, + "market_cap_min": 1000000000, + "start_date": "2026-01", + "request_interval": 12, + "data_dir": "data/us_daily", + "max_retries": 3 +} diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py new file mode 100644 index 00000000..e4cea829 --- /dev/null +++ b/src/processor/us_daily/config.py @@ -0,0 +1,24 @@ +import json +import os +from dataclasses import dataclass + + +@dataclass +class Config: + refresh_tickers: bool = False + market_cap_min: float = 5e9 + start_date: str = "2026-01" + request_interval: int = 12 + data_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = "data_provider/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config diff --git a/src/processor/us_daily/storage.py b/src/processor/us_daily/storage.py new file mode 100644 index 00000000..af80505b --- /dev/null +++ b/src/processor/us_daily/storage.py @@ -0,0 +1,25 @@ +import json +import os + + +def get_tickers_file_path(data_dir: str) -> str: + return os.path.join(data_dir, "top_tickers.json") + + +def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: + return os.path.join(data_dir, ticker, f"{month}.json") + + +def save_json(path: str, data: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + +def load_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def file_exists(path: str) -> bool: + return os.path.isfile(path) diff --git a/src/processor/us_daily/ticker_filter.py b/src/processor/us_daily/ticker_filter.py new file mode 100644 index 00000000..1056d134 --- /dev/null +++ b/src/processor/us_daily/ticker_filter.py @@ -0,0 +1,61 @@ +import logging +import time +from typing import List + +from data_provider.us_daily.config import Config + +logger = logging.getLogger("us_daily") + +EXCHANGES = ["XNAS", "XNYS", "ARCX"] + + +def filter_top_tickers(client, config: Config) -> List[dict]: + result = [] + for exchange in EXCHANGES: + logger.info(f"Fetching tickers for exchange: {exchange}") + try: + tickers = client.list_tickers( + market="stocks", + exchange=exchange, + active=True, + limit=1000, + ) + except Exception as e: + logger.error(f"Failed to list tickers for {exchange}: {e}") + continue + + tickers = list(tickers) + logger.info(f"Total tickers found: {len(tickers)}") + time.sleep(config.request_interval) + + for ticker_obj in tickers: + ticker_str = ticker_obj.ticker + try: + details = client.get_ticker_details(ticker_str) + time.sleep(config.request_interval) + except Exception as e: + logger.warning(f"Failed to get details for {ticker_str}: {e}") + continue + + if details.market_cap is None: + logger.debug(f"{ticker_str}: no market_cap data, skipping") + continue + + if details.market_cap >= config.market_cap_min: + entry = { + "ticker": details.ticker, + "name": details.name, + "market_cap": details.market_cap, + "exchange": details.primary_exchange, + } + result.append(entry) + logger.info( + f" {details.ticker}: market_cap={details.market_cap:.0f} included" + ) + else: + logger.debug( + f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" + ) + + logger.info(f"Total top tickers found: {len(result)}") + return result diff --git a/src/provider/__init__.py b/src/provider/__init__.py new file mode 100644 index 00000000..5973abc3 --- /dev/null +++ b/src/provider/__init__.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" +=================================== +数据源策略层 - 包初始化 +=================================== + +本包实现策略模式管理多个数据源,实现: +1. 统一的数据获取接口 +2. 自动故障切换 +3. 防封禁流控策略 + +数据源优先级(动态调整): +【配置了 TUSHARE_TOKEN 时】 +1. TushareFetcher (Priority 0) - 🔥 最高优先级(动态提升) +2. EfinanceFetcher (Priority 0) - 同优先级 +3. AkshareFetcher (Priority 1) - 来自 akshare 库 +4. PytdxFetcher (Priority 2) - 来自 pytdx 库(通达信) +5. BaostockFetcher (Priority 3) - 来自 baostock 库 +6. YfinanceFetcher (Priority 4) - 来自 yfinance 库 + +【未配置 TUSHARE_TOKEN 时】 +1. EfinanceFetcher (Priority 0) - 最高优先级,来自 efinance 库 +2. AkshareFetcher (Priority 1) - 来自 akshare 库 +3. PytdxFetcher (Priority 2) - 来自 pytdx 库(通达信) +4. TushareFetcher (Priority 2) - 来自 tushare 库(不可用) +5. BaostockFetcher (Priority 3) - 来自 baostock 库 +6. YfinanceFetcher (Priority 4) - 来自 yfinance 库 +7. LongbridgeFetcher (Priority 5) - 长桥 OpenAPI(美股/港股兜底) + +提示:优先级数字越小越优先,同优先级按初始化顺序排列 +""" + +from .base import BaseFetcher, DataFetcherManager +from .efinance_fetcher import EfinanceFetcher +from .akshare_fetcher import AkshareFetcher, is_hk_stock_code +from .tushare_fetcher import TushareFetcher +from .pytdx_fetcher import PytdxFetcher +from .baostock_fetcher import BaostockFetcher +from .yfinance_fetcher import YfinanceFetcher +from .longbridge_fetcher import LongbridgeFetcher +from .us_index_mapping import is_us_index_code, is_us_stock_code, get_us_index_yf_symbol, US_INDEX_MAPPING + +__all__ = [ + 'BaseFetcher', + 'DataFetcherManager', + 'EfinanceFetcher', + 'AkshareFetcher', + 'TushareFetcher', + 'PytdxFetcher', + 'BaostockFetcher', + 'YfinanceFetcher', + 'LongbridgeFetcher', + 'is_us_index_code', + 'is_us_stock_code', + 'is_hk_stock_code', + 'get_us_index_yf_symbol', + 'US_INDEX_MAPPING', +] diff --git a/src/provider/akshare_fetcher.py b/src/provider/akshare_fetcher.py new file mode 100644 index 00000000..f7aa984e --- /dev/null +++ b/src/provider/akshare_fetcher.py @@ -0,0 +1,1917 @@ +# -*- coding: utf-8 -*- +""" +=================================== +AkshareFetcher - 主数据源 (Priority 1) +=================================== + +数据来源: +1. 东方财富爬虫(通过 akshare 库) - 默认数据源 +2. 新浪财经接口 - 备选数据源 +3. 腾讯财经接口 - 备选数据源 + +特点:免费、无需 Token、数据全面 +风险:爬虫机制易被反爬封禁 + +防封禁策略: +1. 每次请求前随机休眠 2-5 秒 +2. 随机轮换 User-Agent +3. 使用 tenacity 实现指数退避重试 +4. 熔断器机制:连续失败后自动冷却 + +增强数据: +- 实时行情:量比、换手率、市盈率、市净率、总市值、流通市值 +- 筹码分布:获利比例、平均成本、筹码集中度 +""" + +import logging +import os +import random +import time +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional, Dict, Any, List, Tuple + +import pandas as pd +import requests +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from patch.eastmoney_patch import eastmoney_patch +from src.config import get_config +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS, is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code +from .realtime_types import ( + UnifiedRealtimeQuote, ChipDistribution, RealtimeSource, + get_realtime_circuit_breaker, get_chip_circuit_breaker, + safe_float, safe_int # 使用统一的类型转换函数 +) +from .us_index_mapping import is_us_index_code, is_us_stock_code + + +# 保留旧的 RealtimeQuote 别名,用于向后兼容 +RealtimeQuote = UnifiedRealtimeQuote + + +logger = logging.getLogger(__name__) + +SINA_REALTIME_ENDPOINT = "hq.sinajs.cn/list" +TENCENT_REALTIME_ENDPOINT = "qt.gtimg.cn/q" + + +# User-Agent 池,用于随机轮换 +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', +] + + +# 缓存实时行情数据(避免重复请求) +# TTL 设为 20 分钟 (1200秒): +# - 批量分析场景:通常 30 只股票在 5 分钟内分析完,20 分钟足够覆盖 +# - 实时性要求:股票分析不需要秒级实时数据,20 分钟延迟可接受 +# - 防封禁:减少 API 调用频率 +_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 1200 # 20分钟缓存有效期 +} + +# ETF 实时行情缓存 +_etf_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 1200 # 20分钟缓存有效期 +} + + +def _is_etf_code(stock_code: str) -> bool: + """ + 判断代码是否为 ETF 基金 + + ETF 代码规则: + - 上交所 ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - 深交所 ETF: 15xxxx, 16xxxx, 18xxxx + + Args: + stock_code: 股票/基金代码 + + Returns: + True 表示是 ETF 代码,False 表示是普通股票代码 + """ + etf_prefixes = ('51', '52', '56', '58', '15', '16', '18') + code = stock_code.strip().split('.')[0] + return code.startswith(etf_prefixes) and len(code) == 6 + + +def _is_hk_code(stock_code: str) -> bool: + """ + 判断代码是否为港股 + + 港股代码规则: + - 5位数字代码,如 '00700' (腾讯控股) + - 部分港股代码可能带有前缀,如 'hk00700', 'hk1810' + + Args: + stock_code: 股票代码 + + Returns: + True 表示是港股代码,False 表示不是港股代码 + """ + # 去除可能的 'hk' 前缀并检查是否为纯数字 + code = stock_code.strip().lower() + if code.endswith('.hk'): + numeric_part = code[:-3] + return numeric_part.isdigit() and 1 <= len(numeric_part) <= 5 + if code.startswith('hk'): + # 带 hk 前缀的一定是港股,去掉前缀后应为纯数字(1-5位) + numeric_part = code[2:] + return numeric_part.isdigit() and 1 <= len(numeric_part) <= 5 + # 无前缀时,5位纯数字才视为港股(避免误判 A 股代码) + return code.isdigit() and len(code) == 5 + + +def is_hk_stock_code(stock_code: str) -> bool: + """ + Public API: determine if a stock code is a Hong Kong stock. + + Delegates to _is_hk_code for internal compatibility. + + Args: + stock_code: Stock code (e.g. '00700', 'hk00700') + + Returns: + True if HK stock, False otherwise + """ + return _is_hk_code(stock_code) + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股股票(不包括美股指数)。 + + 委托给 us_index_mapping 模块的 is_us_stock_code()。 + + Args: + stock_code: 股票代码 + + Returns: + True 表示是美股代码,False 表示不是美股代码 + + Examples: + >>> _is_us_code('AAPL') + True + >>> _is_us_code('TSLA') + True + >>> _is_us_code('SPX') + False + >>> _is_us_code('600519') + False + """ + return is_us_stock_code(stock_code) + + +def _to_sina_tx_symbol(stock_code: str) -> str: + """Convert 6-digit A-share code to sh/sz/bj prefixed symbol for Sina/Tencent APIs.""" + base = (stock_code.strip().split(".")[0] if "." in stock_code else stock_code).strip() + if is_bse_code(base): + return f"bj{base}" + # Shanghai: 60xxxx, 5xxxx (ETF), 90xxxx (B-shares) + if base.startswith(("6", "5", "90")): + return f"sh{base}" + return f"sz{base}" + + +def _classify_realtime_http_error(exc: Exception) -> Tuple[str, str]: + """ + Classify Sina/Tencent realtime quote failures into stable categories. + """ + detail = str(exc).strip() or type(exc).__name__ + lowered = detail.lower() + + remote_disconnect_keywords = ( + "remotedisconnected", + "remote end closed connection without response", + "connection aborted", + "connection broken", + "protocolerror", + "chunkedencodingerror", + ) + timeout_keywords = ( + "timeout", + "timed out", + "readtimeout", + "connecttimeout", + ) + rate_limit_keywords = ( + "banned", + "blocked", + "频率", + "rate limit", + "too many requests", + "429", + "限制", + "forbidden", + "403", + ) + + if any(keyword in lowered for keyword in remote_disconnect_keywords): + return "remote_disconnect", detail + if isinstance(exc, (TimeoutError, requests.exceptions.Timeout)) or any( + keyword in lowered for keyword in timeout_keywords + ): + return "timeout", detail + if any(keyword in lowered for keyword in rate_limit_keywords): + return "rate_limit_or_anti_bot", detail + if isinstance(exc, requests.exceptions.RequestException): + return "request_error", detail + return "unknown_request_error", detail + + +def _build_realtime_failure_message( + source_name: str, + endpoint: str, + stock_code: str, + symbol: str, + category: str, + detail: str, + elapsed: float, + error_type: str, +) -> str: + return ( + f"{source_name} 实时行情接口失败: endpoint={endpoint}, stock_code={stock_code}, " + f"symbol={symbol}, category={category}, error_type={error_type}, " + f"elapsed={elapsed:.2f}s, detail={detail}" + ) + + +class AkshareFetcher(BaseFetcher): + """ + Akshare 数据源实现 + + 优先级:1(最高) + 数据来源:东方财富网爬虫 + + 关键策略: + - 每次请求前随机休眠 2.0-5.0 秒 + - 随机 User-Agent 轮换 + - 失败后指数退避重试(最多3次) + """ + + name = "AkshareFetcher" + priority = int(os.getenv("AKSHARE_PRIORITY", "1")) + + def __init__(self, sleep_min: float = 2.0, sleep_max: float = 5.0): + """ + 初始化 AkshareFetcher + + Args: + sleep_min: 最小休眠时间(秒) + sleep_max: 最大休眠时间(秒) + """ + self.sleep_min = sleep_min + self.sleep_max = sleep_max + self._last_request_time: Optional[float] = None + # 东财补丁开启才执行打补丁操作 + if get_config().enable_eastmoney_patch: + eastmoney_patch() + + def _set_random_user_agent(self) -> None: + """ + 设置随机 User-Agent + + 通过修改 requests Session 的 headers 实现 + 这是关键的反爬策略之一 + """ + try: + import akshare as ak + # akshare 内部使用 requests,我们通过环境变量或直接设置来影响 + # 实际上 akshare 可能不直接暴露 session,这里通过 fake_useragent 作为补充 + random_ua = random.choice(USER_AGENTS) + logger.debug(f"设置 User-Agent: {random_ua[:50]}...") + except Exception as e: + logger.debug(f"设置 User-Agent 失败: {e}") + + def _enforce_rate_limit(self) -> None: + """ + 强制执行速率限制 + + 策略: + 1. 检查距离上次请求的时间间隔 + 2. 如果间隔不足,补充休眠时间 + 3. 然后再执行随机 jitter 休眠 + """ + if self._last_request_time is not None: + elapsed = time.time() - self._last_request_time + min_interval = self.sleep_min + if elapsed < min_interval: + additional_sleep = min_interval - elapsed + logger.debug(f"补充休眠 {additional_sleep:.2f} 秒") + time.sleep(additional_sleep) + + # 执行随机 jitter 休眠 + self.random_sleep(self.sleep_min, self.sleep_max) + self._last_request_time = time.time() + + @retry( + stop=stop_after_attempt(3), # 最多重试3次 + wait=wait_exponential(multiplier=1, min=2, max=30), # 指数退避:2, 4, 8... 最大30秒 + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Akshare 获取原始数据 + + 根据代码类型自动选择 API: + - 美股:不支持,抛出异常由 YfinanceFetcher 处理(Issue #311) + - 港股:使用 ak.stock_hk_hist() + - ETF 基金:使用 ak.fund_etf_hist_em() + - 普通 A 股:使用 ak.stock_zh_a_hist() + + 流程: + 1. 判断代码类型(美股/港股/ETF/A股) + 2. 设置随机 User-Agent + 3. 执行速率限制(随机休眠) + 4. 调用对应的 akshare API + 5. 处理返回数据 + """ + # 根据代码类型选择不同的获取方法 + if _is_us_code(stock_code): + # 美股:akshare 的 stock_us_daily 接口复权存在已知问题(参见 Issue #311) + # 交由 YfinanceFetcher 处理,确保复权价格一致 + raise DataFetchError( + f"AkshareFetcher 不支持美股 {stock_code},请使用 YfinanceFetcher 获取正确的复权价格" + ) + elif _is_hk_code(stock_code): + return self._fetch_hk_data(stock_code, start_date, end_date) + elif _is_etf_code(stock_code): + return self._fetch_etf_data(stock_code, start_date, end_date) + else: + return self._fetch_stock_data(stock_code, start_date, end_date) + + def _fetch_stock_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 + + 策略: + 1. 优先尝试东方财富接口 (ak.stock_zh_a_hist) + 2. 失败后尝试新浪财经接口 (ak.stock_zh_a_daily) + 3. 最后尝试腾讯财经接口 (ak.stock_zh_a_hist_tx) + """ + # 尝试列表 + methods = [ + (self._fetch_stock_data_em, "东方财富"), + (self._fetch_stock_data_sina, "新浪财经"), + (self._fetch_stock_data_tx, "腾讯财经"), + ] + + last_error = None + + for fetch_method, source_name in methods: + try: + logger.info(f"[数据源] 尝试使用 {source_name} 获取 {stock_code}...") + df = fetch_method(stock_code, start_date, end_date) + + if df is not None and not df.empty: + logger.info(f"[数据源] {source_name} 获取成功") + return df + except Exception as e: + last_error = e + logger.warning(f"[数据源] {source_name} 获取失败: {e}") + # 继续尝试下一个 + + # 所有都失败 + raise DataFetchError(f"Akshare 所有渠道获取失败: {last_error}") + + def _fetch_stock_data_em(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (东方财富) + 数据来源:ak.stock_zh_a_hist() + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_zh_a_hist(symbol={stock_code}, ...)") + + try: + import time as _time + api_start = _time.time() + + df = ak.stock_zh_a_hist( + symbol=stock_code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_zh_a_hist 成功: {len(df)} 行, 耗时 {api_elapsed:.2f}s") + return df + else: + logger.warning(f"[API返回] ak.stock_zh_a_hist 返回空数据") + return pd.DataFrame() + + except Exception as e: + error_msg = str(e).lower() + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + raise RateLimitError(f"Akshare(EM) 可能被限流: {e}") from e + raise e + + def _fetch_stock_data_sina(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (新浪财经) + 数据来源:ak.stock_zh_a_daily() + """ + import akshare as ak + + # 转换代码格式:sh600000, sz000001, bj920748 + symbol = _to_sina_tx_symbol(stock_code) + + self._enforce_rate_limit() + + try: + df = ak.stock_zh_a_daily( + symbol=symbol, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + # 标准化新浪数据列名 + # 新浪返回:date, open, high, low, close, volume, amount, outstanding_share, turnover + if df is not None and not df.empty: + # 确保日期列存在 + if 'date' in df.columns: + df = df.rename(columns={'date': '日期'}) + + # 映射其他列以匹配 _normalize_data 的期望 + # _normalize_data 期望:日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额 + rename_map = { + 'open': '开盘', 'high': '最高', 'low': '最低', + 'close': '收盘', 'volume': '成交量', 'amount': '成交额' + } + df = df.rename(columns=rename_map) + + # 计算涨跌幅(新浪接口可能不返回) + if '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + return df + return pd.DataFrame() + + except Exception as e: + raise e + + def _fetch_stock_data_tx(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 (腾讯财经) + 数据来源:ak.stock_zh_a_hist_tx() + """ + import akshare as ak + + # 转换代码格式:sh600000, sz000001, bj920748 + symbol = _to_sina_tx_symbol(stock_code) + + self._enforce_rate_limit() + + try: + df = ak.stock_zh_a_hist_tx( + symbol=symbol, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" + ) + + # 标准化腾讯数据列名 + # 腾讯返回:date, open, close, high, low, volume, amount + if df is not None and not df.empty: + rename_map = { + 'date': '日期', 'open': '开盘', 'high': '最高', + 'low': '最低', 'close': '收盘', 'volume': '成交量', + 'amount': '成交额' + } + df = df.rename(columns=rename_map) + + # 腾讯数据通常包含 '涨跌幅',如果没有则计算 + if 'pct_chg' in df.columns: + df = df.rename(columns={'pct_chg': '涨跌幅'}) + elif '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + return df + return pd.DataFrame() + + except Exception as e: + raise e + + def _fetch_etf_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取 ETF 基金历史数据 + + 数据来源:ak.fund_etf_hist_em() + + Args: + stock_code: ETF 代码,如 '512400', '159883' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + ETF 历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.fund_etf_hist_em(symbol={stock_code}, period=daily, " + f"start_date={start_date.replace('-', '')}, end_date={end_date.replace('-', '')}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取 ETF 日线数据 + df = ak.fund_etf_hist_em( + symbol=stock_code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.fund_etf_hist_em 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] ak.fund_etf_hist_em 返回空数据, 耗时 {api_elapsed:.2f}s") + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取 ETF 数据失败: {e}") from e + + def _fetch_us_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取美股历史数据 + + 数据来源:ak.stock_us_daily()(新浪财经接口) + + Args: + stock_code: 美股代码,如 'AMD', 'AAPL', 'TSLA' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 美股历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 美股代码直接使用大写 + symbol = stock_code.strip().upper() + + logger.info(f"[API调用] ak.stock_us_daily(symbol={symbol}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取美股日线数据 + # stock_us_daily 返回全部历史数据,后续需要按日期过滤 + df = ak.stock_us_daily( + symbol=symbol, + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_us_daily 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + + # 按日期过滤 + df['date'] = pd.to_datetime(df['date']) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df['date'] >= start_dt) & (df['date'] <= end_dt)] + + if not df.empty: + logger.info(f"[API返回] 过滤后日期范围: {df['date'].iloc[0].strftime('%Y-%m-%d')} ~ {df['date'].iloc[-1].strftime('%Y-%m-%d')}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] 过滤后数据为空,日期范围 {start_date} ~ {end_date} 无数据") + + # 转换列名为中文格式以匹配 _normalize_data + # stock_us_daily 返回: date, open, high, low, close, volume + rename_map = { + 'date': '日期', + 'open': '开盘', + 'high': '最高', + 'low': '最低', + 'close': '收盘', + 'volume': '成交量', + } + df = df.rename(columns=rename_map) + + # 计算涨跌幅(美股接口不直接返回) + if '收盘' in df.columns: + df['涨跌幅'] = df['收盘'].pct_change() * 100 + df['涨跌幅'] = df['涨跌幅'].fillna(0) + + # 估算成交额(美股接口不返回) + if '成交量' in df.columns and '收盘' in df.columns: + df['成交额'] = df['成交量'] * df['收盘'] + else: + df['成交额'] = 0 + + return df + else: + logger.warning(f"[API返回] ak.stock_us_daily 返回空数据, 耗时 {api_elapsed:.2f}s") + return pd.DataFrame() + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取美股数据失败: {e}") from e + + def _fetch_hk_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取港股历史数据 + + 数据来源:ak.stock_hk_hist() + + Args: + stock_code: 港股代码,如 '00700', '01810' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 港股历史数据 DataFrame + """ + import akshare as ak + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 确保代码格式正确(5位数字) + code = stock_code.lower().replace('hk', '').zfill(5) + + logger.info(f"[API调用] ak.stock_hk_hist(symbol={code}, period=daily, " + f"start_date={start_date.replace('-', '')}, end_date={end_date.replace('-', '')}, adjust=qfq)") + + try: + import time as _time + api_start = _time.time() + + # 调用 akshare 获取港股日线数据 + df = ak.stock_hk_hist( + symbol=code, + period="daily", + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + adjust="qfq" # 前复权 + ) + + api_elapsed = _time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info(f"[API返回] ak.stock_hk_hist 成功: 返回 {len(df)} 行数据, 耗时 {api_elapsed:.2f}s") + logger.info(f"[API返回] 列名: {list(df.columns)}") + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning(f"[API返回] ak.stock_hk_hist 返回空数据, 耗时 {api_elapsed:.2f}s") + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测反爬封禁 + if any(keyword in error_msg for keyword in ['banned', 'blocked', '频率', 'rate', '限制']): + logger.warning(f"检测到可能被封禁: {e}") + raise RateLimitError(f"Akshare 可能被限流: {e}") from e + + raise DataFetchError(f"Akshare 获取港股数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Akshare 数据 + + Akshare 返回的列名(中文): + 日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额, 振幅, 涨跌幅, 涨跌额, 换手率 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射(Akshare 中文列名 -> 标准英文列名) + column_mapping = { + '日期': 'date', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '涨跌幅': 'pct_chg', + } + + # 重命名列 + df = df.rename(columns=column_mapping) + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_realtime_quote(self, stock_code: str, source: str = "em") -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情数据(支持多数据源) + + 数据源优先级(可配置): + 1. em: 东方财富(akshare ak.stock_zh_a_spot_em)- 数据最全,含量比/PE/PB/市值等 + 2. sina: 新浪财经(akshare ak.stock_zh_a_spot)- 轻量级,基本行情 + 3. tencent: 腾讯直连接口 - 单股票查询,负载小 + + Args: + stock_code: 股票/ETF代码 + source: 数据源类型,可选 "em", "sina", "tencent" + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + circuit_breaker = get_realtime_circuit_breaker() + + # 根据代码类型选择不同的获取方法 + if _is_us_code(stock_code): + # 美股不使用 Akshare,由 YfinanceFetcher 处理 + logger.debug(f"[API跳过] {stock_code} 是美股,Akshare 不支持美股实时行情") + return None + elif _is_hk_code(stock_code): + return self._get_hk_realtime_quote(stock_code) + elif _is_etf_code(stock_code): + source_key = "akshare_etf" + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + return self._get_etf_realtime_quote(stock_code) + else: + source_key = f"akshare_{source}" + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + # 普通 A 股:根据 source 选择数据源 + if source == "sina": + return self._get_stock_realtime_quote_sina(stock_code) + elif source == "tencent": + return self._get_stock_realtime_quote_tencent(stock_code) + else: + return self._get_stock_realtime_quote_em(stock_code) + + def _get_stock_realtime_quote_em(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(东方财富数据源) + + 数据来源:ak.stock_zh_a_spot_em() + 优点:数据最全,含量比、换手率、市盈率、市净率、总市值、流通市值等 + 缺点:全量拉取,数据量大,容易超时/限流 + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_em" + + try: + # 检查缓存 + current_time = time.time() + if (_realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl']): + df = _realtime_cache['data'] + cache_age = int(current_time - _realtime_cache['timestamp']) + logger.debug(f"[缓存命中] A股实时行情(东财) - 缓存年龄 {cache_age}s/{_realtime_cache['ttl']}s") + else: + # 触发全量刷新 + logger.info(f"[缓存未命中] 触发全量刷新 A股实时行情(东财)") + last_error: Optional[Exception] = None + df = None + for attempt in range(1, 3): + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_zh_a_spot_em() 获取A股实时行情... (attempt {attempt}/2)") + import time as _time + api_start = _time.time() + + df = ak.stock_zh_a_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_zh_a_spot_em 成功: 返回 {len(df)} 只股票, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + break + except Exception as e: + last_error = e + logger.info(f"[API错误] ak.stock_zh_a_spot_em 获取失败 (attempt {attempt}/2): {e}") + time.sleep(min(2 ** attempt, 5)) + + # 更新缓存:成功缓存数据;失败也缓存空数据,避免同一轮任务对同一接口反复请求 + if df is None: + logger.info(f"[API错误] ak.stock_zh_a_spot_em 最终失败: {last_error}") + circuit_breaker.record_failure(source_key, str(last_error)) + df = pd.DataFrame() + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + logger.info(f"[缓存更新] A股实时行情(东财) 缓存已刷新,TTL={_realtime_cache['ttl']}s") + + if df is None or df.empty: + logger.info(f"[实时行情] A股实时行情数据为空,跳过 {stock_code}") + return None + + # 查找指定股票 + row = df[df['代码'] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到股票 {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + open_price=safe_float(row.get('今开')), + high=safe_float(row.get('最高')), + low=safe_float(row.get('最低')), + pe_ratio=safe_float(row.get('市盈率-动态')), + pb_ratio=safe_float(row.get('市净率')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + change_60d=safe_float(row.get('60日涨跌幅')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + + logger.info(f"[实时行情-东财] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"量比={quote.volume_ratio}, 换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.info(f"[API错误] 获取 {stock_code} 实时行情(东财)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_stock_realtime_quote_sina(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(新浪财经数据源) + + 数据来源:新浪财经接口(直连,单股票查询) + 优点:单股票查询,负载小,速度快 + 缺点:数据字段较少,无量比/PE/PB等 + + 接口格式:http://hq.sinajs.cn/list=sh600519,sz000001 + """ + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_sina" + symbol = _to_sina_tx_symbol(stock_code) + url = f"http://{SINA_REALTIME_ENDPOINT}={symbol}" + api_start = time.time() + + try: + headers = { + 'Referer': 'http://finance.sina.com.cn', + 'User-Agent': random.choice(USER_AGENTS) + } + + logger.info( + f"[API调用] 新浪财经接口获取 {stock_code} 实时行情: endpoint={SINA_REALTIME_ENDPOINT}, symbol={symbol}" + ) + + self._enforce_rate_limit() + response = requests.get(url, headers=headers, timeout=10) + response.encoding = 'gbk' + api_elapsed = time.time() - api_start + + if response.status_code != 200: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="http_status", + detail=f"HTTP {response.status_code}", + elapsed=api_elapsed, + error_type="HTTPStatus", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 解析数据:var hq_str_sh600519="贵州茅台,1866.000,1870.000,..." + content = response.text.strip() + if '=""' in content or not content: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="empty_response", + detail="empty quote payload", + elapsed=api_elapsed, + error_type="EmptyResponse", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 提取引号内的数据 + data_start = content.find('"') + data_end = content.rfind('"') + if data_start == -1 or data_end == -1: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="malformed_payload", + detail="quote payload missing quotes", + elapsed=api_elapsed, + error_type="MalformedPayload", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + data_str = content[data_start+1:data_end] + fields = data_str.split(',') + + if len(fields) < 32: + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="insufficient_fields", + detail=f"field_count={len(fields)}", + elapsed=api_elapsed, + error_type="InsufficientFields", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + circuit_breaker.record_success(source_key) + + # 新浪数据字段顺序: + # 0:名称 1:今开 2:昨收 3:最新价 4:最高 5:最低 6:买一价 7:卖一价 + # 8:成交量(股) 9:成交额(元) ... 30:日期 31:时间 + # 使用 realtime_types.py 中的统一转换函数 + price = safe_float(fields[3]) + pre_close = safe_float(fields[2]) + change_pct = None + change_amount = None + if price and pre_close and pre_close > 0: + change_amount = price - pre_close + change_pct = (change_amount / pre_close) * 100 + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=fields[0], + source=RealtimeSource.AKSHARE_SINA, + price=price, + change_pct=change_pct, + change_amount=change_amount, + volume=safe_int(fields[8]), # 成交量(股) + amount=safe_float(fields[9]), # 成交额(元) + open_price=safe_float(fields[1]), + high=safe_float(fields[4]), + low=safe_float(fields[5]), + pre_close=pre_close, + ) + + logger.info( + f"[实时行情-新浪] {stock_code} {quote.name}: endpoint={SINA_REALTIME_ENDPOINT}, " + f"价格={quote.price}, 涨跌={quote.change_pct}, 成交量={quote.volume}, elapsed={api_elapsed:.2f}s" + ) + return quote + + except Exception as e: + api_elapsed = time.time() - api_start + category, detail = _classify_realtime_http_error(e) + failure_message = _build_realtime_failure_message( + source_name="新浪", + endpoint=SINA_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category=category, + detail=detail, + elapsed=api_elapsed, + error_type=type(e).__name__, + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + def _get_stock_realtime_quote_tencent(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取普通 A 股实时行情数据(腾讯财经数据源) + + 数据来源:腾讯财经接口(直连,单股票查询) + 优点:单股票查询,负载小,包含换手率 + 缺点:无量比/PE/PB等估值数据 + + 接口格式:http://qt.gtimg.cn/q=sh600519,sz000001 + """ + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_tencent" + symbol = _to_sina_tx_symbol(stock_code) + url = f"http://{TENCENT_REALTIME_ENDPOINT}={symbol}" + api_start = time.time() + + try: + headers = { + 'Referer': 'http://finance.qq.com', + 'User-Agent': random.choice(USER_AGENTS) + } + + logger.info( + f"[API调用] 腾讯财经接口获取 {stock_code} 实时行情: endpoint={TENCENT_REALTIME_ENDPOINT}, symbol={symbol}" + ) + + self._enforce_rate_limit() + response = requests.get(url, headers=headers, timeout=10) + response.encoding = 'gbk' + api_elapsed = time.time() - api_start + + if response.status_code != 200: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="http_status", + detail=f"HTTP {response.status_code}", + elapsed=api_elapsed, + error_type="HTTPStatus", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + content = response.text.strip() + if '=""' in content or not content: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="empty_response", + detail="empty quote payload", + elapsed=api_elapsed, + error_type="EmptyResponse", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + # 提取数据 + data_start = content.find('"') + data_end = content.rfind('"') + if data_start == -1 or data_end == -1: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="malformed_payload", + detail="quote payload missing quotes", + elapsed=api_elapsed, + error_type="MalformedPayload", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + data_str = content[data_start+1:data_end] + fields = data_str.split('~') + + if len(fields) < 45: + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category="insufficient_fields", + detail=f"field_count={len(fields)}", + elapsed=api_elapsed, + error_type="InsufficientFields", + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + circuit_breaker.record_success(source_key) + + # 腾讯数据字段顺序(完整): + # 1:名称 2:代码 3:最新价 4:昨收 5:今开 6:成交量(手) 7:外盘 8:内盘 + # 9-28:买卖五档 30:时间戳 31:涨跌额 32:涨跌幅(%) 33:最高 34:最低 35:收盘/成交量/成交额 + # 36:成交量(手) 37:成交额(万) 38:换手率(%) 39:市盈率 43:振幅(%) + # 44:流通市值(亿) 45:总市值(亿) 46:市净率 47:涨停价 48:跌停价 49:量比 + # 使用 realtime_types.py 中的统一转换函数 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=fields[1] if len(fields) > 1 else "", + source=RealtimeSource.TENCENT, + price=safe_float(fields[3]), + change_pct=safe_float(fields[32]), + change_amount=safe_float(fields[31]) if len(fields) > 31 else None, + volume=safe_int(fields[6]) * 100 if fields[6] else None, # 腾讯返回的是手,转为股 + open_price=safe_float(fields[5]), + high=safe_float(fields[33]) if len(fields) > 33 else None, # 修正:字段 33 是最高价 + low=safe_float(fields[34]) if len(fields) > 34 else None, # 修正:字段 34 是最低价 + pre_close=safe_float(fields[4]), + turnover_rate=safe_float(fields[38]) if len(fields) > 38 else None, + amplitude=safe_float(fields[43]) if len(fields) > 43 else None, + volume_ratio=safe_float(fields[49]) if len(fields) > 49 else None, # 量比 + pe_ratio=safe_float(fields[39]) if len(fields) > 39 else None, # 市盈率 + pb_ratio=safe_float(fields[46]) if len(fields) > 46 else None, # 市净率 + circ_mv=safe_float(fields[44]) * 100000000 if len(fields) > 44 and fields[44] else None, # 流通市值(亿->元) + total_mv=safe_float(fields[45]) * 100000000 if len(fields) > 45 and fields[45] else None, # 总市值(亿->元) + ) + + logger.info( + f"[实时行情-腾讯] {stock_code} {quote.name}: endpoint={TENCENT_REALTIME_ENDPOINT}, " + f"价格={quote.price}, 涨跌={quote.change_pct}%, 量比={quote.volume_ratio}, " + f"换手率={quote.turnover_rate}%, elapsed={api_elapsed:.2f}s" + ) + return quote + + except Exception as e: + api_elapsed = time.time() - api_start + category, detail = _classify_realtime_http_error(e) + failure_message = _build_realtime_failure_message( + source_name="腾讯", + endpoint=TENCENT_REALTIME_ENDPOINT, + stock_code=stock_code, + symbol=symbol, + category=category, + detail=detail, + elapsed=api_elapsed, + error_type=type(e).__name__, + ) + logger.info(failure_message) + circuit_breaker.record_failure(source_key, failure_message) + return None + + def _get_etf_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取 ETF 基金实时行情数据 + + 数据来源:ak.fund_etf_spot_em() + 包含:最新价、涨跌幅、成交量、成交额、换手率等 + + Args: + stock_code: ETF 代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + source_key = "akshare_etf" + + try: + # 检查缓存 + current_time = time.time() + if (_etf_realtime_cache['data'] is not None and + current_time - _etf_realtime_cache['timestamp'] < _etf_realtime_cache['ttl']): + df = _etf_realtime_cache['data'] + logger.debug(f"[缓存命中] 使用缓存的ETF实时行情数据") + else: + last_error: Optional[Exception] = None + df = None + for attempt in range(1, 3): + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.fund_etf_spot_em() 获取ETF实时行情... (attempt {attempt}/2)") + import time as _time + api_start = _time.time() + + df = ak.fund_etf_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.fund_etf_spot_em 成功: 返回 {len(df)} 只ETF, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + break + except Exception as e: + last_error = e + logger.info(f"[API错误] ak.fund_etf_spot_em 获取失败 (attempt {attempt}/2): {e}") + time.sleep(min(2 ** attempt, 5)) + + if df is None: + logger.info(f"[API错误] ak.fund_etf_spot_em 最终失败: {last_error}") + circuit_breaker.record_failure(source_key, str(last_error)) + df = pd.DataFrame() + _etf_realtime_cache['data'] = df + _etf_realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.info(f"[实时行情] ETF实时行情数据为空,跳过 {stock_code}") + return None + + # 查找指定 ETF + row = df[df['代码'] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到 ETF {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + # ETF 行情数据构建 + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + open_price=safe_float(row.get('开盘价')), + high=safe_float(row.get('最高价')), + low=safe_float(row.get('最低价')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + + logger.info(f"[ETF实时行情] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.info(f"[API错误] 获取 ETF {stock_code} 实时行情失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_hk_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取港股实时行情数据 + + 主数据源:ak.stock_hk_spot_em()(东方财富) + 备用数据源:ak.stock_hk_spot()(新浪) + 包含:最新价、涨跌幅、成交量、成交额等 + + Args: + stock_code: 港股代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import akshare as ak + circuit_breaker = get_realtime_circuit_breaker() + em_key = "akshare_hk_em" + sina_key = "akshare_hk_sina" + + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + # 确保代码格式正确(5位数字) + raw_code = stock_code.strip().lower() + if raw_code.endswith('.hk'): + raw_code = raw_code[:-3] + if raw_code.startswith('hk'): + raw_code = raw_code[2:] + code = raw_code.zfill(5) + + # --- 主数据源:东方财富 --- + if circuit_breaker.is_available(em_key): + try: + logger.info(f"[API调用] ak.stock_hk_spot_em() 获取港股实时行情...") + import time as _time + api_start = _time.time() + + df = ak.stock_hk_spot_em() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_hk_spot_em 成功: 返回 {len(df)} 只港股, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(em_key) + + # 查找指定港股 + row = df[df['代码'] == code] + if row.empty: + logger.info(f"[API返回] 未找到港股 {code} 的实时行情 (stock_hk_spot_em)") + else: + row = row.iloc[0] + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + volume_ratio=safe_float(row.get('量比')), + turnover_rate=safe_float(row.get('换手率')), + amplitude=safe_float(row.get('振幅')), + pe_ratio=safe_float(row.get('市盈率')), + pb_ratio=safe_float(row.get('市净率')), + total_mv=safe_float(row.get('总市值')), + circ_mv=safe_float(row.get('流通市值')), + high_52w=safe_float(row.get('52周最高')), + low_52w=safe_float(row.get('52周最低')), + ) + logger.info(f"[港股实时行情] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"换手率={quote.turnover_rate}%") + return quote + + except Exception as e: + logger.warning(f"[API错误] ak.stock_hk_spot_em 获取港股 {stock_code} 失败: {e},尝试 stock_hk_spot 备用接口") + circuit_breaker.record_failure(em_key, str(e)) + else: + logger.info(f"[熔断] 数据源 {em_key} 处于熔断状态,尝试使用备用链路") + + # --- 备用数据源:新浪 --- + if not circuit_breaker.is_available(sina_key): + logger.info(f"[熔断] 数据源 {sina_key} 处于熔断状态,跳过备用链路") + return None + + try: + logger.info(f"[API调用] ak.stock_hk_spot() 获取港股实时行情(备用)...") + import time as _time + api_start = _time.time() + + df_spot = ak.stock_hk_spot() + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ak.stock_hk_spot 成功: 返回 {len(df_spot)} 只港股, 耗时 {api_elapsed:.2f}s") + + row = df_spot[df_spot['代码'] == code] + if row.empty: + logger.info(f"[API返回] 未找到港股 {code} 的实时行情 (stock_hk_spot)") + return None + + row = row.iloc[0] + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get('名称', '')), + source=RealtimeSource.AKSHARE_EM, + price=safe_float(row.get('最新价')), + change_pct=safe_float(row.get('涨跌幅')), + change_amount=safe_float(row.get('涨跌额')), + volume=safe_int(row.get('成交量')), + amount=safe_float(row.get('成交额')), + ) + circuit_breaker.record_success(sina_key) + logger.info(f"[港股实时行情-备用] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%") + return quote + + except Exception as e: + logger.info(f"[API错误] ak.stock_hk_spot 备用接口也失败: {e}") + circuit_breaker.record_failure(sina_key, str(e)) + return None + + def get_chip_distribution(self, stock_code: str) -> Optional[ChipDistribution]: + """ + 获取筹码分布数据 + + 数据来源:ak.stock_cyq_em() + 包含:获利比例、平均成本、筹码集中度 + + 注意:ETF/指数没有筹码分布数据,会直接返回 None + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象(最新一天的数据),获取失败返回 None + """ + import akshare as ak + + # 美股没有筹码分布数据(Akshare 不支持) + if _is_us_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是美股,无筹码分布数据") + return None + + # 港股没有筹码分布数据(stock_cyq_em 是 A 股专属接口) + if _is_hk_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是港股,无筹码分布数据") + return None + + # ETF/指数没有筹码分布数据 + if _is_etf_code(stock_code): + logger.debug(f"[API跳过] {stock_code} 是 ETF/指数,无筹码分布数据") + return None + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ak.stock_cyq_em(symbol={stock_code}) 获取筹码分布...") + import time as _time + api_start = _time.time() + + df = ak.stock_cyq_em(symbol=stock_code) + + api_elapsed = _time.time() - api_start + + if df.empty: + logger.warning(f"[API返回] ak.stock_cyq_em 返回空数据, 耗时 {api_elapsed:.2f}s") + return None + + logger.info(f"[API返回] ak.stock_cyq_em 成功: 返回 {len(df)} 天数据, 耗时 {api_elapsed:.2f}s") + logger.debug(f"[API返回] 筹码数据列名: {list(df.columns)}") + + # 取最新一天的数据 + latest = df.iloc[-1] + + # 使用 realtime_types.py 中的统一转换函数 + chip = ChipDistribution( + code=stock_code, + date=str(latest.get('日期', '')), + profit_ratio=safe_float(latest.get('获利比例')), + avg_cost=safe_float(latest.get('平均成本')), + cost_90_low=safe_float(latest.get('90成本-低')), + cost_90_high=safe_float(latest.get('90成本-高')), + concentration_90=safe_float(latest.get('90集中度')), + cost_70_low=safe_float(latest.get('70成本-低')), + cost_70_high=safe_float(latest.get('70成本-高')), + concentration_70=safe_float(latest.get('70集中度')), + ) + + logger.info(f"[筹码分布] {stock_code} 日期={chip.date}: 获利比例={chip.profit_ratio:.1%}, " + f"平均成本={chip.avg_cost}, 90%集中度={chip.concentration_90:.2%}, " + f"70%集中度={chip.concentration_70:.2%}") + return chip + + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 筹码分布失败: {e}") + return None + + def get_enhanced_data(self, stock_code: str, days: int = 60) -> Dict[str, Any]: + """ + 获取增强数据(历史K线 + 实时行情 + 筹码分布) + + Args: + stock_code: 股票代码 + days: 历史数据天数 + + Returns: + 包含所有数据的字典 + """ + result = { + 'code': stock_code, + 'daily_data': None, + 'realtime_quote': None, + 'chip_distribution': None, + } + + # 获取日线数据 + try: + df = self.get_daily_data(stock_code, days=days) + result['daily_data'] = df + except Exception as e: + logger.error(f"获取 {stock_code} 日线数据失败: {e}") + + # 获取实时行情 + result['realtime_quote'] = self.get_realtime_quote(stock_code) + + # 获取筹码分布 + result['chip_distribution'] = self.get_chip_distribution(stock_code) + + return result + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 (新浪接口),仅支持 A 股 + """ + if region != "cn": + return None + import akshare as ak + + # 主要指数代码映射 + indices_map = { + 'sh000001': '上证指数', + 'sz399001': '深证成指', + 'sz399006': '创业板指', + 'sh000688': '科创50', + 'sh000016': '上证50', + 'sh000300': '沪深300', + } + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + # 使用 akshare 获取指数行情(新浪财经接口) + df = ak.stock_zh_index_spot_sina() + + results = [] + if df is not None and not df.empty: + for code, name in indices_map.items(): + # 查找对应指数 + row = df[df['代码'] == code] + if row.empty: + # 尝试带前缀查找 + row = df[df['代码'].str.contains(code)] + + if not row.empty: + row = row.iloc[0] + current = safe_float(row.get('最新价', 0)) + prev_close = safe_float(row.get('昨收', 0)) + high = safe_float(row.get('最高', 0)) + low = safe_float(row.get('最低', 0)) + + # 计算振幅 + amplitude = 0.0 + if prev_close > 0: + amplitude = (high - low) / prev_close * 100 + + results.append({ + 'code': code, + 'name': name, + 'current': current, + 'change': safe_float(row.get('涨跌额', 0)), + 'change_pct': safe_float(row.get('涨跌幅', 0)), + 'open': safe_float(row.get('今开', 0)), + 'high': high, + 'low': low, + 'prev_close': prev_close, + 'volume': safe_float(row.get('成交量', 0)), + 'amount': safe_float(row.get('成交额', 0)), + 'amplitude': amplitude, + }) + return results + + except Exception as e: + logger.error(f"[Akshare] 获取指数行情失败: {e}") + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 + + 数据源优先级: + 1. 东财接口 (ak.stock_zh_a_spot_em) + 2. 新浪接口 (ak.stock_zh_a_spot) + """ + import akshare as ak + + # 优先东财接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_zh_a_spot_em() 获取市场统计...") + df = ak.stock_zh_a_spot_em() + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.warning(f"[Akshare] 东财接口获取市场统计失败: {e},尝试新浪接口") + + # 东财失败后,尝试新浪接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_zh_a_spot() 获取市场统计(新浪)...") + df = ak.stock_zh_a_spot() + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[Akshare] 新浪接口获取市场统计也失败: {e}") + + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取行业板块涨跌榜 + + 数据源优先级: + 1. 东财接口 (ak.stock_board_industry_name_em) + 2. 新浪接口 (ak.stock_sector_spot) + """ + import akshare as ak + + def _get_rank_top_n(df: pd.DataFrame, change_col: str, industry_name: str, n: int) -> Tuple[list, list]: + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + + # 涨幅前n + top = df.nlargest(n, change_col) + top_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in top.iterrows() + ] + + bottom = df.nsmallest(n, change_col) + bottom_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + + # 优先东财接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_board_industry_name_em() 获取板块排行...") + df = ak.stock_board_industry_name_em() + if df is not None and not df.empty: + change_col = '涨跌幅' + name = '板块名称' + return _get_rank_top_n(df, change_col, name, n) + + except Exception as e: + logger.warning(f"[Akshare] 东财接口获取行业板块排行失败: {e},尝试新浪接口") + + # 东财失败后,尝试新浪接口 + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ak.stock_sector_spot() 获取行业板块排行(新浪)...") + df = ak.stock_sector_spot(indicator='行业') + if df is None or df.empty: + return None + change_col = '涨跌幅' + name = '板块' + return _get_rank_top_n(df, change_col, name, n) + + except Exception as e: + logger.error(f"[Akshare] 新浪接口获取板块排行也失败: {e}") + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = AkshareFetcher() + + # 测试普通股票 + print("=" * 50) + print("测试普通股票数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"[股票] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[股票] 获取失败: {e}") + + # 测试 ETF 基金 + print("\n" + "=" * 50) + print("测试 ETF 基金数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('512400') # 有色龙头ETF + print(f"[ETF] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[ETF] 获取失败: {e}") + + # 测试 ETF 实时行情 + print("\n" + "=" * 50) + print("测试 ETF 实时行情获取") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('512880') # 证券ETF + if quote: + print(f"[ETF实时] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[ETF实时] 未获取到数据") + except Exception as e: + print(f"[ETF实时] 获取失败: {e}") + + # 测试港股历史数据 + print("\n" + "=" * 50) + print("测试港股历史数据获取") + print("=" * 50) + try: + df = fetcher.get_daily_data('00700') # 腾讯控股 + print(f"[港股] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[港股] 获取失败: {e}") + + # 测试港股实时行情 + print("\n" + "=" * 50) + print("测试港股实时行情获取") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('00700') # 腾讯控股 + if quote: + print(f"[港股实时] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[港股实时] 未获取到数据") + except Exception as e: + print(f"[港股实时] 获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (akshare)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") + + # 测试筹码分布数据 + print("\n" + "=" * 50) + print("测试筹码分布数据获取") + print("=" * 50) + try: + chip = fetcher.get_chip_distribution('600519') # 茅台 + except Exception as e: + print(f"[筹码分布] 获取失败: {e}") + + # 测试行业板块排名 + print("\n" + "=" * 50) + print("测试行业板块排名获取") + print("=" * 50) + try: + rankings = fetcher.get_sector_rankings(n=5) + if rankings: + top, bottom = rankings + print("涨幅榜 Top 5:") + for sector in top: + print(f"{sector['name']}: {sector['change_pct']}%") + print("\n跌幅榜 Top 5:") + for sector in bottom: + print(f"{sector['name']}: {sector['change_pct']}%") + else: + print("未获取到行业板块排名数据") + except Exception as e: + print(f"[行业板块排名] 获取失败: {e}") diff --git a/src/provider/baostock_fetcher.py b/src/provider/baostock_fetcher.py new file mode 100644 index 00000000..47cd1718 --- /dev/null +++ b/src/provider/baostock_fetcher.py @@ -0,0 +1,379 @@ +# -*- coding: utf-8 -*- +""" +=================================== +BaostockFetcher - 备用数据源 2 (Priority 3) +=================================== + +数据来源:证券宝(Baostock) +特点:免费、无需 Token、需要登录管理 +优点:稳定、无配额限制 + +关键策略: +1. 管理 bs.login() 和 bs.logout() 生命周期 +2. 使用上下文管理器防止连接泄露 +3. 失败后指数退避重试 +""" + +import logging +import re +from contextlib import contextmanager +from datetime import datetime +from typing import Optional, Generator + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code, _is_hk_market +import os + +logger = logging.getLogger(__name__) + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class BaostockFetcher(BaseFetcher): + """ + Baostock 数据源实现 + + 优先级:3 + 数据来源:证券宝 Baostock API + + 关键策略: + - 使用上下文管理器管理连接生命周期 + - 每次请求都重新登录/登出,防止连接泄露 + - 失败后指数退避重试 + + Baostock 特点: + - 免费、无需注册 + - 需要显式登录/登出 + - 数据更新略有延迟(T+1) + """ + + name = "BaostockFetcher" + priority = int(os.getenv("BAOSTOCK_PRIORITY", "3")) + + def __init__(self): + """初始化 BaostockFetcher""" + self._bs_module = None + + def _get_baostock(self): + """ + 延迟加载 baostock 模块 + + 只在首次使用时导入,避免未安装时报错 + """ + if self._bs_module is None: + import baostock as bs + self._bs_module = bs + return self._bs_module + + @contextmanager + def _baostock_session(self) -> Generator: + """ + Baostock 连接上下文管理器 + + 确保: + 1. 进入上下文时自动登录 + 2. 退出上下文时自动登出 + 3. 异常时也能正确登出 + + 使用示例: + with self._baostock_session(): + # 在这里执行数据查询 + """ + bs = self._get_baostock() + login_result = None + + try: + # 登录 Baostock + login_result = bs.login() + + if login_result.error_code != '0': + raise DataFetchError(f"Baostock 登录失败: {login_result.error_msg}") + + logger.debug("Baostock 登录成功") + + yield bs + + finally: + # 确保登出,防止连接泄露 + try: + logout_result = bs.logout() + if logout_result.error_code == '0': + logger.debug("Baostock 登出成功") + else: + logger.warning(f"Baostock 登出异常: {logout_result.error_msg}") + except Exception as e: + logger.warning(f"Baostock 登出时发生错误: {e}") + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换股票代码为 Baostock 格式 + + Baostock 要求的格式: + - 沪市:sh.600519 + - 深市:sz.000001 + + Args: + stock_code: 原始代码,如 '600519', '000001' + + Returns: + Baostock 格式代码,如 'sh.600519', 'sz.000001' + """ + code = stock_code.strip() + + # HK stocks are not supported by Baostock + if _is_hk_market(code): + raise DataFetchError(f"BaostockFetcher 不支持港股 {code},请使用 AkshareFetcher") + + # 已经包含前缀的情况 + if code.startswith(('sh.', 'sz.')): + return code.lower() + + # 去除可能的后缀 + code = code.replace('.SH', '').replace('.SZ', '').replace('.sh', '').replace('.sz', '') + + # ETF: Shanghai ETF (51xx, 52xx, 56xx, 58xx) -> sh; Shenzhen ETF (15xx, 16xx, 18xx) -> sz + if len(code) == 6: + if code.startswith(('51', '52', '56', '58')): + return f"sh.{code}" + if code.startswith(('15', '16', '18')): + return f"sz.{code}" + + # 根据代码前缀判断市场 + if code.startswith(('600', '601', '603', '688')): + return f"sh.{code}" + elif code.startswith(('000', '002', '300')): + return f"sz.{code}" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"sz.{code}" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Baostock 获取原始数据 + + 使用 query_history_k_data_plus() 获取日线数据 + + 流程: + 1. 检查是否为美股(不支持) + 2. 使用上下文管理器管理连接 + 3. 转换股票代码格式 + 4. 调用 API 查询数据 + 5. 将结果转换为 DataFrame + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_us_code(stock_code): + raise DataFetchError(f"BaostockFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 港股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_hk_market(stock_code): + raise DataFetchError(f"BaostockFetcher 不支持港股 {stock_code},请使用 AkshareFetcher") + + # 北交所不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if is_bse_code(stock_code): + raise DataFetchError( + f"BaostockFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + + # 转换代码格式 + bs_code = self._convert_stock_code(stock_code) + + logger.debug(f"调用 Baostock query_history_k_data_plus({bs_code}, {start_date}, {end_date})") + + with self._baostock_session() as bs: + try: + # 查询日线数据 + # adjustflag: 1-后复权,2-前复权,3-不复权 + rs = bs.query_history_k_data_plus( + code=bs_code, + fields="date,open,high,low,close,volume,amount,pctChg", + start_date=start_date, + end_date=end_date, + frequency="d", # 日线 + adjustflag="2" # 前复权 + ) + + if rs.error_code != '0': + raise DataFetchError(f"Baostock 查询失败: {rs.error_msg}") + + # 转换为 DataFrame + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if not data_list: + raise DataFetchError(f"Baostock 未查询到 {stock_code} 的数据") + + df = pd.DataFrame(data_list, columns=rs.fields) + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Baostock 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Baostock 数据 + + Baostock 返回的列名: + date, open, high, low, close, volume, amount, pctChg + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射(只需要处理 pctChg) + column_mapping = { + 'pctChg': 'pct_chg', + } + + df = df.rename(columns=column_mapping) + + # 数值类型转换(Baostock 返回的都是字符串) + numeric_cols = ['open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + 使用 Baostock 的 query_stock_basic 接口获取股票基本信息 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + # 检查缓存 + if hasattr(self, '_stock_name_cache') and stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + # 初始化缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + + try: + bs_code = self._convert_stock_code(stock_code) + + with self._baostock_session() as bs: + # 查询股票基本信息 + rs = bs.query_stock_basic(code=bs_code) + + if rs.error_code == '0': + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if data_list: + # Baostock 返回的字段:code, code_name, ipoDate, outDate, type, status + fields = rs.fields + name_idx = fields.index('code_name') if 'code_name' in fields else None + if name_idx is not None and len(data_list[0]) > name_idx: + name = data_list[0][name_idx] + self._stock_name_cache[stock_code] = name + logger.debug(f"Baostock 获取股票名称成功: {stock_code} -> {name}") + return name + + except Exception as e: + logger.warning(f"Baostock 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_stock_list(self) -> Optional[pd.DataFrame]: + """ + 获取股票列表 + + 使用 Baostock 的 query_stock_basic 接口获取全部股票列表 + + Returns: + 包含 code, name 列的 DataFrame,失败返回 None + """ + try: + with self._baostock_session() as bs: + # 查询所有股票基本信息 + rs = bs.query_stock_basic() + + if rs.error_code == '0': + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + if data_list: + df = pd.DataFrame(data_list, columns=rs.fields) + + # 转换代码格式(去除 sh. 或 sz. 前缀) + df['code'] = df['code'].apply(lambda x: x.split('.')[1] if '.' in x else x) + df = df.rename(columns={'code_name': 'name'}) + + # 更新缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + for _, row in df.iterrows(): + self._stock_name_cache[row['code']] = row['name'] + + logger.info(f"Baostock 获取股票列表成功: {len(df)} 条") + return df[['code', 'name']] + + except Exception as e: + logger.warning(f"Baostock 获取股票列表失败: {e}") + + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = BaostockFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + except Exception as e: + print(f"获取失败: {e}") diff --git a/src/provider/base.py b/src/provider/base.py new file mode 100644 index 00000000..1c6bb06c --- /dev/null +++ b/src/provider/base.py @@ -0,0 +1,2500 @@ +# -*- coding: utf-8 -*- +""" +=================================== +数据源基类与管理器 +=================================== + +设计模式:策略模式 (Strategy Pattern) +- BaseFetcher: 抽象基类,定义统一接口 +- DataFetcherManager: 策略管理器,实现自动切换 + +防封禁策略: +1. 每个 Fetcher 内置流控逻辑 +2. 失败自动切换到下一个数据源 +3. 指数退避重试机制 +""" + +import logging +import random +import time +from threading import BoundedSemaphore, RLock, Thread +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Callable, Optional, List, Tuple, Dict, Any + +import pandas as pd +import numpy as np +from src.data.stock_index_loader import get_index_stock_name +from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +from .fundamental_adapter import AkshareFundamentalAdapter + +# 配置日志 +logger = logging.getLogger(__name__) + + +# === 标准化列名定义 === +STANDARD_COLUMNS = ['date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + + +def unwrap_exception(exc: Exception) -> Exception: + """ + Follow chained exceptions and return the deepest non-cyclic cause. + """ + current = exc + visited = set() + + while current is not None and id(current) not in visited: + visited.add(id(current)) + next_exc = current.__cause__ or current.__context__ + if next_exc is None: + break + current = next_exc + + return current + + +def summarize_exception(exc: Exception) -> Tuple[str, str]: + """ + Build a stable summary for logs while preserving the application-layer message. + """ + root = unwrap_exception(exc) + error_type = type(root).__name__ + message = str(exc).strip() or str(root).strip() or error_type + return error_type, " ".join(message.split()) + + +def normalize_stock_code(stock_code: str) -> str: + """ + Normalize stock code by stripping exchange prefixes/suffixes. + + Accepted formats and their normalized results: + - '600519' -> '600519' (already clean) + - 'SH600519' -> '600519' (strip SH prefix) + - 'SZ000001' -> '000001' (strip SZ prefix) + - 'BJ920748' -> '920748' (strip BJ prefix, BSE) + - 'sh600519' -> '600519' (case-insensitive) + - '600519.SH' -> '600519' (strip .SH suffix) + - '000001.SZ' -> '000001' (strip .SZ suffix) + - '920748.BJ' -> '920748' (strip .BJ suffix, BSE) + - 'HK00700' -> 'HK00700' (keep HK prefix for HK stocks) + - '1810.HK' -> 'HK01810' (normalize HK suffix to canonical prefix form) + - 'AAPL' -> 'AAPL' (keep US stock ticker as-is) + + This function is applied at the DataProviderManager layer so that + all individual fetchers receive a clean 6-digit code (for A-shares/ETFs). + """ + code = stock_code.strip() + upper = code.upper() + + # Normalize HK prefix to a canonical 5-digit form (e.g. hk1810 -> HK01810) + if upper.startswith('HK') and not upper.startswith('HK.'): + candidate = upper[2:] + if candidate.isdigit() and 1 <= len(candidate) <= 5: + return f"HK{candidate.zfill(5)}" + + # Strip SH/SZ prefix (e.g. SH600519 -> 600519) + if upper.startswith(('SH', 'SZ')) and not upper.startswith('SH.') and not upper.startswith('SZ.'): + candidate = code[2:] + # Only strip if the remainder looks like a valid numeric code + if candidate.isdigit() and len(candidate) in (5, 6): + return candidate + + # Strip BJ prefix (e.g. BJ920748 -> 920748) + if upper.startswith('BJ') and not upper.startswith('BJ.'): + candidate = code[2:] + if candidate.isdigit() and len(candidate) == 6: + return candidate + + # Strip .SH/.SZ/.BJ suffix (e.g. 600519.SH -> 600519, 920748.BJ -> 920748) + if '.' in code: + base, suffix = code.rsplit('.', 1) + if suffix.upper() == 'HK' and base.isdigit() and 1 <= len(base) <= 5: + return f"HK{base.zfill(5)}" + if suffix.upper() in ('SH', 'SZ', 'SS', 'BJ') and base.isdigit(): + return base + + return code + + +ETF_PREFIXES = ("51", "52", "56", "58", "15", "16", "18") + + +def _is_us_market(code: str) -> bool: + """判断是否为美股/美股指数代码(不含中文前后缀)。""" + from .us_index_mapping import is_us_stock_code, is_us_index_code + + normalized = (code or "").strip().upper() + return is_us_index_code(normalized) or is_us_stock_code(normalized) + + +def _is_hk_market(code: str) -> bool: + """ + 判定是否为港股代码。 + + 支持 `HK00700` 及纯 5 位数字形式(A 股 ETF/股票常见为 6 位)。 + """ + normalized = (code or "").strip().upper() + if normalized.endswith(".HK"): + base = normalized[:-3] + return base.isdigit() and 1 <= len(base) <= 5 + if normalized.startswith("HK"): + digits = normalized[2:] + return digits.isdigit() and 1 <= len(digits) <= 5 + if normalized.isdigit() and len(normalized) == 5: + return True + return False + + +def _is_etf_code(code: str) -> bool: + """判定 A 股 ETF 基金代码(保守规则)。""" + normalized = normalize_stock_code(code) + return ( + normalized.isdigit() + and len(normalized) == 6 + and normalized.startswith(ETF_PREFIXES) + ) + + +def _market_tag(code: str) -> str: + """返回市场标签: cn/us/hk.""" + if _is_us_market(code): + return "us" + if _is_hk_market(code): + return "hk" + return "cn" + + +def is_bse_code(code: str) -> bool: + """ + Check if the code is a Beijing Stock Exchange (BSE) A-share code. + + BSE rules (2026): + - New format (2024+): 92xxxx main trading codes + - Historical ranges: 43xxxx, 83xxxx, 87xxxx, 88xxxx + - Special instruments: 81xxxx convertible bonds, 82xxxx preferred shares + - Subscription codes: 889xxx + Note: 900xxx are Shanghai B-shares and must return False. + """ + c = (code or "").strip().split(".")[0] + if len(c) != 6 or not c.isdigit(): + return False + + if c.startswith("900"): + return False + + return c.startswith(("92", "43", "81", "82", "83", "87", "88")) + +def is_st_stock(name: str) -> bool: + """ + Check if the stock is an ST or *ST stock based on its name. + + ST stocks have special trading rules and typically a ±5% limit. + """ + n = (name or "").upper() + return 'ST' in n + +def is_kc_cy_stock(code: str) -> bool: + """ + Check if the stock is a STAR Market (科创板) or ChiNext (创业板) stock based on its code. + + - STAR Market: Codes starting with 688 + - ChiNext: Codes starting with 300 + Both have a ±20% limit. + """ + c = (code or "").strip().split(".")[0] + return c.startswith("688") or c.startswith("30") + + +def canonical_stock_code(code: str) -> str: + """ + Return the canonical (uppercase) form of a stock code. + + This is a display/storage layer concern, distinct from normalize_stock_code + which strips exchange prefixes. Apply at system input boundaries to ensure + consistent case across BOT, WEB UI, API, and CLI paths (Issue #355). + + Examples: + 'aapl' -> 'AAPL' + 'AAPL' -> 'AAPL' + '600519' -> '600519' (digits are unchanged) + 'hk00700' -> 'HK00700' + """ + return (code or "").strip().upper() + + +class DataFetchError(Exception): + """数据获取异常基类""" + pass + + +class RateLimitError(DataFetchError): + """API 速率限制异常""" + pass + + +class DataSourceUnavailableError(DataFetchError): + """数据源不可用异常""" + pass + + +class BaseFetcher(ABC): + """ + 数据源抽象基类 + + 职责: + 1. 定义统一的数据获取接口 + 2. 提供数据标准化方法 + 3. 实现通用的技术指标计算 + + 子类实现: + - _fetch_raw_data(): 从具体数据源获取原始数据 + - _normalize_data(): 将原始数据转换为标准格式 + """ + + name: str = "BaseFetcher" + priority: int = 99 # 优先级数字越小越优先 + + @abstractmethod + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从数据源获取原始数据(子类必须实现) + + Args: + stock_code: 股票代码,如 '600519', '000001' + start_date: 开始日期,格式 'YYYY-MM-DD' + end_date: 结束日期,格式 'YYYY-MM-DD' + + Returns: + 原始数据 DataFrame(列名因数据源而异) + """ + pass + + @abstractmethod + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化数据列名(子类必须实现) + + 将不同数据源的列名统一为: + ['date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + """ + pass + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 + + Args: + region: 市场区域,cn=A股 us=美股 + + Returns: + List[Dict]: 指数列表,每个元素为字典,包含: + - code: 指数代码 + - name: 指数名称 + - current: 当前点位 + - change: 涨跌点数 + - change_pct: 涨跌幅(%) + - volume: 成交量 + - amount: 成交额 + """ + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 + + Returns: + Dict: 包含: + - up_count: 上涨家数 + - down_count: 下跌家数 + - flat_count: 平盘家数 + - limit_up_count: 涨停家数 + - limit_down_count: 跌停家数 + - total_amount: 两市成交额 + """ + return None + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取板块涨跌榜 + + Args: + n: 返回前n个 + + Returns: + Tuple: (领涨板块列表, 领跌板块列表) + """ + return None + + def get_daily_data( + self, + stock_code: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + days: int = 30 + ) -> pd.DataFrame: + """ + 获取日线数据(统一入口) + + 流程: + 1. 计算日期范围 + 2. 调用子类获取原始数据 + 3. 标准化列名 + 4. 计算技术指标 + + Args: + stock_code: 股票代码 + start_date: 开始日期(可选) + end_date: 结束日期(可选,默认今天) + days: 获取天数(当 start_date 未指定时使用) + + Returns: + 标准化的 DataFrame,包含技术指标 + """ + # 计算日期范围 + if end_date is None: + end_date = datetime.now().strftime('%Y-%m-%d') + + if start_date is None: + # 默认获取最近 30 个交易日(按日历日估算,多取一些) + from datetime import timedelta + start_dt = datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=days * 2) + start_date = start_dt.strftime('%Y-%m-%d') + + request_start = time.time() + logger.info(f"[{self.name}] 开始获取 {stock_code} 日线数据: 范围={start_date} ~ {end_date}") + + try: + # Step 1: 获取原始数据 + raw_df = self._fetch_raw_data(stock_code, start_date, end_date) + + if raw_df is None or raw_df.empty: + raise DataFetchError(f"[{self.name}] 未获取到 {stock_code} 的数据") + + # Step 2: 标准化列名 + df = self._normalize_data(raw_df, stock_code) + + # Step 3: 数据清洗 + df = self._clean_data(df) + + # Step 4: 计算技术指标 + df = self._calculate_indicators(df) + + elapsed = time.time() - request_start + logger.info( + f"[{self.name}] {stock_code} 获取成功: 范围={start_date} ~ {end_date}, " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df + + except Exception as e: + elapsed = time.time() - request_start + error_type, error_reason = summarize_exception(e) + logger.error( + f"[{self.name}] {stock_code} 获取失败: 范围={start_date} ~ {end_date}, " + f"error_type={error_type}, elapsed={elapsed:.2f}s, reason={error_reason}" + ) + raise DataFetchError(f"[{self.name}] {stock_code}: {error_reason}") from e + + def _clean_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + 数据清洗 + + 处理: + 1. 确保日期列格式正确 + 2. 数值类型转换 + 3. 去除空值行 + 4. 按日期排序 + """ + df = df.copy() + + # 确保日期列为 datetime 类型 + if 'date' in df.columns: + df['date'] = pd.to_datetime(df['date']) + + # 数值列类型转换 + numeric_cols = ['open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg'] + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # 去除关键列为空的行 + df = df.dropna(subset=['close', 'volume']) + + # 按日期升序排序 + df = df.sort_values('date', ascending=True).reset_index(drop=True) + + return df + + def _calculate_indicators(self, df: pd.DataFrame) -> pd.DataFrame: + """ + 计算技术指标 + + 计算指标: + - MA5, MA10, MA20: 移动平均线 + - Volume_Ratio: 量比(今日成交量 / 5日平均成交量) + """ + df = df.copy() + + # 移动平均线 + df['ma5'] = df['close'].rolling(window=5, min_periods=1).mean() + df['ma10'] = df['close'].rolling(window=10, min_periods=1).mean() + df['ma20'] = df['close'].rolling(window=20, min_periods=1).mean() + + # 量比:当日成交量 / 5日平均成交量 + # 注意:此处的 volume_ratio 是“日线成交量 / 前5日均量(shift 1)”的相对倍数, + # 与部分交易软件口径的“分时量比(同一时刻对比)”不同,含义更接近“放量倍数”。 + # 该行为目前保留(按需求不改逻辑)。 + avg_volume_5 = df['volume'].rolling(window=5, min_periods=1).mean() + df['volume_ratio'] = df['volume'] / avg_volume_5.shift(1) + df['volume_ratio'] = df['volume_ratio'].fillna(1.0) + + # 保留2位小数 + for col in ['ma5', 'ma10', 'ma20', 'volume_ratio']: + if col in df.columns: + df[col] = df[col].round(2) + + return df + + @staticmethod + def random_sleep(min_seconds: float = 1.0, max_seconds: float = 3.0) -> None: + """ + 智能随机休眠(Jitter) + + 防封禁策略:模拟人类行为的随机延迟 + 在请求之间加入不规则的等待时间 + """ + sleep_time = random.uniform(min_seconds, max_seconds) + logger.debug(f"随机休眠 {sleep_time:.2f} 秒...") + time.sleep(sleep_time) + + +class DataFetcherManager: + """ + 数据源策略管理器 + + 职责: + 1. 管理多个数据源(按优先级排序) + 2. 自动故障切换(Failover) + 3. 提供统一的数据获取接口 + + 切换策略: + - 优先使用高优先级数据源 + - 失败后自动切换到下一个 + - 所有数据源都失败时抛出异常 + """ + + def __init__(self, fetchers: Optional[List[BaseFetcher]] = None): + """ + 初始化管理器 + + Args: + fetchers: 数据源列表(可选,默认按优先级自动创建) + """ + self._fetchers: List[BaseFetcher] = [] + self._fetchers_lock = RLock() + self._fetcher_call_locks: Dict[int, RLock] = {} + self._fetcher_call_locks_lock = RLock() + self._stock_name_cache: Dict[str, str] = {} + self._stock_name_cache_lock = RLock() + + if fetchers: + # 按优先级排序 + self._fetchers = sorted(fetchers, key=lambda f: f.priority) + else: + # 默认数据源将在首次使用时延迟加载 + self._init_default_fetchers() + self._fundamental_adapter = AkshareFundamentalAdapter() + self._tickflow_fetcher = None + self._tickflow_api_key: Optional[str] = None + self._tickflow_lock = RLock() + self._fundamental_cache: Dict[str, Dict[str, Any]] = {} + self._fundamental_cache_lock = RLock() + self._fundamental_timeout_worker_limit = 8 + self._fundamental_timeout_slots = BoundedSemaphore(self._fundamental_timeout_worker_limit) + + def _ensure_concurrency_guards(self) -> None: + """Lazily initialize thread-safety primitives for test scaffolds using __new__.""" + if not hasattr(self, "_fetchers_lock") or self._fetchers_lock is None: + self._fetchers_lock = RLock() + if not hasattr(self, "_fetcher_call_locks") or self._fetcher_call_locks is None: + self._fetcher_call_locks = {} + if not hasattr(self, "_fetcher_call_locks_lock") or self._fetcher_call_locks_lock is None: + self._fetcher_call_locks_lock = RLock() + if not hasattr(self, "_stock_name_cache") or self._stock_name_cache is None: + self._stock_name_cache = {} + if not hasattr(self, "_stock_name_cache_lock") or self._stock_name_cache_lock is None: + self._stock_name_cache_lock = RLock() + + def _get_fetchers_snapshot(self) -> List[BaseFetcher]: + self._ensure_concurrency_guards() + with self._fetchers_lock: + return list(getattr(self, "_fetchers", [])) + + def _get_fetcher_call_lock(self, fetcher: BaseFetcher) -> RLock: + self._ensure_concurrency_guards() + fetcher_id = id(fetcher) + with self._fetcher_call_locks_lock: + lock = self._fetcher_call_locks.get(fetcher_id) + if lock is None: + lock = RLock() + self._fetcher_call_locks[fetcher_id] = lock + return lock + + def _call_fetcher_method(self, fetcher: BaseFetcher, method_name: str, *args, **kwargs): + """Serialize shared fetcher state access through manager-owned per-instance locks.""" + method = getattr(fetcher, method_name) + with self._get_fetcher_call_lock(fetcher): + return method(*args, **kwargs) + + def _get_cached_stock_name(self, stock_code: str) -> Optional[str]: + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + return self._stock_name_cache.get(stock_code) + + def _cache_stock_name(self, stock_code: str, name: Optional[str]) -> Optional[str]: + if name is None: + return None + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + self._stock_name_cache[stock_code] = name + return name + + def _get_tickflow_fetcher(self): + """Lazily create a TickFlow fetcher for market-review-only calls.""" + from src.config import get_config + + config = get_config() + api_key = (getattr(config, "tickflow_api_key", None) or "").strip() + + if not hasattr(self, "_tickflow_lock") or self._tickflow_lock is None: + self._tickflow_lock = RLock() + + with self._tickflow_lock: + current_fetcher = getattr(self, "_tickflow_fetcher", None) + current_key = getattr(self, "_tickflow_api_key", None) + + if not api_key: + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭旧实例失败: %s", exc) + self._tickflow_fetcher = None + self._tickflow_api_key = None + return None + + if current_fetcher is not None and current_key == api_key: + return current_fetcher + + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 切换实例时关闭失败: %s", exc) + + try: + from .tickflow_fetcher import TickFlowFetcher + + fetcher = TickFlowFetcher(api_key=api_key) + self._tickflow_fetcher = fetcher + self._tickflow_api_key = api_key + return fetcher + except Exception as exc: + logger.warning("[TickFlowFetcher] 初始化失败: %s", exc) + self._tickflow_fetcher = None + self._tickflow_api_key = None + return None + + def close(self) -> None: + """Best-effort release of manager-owned resources.""" + if not hasattr(self, "_tickflow_lock") or self._tickflow_lock is None: + self._tickflow_lock = RLock() + + with self._tickflow_lock: + current_fetcher = getattr(self, "_tickflow_fetcher", None) + self._tickflow_fetcher = None + self._tickflow_api_key = None + + if current_fetcher is not None and hasattr(current_fetcher, "close"): + try: + current_fetcher.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭管理器资源失败: %s", exc) + + def __del__(self) -> None: + try: + self.close() + except Exception: + # Best-effort cleanup during interpreter shutdown. + pass + + def _get_fundamental_cache_key(self, stock_code: str, budget_seconds: Optional[float] = None) -> str: + """生成基本面缓存 key(包含预算分桶以避免低预算结果污染高预算请求)。""" + normalized_code = normalize_stock_code(stock_code) + if budget_seconds is None: + return f"{normalized_code}|budget=default" + try: + budget = max(0.0, float(budget_seconds)) + except (TypeError, ValueError): + budget = 0.0 + # 100ms bucket to balance cache reuse and scenario isolation. + budget_bucket = int(round(budget * 10)) + return f"{normalized_code}|budget={budget_bucket}" + + def _prune_fundamental_cache(self, ttl_seconds: int, max_entries: int) -> None: + """Prune expired and overflow fundamental cache items.""" + with self._fundamental_cache_lock: + if not self._fundamental_cache: + return + + now_ts = time.time() + if ttl_seconds > 0: + cache_items = list(self._fundamental_cache.items()) + expired_keys = [ + key + for key, value in cache_items + if now_ts - float(value.get("ts", 0)) > ttl_seconds + ] + for key in expired_keys: + self._fundamental_cache.pop(key, None) + + if max_entries > 0 and len(self._fundamental_cache) > max_entries: + overflow = len(self._fundamental_cache) - max_entries + sorted_items = sorted( + list(self._fundamental_cache.items()), + key=lambda item: float(item[1].get("ts", 0)), + ) + for key, _ in sorted_items[:overflow]: + self._fundamental_cache.pop(key, None) + + @staticmethod + def _try_scalar_isna(value: Any, context: str) -> Optional[bool]: + """Return scalar ``pd.isna`` result, or ``None`` when callers should use fallback logic.""" + if isinstance(value, (dict, list, tuple, set, pd.DataFrame, pd.Series, pd.Index)): + return None + + if isinstance(value, np.ndarray): + if value.ndim != 0: + return None + value = value.item() + + try: + isna_result = pd.isna(value) + except (TypeError, ValueError) as exc: + if hasattr(value, "__array__"): + logger.debug( + "[%s] pd.isna failed for array-like object; re-raise: value_type=%s error_type=%s", + context, + type(value).__name__, + type(exc).__name__, + ) + raise + logger.debug( + "[%s] pd.isna fallback: value_type=%s error_type=%s", + context, + type(value).__name__, + type(exc).__name__, + ) + return None + + if isinstance(isna_result, (bool, np.bool_)): + return bool(isna_result) + + if isinstance(isna_result, np.ndarray): + if isna_result.ndim == 0: + return bool(isna_result.item()) + logger.debug( + "[%s] pd.isna returned non-scalar result: value_type=%s result_type=%s", + context, + type(value).__name__, + type(isna_result).__name__, + ) + return None + + logger.debug( + "[%s] pd.isna returned unexpected result type: value_type=%s result_type=%s", + context, + type(value).__name__, + type(isna_result).__name__, + ) + return None + + @staticmethod + def _is_missing_board_value(value: Any) -> bool: + """Return True when a board field value should be treated as missing.""" + if value is None: + return True + is_missing = DataFetcherManager._try_scalar_isna(value, "board_value") + if is_missing is True: + return True + text = str(value).strip() + return text == "" or text.lower() in {"nan", "none", "null", "na", "n/a"} + + @staticmethod + def _normalize_belong_boards(raw_data: Any) -> List[Dict[str, Any]]: + """Normalize belong-board results from heterogeneous providers.""" + if DataFetcherManager._is_missing_board_value(raw_data): + return [] + + normalized: List[Dict[str, Any]] = [] + dedupe = set() + + if isinstance(raw_data, pd.DataFrame): + if raw_data.empty: + return [] + name_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块名称", "板块", "所属板块", "板块名", "name", "industry"} + ), + None, + ) + code_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块代码", "代码", "code"} + ), + None, + ) + type_col = next( + ( + col + for col in raw_data.columns + if str(col) in {"板块类型", "类别", "type"} + ), + None, + ) + if name_col is None: + return [] + for _, row in raw_data.iterrows(): + board_name_raw = row.get(name_col, "") + if DataFetcherManager._is_missing_board_value(board_name_raw): + continue + board_name = str(board_name_raw).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + item = {"name": board_name} + if code_col is not None: + board_code_raw = row.get(code_col, "") + if not DataFetcherManager._is_missing_board_value(board_code_raw): + item["code"] = str(board_code_raw).strip() + if type_col is not None: + board_type_raw = row.get(type_col, "") + if not DataFetcherManager._is_missing_board_value(board_type_raw): + item["type"] = str(board_type_raw).strip() + normalized.append(item) + return normalized + + if isinstance(raw_data, dict): + raw_data = [raw_data] + + if isinstance(raw_data, (list, tuple, set)): + for item in raw_data: + if isinstance(item, dict): + board_name_raw = ( + item.get("name") + or item.get("board_name") + or item.get("板块名称") + or item.get("板块") + or item.get("所属板块") + or item.get("板块名") + or item.get("industry") + or item.get("行业") + ) + if DataFetcherManager._is_missing_board_value(board_name_raw): + continue + board_name = str(board_name_raw).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + normalized_item: Dict[str, Any] = {"name": board_name} + code_raw = ( + item.get("code") + or item.get("板块代码") + or item.get("代码") + ) + if not DataFetcherManager._is_missing_board_value(code_raw): + normalized_item["code"] = str(code_raw).strip() + type_raw = ( + item.get("type") + or item.get("板块类型") + or item.get("类别") + ) + if not DataFetcherManager._is_missing_board_value(type_raw): + normalized_item["type"] = str(type_raw).strip() + normalized.append(normalized_item) + continue + if DataFetcherManager._is_missing_board_value(item): + continue + board_name = str(item).strip() + if board_name in dedupe: + continue + dedupe.add(board_name) + normalized.append({"name": board_name}) + return normalized + + if not DataFetcherManager._is_missing_board_value(raw_data): + board_name = str(raw_data).strip() + return [{"name": board_name}] + return [] + + def _init_default_fetchers(self) -> None: + """ + 初始化默认数据源列表 + + 优先级动态调整逻辑: + - 如果配置了 TUSHARE_TOKEN:Tushare 优先级提升为 0(最高) + - 否则按默认优先级: + 0. EfinanceFetcher (Priority 0) - 最高优先级 + 1. AkshareFetcher (Priority 1) + 2. PytdxFetcher (Priority 2) - 通达信 + 2. TushareFetcher (Priority 2) + 3. BaostockFetcher (Priority 3) + 4. YfinanceFetcher (Priority 4) + 5. LongbridgeFetcher (Priority 5) - 长桥(美股/港股兜底) + """ + from .efinance_fetcher import EfinanceFetcher + from .akshare_fetcher import AkshareFetcher + from .tushare_fetcher import TushareFetcher + from .pytdx_fetcher import PytdxFetcher + from .baostock_fetcher import BaostockFetcher + from .yfinance_fetcher import YfinanceFetcher + from .longbridge_fetcher import LongbridgeFetcher + # 创建所有数据源实例(优先级在各 Fetcher 的 __init__ 中确定) + efinance = EfinanceFetcher() + akshare = AkshareFetcher() + tushare = TushareFetcher() # 会根据 Token 配置自动调整优先级 + pytdx = PytdxFetcher() # 通达信数据源(可配 PYTDX_HOST/PYTDX_PORT) + baostock = BaostockFetcher() + yfinance = YfinanceFetcher() + longbridge = LongbridgeFetcher() # 长桥(美股/港股兜底,懒加载) + + # 初始化数据源列表 + self._ensure_concurrency_guards() + with self._fetchers_lock: + self._fetchers = [ + efinance, + akshare, + tushare, + pytdx, + baostock, + yfinance, + longbridge, + ] + + # 按优先级排序(Tushare 如果配置了 Token 且初始化成功,优先级为 0) + self._fetchers.sort(key=lambda f: f.priority) + + # 构建优先级说明 + priority_info = ", ".join([f"{f.name}(P{f.priority})" for f in self._get_fetchers_snapshot()]) + logger.info(f"已初始化 {len(self._fetchers)} 个数据源(按优先级): {priority_info}") + + def add_fetcher(self, fetcher: BaseFetcher) -> None: + """添加数据源并重新排序""" + self._ensure_concurrency_guards() + with self._fetchers_lock: + self._fetchers.append(fetcher) + self._fetchers.sort(key=lambda f: f.priority) + + def get_daily_data( + self, + stock_code: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + days: int = 30 + ) -> Tuple[pd.DataFrame, str]: + """ + 获取日线数据(自动切换数据源) + + 故障切换策略: + 1. 美股指数/美股股票直接路由到 YfinanceFetcher + 2. 其他代码从最高优先级数据源开始尝试 + 3. 捕获异常后自动切换到下一个 + 4. 记录每个数据源的失败原因 + 5. 所有数据源失败后抛出详细异常 + + Args: + stock_code: 股票代码 + start_date: 开始日期 + end_date: 结束日期 + days: 获取天数 + + Returns: + Tuple[DataFrame, str]: (数据, 成功的数据源名称) + + Raises: + DataFetchError: 所有数据源都失败时抛出 + """ + from .us_index_mapping import is_us_index_code, is_us_stock_code + + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + fetchers = self._get_fetchers_snapshot() + errors = [] + total_fetchers = len(fetchers) + request_start = time.time() + + # 快速路径:美股/港股使用专用数据源路由 + # - 配置长桥凭据后: Longbridge 为首选, YFinance/AkShare 兜底 + # - 未配置长桥: YFinance 为首选(美股), 通用 fetcher 循环(港股) + # - 美股指数: 始终 YFinance 为首选(Longbridge 不提供指数K线) + is_us_index = is_us_index_code(stock_code) + is_us = is_us_index or is_us_stock_code(stock_code) + is_hk = (not is_us) and _is_hk_market(stock_code) + + # 美股(含美股指数)使用 Longbridge/YFinance 特殊路由;港股走下方通用数据源循环 + if is_us: + prefer_lb = self._longbridge_preferred() and not is_us_index + source_order = ( + ["LongbridgeFetcher", "YfinanceFetcher"] + if prefer_lb + else ["YfinanceFetcher", "LongbridgeFetcher"] + ) + market_label = "美股指数" if is_us_index else "美股" + + for src_name in source_order: + for attempt, fetcher in enumerate(fetchers, start=1): + if fetcher.name != src_name: + continue + try: + role = "首选" if src_name == source_order[0] else "兜底" + logger.info( + f"[数据源尝试 {attempt}/{total_fetchers}] [{fetcher.name}] " + f"{market_label} {stock_code} {role}路由..." + ) + df = self._call_fetcher_method( + fetcher, + "get_daily_data", + stock_code=stock_code, + start_date=start_date, + end_date=end_date, + days=days, + ) + if df is not None and not df.empty: + elapsed = time.time() - request_start + logger.info( + f"[数据源完成] {stock_code} 使用 [{fetcher.name}] 获取成功: " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df, fetcher.name + except Exception as e: + error_type, error_reason = summarize_exception(e) + error_msg = f"[{fetcher.name}] ({error_type}) {error_reason}" + logger.warning( + f"[数据源失败 {attempt}/{total_fetchers}] [{fetcher.name}] {stock_code}: " + f"error_type={error_type}, reason={error_reason}" + ) + errors.append(error_msg) + break + + error_summary = f"{market_label} {stock_code} 获取失败:\n" + "\n".join(errors) + elapsed = time.time() - request_start + logger.error(f"[数据源终止] {stock_code} 获取失败: elapsed={elapsed:.2f}s\n{error_summary}") + raise DataFetchError(error_summary) + + for attempt, fetcher in enumerate(fetchers, start=1): + try: + logger.info(f"[数据源尝试 {attempt}/{total_fetchers}] [{fetcher.name}] 获取 {stock_code}...") + df = self._call_fetcher_method( + fetcher, + "get_daily_data", + stock_code=stock_code, + start_date=start_date, + end_date=end_date, + days=days + ) + + if df is not None and not df.empty: + elapsed = time.time() - request_start + logger.info( + f"[数据源完成] {stock_code} 使用 [{fetcher.name}] 获取成功: " + f"rows={len(df)}, elapsed={elapsed:.2f}s" + ) + return df, fetcher.name + + except Exception as e: + error_type, error_reason = summarize_exception(e) + error_msg = f"[{fetcher.name}] ({error_type}) {error_reason}" + logger.warning( + f"[数据源失败 {attempt}/{total_fetchers}] [{fetcher.name}] {stock_code}: " + f"error_type={error_type}, reason={error_reason}" + ) + errors.append(error_msg) + if attempt < total_fetchers: + next_fetcher = fetchers[attempt] + logger.info(f"[数据源切换] {stock_code}: [{fetcher.name}] -> [{next_fetcher.name}]") + # 继续尝试下一个数据源 + continue + + # 所有数据源都失败 + error_summary = f"所有数据源获取 {stock_code} 失败:\n" + "\n".join(errors) + elapsed = time.time() - request_start + logger.error(f"[数据源终止] {stock_code} 获取失败: elapsed={elapsed:.2f}s\n{error_summary}") + raise DataFetchError(error_summary) + + @property + def available_fetchers(self) -> List[str]: + """返回可用数据源名称列表""" + return [f.name for f in self._get_fetchers_snapshot()] + + def prefetch_realtime_quotes(self, stock_codes: List[str]) -> int: + """ + 批量预取实时行情数据(在分析开始前调用) + + 策略: + 1. 检查优先级中是否包含全量拉取数据源(efinance/akshare_em) + 2. 如果不包含,跳过预取(新浪/腾讯是单股票查询,无需预取) + 3. 如果自选股数量 >= 5 且使用全量数据源,则预取填充缓存 + + 这样做的好处: + - 使用新浪/腾讯时:每只股票独立查询,无全量拉取问题 + - 使用 efinance/东财时:预取一次,后续缓存命中 + + Args: + stock_codes: 待分析的股票代码列表 + + Returns: + 预取的股票数量(0 表示跳过预取) + """ + # Normalize all codes + stock_codes = [normalize_stock_code(c) for c in stock_codes] + + from src.config import get_config + + config = get_config() + + # Issue #455: PREFETCH_REALTIME_QUOTES=false 可禁用预取,避免全市场拉取 + if not getattr(config, "prefetch_realtime_quotes", True): + logger.debug("[预取] PREFETCH_REALTIME_QUOTES=false,跳过批量预取") + return 0 + + # 如果实时行情被禁用,跳过预取 + if not config.enable_realtime_quote: + logger.debug("[预取] 实时行情功能已禁用,跳过预取") + return 0 + + # 检查优先级中是否包含全量拉取数据源 + # 注意:新增全量接口(如 tushare_realtime)时需同步更新此列表 + # 全量接口特征:一次 API 调用拉取全市场 5000+ 股票数据 + priority = config.realtime_source_priority.lower() + bulk_sources = ['efinance', 'akshare_em', 'tushare'] # 全量接口列表 + + # 如果优先级中前两个都不是全量数据源,跳过预取 + # 因为新浪/腾讯是单股票查询,不需要预取 + priority_list = [s.strip() for s in priority.split(',')] + first_bulk_source_index = None + for i, source in enumerate(priority_list): + if source in bulk_sources: + first_bulk_source_index = i + break + + # 如果没有全量数据源,或者全量数据源排在第 3 位之后,跳过预取 + if first_bulk_source_index is None or first_bulk_source_index >= 2: + logger.info(f"[预取] 当前优先级使用轻量级数据源(sina/tencent),无需预取") + return 0 + + # 如果股票数量少于 5 个,不进行批量预取(逐个查询更高效) + if len(stock_codes) < 5: + logger.info(f"[预取] 股票数量 {len(stock_codes)} < 5,跳过批量预取") + return 0 + + logger.info(f"[预取] 开始批量预取实时行情,共 {len(stock_codes)} 只股票...") + + # 尝试通过 efinance 或 akshare 预取 + # 只需要调用一次 get_realtime_quote,缓存机制会自动拉取全市场数据 + try: + # 用第一只股票触发全量拉取 + first_code = stock_codes[0] + quote = self.get_realtime_quote(first_code) + + if quote: + logger.info(f"[预取] 批量预取完成,缓存已填充") + return len(stock_codes) + else: + logger.warning(f"[预取] 批量预取失败,将使用逐个查询模式") + return 0 + + except Exception as e: + logger.error(f"[预取] 批量预取异常: {e}") + return 0 + + def get_realtime_quote(self, stock_code: str, *, log_final_failure: bool = True): + """ + 获取实时行情数据(自动故障切换) + + 故障切换策略(按配置的优先级): + 1. 美股:使用 YfinanceFetcher.get_realtime_quote() + 2. EfinanceFetcher.get_realtime_quote() + 3. AkshareFetcher.get_realtime_quote(source="em") - 东财 + 4. AkshareFetcher.get_realtime_quote(source="sina") - 新浪 + 5. AkshareFetcher.get_realtime_quote(source="tencent") - 腾讯 + 6. 返回 None(降级兜底) + + Args: + stock_code: 股票代码 + log_final_failure: Whether to emit the final "all sources failed" + summary log when no realtime quote is available. + + Returns: + UnifiedRealtimeQuote 对象,所有数据源都失败则返回 None + """ + raw_stock_code = (stock_code or "").strip() + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + from .akshare_fetcher import _is_us_code + from .us_index_mapping import is_us_index_code + from src.config import get_config + + config = get_config() + + # 如果实时行情功能被禁用,直接返回 None + if not config.enable_realtime_quote: + logger.debug(f"[实时行情] 功能已禁用,跳过 {stock_code}") + return None + + # ---------------------------------------------------------- + # 美股 (指数 + 个股) / 港股 — 专用双源路由 + # 配置长桥后: Longbridge 首选, YFinance/AkShare 补充 + # 未配置长桥: YFinance/AkShare 首选, Longbridge 补充 + # 美股指数: 始终 YFinance 首选(Longbridge 不提供指数行情) + # ---------------------------------------------------------- + is_us_index = is_us_index_code(stock_code) + is_us = is_us_index or _is_us_code(stock_code) + is_hk = (not is_us) and _is_hk_market(stock_code) + + if is_us or is_hk: + prefer_lb = self._longbridge_preferred() and not is_us_index + if is_us: + primary_src = "LongbridgeFetcher" if prefer_lb else "YfinanceFetcher" + secondary_src = "YfinanceFetcher" if prefer_lb else "LongbridgeFetcher" + market_label = "美股指数" if is_us_index else "美股" + primary_kw: dict = {} + secondary_kw: dict = {} + else: + primary_src = "LongbridgeFetcher" if prefer_lb else "AkshareFetcher" + secondary_src = "AkshareFetcher" if prefer_lb else "LongbridgeFetcher" + market_label = "港股" + primary_kw = {"source": "hk"} if primary_src == "AkshareFetcher" else {} + secondary_kw = {"source": "hk"} if secondary_src == "AkshareFetcher" else {} + + primary_quote = self._try_fetcher_quote(stock_code, primary_src, **primary_kw) + if primary_quote is not None: + logger.info(f"[实时行情] {market_label} {stock_code} 成功获取 (来源: {primary_src})") + primary_quote = self._supplement_quote( + stock_code, primary_quote, secondary_src, **secondary_kw, + ) + if primary_quote is not None: + return primary_quote + if log_final_failure: + logger.info(f"[实时行情] {market_label} {stock_code} 无可用数据源") + return None + + # 获取配置的数据源优先级 + source_priority = config.realtime_source_priority.split(',') + + errors = [] + # primary_quote holds the first successful result; we may supplement + # missing fields (volume_ratio, turnover_rate, etc.) from later sources. + primary_quote = None + + for source in source_priority: + source = source.strip().lower() + + try: + quote = None + + if source == "efinance": + # 尝试 EfinanceFetcher + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "EfinanceFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code) + break + + elif source == "akshare_em": + # 尝试 AkshareFetcher 东财数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="em") + break + + elif source == "akshare_sina": + # 尝试 AkshareFetcher 新浪数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="sina") + break + + elif source in ("tencent", "akshare_qq"): + # 尝试 AkshareFetcher 腾讯数据源 + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "AkshareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', stock_code, source="tencent") + break + + elif source == "tushare": + # 尝试 TushareFetcher(需要 Tushare Pro 积分) + for fetcher in self._get_fetchers_snapshot(): + if fetcher.name == "TushareFetcher": + if hasattr(fetcher, 'get_realtime_quote'): + quote = self._call_fetcher_method(fetcher, 'get_realtime_quote', raw_stock_code or stock_code) + break + + if quote is not None and quote.has_basic_data(): + if primary_quote is None: + # First successful source becomes primary + primary_quote = quote + logger.info(f"[实时行情] {stock_code} 成功获取 (来源: {source})") + # If all key supplementary fields are present, return early + if not self._quote_needs_supplement(primary_quote): + return primary_quote + # Otherwise, continue to try later sources for missing fields + logger.debug(f"[实时行情] {stock_code} 部分字段缺失,尝试从后续数据源补充") + supplement_attempts = 0 + else: + # Supplement missing fields from this source (limit attempts) + supplement_attempts += 1 + if supplement_attempts > 1: + logger.debug(f"[实时行情] {stock_code} 补充尝试已达上限,停止继续") + break + merged = self._merge_quote_fields(primary_quote, quote) + if merged: + logger.info(f"[实时行情] {stock_code} 从 {source} 补充了缺失字段: {merged}") + # Stop supplementing once all key fields are filled + if not self._quote_needs_supplement(primary_quote): + break + + except Exception as e: + error_msg = f"[{source}] 失败: {str(e)}" + logger.info(f"[实时行情] {stock_code} {error_msg},继续尝试下一个数据源") + errors.append(error_msg) + continue + + # Return primary even if some fields are still missing + if primary_quote is not None: + return primary_quote + + # 所有数据源都失败,返回 None(降级兜底) + if log_final_failure: + if errors: + logger.info(f"[实时行情] {stock_code} 所有数据源均失败: {'; '.join(errors)}") + else: + logger.info(f"[实时行情] {stock_code} 无可用数据源") + + return None + + # Fields worth supplementing from secondary sources when the primary + # source returns None for them. Ordered by importance. + _SUPPLEMENT_FIELDS = [ + 'volume_ratio', 'turnover_rate', + 'pe_ratio', 'pb_ratio', 'total_mv', 'circ_mv', + 'amplitude', + ] + + @classmethod + def _quote_needs_supplement(cls, quote) -> bool: + """Check if any key supplementary field is still None.""" + for f in cls._SUPPLEMENT_FIELDS: + if getattr(quote, f, None) is None: + return True + return False + + @classmethod + def _merge_quote_fields(cls, primary, secondary) -> list: + """ + Copy non-None fields from *secondary* into *primary* where + *primary* has None. Returns list of field names that were filled. + """ + filled = [] + for f in cls._SUPPLEMENT_FIELDS: + if getattr(primary, f, None) is None: + val = getattr(secondary, f, None) + if val is not None: + setattr(primary, f, val) + filled.append(f) + return filled + + def _longbridge_preferred(self) -> bool: + """Return True when Longbridge keys are configured and available. + + When True, non-A-share routing (US & HK) uses Longbridge as the + primary data source with Yfinance/AkShare as fallback. + """ + for f in self._get_fetchers_snapshot(): + if f.name == "LongbridgeFetcher": + return hasattr(f, '_is_available') and f._is_available() + return False + + def _try_fetcher_quote(self, stock_code: str, fetcher_name: str, **kw): + """Try to get a realtime quote from a named fetcher; returns quote or None.""" + for f in self._get_fetchers_snapshot(): + if f.name != fetcher_name: + continue + if not hasattr(f, 'get_realtime_quote'): + return None + try: + q = self._call_fetcher_method(f, 'get_realtime_quote', stock_code, **kw) + if q is not None and q.has_basic_data(): + return q + except Exception as e: + logger.debug(f"[实时行情] {stock_code} {fetcher_name} 获取失败: {e}") + return None + return None + + def _supplement_quote(self, stock_code: str, primary_quote, fetcher_name: str, **kw): + """Supplement *primary_quote* with data from *fetcher_name*. + + If *primary_quote* is None, try *fetcher_name* as the sole source. + Returns the (potentially enriched) quote, or None. + """ + if primary_quote is not None: + if not self._quote_needs_supplement(primary_quote): + return primary_quote + try: + secondary = self._try_fetcher_quote(stock_code, fetcher_name, **kw) + if secondary is not None: + filled = self._merge_quote_fields(primary_quote, secondary) + if filled: + logger.info(f"[实时行情] {stock_code} 从 {fetcher_name} 补充了: {filled}") + except Exception as e: + logger.debug(f"[实时行情] {stock_code} {fetcher_name} 补充失败: {e}") + return primary_quote + + q = self._try_fetcher_quote(stock_code, fetcher_name, **kw) + if q is not None: + logger.info(f"[实时行情] {stock_code} 从 {fetcher_name} 获取成功 (独立数据源)") + return q + + def _supplement_from_longbridge(self, stock_code: str, primary_quote): + """Shortcut kept for backward-compat with A-share general loop.""" + return self._supplement_quote(stock_code, primary_quote, "LongbridgeFetcher") + + def get_chip_distribution(self, stock_code: str): + """ + 获取筹码分布数据(带熔断和多数据源降级) + + 策略: + 1. 检查配置开关 + 2. 检查熔断器状态 + 3. 依次尝试多个数据源:数据源优先级与获取daily的数据优先级一致 + 4. 所有数据源失败则返回 None(降级兜底) + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象,失败则返回 None + """ + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + + from .realtime_types import get_chip_circuit_breaker + from src.config import get_config + + config = get_config() + + # 如果筹码分布功能被禁用,直接返回 None + if not config.enable_chip_distribution: + logger.debug(f"[筹码分布] 功能已禁用,跳过 {stock_code}") + return None + + circuit_breaker = get_chip_circuit_breaker() + + # 直接遍历管理器已经按 priority 排好序的数据源列表 + for fetcher in self._get_fetchers_snapshot(): + # 只处理实现了筹码分布逻辑的数据源 + if not hasattr(fetcher, 'get_chip_distribution'): + continue + + fetcher_name = fetcher.name + # 动态生成熔断器的 key,例如 "TushareFetcher" -> "tushare_chip" + source_key = f"{fetcher_name.replace('Fetcher', '').lower()}_chip" + + # 检查熔断器状态 + if not circuit_breaker.is_available(source_key): + logger.debug(f"[熔断] {fetcher_name} 筹码接口处于熔断状态,尝试下一个") + continue + + try: + chip = self._call_fetcher_method(fetcher, 'get_chip_distribution', stock_code) + if chip is not None: + circuit_breaker.record_success(source_key) + logger.info(f"[筹码分布] {stock_code} 成功获取 (来源: {fetcher_name})") + return chip + else: + # 空结果:释放 HALF_OPEN 探测名额,避免卡死 + circuit_breaker.record_inconclusive(source_key) + except Exception as e: + logger.warning(f"[筹码分布] {fetcher_name} 获取 {stock_code} 失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + continue + + logger.warning(f"[筹码分布] {stock_code} 所有数据源均失败") + return None + + def get_stock_name(self, stock_code: str, allow_realtime: bool = True) -> Optional[str]: + """ + 获取股票中文名称(自动切换数据源) + + 尝试从多个数据源获取股票名称: + 1. 先从内存缓存中获取(如果有) + 2. 再尝试本地维护映射与 stocks.index.json 索引 + 3. 然后按需查询实时行情 + 4. 依次尝试各个数据源的 get_stock_name 方法 + + Args: + stock_code: 股票代码 + allow_realtime: Whether to query realtime quote first. Set False when + caller only wants lightweight prefetch without triggering heavy + realtime source calls. + + Returns: + 股票中文名称,所有数据源都失败则返回 None + """ + raw_stock_code = (stock_code or "").strip() + # Normalize code (strip SH/SZ prefix etc.) + stock_code = normalize_stock_code(stock_code) + static_name = STOCK_NAME_MAP.get(stock_code) + + # 1. 先检查缓存 + cached_name = self._get_cached_stock_name(stock_code) + if cached_name is not None: + return cached_name + + if is_meaningful_stock_name(static_name, stock_code): + return self._cache_stock_name(stock_code, static_name) or static_name + + index_name = get_index_stock_name(stock_code) + if is_meaningful_stock_name(index_name, stock_code): + return self._cache_stock_name(stock_code, index_name) or index_name + + # 2. 尝试从实时行情中获取(最快,可按需禁用) + if allow_realtime: + quote = self.get_realtime_quote(raw_stock_code or stock_code, log_final_failure=False) + if quote and hasattr(quote, 'name') and is_meaningful_stock_name(getattr(quote, 'name', ''), stock_code): + name = quote.name + self._cache_stock_name(stock_code, name) + logger.info(f"[股票名称] 从实时行情获取: {stock_code} -> {name}") + return name + + # 3. 依次尝试各个数据源 + from .akshare_fetcher import _is_us_code + is_us = _is_us_code(stock_code) + _US_CAPABLE_FETCHERS = {"YfinanceFetcher", "LongbridgeFetcher"} + for fetcher in self._get_fetchers_snapshot(): + if not hasattr(fetcher, 'get_stock_name'): + continue + if is_us and fetcher.name not in _US_CAPABLE_FETCHERS: + continue + try: + name = self._call_fetcher_method(fetcher, 'get_stock_name', stock_code) + if is_meaningful_stock_name(name, stock_code): + self._cache_stock_name(stock_code, name) + logger.info(f"[股票名称] 从 {fetcher.name} 获取: {stock_code} -> {name}") + return name + except Exception as e: + logger.debug(f"[股票名称] {fetcher.name} 获取失败: {e}") + continue + + # 4. 所有数据源都失败 + logger.warning(f"[股票名称] 所有数据源都无法获取 {stock_code} 的名称") + return "" + + def get_belong_boards(self, stock_code: str) -> List[Dict[str, Any]]: + """ + Get stock membership boards through capability probing. + + Keep this at manager layer to avoid changing BaseFetcher abstraction. + """ + stock_code = normalize_stock_code(stock_code) + if _market_tag(stock_code) != "cn": + return [] + for fetcher in self._fetchers: + if not hasattr(fetcher, "get_belong_board"): + continue + try: + raw_data = fetcher.get_belong_board(stock_code) + boards = self._normalize_belong_boards(raw_data) + if boards: + logger.info(f"[{fetcher.name}] 获取所属板块成功: {stock_code}, count={len(boards)}") + return boards + except Exception as e: + logger.debug(f"[{fetcher.name}] 获取所属板块失败: {e}") + continue + return [] + + def prefetch_stock_names(self, stock_codes: List[str], use_bulk: bool = False) -> None: + """ + Pre-fetch stock names into cache before parallel analysis (Issue #455). + + When use_bulk=False, only calls get_stock_name per code (no get_stock_list), + avoiding full-market fetch. Sequential execution to avoid rate limits. + + Args: + stock_codes: Stock codes to prefetch. + use_bulk: If True, may use get_stock_list (full fetch). Default False. + """ + if not stock_codes: + return + stock_codes = [normalize_stock_code(c) for c in stock_codes] + if use_bulk: + self.batch_get_stock_names(stock_codes) + return + for code in stock_codes: + # Skip realtime lookup to avoid triggering expensive full-market quote + # requests during the prefetch phase. + self.get_stock_name(code, allow_realtime=False) + + def batch_get_stock_names(self, stock_codes: List[str]) -> Dict[str, str]: + """ + 批量获取股票中文名称 + + 先尝试从支持批量查询的数据源获取股票列表, + 然后再逐个查询缺失的股票名称。 + + Args: + stock_codes: 股票代码列表 + + Returns: + {股票代码: 股票名称} 字典 + """ + result = {} + missing_codes = set(stock_codes) + + # 1. 先检查缓存 + self._ensure_concurrency_guards() + with self._stock_name_cache_lock: + for code in stock_codes: + cached_name = self._stock_name_cache.get(code) + if cached_name is not None: + result[code] = cached_name + missing_codes.discard(code) + + if not missing_codes: + return result + + # 2. 尝试批量获取股票列表 + for fetcher in self._get_fetchers_snapshot(): + if hasattr(fetcher, 'get_stock_list') and missing_codes: + try: + stock_list = self._call_fetcher_method(fetcher, 'get_stock_list') + if stock_list is not None and not stock_list.empty: + cache_updates: Dict[str, str] = {} + for _, row in stock_list.iterrows(): + code = row.get('code') + name = row.get('name') + if code and name: + cache_updates[code] = name + if code in missing_codes: + result[code] = name + missing_codes.discard(code) + + if cache_updates: + with self._stock_name_cache_lock: + self._stock_name_cache.update(cache_updates) + + if not missing_codes: + break + + logger.info(f"[股票名称] 从 {fetcher.name} 批量获取完成,剩余 {len(missing_codes)} 个待查") + except Exception as e: + logger.debug(f"[股票名称] {fetcher.name} 批量获取失败: {e}") + continue + + # 3. 逐个获取剩余的 + for code in list(missing_codes): + name = self.get_stock_name(code) + if name: + result[code] = name + missing_codes.discard(code) + + logger.info(f"[股票名称] 批量获取完成,成功 {len(result)}/{len(stock_codes)}") + return result + + def get_main_indices(self, region: str = "cn") -> List[Dict[str, Any]]: + """获取主要指数实时行情(自动切换数据源)""" + if region == "cn": + tickflow_fetcher = self._get_tickflow_fetcher() + if tickflow_fetcher is not None: + try: + data = tickflow_fetcher.get_main_indices(region=region) + if data: + logger.info("[TickFlowFetcher] 获取指数行情成功") + return data + except Exception as e: + logger.warning(f"[TickFlowFetcher] 获取指数行情失败: {e}") + + for fetcher in self._fetchers: + try: + data = fetcher.get_main_indices(region=region) + if data: + logger.info(f"[{fetcher.name}] 获取指数行情成功") + return data + except Exception as e: + logger.warning(f"[{fetcher.name}] 获取指数行情失败: {e}") + continue + return [] + + def get_market_stats(self) -> Dict[str, Any]: + """获取市场涨跌统计(自动切换数据源)""" + tickflow_fetcher = self._get_tickflow_fetcher() + if tickflow_fetcher is not None: + try: + data = tickflow_fetcher.get_market_stats() + if data: + logger.info("[TickFlowFetcher] 获取市场统计成功") + return data + except Exception as e: + logger.warning(f"[TickFlowFetcher] 获取市场统计失败: {e}") + + for fetcher in self._fetchers: + try: + data = fetcher.get_market_stats() + if data: + logger.info(f"[{fetcher.name}] 获取市场统计成功") + return data + except Exception as e: + logger.warning(f"[{fetcher.name}] 获取市场统计失败: {e}") + continue + return {} + + def _run_with_timeout( + self, + task: Callable[[], Any], + timeout_seconds: float, + task_name: str, + ) -> Tuple[Optional[Any], Optional[str], int]: + """ + Execute a task in a short-lived thread and enforce a timeout. + + Returns: + (result, error, duration_ms) + """ + start = time.time() + timeout_value = max(0.0, timeout_seconds) + if timeout_value <= 0: + return None, f"{task_name} timeout", 0 + result_holder: Dict[str, Any] = {} + error_holder: Dict[str, Exception] = {} + + if not self._fundamental_timeout_slots.acquire(blocking=False): + return None, f"{task_name} timeout worker pool exhausted", int(timeout_value * 1000) + + def runner() -> None: + try: + result_holder["value"] = task() + except Exception as exc: + error_holder["value"] = exc + finally: + try: + self._fundamental_timeout_slots.release() + except ValueError: + pass + + worker = Thread(target=runner, daemon=True, name=f"fundamental-{task_name}") + try: + worker.start() + except Exception as exc: + try: + self._fundamental_timeout_slots.release() + except ValueError: + pass + return None, str(exc), int((time.time() - start) * 1000) + worker.join(timeout=timeout_value) + if worker.is_alive(): + return None, f"{task_name} timeout", int(timeout_value * 1000) + if "value" in error_holder: + return None, str(error_holder["value"]), int((time.time() - start) * 1000) + return result_holder.get("value"), None, int((time.time() - start) * 1000) + + def _run_with_retry( + self, + task: Callable[[], Any], + timeout_seconds: float, + task_name: str, + ) -> Tuple[Optional[Any], Optional[str], int]: + """ + Execute a task with bounded budget and best-effort retries. + + Returns: + (result, error, total_duration_ms) + """ + config = self._get_fundamental_config() + attempts = max(1, int(config.fundamental_retry_max)) + remaining_seconds = max(0.0, float(timeout_seconds)) + total_cost_ms = 0 + last_error: Optional[str] = None + + for _ in range(attempts): + if remaining_seconds <= 0: + break + result, err, cost_ms = self._run_with_timeout(task, remaining_seconds, task_name) + total_cost_ms += cost_ms + remaining_seconds = max(0.0, remaining_seconds - cost_ms / 1000) + if err is None: + return result, None, total_cost_ms + last_error = err + if remaining_seconds <= 0: + break + + return None, last_error, total_cost_ms + + def _get_fundamental_config(self): + from src.config import get_config + return get_config() + + @staticmethod + def _normalize_source_chain( + entries: Any, + provider: str, + result: str, + duration_ms: int, + ) -> List[Dict[str, Any]]: + """Normalize free-form source chain entries to structured dict list.""" + if entries is None: + return [{"provider": provider, "result": result, "duration_ms": duration_ms}] + + normalized: List[Dict[str, Any]] = [] + if not isinstance(entries, (list, tuple)): + entries = [entries] + + for item in entries: + if isinstance(item, dict): + normalized.append({ + "provider": str(item.get("provider") or provider), + "result": str(item.get("result") or result), + "duration_ms": int(item.get("duration_ms", duration_ms)), + }) + continue + + if item is None: + continue + + provider_name = str(item) + normalized.append({ + "provider": provider_name, + "result": result, + "duration_ms": duration_ms, + }) + + if not normalized: + return [{"provider": provider, "result": result, "duration_ms": duration_ms}] + + return normalized + + @staticmethod + def _block_status(payload: Dict[str, Any], available: bool = True) -> str: + if not available: + return "not_supported" + if not payload: + return "partial" + return "ok" + + @staticmethod + def _build_fundamental_block( + status: str, + payload: Optional[Dict[str, Any]] = None, + source_chain: Optional[List[Dict[str, Any]]] = None, + errors: Optional[List[str]] = None, + ) -> Dict[str, Any]: + return { + "status": status, + "coverage": {"status": status}, + "source_chain": source_chain or [], + "errors": errors or [], + "data": payload or {}, + } + + @staticmethod + def _has_meaningful_payload(payload: Any) -> bool: + if payload is None: + return False + if isinstance(payload, str): + normalized = payload.strip().lower() + return normalized not in ("", "-", "nan", "none", "null", "n/a", "na") + if isinstance(payload, dict): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload.values()) + if isinstance(payload, pd.DataFrame): + if payload.empty: + return False + return any( + DataFetcherManager._has_meaningful_payload(v) + for v in payload.to_numpy().flat + ) + if isinstance(payload, (pd.Series, pd.Index)): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload.tolist()) + if isinstance(payload, np.ndarray): + if payload.ndim == 0: + payload = payload.item() + else: + return any( + DataFetcherManager._has_meaningful_payload(v) + for v in payload.flat + ) + if isinstance(payload, (list, tuple, set)): + return any(DataFetcherManager._has_meaningful_payload(v) for v in payload) + if DataFetcherManager._try_scalar_isna(payload, "fundamental_payload") is True: + return False + return True + + @staticmethod + def _infer_block_status(payload: Any, fallback_status: str) -> str: + if DataFetcherManager._has_meaningful_payload(payload): + return "ok" + if fallback_status in ("failed", "partial", "not_supported"): + return fallback_status + return "partial" + + @staticmethod + def _should_cache_fundamental_context(context: Any) -> bool: + if not isinstance(context, dict): + return False + status = str(context.get("status", "")).strip().lower() + if status == "ok": + return True + if status == "failed": + return False + for block in ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ): + payload = context.get(block, {}) + if isinstance(payload, dict) and DataFetcherManager._has_meaningful_payload(payload.get("data")): + return True + return False + + def _build_market_not_supported(self, market: str, reason: str) -> Dict[str, Any]: + blocks = { + "valuation": self._build_fundamental_block( + "partial" if market == "etf" else "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "growth": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "earnings": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "institution": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "capital_flow": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "dragon_tiger": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + "boards": self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + [reason], + ), + } + return { + "market": market, + "status": "partial" if market == "etf" else "not_supported", + "coverage": { + block: blocks[block]["status"] for block in blocks + }, + "source_chain": [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + "errors": [reason], + **blocks, + } + + def build_failed_fundamental_context(self, stock_code: str, reason: str) -> Dict[str, Any]: + """Build a consistent failed-context payload for caller-side fallback.""" + market = _market_tag(stock_code) + block_names = ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ) + blocks = { + block: self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + [reason], + ) + for block in block_names + } + return { + "market": market, + "status": "failed", + "coverage": {block: "failed" for block in block_names}, + "source_chain": [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + "errors": [reason], + **blocks, + } + + def get_fundamental_context( + self, + stock_code: str, + budget_seconds: Optional[float] = None + ) -> Dict[str, Any]: + """ + Aggregate fundamental blocks with fail-open semantics. + """ + from src.config import get_config + + config = get_config() + if not config.enable_fundamental_pipeline: + return self._build_market_not_supported( + market=_market_tag(stock_code), + reason="fundamental pipeline disabled", + ) + + stock_code = normalize_stock_code(stock_code) + market = _market_tag(stock_code) + is_etf = _is_etf_code(stock_code) + if market in {"us", "hk"}: + return self._build_market_not_supported( + market=market, + reason="market not supported", + ) + + stage_timeout = float( + budget_seconds if budget_seconds is not None else config.fundamental_stage_timeout_seconds + ) + stage_timeout = max(0.0, stage_timeout) + fetch_timeout = float(config.fundamental_fetch_timeout_seconds) + fetch_timeout = max(0.0, fetch_timeout) + + cache_ttl = int(config.fundamental_cache_ttl_seconds) + cache_max_entries = max(0, int(getattr(config, "fundamental_cache_max_entries", 256))) + cache_key = self._get_fundamental_cache_key(stock_code, stage_timeout) + if cache_ttl > 0: + self._prune_fundamental_cache(cache_ttl, cache_max_entries) + with self._fundamental_cache_lock: + cache_item = self._fundamental_cache.get(cache_key) + if cache_item: + age = time.time() - float(cache_item.get("ts", 0)) + if age <= cache_ttl: + return cache_item.get("context", {}) + + remaining_seconds = stage_timeout + result_ctx: Dict[str, Any] = { + "market": market, + "valuation": {}, + "growth": {}, + "earnings": {}, + "institution": {}, + "capital_flow": {}, + "dragon_tiger": {}, + "boards": {}, + "coverage": {}, + "source_chain": [], + "errors": [], + } + + start_ts = time.time() + + def _consume_budget(consumed_ms: int) -> None: + nonlocal remaining_seconds + remaining_seconds = max(0.0, remaining_seconds - consumed_ms / 1000.0) + + valuation_timeout = min(fetch_timeout, remaining_seconds) + if valuation_timeout > 0: + quote_payload, valuation_err, valuation_ms = self._run_with_retry( + lambda: self.get_realtime_quote(stock_code), + valuation_timeout, + "fundamental_valuation", + ) + _consume_budget(valuation_ms) + else: + quote_payload, valuation_err, valuation_ms = None, "fundamental stage timeout", 0 + + valuation_payload = { + "pe_ratio": getattr(quote_payload, "pe_ratio", None) if quote_payload else None, + "pb_ratio": getattr(quote_payload, "pb_ratio", None) if quote_payload else None, + "total_mv": getattr(quote_payload, "total_mv", None) if quote_payload else None, + "circ_mv": getattr(quote_payload, "circ_mv", None) if quote_payload else None, + } + valuation_status = self._infer_block_status( + valuation_payload, + "partial" if quote_payload is not None else "not_supported", + ) + if valuation_status == "partial" and valuation_err and not self._has_meaningful_payload(valuation_payload): + valuation_status = "failed" + result_ctx["valuation"] = self._build_fundamental_block( + valuation_status, + valuation_payload, + self._normalize_source_chain( + [{"provider": "realtime_quote", "result": valuation_status, "duration_ms": valuation_ms}], + "realtime_quote", + valuation_status, + valuation_ms, + ), + [valuation_err] if valuation_err else [], + ) + + # growth / earnings / institution (one AkShare call) + if remaining_seconds <= 0: + bundle_status = "failed" + bundle_payload: Dict[str, Any] = {} + bundle_errors = ["fundamental stage timeout"] + bundle_ms = 0 + else: + bundle_timeout = min(fetch_timeout, remaining_seconds) + bundle_payload, bundle_err_msg, bundle_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_fundamental_bundle(stock_code), + bundle_timeout, + "fundamental_bundle", + ) + _consume_budget(bundle_ms) + if not isinstance(bundle_payload, dict): + bundle_status = "failed" + bundle_payload = {} + bundle_errors = ["fundamental_bundle failed"] + if bundle_err_msg: + bundle_errors.append(bundle_err_msg) + else: + bundle_status = str(bundle_payload.get("status", "not_supported")) + bundle_errors = [bundle_err_msg] if bundle_err_msg else [] + + bundle_chain = self._normalize_source_chain( + bundle_payload.get("source_chain", []), + "fundamental_bundle", + bundle_status, + bundle_ms, + ) if isinstance(bundle_payload, dict) else self._normalize_source_chain( + None, + "fundamental_bundle", + bundle_status, + bundle_ms, + ) + growth_payload = bundle_payload.get("growth", {}) if isinstance(bundle_payload, dict) else {} + earnings_payload = bundle_payload.get("earnings", {}) if isinstance(bundle_payload, dict) else {} + institution_payload = bundle_payload.get("institution", {}) if isinstance(bundle_payload, dict) else {} + if not isinstance(growth_payload, dict): + growth_payload = {} + else: + growth_payload = dict(growth_payload) + if not isinstance(earnings_payload, dict): + earnings_payload = {} + else: + earnings_payload = dict(earnings_payload) + if not isinstance(institution_payload, dict): + institution_payload = {} + else: + institution_payload = dict(institution_payload) + + # Derive TTM dividend yield from already-fetched quote price; avoid extra quote calls. + earnings_extra_errors: List[str] = [] + dividend_payload = earnings_payload.get("dividend") + if isinstance(dividend_payload, dict): + dividend_payload = dict(dividend_payload) + ttm_cash_raw = dividend_payload.get("ttm_cash_dividend_per_share") + ttm_cash = None + if ttm_cash_raw is not None: + try: + ttm_cash = float(ttm_cash_raw) + except (TypeError, ValueError): + earnings_extra_errors.append("invalid_ttm_cash_dividend_per_share") + if isinstance(quote_payload, dict): + latest_price_raw = quote_payload.get("price") + else: + latest_price_raw = getattr(quote_payload, "price", None) if quote_payload else None + latest_price = None + if latest_price_raw is not None: + try: + latest_price = float(latest_price_raw) + except (TypeError, ValueError): + latest_price = None + ttm_yield = None + if ttm_cash is not None: + if latest_price is not None and latest_price > 0: + ttm_yield = round(ttm_cash / latest_price * 100.0, 4) + else: + earnings_extra_errors.append("invalid_price_for_ttm_dividend_yield") + + dividend_payload["ttm_dividend_yield_pct"] = ttm_yield + if ttm_yield is not None: + dividend_payload["yield_formula"] = "ttm_cash_dividend_per_share / latest_price * 100" + earnings_payload["dividend"] = dividend_payload + + adapter_errors = list(bundle_payload.get("errors", [])) if isinstance(bundle_payload, dict) else [] + adapter_errors.extend(bundle_errors) + growth_errors = list(adapter_errors) + earnings_errors = list(adapter_errors) + earnings_errors.extend(earnings_extra_errors) + institution_errors = list(adapter_errors) + + growth_status = self._infer_block_status(growth_payload, bundle_status) + earnings_status = self._infer_block_status(earnings_payload, bundle_status) + institution_status = self._infer_block_status(institution_payload, bundle_status) + + result_ctx["growth"] = self._build_fundamental_block( + growth_status, + growth_payload, + bundle_chain, + growth_errors, + ) + result_ctx["earnings"] = self._build_fundamental_block( + earnings_status, + earnings_payload, + bundle_chain, + earnings_errors, + ) + result_ctx["institution"] = self._build_fundamental_block( + institution_status, + institution_payload, + bundle_chain, + institution_errors, + ) + + # capital flow + if is_etf: + result_ctx["capital_flow"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["dragon_tiger"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["boards"] = self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["etf not fully supported"], + ) + result_ctx["status"] = "partial" + else: + capital_flow_budget = min(fetch_timeout, remaining_seconds) + capital_flow_start = time.time() + result_ctx["capital_flow"] = self.get_capital_flow_context( + stock_code, + budget_seconds=capital_flow_budget, + ) + _consume_budget(int((time.time() - capital_flow_start) * 1000)) + + dragon_tiger_budget = min(fetch_timeout, remaining_seconds) + dragon_tiger_start = time.time() + result_ctx["dragon_tiger"] = self.get_dragon_tiger_context( + stock_code, + budget_seconds=dragon_tiger_budget, + ) + _consume_budget(int((time.time() - dragon_tiger_start) * 1000)) + + result_ctx["boards"] = self.get_board_context( + stock_code, + budget_seconds=min(fetch_timeout, remaining_seconds), + ) + + block_statuses = { + "valuation": result_ctx["valuation"].get("status", "not_supported"), + "growth": result_ctx["growth"].get("status", "not_supported"), + "earnings": result_ctx["earnings"].get("status", "not_supported"), + "institution": result_ctx["institution"].get("status", "not_supported"), + "capital_flow": result_ctx["capital_flow"].get("status", "not_supported"), + "dragon_tiger": result_ctx["dragon_tiger"].get("status", "not_supported"), + "boards": result_ctx["boards"].get("status", "not_supported"), + } + result_ctx["coverage"] = block_statuses + for block in ( + "valuation", + "growth", + "earnings", + "institution", + "capital_flow", + "dragon_tiger", + "boards", + ): + result_ctx["errors"].extend(result_ctx[block].get("errors", [])) + result_ctx["source_chain"].extend(result_ctx[block].get("source_chain", [])) + + if is_etf: + # Keep ETF downgrade semantics for overall status even when valuation is available. + result_ctx["status"] = ( + "not_supported" if all(value == "not_supported" for value in block_statuses.values()) else "partial" + ) + elif all(value == "not_supported" for value in block_statuses.values()): + result_ctx["status"] = "not_supported" + elif "failed" in block_statuses.values() or "partial" in block_statuses.values(): + result_ctx["status"] = "partial" + else: + result_ctx["status"] = "ok" + + result_ctx["elapsed_ms"] = int((time.time() - start_ts) * 1000) + if cache_ttl > 0 and self._should_cache_fundamental_context(result_ctx): + with self._fundamental_cache_lock: + self._fundamental_cache[cache_key] = { + "ts": time.time(), + "context": result_ctx, + } + self._prune_fundamental_cache(cache_ttl, cache_max_entries) + return result_ctx + + def get_capital_flow_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """资金流向块(fail-open)。""" + from src.config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + payload, err, cost_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_capital_flow(stock_code), + timeout, + "capital_flow", + ) + if not isinstance(payload, dict): + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": cost_ms}], + [err or "capital_flow failed"], + ) + + stock_flow = payload.get("stock_flow") or {} + sector_rankings = payload.get("sector_rankings") or {} + has_stock_flow = False + if isinstance(stock_flow, dict): + has_stock_flow = any(v is not None for v in stock_flow.values()) + has_sector_rankings = bool(sector_rankings.get("top")) or bool(sector_rankings.get("bottom")) + adapter_status = str(payload.get("status", "not_supported")) + if has_stock_flow or has_sector_rankings: + capital_flow_status = "ok" + elif adapter_status == "not_supported": + capital_flow_status = "not_supported" + else: + capital_flow_status = "partial" + + return self._build_fundamental_block( + capital_flow_status, + { + "stock_flow": payload.get("stock_flow", {}), + "sector_rankings": payload.get("sector_rankings", {}), + }, + self._normalize_source_chain( + payload.get("source_chain", []), + "capital_flow", + capital_flow_status, + cost_ms, + ), + list(payload.get("errors", [])) + ([err] if err else []), + ) + + def get_dragon_tiger_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """龙虎榜块(fail-open)。""" + from src.config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + payload, err, cost_ms = self._run_with_retry( + lambda: self._fundamental_adapter.get_dragon_tiger_flag(stock_code), + timeout, + "dragon_tiger", + ) + if not isinstance(payload, dict): + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": cost_ms}], + [err or "dragon_tiger failed"], + ) + return self._build_fundamental_block( + (payload.get("status") if isinstance(payload.get("status"), str) else "partial"), + { + "is_on_list": bool(payload.get("is_on_list", False)), + "recent_count": int(payload.get("recent_count", 0)), + "latest_date": payload.get("latest_date"), + }, + self._normalize_source_chain( + payload.get("source_chain", []), + "dragon_tiger", + str(payload.get("status", "ok")), + cost_ms, + ), + list(payload.get("errors", [])) + ([err] if err else []), + ) + + def get_board_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: + """板块榜单块(fail-open)。""" + from src.config import get_config + + config = get_config() + stock_code = normalize_stock_code(stock_code) + timeout = float(budget_seconds if budget_seconds is not None else config.fundamental_fetch_timeout_seconds) + if _market_tag(stock_code) != "cn" or _is_etf_code(stock_code): + return self._build_fundamental_block( + "not_supported", + {}, + [{"provider": "fundamental_pipeline", "result": "not_supported", "duration_ms": 0}], + ["not supported"], + ) + + if timeout <= 0: + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "fundamental_pipeline", "result": "failed", "duration_ms": 0}], + ["fundamental stage timeout"], + ) + + def task() -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]], str]: + return self._get_sector_rankings_with_meta(5) + + rankings, err, cost_ms = self._run_with_retry(task, timeout, "boards") + if isinstance(rankings, tuple) and len(rankings) == 4: + top, bottom, chain, chain_error = rankings + if chain_error and not err: + err = chain_error + if not top and not bottom: + return self._build_fundamental_block( + "failed", + {}, + chain if chain else [{"provider": "sector_rankings", "result": "failed", "duration_ms": cost_ms}], + [err or "boards empty from all sources"], + ) + board_status = "ok" if top and bottom else "partial" + return self._build_fundamental_block( + board_status, + {"top": top or [], "bottom": bottom or []}, + chain if chain else self._normalize_source_chain( + ["sector_rankings"], + "boards", + board_status, + cost_ms, + ), + [err] if err else [], + ) + + return self._build_fundamental_block( + "failed", + {}, + [{"provider": "sector_rankings", "result": "failed", "duration_ms": cost_ms}], + [err or "boards failed"], + ) + + def _get_sector_rankings_with_meta( + self, + n: int = 5, + ) -> Tuple[List[Dict], List[Dict], List[Dict[str, Any]], str]: + """Get sector rankings with ordered fallback chain metadata.""" + source_chain: List[Dict[str, Any]] = [] + last_error = "" + + # 直接遍历管理器已经按 priority 排好序的数据源列表 + for fetcher in self._fetchers: + if not hasattr(fetcher, 'get_sector_rankings'): + continue + + start = time.time() + try: + data = fetcher.get_sector_rankings(n) + duration_ms = int((time.time() - start) * 1000) + if data and data[0] is not None and data[1] is not None: + source_chain.append( + { + "provider": fetcher.name, + "result": "ok", + "duration_ms": duration_ms, + } + ) + logger.info(f"[{fetcher.name}] 获取板块排行成功") + return data[0], data[1], source_chain, "" + + last_error = f"{fetcher.name}返回空结果" + source_chain.append( + { + "provider": fetcher.name, + "result": "empty", + "duration_ms": duration_ms, + "error": last_error, + } + ) + except Exception as e: + error_type, error_reason = summarize_exception(e) + last_error = f"{fetcher.name} ({error_type}) {error_reason}" + duration_ms = int((time.time() - start) * 1000) + source_chain.append( + { + "provider": fetcher.name, + "result": "failed", + "duration_ms": duration_ms, + "error": error_reason, + } + ) + logger.warning(f"[{fetcher.name}] 获取板块排行失败: {error_reason}") + + return [], [], source_chain, last_error + + def get_sector_rankings(self, n: int = 5) -> Tuple[List[Dict], List[Dict]]: + """获取板块涨跌榜(自动切换数据源)""" + # 按需求固定回退顺序:Akshare(EM) -> Akshare(Sina) -> Tushare -> Efinance + top, bottom, _, last_error = self._get_sector_rankings_with_meta(n) + if top or bottom: + return top, bottom + logger.warning(f"[板块排行] 所有数据源均失败,最终错误: {last_error}") + return [], [] diff --git a/src/provider/efinance_fetcher.py b/src/provider/efinance_fetcher.py new file mode 100644 index 00000000..70048c0b --- /dev/null +++ b/src/provider/efinance_fetcher.py @@ -0,0 +1,1238 @@ +# -*- coding: utf-8 -*- +""" +=================================== +EfinanceFetcher - 优先数据源 (Priority 0) +=================================== + +数据来源:东方财富爬虫(通过 efinance 库) +特点:免费、无需 Token、数据全面、API 简洁 +仓库:https://github.com/Micro-sheep/efinance + +与 AkshareFetcher 类似,但 efinance 库: +1. API 更简洁易用 +2. 支持批量获取数据 +3. 更稳定的接口封装 + +防封禁策略: +1. 每次请求前随机休眠 1.5-3.0 秒 +2. 随机轮换 User-Agent +3. 使用 tenacity 实现指数退避重试 +4. 熔断器机制:连续失败后自动冷却 +""" + +import logging +import os +import random +import re +import time +from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional, Dict, Any, List, Tuple + +import pandas as pd +import requests # 引入 requests 以捕获异常 +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +# Timeout (seconds) for efinance library calls that go through eastmoney APIs +# with no built-in timeout. Prevents indefinite hangs when hosts are unreachable. +try: + _EF_CALL_TIMEOUT = int(os.environ.get("EFINANCE_CALL_TIMEOUT", "30")) +except (ValueError, TypeError): + import logging as _logging + _logging.getLogger(__name__).warning( + "EFINANCE_CALL_TIMEOUT is not a valid integer, using default 30s" + ) + _EF_CALL_TIMEOUT = 30 + +from patch.eastmoney_patch import eastmoney_patch +from src.config import get_config +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code +from .realtime_types import ( + UnifiedRealtimeQuote, RealtimeSource, + get_realtime_circuit_breaker, + safe_float, safe_int # 使用统一的类型转换函数 +) + + +# 保留旧的类型别名,用于向后兼容 +@dataclass +class EfinanceRealtimeQuote: + """ + 实时行情数据(来自 efinance)- 向后兼容别名 + + 新代码建议使用 UnifiedRealtimeQuote + """ + code: str + name: str = "" + price: float = 0.0 # 最新价 + change_pct: float = 0.0 # 涨跌幅(%) + change_amount: float = 0.0 # 涨跌额 + + # 量价指标 + volume: int = 0 # 成交量 + amount: float = 0.0 # 成交额 + turnover_rate: float = 0.0 # 换手率(%) + amplitude: float = 0.0 # 振幅(%) + + # 价格区间 + high: float = 0.0 # 最高价 + low: float = 0.0 # 最低价 + open_price: float = 0.0 # 开盘价 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'code': self.code, + 'name': self.name, + 'price': self.price, + 'change_pct': self.change_pct, + 'change_amount': self.change_amount, + 'volume': self.volume, + 'amount': self.amount, + 'turnover_rate': self.turnover_rate, + 'amplitude': self.amplitude, + 'high': self.high, + 'low': self.low, + 'open': self.open_price, + } + + +logger = logging.getLogger(__name__) + +EASTMONEY_HISTORY_ENDPOINT = "push2his.eastmoney.com/api/qt/stock/kline/get" + + +# User-Agent 池,用于随机轮换 +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', +] + + +# 缓存实时行情数据(避免重复请求) +# TTL 设为 10 分钟 (600秒):批量分析场景下避免重复拉取 +_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 600 # 10分钟缓存有效期 +} + +# ETF 实时行情缓存(与股票分开缓存) +_etf_realtime_cache: Dict[str, Any] = { + 'data': None, + 'timestamp': 0, + 'ttl': 600 # 10分钟缓存有效期 +} + + +def _is_etf_code(stock_code: str) -> bool: + """ + 判断代码是否为 ETF 基金 + + ETF 代码规则: + - 上交所 ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - 深交所 ETF: 15xxxx, 16xxxx, 18xxxx + + Args: + stock_code: 股票/基金代码 + + Returns: + True 表示是 ETF 代码,False 表示是普通股票代码 + """ + etf_prefixes = ('51', '52', '56', '58', '15', '16', '18') + return stock_code.startswith(etf_prefixes) and len(stock_code) == 6 + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +def _ef_call_with_timeout(func, *args, timeout=None, **kwargs): + """Run an efinance library call in a thread with a timeout. + + efinance internally uses requests/urllib3 with no timeout, so when + eastmoney hosts are unreachable the call can hang for many minutes. + This helper caps the *calling thread's* wait time. Note: Python threads + cannot be forcibly killed, so the worker thread may continue running in + the background until the OS-level TCP timeout fires or the process exits. + This is acceptable — the calling thread returns promptly on timeout. + """ + if timeout is None: + timeout = _EF_CALL_TIMEOUT + # Do NOT use 'with ThreadPoolExecutor(...)' here: the context manager calls + # shutdown(wait=True) on __exit__, which would re-block on the hung thread. + executor = ThreadPoolExecutor(max_workers=1) + try: + future = executor.submit(func, *args, **kwargs) + return future.result(timeout=timeout) + finally: + # wait=False: calling thread returns immediately; worker cleans up later + executor.shutdown(wait=False) + + +def _classify_eastmoney_error(exc: Exception) -> Tuple[str, str]: + """ + Classify Eastmoney request failures into stable log categories. + """ + message = str(exc).strip() + lowered = message.lower() + + remote_disconnect_keywords = ( + 'remotedisconnected', + 'remote end closed connection without response', + 'connection aborted', + 'connection broken', + 'protocolerror', + ) + timeout_keywords = ( + 'timeout', + 'timed out', + 'readtimeout', + 'connecttimeout', + ) + rate_limit_keywords = ( + 'banned', + 'blocked', + '频率', + 'rate limit', + 'too many requests', + '429', + '限制', + 'forbidden', + '403', + ) + + if any(keyword in lowered for keyword in remote_disconnect_keywords): + return "remote_disconnect", message + if isinstance(exc, (TimeoutError, requests.exceptions.Timeout)) or any( + keyword in lowered for keyword in timeout_keywords + ): + return "timeout", message + if any(keyword in lowered for keyword in rate_limit_keywords): + return "rate_limit_or_anti_bot", message + if isinstance(exc, requests.exceptions.RequestException): + return "request_error", message + return "unknown_request_error", message + + +class EfinanceFetcher(BaseFetcher): + """ + Efinance 数据源实现 + + 优先级:0(最高,优先于 AkshareFetcher) + 数据来源:东方财富网(通过 efinance 库封装) + 仓库:https://github.com/Micro-sheep/efinance + + 主要 API: + - ef.stock.get_quote_history(): 获取历史 K 线数据 + - ef.stock.get_base_info(): 获取股票基本信息 + - ef.stock.get_realtime_quotes(): 获取实时行情 + + 关键策略: + - 每次请求前随机休眠 1.5-3.0 秒 + - 随机 User-Agent 轮换 + - 失败后指数退避重试(最多3次) + """ + + name = "EfinanceFetcher" + priority = int(os.getenv("EFINANCE_PRIORITY", "0")) # 最高优先级,排在 AkshareFetcher 之前 + + def __init__(self, sleep_min: float = 1.5, sleep_max: float = 3.0): + """ + 初始化 EfinanceFetcher + + Args: + sleep_min: 最小休眠时间(秒) + sleep_max: 最大休眠时间(秒) + """ + self.sleep_min = sleep_min + self.sleep_max = sleep_max + self._last_request_time: Optional[float] = None + # 东财补丁开启才执行打补丁操作 + if get_config().enable_eastmoney_patch: + eastmoney_patch() + + @staticmethod + def _build_history_failure_message( + stock_code: str, + beg_date: str, + end_date: str, + exc: Exception, + elapsed: float, + is_etf: bool = False, + ) -> Tuple[str, str]: + category, detail = _classify_eastmoney_error(exc) + instrument_type = "ETF" if is_etf else "stock" + message = ( + "Eastmoney 历史K线接口失败: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"market_type={instrument_type}, range={beg_date}~{end_date}, " + f"category={category}, error_type={type(exc).__name__}, elapsed={elapsed:.2f}s, detail={detail}" + ) + return category, message + + def _set_random_user_agent(self) -> None: + """ + 设置随机 User-Agent + + 通过修改 requests Session 的 headers 实现 + 这是关键的反爬策略之一 + """ + try: + random_ua = random.choice(USER_AGENTS) + logger.debug(f"设置 User-Agent: {random_ua[:50]}...") + except Exception as e: + logger.debug(f"设置 User-Agent 失败: {e}") + + def _enforce_rate_limit(self) -> None: + """ + 强制执行速率限制 + + 策略: + 1. 检查距离上次请求的时间间隔 + 2. 如果间隔不足,补充休眠时间 + 3. 然后再执行随机 jitter 休眠 + """ + if self._last_request_time is not None: + elapsed = time.time() - self._last_request_time + min_interval = self.sleep_min + if elapsed < min_interval: + additional_sleep = min_interval - elapsed + logger.debug(f"补充休眠 {additional_sleep:.2f} 秒") + time.sleep(additional_sleep) + + # 执行随机 jitter 休眠 + self.random_sleep(self.sleep_min, self.sleep_max) + self._last_request_time = time.time() + + @retry( + stop=stop_after_attempt(1), # 减少到1次,避免触发限流 + wait=wait_exponential(multiplier=1, min=4, max=60), # 保持等待时间设置 + retry=retry_if_exception_type(( + ConnectionError, + TimeoutError, + requests.exceptions.RequestException, + requests.exceptions.ConnectionError, + requests.exceptions.ChunkedEncodingError + )), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 efinance 获取原始数据 + + 根据代码类型自动选择 API: + - 美股:不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + - 普通股票:使用 ef.stock.get_quote_history() + - ETF 基金:使用 ef.stock.get_quote_history()(ETF 是交易所证券,使用股票 K 线接口) + + 流程: + 1. 判断代码类型(美股/股票/ETF) + 2. 设置随机 User-Agent + 3. 执行速率限制(随机休眠) + 4. 调用对应的 efinance API + 5. 处理返回数据 + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到 AkshareFetcher/YfinanceFetcher + if _is_us_code(stock_code): + raise DataFetchError(f"EfinanceFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 根据代码类型选择不同的获取方法 + if _is_etf_code(stock_code): + return self._fetch_etf_data(stock_code, start_date, end_date) + else: + return self._fetch_stock_data(stock_code, start_date, end_date) + + def _fetch_stock_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取普通 A 股历史数据 + + 数据来源:ef.stock.get_quote_history() + + API 参数说明: + - stock_codes: 股票代码 + - beg: 开始日期,格式 'YYYYMMDD' + - end: 结束日期,格式 'YYYYMMDD' + - klt: 周期,101=日线 + - fqt: 复权方式,1=前复权 + """ + import efinance as ef + + # 防封禁策略 1: 随机 User-Agent + self._set_random_user_agent() + + # 防封禁策略 2: 强制休眠 + self._enforce_rate_limit() + + # 格式化日期(efinance 使用 YYYYMMDD 格式) + beg_date = start_date.replace('-', '') + end_date_fmt = end_date.replace('-', '') + + logger.info(f"[API调用] ef.stock.get_quote_history(stock_codes={stock_code}, " + f"beg={beg_date}, end={end_date_fmt}, klt=101, fqt=1)") + + api_start = time.time() + try: + # 调用 efinance 获取 A 股日线数据 + # klt=101 获取日线数据 + # fqt=1 获取前复权数据 + df = _ef_call_with_timeout( + ef.stock.get_quote_history, + stock_codes=stock_code, + beg=beg_date, + end=end_date_fmt, + klt=101, # 日线 + fqt=1, # 前复权 + timeout=60, + ) + + api_elapsed = time.time() - api_start + + # 记录返回数据摘要 + if df is not None and not df.empty: + logger.info( + "[API返回] Eastmoney 历史K线成功: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, rows={len(df)}, elapsed={api_elapsed:.2f}s" + ) + logger.info(f"[API返回] 列名: {list(df.columns)}") + if '日期' in df.columns: + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning( + "[API返回] Eastmoney 历史K线为空: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, elapsed={api_elapsed:.2f}s" + ) + + return df + + except Exception as e: + api_elapsed = time.time() - api_start + category, failure_message = self._build_history_failure_message( + stock_code=stock_code, + beg_date=beg_date, + end_date=end_date_fmt, + exc=e, + elapsed=api_elapsed, + ) + + if category == "rate_limit_or_anti_bot": + logger.warning(failure_message) + raise RateLimitError(f"efinance 可能被限流: {failure_message}") from e + + logger.error(failure_message) + raise DataFetchError(f"efinance 获取数据失败: {failure_message}") from e + + def _fetch_etf_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 获取 ETF 基金历史数据 + + Exchange-traded ETFs have OHLCV data just like regular stocks, so we use + ef.stock.get_quote_history (the stock K-line API) which returns full + open/high/low/close/volume data. + + Previously this method used ef.fund.get_quote_history which only returns + NAV data (单位净值/累计净值) without volume or OHLC, causing: + - Issue #541: 'got an unexpected keyword argument beg' + - Issue #527: ETF volume/turnover always showing 0 + + Args: + stock_code: ETF code, e.g. '512400', '159883', '515120' + start_date: Start date, format 'YYYY-MM-DD' + end_date: End date, format 'YYYY-MM-DD' + + Returns: + ETF historical OHLCV DataFrame + """ + import efinance as ef + + # Anti-ban strategy 1: random User-Agent + self._set_random_user_agent() + + # Anti-ban strategy 2: enforce rate limit + self._enforce_rate_limit() + + # Format dates (efinance uses YYYYMMDD) + beg_date = start_date.replace('-', '') + end_date_fmt = end_date.replace('-', '') + + logger.info(f"[API调用] ef.stock.get_quote_history(stock_codes={stock_code}, " + f"beg={beg_date}, end={end_date_fmt}, klt=101, fqt=1) [ETF]") + + api_start = time.time() + try: + # ETFs are exchange-traded securities; use the stock API to get full OHLCV data + df = _ef_call_with_timeout( + ef.stock.get_quote_history, + stock_codes=stock_code, + beg=beg_date, + end=end_date_fmt, + klt=101, # daily + fqt=1, # forward-adjusted + timeout=60, + ) + + api_elapsed = time.time() - api_start + + if df is not None and not df.empty: + logger.info( + "[API返回] Eastmoney 历史K线成功 [ETF]: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, rows={len(df)}, elapsed={api_elapsed:.2f}s" + ) + logger.info(f"[API返回] 列名: {list(df.columns)}") + if '日期' in df.columns: + logger.info(f"[API返回] 日期范围: {df['日期'].iloc[0]} ~ {df['日期'].iloc[-1]}") + logger.debug(f"[API返回] 最新3条数据:\n{df.tail(3).to_string()}") + else: + logger.warning( + "[API返回] Eastmoney 历史K线为空 [ETF]: " + f"endpoint={EASTMONEY_HISTORY_ENDPOINT}, stock_code={stock_code}, " + f"range={beg_date}~{end_date_fmt}, elapsed={api_elapsed:.2f}s" + ) + + return df + + except Exception as e: + api_elapsed = time.time() - api_start + category, failure_message = self._build_history_failure_message( + stock_code=stock_code, + beg_date=beg_date, + end_date=end_date_fmt, + exc=e, + elapsed=api_elapsed, + is_etf=True, + ) + + if category == "rate_limit_or_anti_bot": + logger.warning(failure_message) + raise RateLimitError(f"efinance 可能被限流: {failure_message}") from e + + logger.error(failure_message) + raise DataFetchError(f"efinance 获取 ETF 数据失败: {failure_message}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 efinance 数据 + + efinance 返回的列名(中文): + 股票名称, 股票代码, 日期, 开盘, 收盘, 最高, 最低, 成交量, 成交额, 振幅, 涨跌幅, 涨跌额, 换手率 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # Column mapping (efinance Chinese column names -> standard English column names) + column_mapping = { + '日期': 'date', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '涨跌幅': 'pct_chg', + '股票代码': 'code', + '股票名称': 'name', + } + + # 重命名列 + df = df.rename(columns=column_mapping) + + # Fallback: if OHLC columns are missing (e.g. very old data path), fill from close + if 'close' in df.columns and 'open' not in df.columns: + df['open'] = df['close'] + df['high'] = df['close'] + df['low'] = df['close'] + + # Fill volume and amount if missing + if 'volume' not in df.columns: + df['volume'] = 0 + if 'amount' not in df.columns: + df['amount'] = 0 + + + # 如果没有 code 列,手动添加 + if 'code' not in df.columns: + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情数据 + + 数据来源:ef.stock.get_realtime_quotes() + ETF 数据源:ef.stock.get_realtime_quotes(['ETF']) + + Args: + stock_code: 股票代码 + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + # ETF 需要单独请求 ETF 实时行情接口 + if _is_etf_code(stock_code): + return self._get_etf_realtime_quote(stock_code) + + import efinance as ef + circuit_breaker = get_realtime_circuit_breaker() + source_key = "efinance" + + # 检查熔断器状态 + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + + try: + # 检查缓存 + current_time = time.time() + if (_realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl']): + df = _realtime_cache['data'] + cache_age = int(current_time - _realtime_cache['timestamp']) + logger.debug(f"[缓存命中] 实时行情(efinance) - 缓存年龄 {cache_age}s/{_realtime_cache['ttl']}s") + else: + # 触发全量刷新 + logger.info(f"[缓存未命中] 触发全量刷新 实时行情(efinance)") + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_realtime_quotes() 获取实时行情...") + import time as _time + api_start = _time.time() + + # efinance 的实时行情 API (with timeout to avoid indefinite hangs) + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes) + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ef.stock.get_realtime_quotes 成功: 返回 {len(df)} 只股票, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + + # 更新缓存 + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + logger.info(f"[缓存更新] 实时行情(efinance) 缓存已刷新,TTL={_realtime_cache['ttl']}s") + + # 查找指定股票 + # efinance 返回的列名可能是 '股票代码' 或 'code' + code_col = '股票代码' if '股票代码' in df.columns else 'code' + row = df[df[code_col] == stock_code] + if row.empty: + logger.info(f"[API返回] 未找到股票 {stock_code} 的实时行情") + return None + + row = row.iloc[0] + + # 使用 realtime_types.py 中的统一转换函数 + # 获取列名(可能是中文或英文) + name_col = '股票名称' if '股票名称' in df.columns else 'name' + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + turn_col = '换手率' if '换手率' in df.columns else 'turnover_rate' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + open_col = '开盘' if '开盘' in df.columns else 'open' + # efinance 也返回量比、市盈率、市值等字段 + vol_ratio_col = '量比' if '量比' in df.columns else 'volume_ratio' + pe_col = '市盈率' if '市盈率' in df.columns else 'pe_ratio' + total_mv_col = '总市值' if '总市值' in df.columns else 'total_mv' + circ_mv_col = '流通市值' if '流通市值' in df.columns else 'circ_mv' + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=str(row.get(name_col, '')), + source=RealtimeSource.EFINANCE, + price=safe_float(row.get(price_col)), + change_pct=safe_float(row.get(pct_col)), + change_amount=safe_float(row.get(chg_col)), + volume=safe_int(row.get(vol_col)), + amount=safe_float(row.get(amt_col)), + turnover_rate=safe_float(row.get(turn_col)), + amplitude=safe_float(row.get(amp_col)), + high=safe_float(row.get(high_col)), + low=safe_float(row.get(low_col)), + open_price=safe_float(row.get(open_col)), + volume_ratio=safe_float(row.get(vol_ratio_col)), # 量比 + pe_ratio=safe_float(row.get(pe_col)), # 市盈率 + total_mv=safe_float(row.get(total_mv_col)), # 总市值 + circ_mv=safe_float(row.get(circ_mv_col)), # 流通市值 + ) + + logger.info(f"[实时行情-efinance] {stock_code} {quote.name}: 价格={quote.price}, 涨跌={quote.change_pct}%, " + f"量比={quote.volume_ratio}, 换手率={quote.turnover_rate}%") + return quote + + except FuturesTimeoutError: + logger.info(f"[超时] ef.stock.get_realtime_quotes() 超过 {_EF_CALL_TIMEOUT}s,跳过 {stock_code}") + circuit_breaker.record_failure(source_key, "timeout") + return None + except Exception as e: + logger.info(f"[API错误] 获取 {stock_code} 实时行情(efinance)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def _get_etf_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取 ETF 实时行情 + + efinance 默认实时接口仅返回股票数据,ETF 需要显式传入 ['ETF']。 + """ + import efinance as ef + circuit_breaker = get_realtime_circuit_breaker() + source_key = "efinance_etf" + + if not circuit_breaker.is_available(source_key): + logger.info(f"[熔断] 数据源 {source_key} 处于熔断状态,跳过") + return None + + try: + current_time = time.time() + if ( + _etf_realtime_cache['data'] is not None and + current_time - _etf_realtime_cache['timestamp'] < _etf_realtime_cache['ttl'] + ): + df = _etf_realtime_cache['data'] + cache_age = int(current_time - _etf_realtime_cache['timestamp']) + logger.debug(f"[缓存命中] ETF实时行情(efinance) - 缓存年龄 {cache_age}s/{_etf_realtime_cache['ttl']}s") + else: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['ETF']) 获取ETF实时行情...") + import time as _time + api_start = _time.time() + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['ETF']) + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ETF 实时行情成功: {len(df)} 条, 耗时 {api_elapsed:.2f}s") + circuit_breaker.record_success(source_key) + else: + logger.info(f"[API返回] ETF 实时行情为空, 耗时 {api_elapsed:.2f}s") + df = pd.DataFrame() + + _etf_realtime_cache['data'] = df + _etf_realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.info(f"[实时行情] ETF实时行情数据为空(efinance),跳过 {stock_code}") + return None + + code_col = '股票代码' if '股票代码' in df.columns else 'code' + code_series = df[code_col].astype(str).str.zfill(6) + target_code = str(stock_code).strip().zfill(6) + row = df[code_series == target_code] + if row.empty: + logger.info(f"[API返回] 未找到 ETF {stock_code} 的实时行情(efinance)") + return None + + row = row.iloc[0] + name_col = '股票名称' if '股票名称' in df.columns else 'name' + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + turn_col = '换手率' if '换手率' in df.columns else 'turnover_rate' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + open_col = '开盘' if '开盘' in df.columns else 'open' + + quote = UnifiedRealtimeQuote( + code=target_code, + name=str(row.get(name_col, '')), + source=RealtimeSource.EFINANCE, + price=safe_float(row.get(price_col)), + change_pct=safe_float(row.get(pct_col)), + change_amount=safe_float(row.get(chg_col)), + volume=safe_int(row.get(vol_col)), + amount=safe_float(row.get(amt_col)), + turnover_rate=safe_float(row.get(turn_col)), + amplitude=safe_float(row.get(amp_col)), + high=safe_float(row.get(high_col)), + low=safe_float(row.get(low_col)), + open_price=safe_float(row.get(open_col)), + ) + + logger.info( + f"[ETF实时行情-efinance] {target_code} {quote.name}: " + f"价格={quote.price}, 涨跌={quote.change_pct}%, 换手率={quote.turnover_rate}%" + ) + return quote + except Exception as e: + logger.info(f"[API错误] 获取 ETF {stock_code} 实时行情(efinance)失败: {e}") + circuit_breaker.record_failure(source_key, str(e)) + return None + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数实时行情 (efinance),仅支持 A 股 + """ + if region != "cn": + return None + import efinance as ef + + indices_map = { + '000001': ('上证指数', 'sh000001'), + '399001': ('深证成指', 'sz399001'), + '399006': ('创业板指', 'sz399006'), + '000688': ('科创50', 'sh000688'), + '000016': ('上证50', 'sh000016'), + '000300': ('沪深300', 'sh000300'), + } + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['沪深系列指数']) 获取指数行情...") + import time as _time + api_start = _time.time() + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['沪深系列指数']) + api_elapsed = _time.time() - api_start + + if df is None or df.empty: + logger.warning(f"[API返回] 指数行情为空, 耗时 {api_elapsed:.2f}s") + return None + + logger.info(f"[API返回] 指数行情成功: {len(df)} 条, 耗时 {api_elapsed:.2f}s") + code_col = '股票代码' if '股票代码' in df.columns else 'code' + code_series = df[code_col].astype(str).str.zfill(6) + + results: List[Dict[str, Any]] = [] + for code, (name, full_code) in indices_map.items(): + row = df[code_series == code] + if row.empty: + continue + item = row.iloc[0] + + price_col = '最新价' if '最新价' in df.columns else 'price' + pct_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + chg_col = '涨跌额' if '涨跌额' in df.columns else 'change' + open_cols = [column for column in ('今开', '开盘', 'open') if column in df.columns] + high_col = '最高' if '最高' in df.columns else 'high' + low_col = '最低' if '最低' in df.columns else 'low' + vol_col = '成交量' if '成交量' in df.columns else 'volume' + amt_col = '成交额' if '成交额' in df.columns else 'amount' + amp_col = '振幅' if '振幅' in df.columns else 'amplitude' + + current = safe_float(item.get(price_col, 0)) + change_amount = safe_float(item.get(chg_col, 0)) + open_price = 0.0 + for column in open_cols: + candidate = safe_float(item.get(column), default=None) + if candidate not in (None, 0.0): + open_price = candidate + break + if open_price == 0.0 and open_cols: + open_price = safe_float(item.get(open_cols[0], 0), 0) + + results.append({ + 'code': full_code, + 'name': name, + 'current': current, + 'change': change_amount, + 'change_pct': safe_float(item.get(pct_col, 0)), + 'open': open_price, + 'high': safe_float(item.get(high_col, 0)), + 'low': safe_float(item.get(low_col, 0)), + 'prev_close': current - change_amount if current or change_amount else 0, + 'volume': safe_float(item.get(vol_col, 0)), + 'amount': safe_float(item.get(amt_col, 0)), + 'amplitude': safe_float(item.get(amp_col, 0)), + }) + + if results: + logger.info(f"[efinance] 获取到 {len(results)} 个指数行情") + return results if results else None + except Exception as e: + logger.error(f"[efinance] 获取指数行情失败: {e}") + return None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """ + 获取市场涨跌统计 (efinance) + """ + import efinance as ef + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + current_time = time.time() + if ( + _realtime_cache['data'] is not None and + current_time - _realtime_cache['timestamp'] < _realtime_cache['ttl'] + ): + df = _realtime_cache['data'] + else: + logger.info("[API调用] ef.stock.get_realtime_quotes() 获取市场统计...") + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes) + _realtime_cache['data'] = df + _realtime_cache['timestamp'] = current_time + + if df is None or df.empty: + logger.warning("[API返回] 市场统计数据为空") + return None + + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[efinance] 获取市场统计失败: {e}") + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[List[Dict], List[Dict]]]: + """ + 获取板块涨跌榜 (efinance) + """ + import efinance as ef + + try: + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info("[API调用] ef.stock.get_realtime_quotes(['行业板块']) 获取板块行情...") + df = _ef_call_with_timeout(ef.stock.get_realtime_quotes, ['行业板块']) + if df is None or df.empty: + logger.warning("[efinance] 板块行情数据为空") + return None + + change_col = '涨跌幅' if '涨跌幅' in df.columns else 'pct_chg' + name_col = '股票名称' if '股票名称' in df.columns else 'name' + if change_col not in df.columns or name_col not in df.columns: + return None + + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + top = df.nlargest(n, change_col) + bottom = df.nsmallest(n, change_col) + + top_sectors = [ + {'name': str(row[name_col]), 'change_pct': float(row[change_col])} + for _, row in top.iterrows() + ] + bottom_sectors = [ + {'name': str(row[name_col]), 'change_pct': float(row[change_col])} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + except Exception as e: + logger.error(f"[efinance] 获取板块排行失败: {e}") + return None + + def get_base_info(self, stock_code: str) -> Optional[Dict[str, Any]]: + """ + 获取股票基本信息 + + 数据来源:ef.stock.get_base_info() + 包含:市盈率、市净率、所处行业、总市值、流通市值、ROE、净利率等 + + Args: + stock_code: 股票代码 + + Returns: + 包含基本信息的字典,获取失败返回 None + """ + import efinance as ef + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_base_info(stock_codes={stock_code}) 获取基本信息...") + import time as _time + api_start = _time.time() + + info = _ef_call_with_timeout(ef.stock.get_base_info, stock_code) + + api_elapsed = _time.time() - api_start + logger.info(f"[API返回] ef.stock.get_base_info 成功, 耗时 {api_elapsed:.2f}s") + + if info is None: + logger.warning(f"[API返回] 未获取到 {stock_code} 的基本信息") + return None + + # 转换为字典 + if isinstance(info, pd.Series): + return info.to_dict() + elif isinstance(info, pd.DataFrame): + if not info.empty: + return info.iloc[0].to_dict() + + return None + + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 基本信息失败: {e}") + return None + + def get_belong_board(self, stock_code: str) -> Optional[pd.DataFrame]: + """ + 获取股票所属板块 + + 数据来源:ef.stock.get_belong_board() + + Args: + stock_code: 股票代码 + + Returns: + 所属板块 DataFrame,获取失败返回 None + """ + import efinance as ef + + try: + # 防封禁策略 + self._set_random_user_agent() + self._enforce_rate_limit() + + logger.info(f"[API调用] ef.stock.get_belong_board(stock_code={stock_code}) 获取所属板块...") + import time as _time + api_start = _time.time() + + df = _ef_call_with_timeout(ef.stock.get_belong_board, stock_code) + + api_elapsed = _time.time() - api_start + + if df is not None and not df.empty: + logger.info(f"[API返回] ef.stock.get_belong_board 成功: 返回 {len(df)} 个板块, 耗时 {api_elapsed:.2f}s") + return df + else: + logger.warning(f"[API返回] 未获取到 {stock_code} 的板块信息") + return None + + except FuturesTimeoutError: + logger.warning(f"[超时] ef.stock.get_belong_board({stock_code}) 超过 {_EF_CALL_TIMEOUT}s,跳过") + return None + except Exception as e: + logger.error(f"[API错误] 获取 {stock_code} 所属板块失败: {e}") + return None + + def get_enhanced_data(self, stock_code: str, days: int = 60) -> Dict[str, Any]: + """ + 获取增强数据(历史K线 + 实时行情 + 基本信息) + + Args: + stock_code: 股票代码 + days: 历史数据天数 + + Returns: + 包含所有数据的字典 + """ + result = { + 'code': stock_code, + 'daily_data': None, + 'realtime_quote': None, + 'base_info': None, + 'belong_board': None, + } + + # 获取日线数据 + try: + df = self.get_daily_data(stock_code, days=days) + result['daily_data'] = df + except Exception as e: + logger.error(f"获取 {stock_code} 日线数据失败: {e}") + + # 获取实时行情 + result['realtime_quote'] = self.get_realtime_quote(stock_code) + + # 获取基本信息 + result['base_info'] = self.get_base_info(stock_code) + + # 获取所属板块 + result['belong_board'] = self.get_belong_board(stock_code) + + return result + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = EfinanceFetcher() + + # 测试普通股票 + print("=" * 50) + print("测试普通股票数据获取 (efinance)") + print("=" * 50) + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"[股票] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[股票] 获取失败: {e}") + + # 测试 ETF 基金 + print("\n" + "=" * 50) + print("测试 ETF 基金数据获取 (efinance)") + print("=" * 50) + try: + df = fetcher.get_daily_data('512400') # 有色龙头ETF + print(f"[ETF] 获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"[ETF] 获取失败: {e}") + + # 测试实时行情 + print("\n" + "=" * 50) + print("测试实时行情获取 (efinance)") + print("=" * 50) + try: + quote = fetcher.get_realtime_quote('600519') + if quote: + print(f"[实时行情] {quote.name}: 价格={quote.price}, 涨跌幅={quote.change_pct}%") + else: + print("[实时行情] 未获取到数据") + except Exception as e: + print(f"[实时行情] 获取失败: {e}") + + # 测试基本信息 + print("\n" + "=" * 50) + print("测试基本信息获取 (efinance)") + print("=" * 50) + try: + info = fetcher.get_base_info('600519') + if info: + print(f"[基本信息] 市盈率={info.get('市盈率(动)', 'N/A')}, 市净率={info.get('市净率', 'N/A')}") + else: + print("[基本信息] 未获取到数据") + except Exception as e: + print(f"[基本信息] 获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (efinance)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") diff --git a/src/provider/fundamental_adapter.py b/src/provider/fundamental_adapter.py new file mode 100644 index 00000000..6fe41e1f --- /dev/null +++ b/src/provider/fundamental_adapter.py @@ -0,0 +1,532 @@ +# -*- coding: utf-8 -*- +""" +AkShare fundamental adapter (fail-open). + +This adapter intentionally uses capability probing against multiple AkShare +endpoint candidates. It should never raise to caller; partial data is allowed. +""" + +from __future__ import annotations + +import logging +import re +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd + +logger = logging.getLogger(__name__) + +_DIVIDEND_KEYWORD_MAP: Dict[str, List[str]] = { + "per_share": [ + "每股派息", + "每股现金红利", + "每股分红", + "每股派现", + "派现(元/股)", + "派息(元/股)", + "税前派息(元/股)", + "现金分红(税前)", + ], + "plan_text": [ + "分配方案", + "分红方案", + "实施方案", + "派息方案", + "方案", + "预案", + "方案说明", + ], + "ex_dividend_date": ["除权除息日", "除息日", "除权日", "除权除息", "除息日期"], + "record_date": ["股权登记日", "登记日"], + "announce_date": ["公告日期", "公告日", "实施公告日", "预案公告日"], + "report_date": ["报告期", "报告日期", "截止日期", "统计截止日期"], +} + + +def _safe_float(value: Any) -> Optional[float]: + """Best-effort float conversion.""" + if value is None: + return None + if isinstance(value, (int, float)): + try: + return float(value) + except (TypeError, ValueError): + return None + s = str(value).strip().replace(",", "").replace("%", "") + if not s: + return None + try: + return float(s) + except (TypeError, ValueError): + return None + + +def _safe_str(value: Any) -> str: + if value is None: + return "" + return str(value).strip() + + +def _safe_datetime(value: Any) -> Optional[datetime]: + if value is None: + return None + try: + parsed = pd.to_datetime(value) + except Exception: + return None + if pd.isna(parsed): + return None + try: + return parsed.to_pydatetime() + except Exception: + return None + + +def _normalize_code(raw: Any) -> str: + s = _safe_str(raw).upper() + if "." in s: + s = s.split(".", 1)[0] + s = re.sub(r"^(SH|SZ|BJ)", "", s) + return s + + +def _pick_by_keywords(row: pd.Series, keywords: List[str]) -> Optional[Any]: + """ + Return first non-empty row value whose column name contains any keyword. + """ + for col in row.index: + col_s = str(col) + if any(k in col_s for k in keywords): + val = row.get(col) + if val is not None and str(val).strip() not in ("", "-", "nan", "None"): + return val + return None + + +def _parse_dividend_plan_to_per_share(plan_text: str) -> Optional[float]: + """Parse per-share cash dividend from Chinese plan text.""" + text = _safe_str(plan_text) + if not text: + return None + + for pattern in ( + r"(?:每)?\s*10\s*股?\s*派(?:发)?\s*([0-9]+(?:\.[0-9]+)?)\s*元", + r"10\s*派\s*([0-9]+(?:\.[0-9]+)?)\s*元", + ): + match = re.search(pattern, text) + if match: + parsed = _safe_float(match.group(1)) + if parsed is not None and parsed > 0: + return parsed / 10.0 + + match_per_share = re.search(r"每\s*股\s*派(?:发)?\s*([0-9]+(?:\.[0-9]+)?)\s*元", text) + if match_per_share: + parsed = _safe_float(match_per_share.group(1)) + if parsed is not None and parsed > 0: + return parsed + return None + + +def _extract_cash_dividend_per_share(row: pd.Series) -> Optional[float]: + """Extract pre-tax cash dividend per share from a row.""" + plan_text = _safe_str(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["plan_text"])) + # Keep pre-tax semantics; skip explicit after-tax plans unless pre-tax marker exists. + if "税后" in plan_text and "税前" not in plan_text and "含税" not in plan_text: + return None + + direct = _safe_float(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["per_share"])) + if direct is not None and direct > 0: + return direct + return _parse_dividend_plan_to_per_share(plan_text) + + +def _filter_rows_by_code(df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + if df is None or df.empty: + return pd.DataFrame() + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码", "symbol", "ts_code"))] + if not code_cols: + return df + + target = _normalize_code(stock_code) + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + filtered = df[series == target] + if not filtered.empty: + return filtered + except Exception: + continue + return pd.DataFrame() + + +def _normalize_report_date(value: Any) -> Optional[str]: + parsed = _safe_datetime(value) + return parsed.date().isoformat() if parsed else None + + +def _build_dividend_payload( + dividend_df: pd.DataFrame, + stock_code: str, + max_events: int = 5, +) -> Dict[str, Any]: + work_df = _filter_rows_by_code(dividend_df, stock_code) + if work_df.empty: + return {} + + now_date = datetime.now().date() + ttm_start_date = now_date - timedelta(days=365) + dedupe_keys = set() + events: List[Dict[str, Any]] = [] + + for _, row in work_df.iterrows(): + if not isinstance(row, pd.Series): + continue + ex_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["ex_dividend_date"])) + record_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["record_date"])) + announce_dt = _safe_datetime(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["announce_date"])) + event_dt = ex_dt or record_dt or announce_dt + if event_dt is None: + continue + event_date = event_dt.date() + if event_date > now_date: + continue + + per_share = _extract_cash_dividend_per_share(row) + if per_share is None or per_share <= 0: + continue + + dedupe_key = (event_date.isoformat(), round(per_share, 6)) + if dedupe_key in dedupe_keys: + continue + dedupe_keys.add(dedupe_key) + + events.append( + { + "event_date": event_date.isoformat(), + "ex_dividend_date": ex_dt.date().isoformat() if ex_dt else None, + "record_date": record_dt.date().isoformat() if record_dt else None, + "announcement_date": announce_dt.date().isoformat() if announce_dt else None, + "cash_dividend_per_share": round(per_share, 6), + "is_pre_tax": True, + } + ) + + if not events: + return {} + + events.sort(key=lambda item: item.get("event_date") or "", reverse=True) + ttm_events: List[Dict[str, Any]] = [] + for item in events: + event_dt = _safe_datetime(item.get("event_date")) + if event_dt is None: + continue + event_date = event_dt.date() + if ttm_start_date <= event_date <= now_date: + ttm_events.append(item) + + return { + "events": events[:max(1, max_events)], + "ttm_event_count": len(ttm_events), + "ttm_cash_dividend_per_share": ( + round(sum(float(item.get("cash_dividend_per_share") or 0.0) for item in ttm_events), 6) + if ttm_events else None + ), + "coverage": "cash_dividend_pre_tax", + "as_of": now_date.isoformat(), + } + + +def _extract_latest_row(df: pd.DataFrame, stock_code: str) -> Optional[pd.Series]: + """ + Select the most relevant row for the given stock. + """ + if df is None or df.empty: + return None + + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码", "ts_code", "symbol"))] + target = _normalize_code(stock_code) + if code_cols: + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + matched = df[series == target] + if not matched.empty: + return matched.iloc[0] + except Exception: + continue + return None + + # Fallback: use latest row + return df.iloc[0] + + +class AkshareFundamentalAdapter: + """AkShare adapter for fundamentals, capital flow and dragon-tiger signals.""" + + def _call_df_candidates( + self, + candidates: List[Tuple[str, Dict[str, Any]]], + ) -> Tuple[Optional[pd.DataFrame], Optional[str], List[str]]: + errors: List[str] = [] + try: + import akshare as ak + except Exception as exc: + return None, None, [f"import_akshare:{type(exc).__name__}"] + + for func_name, kwargs in candidates: + fn = getattr(ak, func_name, None) + if fn is None: + continue + try: + df = fn(**kwargs) + if isinstance(df, pd.Series): + df = df.to_frame().T + if isinstance(df, pd.DataFrame) and not df.empty: + return df, func_name, errors + except Exception as exc: + errors.append(f"{func_name}:{type(exc).__name__}") + continue + return None, None, errors + + def get_fundamental_bundle(self, stock_code: str) -> Dict[str, Any]: + """ + Return normalized fundamental blocks from AkShare with partial tolerance. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "growth": {}, + "earnings": {}, + "institution": {}, + "source_chain": [], + "errors": [], + } + + # Financial indicators + fin_df, fin_source, fin_errors = self._call_df_candidates([ + ("stock_financial_abstract", {"symbol": stock_code}), + ("stock_financial_analysis_indicator", {"symbol": stock_code}), + ("stock_financial_analysis_indicator", {}), + ]) + result["errors"].extend(fin_errors) + if fin_df is not None: + row = _extract_latest_row(fin_df, stock_code) + if row is not None: + revenue_yoy = _safe_float(_pick_by_keywords(row, ["营业收入同比", "营收同比", "收入同比", "同比增长"])) + profit_yoy = _safe_float(_pick_by_keywords(row, ["净利润同比", "净利同比", "归母净利润同比"])) + roe = _safe_float(_pick_by_keywords(row, ["净资产收益率", "ROE", "净资产收益"])) + gross_margin = _safe_float(_pick_by_keywords(row, ["毛利率"])) + report_date = _normalize_report_date(_pick_by_keywords(row, _DIVIDEND_KEYWORD_MAP["report_date"])) + revenue = _safe_float(_pick_by_keywords(row, ["营业总收入", "营业收入", "营收"])) + net_profit_parent = _safe_float(_pick_by_keywords(row, ["归母净利润", "母公司股东净利润", "净利润"])) + operating_cash_flow = _safe_float( + _pick_by_keywords(row, ["经营活动产生的现金流量净额", "经营现金流", "经营活动现金流"]) + ) + result["growth"] = { + "revenue_yoy": revenue_yoy, + "net_profit_yoy": profit_yoy, + "roe": roe, + "gross_margin": gross_margin, + } + financial_report_payload = { + "report_date": report_date, + "revenue": revenue, + "net_profit_parent": net_profit_parent, + "operating_cash_flow": operating_cash_flow, + "roe": roe, + } + if any(v is not None for v in financial_report_payload.values()): + result["earnings"]["financial_report"] = financial_report_payload + result["source_chain"].append(f"growth:{fin_source}") + + # Earnings forecast + forecast_df, forecast_source, forecast_errors = self._call_df_candidates([ + ("stock_yjyg_em", {"symbol": stock_code}), + ("stock_yjyg_em", {}), + ("stock_yjbb_em", {"symbol": stock_code}), + ("stock_yjbb_em", {}), + ]) + result["errors"].extend(forecast_errors) + if forecast_df is not None: + row = _extract_latest_row(forecast_df, stock_code) + if row is not None: + result["earnings"]["forecast_summary"] = _safe_str( + _pick_by_keywords(row, ["预告", "业绩变动", "内容", "摘要", "公告"]) + )[:200] + result["source_chain"].append(f"earnings_forecast:{forecast_source}") + + # Earnings quick report + quick_df, quick_source, quick_errors = self._call_df_candidates([ + ("stock_yjkb_em", {"symbol": stock_code}), + ("stock_yjkb_em", {}), + ]) + result["errors"].extend(quick_errors) + if quick_df is not None: + row = _extract_latest_row(quick_df, stock_code) + if row is not None: + result["earnings"]["quick_report_summary"] = _safe_str( + _pick_by_keywords(row, ["快报", "摘要", "公告", "说明"]) + )[:200] + result["source_chain"].append(f"earnings_quick:{quick_source}") + + # Dividend details (cash dividend, pre-tax) + dividend_df, dividend_source, dividend_errors = self._call_df_candidates([ + ("stock_fhps_detail_em", {"symbol": stock_code}), + ("stock_history_dividend_detail", {"symbol": stock_code, "indicator": "分红", "date": ""}), + ("stock_dividend_cninfo", {"symbol": stock_code}), + ]) + result["errors"].extend(dividend_errors) + if dividend_df is not None: + dividend_payload = _build_dividend_payload(dividend_df, stock_code, max_events=5) + if dividend_payload: + result["earnings"]["dividend"] = dividend_payload + result["source_chain"].append(f"dividend:{dividend_source}") + + # Institution / top shareholders + inst_df, inst_source, inst_errors = self._call_df_candidates([ + ("stock_institute_hold", {}), + ("stock_institute_recommend", {}), + ]) + result["errors"].extend(inst_errors) + if inst_df is not None: + row = _extract_latest_row(inst_df, stock_code) + if row is not None: + inst_change = _safe_float(_pick_by_keywords(row, ["增减", "变化", "变动", "持股变化"])) + result["institution"]["institution_holding_change"] = inst_change + result["source_chain"].append(f"institution:{inst_source}") + + top10_df, top10_source, top10_errors = self._call_df_candidates([ + ("stock_gdfx_top_10_em", {"symbol": stock_code}), + ("stock_gdfx_top_10_em", {}), + ("stock_zh_a_gdhs_detail_em", {"symbol": stock_code}), + ("stock_zh_a_gdhs_detail_em", {}), + ]) + result["errors"].extend(top10_errors) + if top10_df is not None: + row = _extract_latest_row(top10_df, stock_code) + if row is not None: + holder_change = _safe_float(_pick_by_keywords(row, ["增减", "变化", "持股变化", "变动"])) + result["institution"]["top10_holder_change"] = holder_change + result["source_chain"].append(f"top10:{top10_source}") + + has_content = bool(result["growth"] or result["earnings"] or result["institution"]) + result["status"] = "partial" if has_content else "not_supported" + return result + + def get_capital_flow(self, stock_code: str, top_n: int = 5) -> Dict[str, Any]: + """ + Return stock + sector capital flow. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "stock_flow": {}, + "sector_rankings": {"top": [], "bottom": []}, + "source_chain": [], + "errors": [], + } + + stock_df, stock_source, stock_errors = self._call_df_candidates([ + ("stock_individual_fund_flow", {"stock": stock_code}), + ("stock_individual_fund_flow", {"symbol": stock_code}), + ("stock_individual_fund_flow", {}), + ("stock_main_fund_flow", {"symbol": stock_code}), + ("stock_main_fund_flow", {}), + ]) + result["errors"].extend(stock_errors) + if stock_df is not None: + row = _extract_latest_row(stock_df, stock_code) + if row is not None: + net_inflow = _safe_float(_pick_by_keywords(row, ["主力净流入", "净流入", "净额"])) + inflow_5d = _safe_float(_pick_by_keywords(row, ["5日", "五日"])) + inflow_10d = _safe_float(_pick_by_keywords(row, ["10日", "十日"])) + result["stock_flow"] = { + "main_net_inflow": net_inflow, + "inflow_5d": inflow_5d, + "inflow_10d": inflow_10d, + } + result["source_chain"].append(f"capital_stock:{stock_source}") + + sector_df, sector_source, sector_errors = self._call_df_candidates([ + ("stock_sector_fund_flow_rank", {}), + ("stock_sector_fund_flow_summary", {}), + ]) + result["errors"].extend(sector_errors) + if sector_df is not None: + name_col = next((c for c in sector_df.columns if any(k in str(c) for k in ("板块", "行业", "名称", "name"))), None) + flow_col = next((c for c in sector_df.columns if any(k in str(c) for k in ("净流入", "主力", "flow", "净额"))), None) + if name_col and flow_col: + work_df = sector_df[[name_col, flow_col]].copy() + work_df[flow_col] = pd.to_numeric(work_df[flow_col], errors="coerce") + work_df = work_df.dropna(subset=[flow_col]) + top_df = work_df.nlargest(top_n, flow_col) + bottom_df = work_df.nsmallest(top_n, flow_col) + result["sector_rankings"] = { + "top": [{"name": _safe_str(r[name_col]), "net_inflow": float(r[flow_col])} for _, r in top_df.iterrows()], + "bottom": [{"name": _safe_str(r[name_col]), "net_inflow": float(r[flow_col])} for _, r in bottom_df.iterrows()], + } + result["source_chain"].append(f"capital_sector:{sector_source}") + + has_content = bool(result["stock_flow"] or result["sector_rankings"]["top"] or result["sector_rankings"]["bottom"]) + result["status"] = "partial" if has_content else "not_supported" + return result + + def get_dragon_tiger_flag(self, stock_code: str, lookback_days: int = 20) -> Dict[str, Any]: + """ + Return dragon-tiger signal in lookback window. + """ + result: Dict[str, Any] = { + "status": "not_supported", + "is_on_list": False, + "recent_count": 0, + "latest_date": None, + "source_chain": [], + "errors": [], + } + + df, source, errors = self._call_df_candidates([ + ("stock_lhb_stock_statistic_em", {}), + ("stock_lhb_detail_em", {}), + ("stock_lhb_jgmmtj_em", {}), + ]) + result["errors"].extend(errors) + if df is None: + return result + + # Try code filter + code_cols = [c for c in df.columns if any(k in str(c) for k in ("代码", "股票代码", "证券代码"))] + target = _normalize_code(stock_code) + matched = pd.DataFrame() + for col in code_cols: + try: + series = df[col].astype(str).map(_normalize_code) + cur = df[series == target] + if not cur.empty: + matched = cur + break + except Exception: + continue + if matched.empty: + result["source_chain"].append(f"dragon_tiger:{source}") + result["status"] = "ok" if code_cols else "partial" + return result + + date_col = next((c for c in matched.columns if any(k in str(c) for k in ("日期", "上榜", "交易日", "time"))), None) + parsed_dates: List[datetime] = [] + if date_col is not None: + for val in matched[date_col].astype(str).tolist(): + try: + parsed_dates.append(pd.to_datetime(val).to_pydatetime()) + except Exception: + continue + now = datetime.now() + start = now - timedelta(days=max(1, lookback_days)) + recent_dates = [d for d in parsed_dates if start <= d <= now] + + result["is_on_list"] = bool(recent_dates) + result["recent_count"] = len(recent_dates) if recent_dates else int(len(matched)) + result["latest_date"] = max(recent_dates).date().isoformat() if recent_dates else ( + max(parsed_dates).date().isoformat() if parsed_dates else None + ) + result["status"] = "ok" + result["source_chain"].append(f"dragon_tiger:{source}") + return result diff --git a/src/provider/longbridge_fetcher.py b/src/provider/longbridge_fetcher.py new file mode 100644 index 00000000..5da10f16 --- /dev/null +++ b/src/provider/longbridge_fetcher.py @@ -0,0 +1,697 @@ +# -*- coding: utf-8 -*- +""" +=================================== +LongbridgeFetcher - 长桥兜底数据源 (Priority 5) +=================================== + +数据来源:长桥 OpenAPI (https://open.longbridge.com) +特点:覆盖美股 + 港股,可计算量比/换手率/PE 等 yfinance 缺失字段 +定位:美股/港股最后兜底数据源 + +关键策略: +1. 组合 quote + static_info 接口计算 turnover_rate / pe_ratio / total_mv +2. 通过 history_candlesticks 计算 volume_ratio(近5日均量比) +3. 懒加载 QuoteContext,首次调用时才建立连接 +4. static_info 进程内短缓存,减少重复请求(默认 24h,可调;见 LONGBRIDGE_STATIC_INFO_TTL_SECONDS) + +凭证:`LONGBRIDGE_APP_KEY` / `LONGBRIDGE_APP_SECRET` / `LONGBRIDGE_ACCESS_TOKEN`。 +可选:`LONGBRIDGE_STATIC_INFO_TTL_SECONDS`;SDK `language` 取自 `REPORT_LANGUAGE`,`log_path` 为 `{LOG_DIR}/longbridge_sdk.log`; +`LONGBRIDGE_HTTP_URL` / `LONGBRIDGE_QUOTE_WS_URL` / `LONGBRIDGE_TRADE_WS_URL` / `LONGBRIDGE_REGION` (见官方文档默认值)。 +""" + +import logging +import os +import time +import threading +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional, Dict, Any + +import pandas as pd + +from .base import BaseFetcher, STANDARD_COLUMNS +from .realtime_types import UnifiedRealtimeQuote, RealtimeSource, safe_float +from .us_index_mapping import is_us_stock_code, is_us_index_code + +logger = logging.getLogger(__name__) + +_DEFAULT_STATIC_INFO_TTL = 86400 # 24h + + +def _static_info_ttl_seconds() -> int: + """TTL for static_info cache; 0 disables caching (always fetch).""" + raw = os.getenv("LONGBRIDGE_STATIC_INFO_TTL_SECONDS", "").strip() + if raw == "": + return _DEFAULT_STATIC_INFO_TTL + try: + return max(0, int(raw)) + except ValueError: + return _DEFAULT_STATIC_INFO_TTL + + +_REGION_URL_MAP: Dict[str, Dict[str, str]] = { + "cn": { + "http_url": "https://openapi.longbridge.cn", + "quote_ws_url": "wss://openapi-quote.longbridge.cn/v2", + "trade_ws_url": "wss://openapi-trade.longbridge.cn/v2", + }, + "hk": { + "http_url": "https://openapi.longbridge.com", + "quote_ws_url": "wss://openapi-quote.longbridge.com/v2", + "trade_ws_url": "wss://openapi-trade.longbridge.com/v2", + }, +} + + +def _sanitize_longbridge_env() -> None: + """Remove empty-string LONGBRIDGE_*_URL env vars. + + GitHub Actions sets ``LONGBRIDGE_HTTP_URL: ${{ vars.X || secrets.X }}`` + which resolves to an empty string ``""`` when neither var nor secret is + configured. The Rust SDK's ``Config.from_apikey()`` auto-reads these + env vars, and an empty string is *not* the same as "unset" — it causes + the SDK to use a blank URL, which breaks the WebSocket handshake and + results in "context dropped" / "Client is closed" within milliseconds. + + Also mirrors ``LONGBRIDGE_REGION`` → ``LONGPORT_REGION`` because the + Rust SDK's internal ``is_cn()`` function only checks ``LONGPORT_REGION`` + (not ``LONGBRIDGE_REGION``) when deciding which default endpoints to use. + """ + for key in ( + "LONGBRIDGE_HTTP_URL", + "LONGBRIDGE_QUOTE_WS_URL", + "LONGBRIDGE_TRADE_WS_URL", + "LONGBRIDGE_ENABLE_OVERNIGHT", + "LONGBRIDGE_PUSH_CANDLESTICK_MODE", + "LONGBRIDGE_PRINT_QUOTE_PACKAGES", + "LONGBRIDGE_REGION", + "LONGBRIDGE_STATIC_INFO_TTL_SECONDS", + "LONGBRIDGE_LOG_PATH", + ): + val = os.environ.get(key) + if val is not None and val.strip() == "": + del os.environ[key] + logger.debug("[Longbridge] 删除空环境变量 %s", key) + + # App default: quiet (false). Matches README / docs/full-guide / .env.example; SDK alone may default verbose. + if "LONGBRIDGE_PRINT_QUOTE_PACKAGES" not in os.environ: + os.environ["LONGBRIDGE_PRINT_QUOTE_PACKAGES"] = "false" + + if not os.environ.get("LONGBRIDGE_LOG_PATH"): + try: + log_dir = (os.getenv("LOG_DIR") or "./logs").strip() or "./logs" + p = Path(log_dir).expanduser() + p.mkdir(parents=True, exist_ok=True) + os.environ["LONGBRIDGE_LOG_PATH"] = str(p / "longbridge_sdk.log") + logger.debug("[Longbridge] 设置 LONGBRIDGE_LOG_PATH=%s", + os.environ["LONGBRIDGE_LOG_PATH"]) + except Exception: + pass + + region = (os.getenv("LONGBRIDGE_REGION") or "").strip().lower() + if region: + if not os.environ.get("LONGPORT_REGION"): + os.environ["LONGPORT_REGION"] = region + logger.debug("[Longbridge] 同步 LONGPORT_REGION=%s", region) + + urls = _REGION_URL_MAP.get(region, {}) + for env_name, default_url in ( + ("LONGBRIDGE_HTTP_URL", urls.get("http_url")), + ("LONGBRIDGE_QUOTE_WS_URL", urls.get("quote_ws_url")), + ("LONGBRIDGE_TRADE_WS_URL", urls.get("trade_ws_url")), + ): + if default_url and not os.environ.get(env_name): + os.environ[env_name] = default_url + logger.debug("[Longbridge] 根据 REGION=%s 设置 %s=%s", + region, env_name, default_url) + + +def _longbridge_config_kwargs() -> Dict[str, Any]: + """Optional kwargs for ``Config.from_apikey`` (Longbridge OpenAPI SDK).""" + try: + import inspect + from longbridge.openapi import Config, Language, PushCandlestickMode + except Exception: + return {} + + try: + params = inspect.signature(Config.from_apikey).parameters + except Exception: + return {} + + kw: Dict[str, Any] = {} + + if "enable_print_quote_packages" in params: + # Unset / empty → False (quiet); SDK default would be verbose — we opt in explicitly. + raw = os.getenv("LONGBRIDGE_PRINT_QUOTE_PACKAGES") + if raw is None or not str(raw).strip(): + kw["enable_print_quote_packages"] = False + else: + raw_norm = str(raw).strip().lower() + kw["enable_print_quote_packages"] = raw_norm not in ("0", "false", "no") + + for pname, envname in ( + ("http_url", "LONGBRIDGE_HTTP_URL"), + ("quote_ws_url", "LONGBRIDGE_QUOTE_WS_URL"), + ("trade_ws_url", "LONGBRIDGE_TRADE_WS_URL"), + ): + if pname in params: + v = os.getenv(envname, "").strip() + if v: + kw[pname] = v + + if "language" in params: + try: + from src.report_language import normalize_report_language + + rl = normalize_report_language(os.getenv("REPORT_LANGUAGE"), default="zh") + if rl == "zh": + kw["language"] = Language.ZH_CN + elif rl == "en": + kw["language"] = Language.EN + except Exception as e: + logger.debug("Longbridge language from REPORT_LANGUAGE skipped: %s", e) + + if "enable_overnight" in params: + o = os.getenv("LONGBRIDGE_ENABLE_OVERNIGHT", "").strip().lower() + if o: + kw["enable_overnight"] = o in ("1", "true", "yes") + + if "push_candlestick_mode" in params: + cm = os.getenv("LONGBRIDGE_PUSH_CANDLESTICK_MODE", "").strip().lower() + if cm == "realtime": + kw["push_candlestick_mode"] = PushCandlestickMode.Realtime + elif cm == "confirmed": + kw["push_candlestick_mode"] = PushCandlestickMode.Confirmed + elif cm: + logger.warning( + "Unknown LONGBRIDGE_PUSH_CANDLESTICK_MODE=%r; use realtime or confirmed", cm + ) + + if "log_path" in params: + try: + log_dir = (os.getenv("LOG_DIR") or "./logs").strip() or "./logs" + p = Path(log_dir).expanduser() + p.mkdir(parents=True, exist_ok=True) + kw["log_path"] = str(p / "longbridge_sdk.log") + except Exception as e: + logger.debug("Longbridge log_path from LOG_DIR skipped: %s", e) + + return kw + + +def _is_us_code(stock_code: str) -> bool: + normalized = stock_code.strip().upper() + return is_us_stock_code(normalized) or is_us_index_code(normalized) + + +def _is_hk_code(stock_code: str) -> bool: + normalized = (stock_code or "").strip().upper() + if normalized.startswith("HK"): + digits = normalized[2:] + return digits.isdigit() and 1 <= len(digits) <= 5 + if normalized.endswith(".HK"): + return True + if normalized.isdigit() and len(normalized) == 5: + return True + return False + + +def _to_longbridge_symbol(stock_code: str) -> Optional[str]: + """Convert internal stock code to Longbridge symbol format. + + Examples: + AAPL -> AAPL.US + HK00700 -> 0700.HK + 00700 -> 0700.HK (5-digit pure number treated as HK) + """ + code = stock_code.strip() + upper = code.upper() + + if upper.endswith(".US"): + return upper + if upper.endswith(".HK"): + return upper + + if _is_us_code(code): + return f"{upper}.US" + + if _is_hk_code(code): + upper = code.upper() + if upper.startswith("HK"): + digits = upper[2:] + else: + digits = upper + digits = digits.lstrip("0") or "0" + return f"{digits.zfill(4)}.HK" + + return None + + +class LongbridgeFetcher(BaseFetcher): + """ + 长桥 OpenAPI 数据源实现 + + 优先级: 5(最低,作为美股/港股最后兜底) + 数据来源: Longbridge OpenAPI + + 通过组合多个 API 计算 yfinance 缺失的指标: + - turnover_rate = volume / circulating_shares * 100 + - volume_ratio = today_volume / avg_5day_volume + - pe_ratio = price / eps_ttm + """ + + name = "LongbridgeFetcher" + priority = int(os.getenv("LONGBRIDGE_PRIORITY", "5")) + + _CONNECTION_ERRORS = ("client is closed", "context closed", "connection closed") + + def __init__(self): + self._ctx = None + self._config = None + self._ctx_lock = threading.Lock() + self._available = None + # {symbol: (StaticInfo, timestamp)} + self._static_cache: Dict[str, Any] = {} + self._static_cache_lock = threading.Lock() + + def _is_connection_error(self, exc: Exception) -> bool: + msg = str(exc).lower() + return any(s in msg for s in self._CONNECTION_ERRORS) + + def _invalidate_ctx(self): + """Reset cached context so the next call rebuilds the connection.""" + with self._ctx_lock: + self._ctx = None + self._config = None + + def _is_available(self) -> bool: + """Check if Longbridge credentials are configured.""" + if self._available is not None: + return self._available + try: + from src.config import get_config + config = get_config() + has_creds = bool( + config.longbridge_app_key + and config.longbridge_app_secret + and config.longbridge_access_token + ) + except Exception: + has_creds = bool( + os.getenv("LONGBRIDGE_APP_KEY") + and os.getenv("LONGBRIDGE_APP_SECRET") + and os.getenv("LONGBRIDGE_ACCESS_TOKEN") + ) + self._available = has_creds + return has_creds + + def _get_ctx(self): + """Lazy-init the QuoteContext (thread-safe).""" + if self._ctx is not None: + return self._ctx + with self._ctx_lock: + if self._ctx is not None: + return self._ctx + if not self._is_available(): + return None + try: + from longbridge.openapi import QuoteContext, Config + + # ── 1. Clean up empty URL env vars & apply REGION mapping ── + _sanitize_longbridge_env() + + # ── 2. Ensure credentials are available in env ── + try: + from src.config import get_config + app_config = get_config() + app_key = app_config.longbridge_app_key + app_secret = app_config.longbridge_app_secret + access_token = app_config.longbridge_access_token + except Exception: + app_key = os.getenv("LONGBRIDGE_APP_KEY") + app_secret = os.getenv("LONGBRIDGE_APP_SECRET") + access_token = os.getenv("LONGBRIDGE_ACCESS_TOKEN") + + for k, v in { + "LONGBRIDGE_APP_KEY": app_key, + "LONGBRIDGE_APP_SECRET": app_secret, + "LONGBRIDGE_ACCESS_TOKEN": access_token, + }.items(): + if v and not os.environ.get(k): + os.environ[k] = v + + # ── 3. Build Config ── + extra_kw = _longbridge_config_kwargs() + lb_config = None + + # Prefer from_apikey_env() — reads all LONGBRIDGE_* env vars + # (credentials + URLs + options) including .env files. + # Available in longbridge >= 4.x. from_env() only exists on + # the unreleased master branch. + for factory_name in ("from_apikey_env", "from_env"): + factory = getattr(Config, factory_name, None) + if factory is None: + continue + try: + lb_config = factory() + logger.info("[Longbridge] Config.%s() 成功", factory_name) + break + except Exception as e: + logger.debug( + "[Longbridge] Config.%s() 失败: %s", factory_name, e + ) + + if lb_config is None: + lb_config = Config.from_apikey( + app_key, + app_secret, + access_token, + **extra_kw, + ) + logger.info("[Longbridge] Config.from_apikey() 创建成功") + + # Diagnostic logging + region = os.getenv("LONGBRIDGE_REGION") or os.getenv("LONGPORT_REGION") or "(auto)" + logger.info( + "[Longbridge] 配置: region=%s, http=%s, quote_ws=%s", + region, + os.getenv("LONGBRIDGE_HTTP_URL", "(default)"), + os.getenv("LONGBRIDGE_QUOTE_WS_URL", "(default)"), + ) + + self._config = lb_config + self._ctx = QuoteContext(lb_config) + logger.info("[Longbridge] QuoteContext 初始化成功") + return self._ctx + except Exception as e: + logger.warning("[Longbridge] QuoteContext 初始化失败: %s", e) + self._available = False + return None + + # ------------------------------------------------------------------ + # static_info with cache + # ------------------------------------------------------------------ + + def _get_static_info(self, symbol: str) -> Optional[Any]: + """Fetch static info (shares, EPS, BPS, name) with optional in-process TTL cache.""" + ttl = _static_info_ttl_seconds() + now = time.time() + if ttl > 0: + with self._static_cache_lock: + cached = self._static_cache.get(symbol) + if cached and (now - cached[1]) < ttl: + return cached[0] + + ctx = self._get_ctx() + if ctx is None: + return None + try: + infos = ctx.static_info([symbol]) + if infos: + info = infos[0] + if ttl > 0: + with self._static_cache_lock: + self._static_cache[symbol] = (info, now) + return info + except Exception as e: + logger.debug(f"[Longbridge] static_info({symbol}) 失败: {e}") + if self._is_connection_error(e): + self._invalidate_ctx() + return None + + # ------------------------------------------------------------------ + # get_stock_name via static_info + # ------------------------------------------------------------------ + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """Return stock name from Longbridge static_info (name_cn or name_en).""" + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + return None + info = self._get_static_info(symbol) + if info is None: + return None + name = getattr(info, "name_cn", "") or getattr(info, "name_en", "") or "" + return name.strip() or None + + # ------------------------------------------------------------------ + # volume_ratio from history + # ------------------------------------------------------------------ + + def _ts_sort_key(self, candle: Any) -> float: + """Monotonic sort key for a candle timestamp (UTC seconds or datetime).""" + ts = getattr(candle, "timestamp", None) + if ts is None: + return 0.0 + if hasattr(ts, "timestamp"): + return float(ts.timestamp()) + return float(int(ts)) + + def _compute_volume_ratio(self, symbol: str, today_volume: int) -> Optional[float]: + """Compute volume_ratio = today_volume / avg(recent completed daily volumes). + + Uses the most recent daily bar as \"today/incomplete\" reference window: average + volume of the next 5 older daily bars. Avoids local `date.today()` matching, which + breaks for US symbols when the shell runs in CN timezone. + """ + if not today_volume or today_volume <= 0: + return None + ctx = self._get_ctx() + if ctx is None: + return None + try: + from longbridge.openapi import Period, AdjustType + + candles = ctx.history_candlesticks_by_offset( + symbol, + Period.Day, + AdjustType.NoAdjust, + False, + 6, + datetime.now(), + ) + if not candles or len(candles) < 2: + return None + + ordered = sorted(candles, key=self._ts_sort_key, reverse=True) + past_vols: list = [] + for c in ordered[1:6]: + vol = int(getattr(c, "volume", 0) or 0) + if vol > 0: + past_vols.append(vol) + + if not past_vols: + return None + + avg_vol = sum(past_vols) / len(past_vols) + if avg_vol <= 0: + return None + + return round(today_volume / avg_vol, 2) + except Exception as e: + logger.debug(f"[Longbridge] 计算量比失败({symbol}): {e}") + return None + + # ------------------------------------------------------------------ + # get_realtime_quote + # ------------------------------------------------------------------ + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """Fetch realtime quote from Longbridge, computing derived fields.""" + if not self._is_available(): + return None + + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + logger.debug(f"[Longbridge] 无法转换代码: {stock_code}") + return None + + ctx = self._get_ctx() + if ctx is None: + return None + + try: + quotes = ctx.quote([symbol]) + if not quotes: + return None + q = quotes[0] + except Exception as e: + logger.info(f"[Longbridge] quote({symbol}) 失败: {e}") + if self._is_connection_error(e): + logger.warning("[Longbridge] 检测到连接已断开,将在下次调用时重建连接") + self._invalidate_ctx() + return None + + price = safe_float(getattr(q, "last_done", None)) + if price is None or price <= 0: + return None + + prev_close = safe_float(getattr(q, "prev_close", None)) + open_price = safe_float(getattr(q, "open", None)) + high = safe_float(getattr(q, "high", None)) + low = safe_float(getattr(q, "low", None)) + volume = int(getattr(q, "volume", 0) or 0) + turnover = safe_float(getattr(q, "turnover", None)) + + change_amount = None + change_pct = None + amplitude = None + if prev_close and prev_close > 0: + change_amount = round(price - prev_close, 4) + change_pct = round((price - prev_close) / prev_close * 100, 2) + if high is not None and low is not None: + amplitude = round((high - low) / prev_close * 100, 2) + + # Fetch static info for derived fields + static = self._get_static_info(symbol) + + turnover_rate = None + pe_ratio = None + pb_ratio = None + total_mv = None + circ_mv = None + name = "" + + if static is not None: + name = getattr(static, "name_cn", "") or getattr(static, "name_en", "") or "" + circulating = int(getattr(static, "circulating_shares", 0) or 0) + total_shares = int(getattr(static, "total_shares", 0) or 0) + eps_ttm = safe_float(getattr(static, "eps_ttm", None)) + eps_plain = safe_float(getattr(static, "eps", None)) + bps = safe_float(getattr(static, "bps", None)) + + # US names often report circulating_shares=0 while total_shares is set — use total for turnover. + shares_for_turnover = circulating if circulating > 0 else total_shares + if shares_for_turnover > 0 and volume > 0: + turnover_rate = round(volume / shares_for_turnover * 100, 4) + elif volume > 0: + logger.debug( + "[Longbridge] %s 无法计算换手率: volume=%s circulating=%s total_shares=%s", + symbol, + volume, + circulating, + total_shares, + ) + + eps_for_pe = None + if eps_ttm is not None and eps_ttm > 0: + eps_for_pe = eps_ttm + elif eps_plain is not None and eps_plain > 0: + eps_for_pe = eps_plain + if eps_for_pe: + pe_ratio = round(price / eps_for_pe, 2) + + if bps is not None and bps > 0: + pb_ratio = round(price / bps, 2) + if total_shares > 0: + total_mv = round(price * total_shares, 2) + if circulating > 0: + circ_mv = round(price * circulating, 2) + + volume_ratio = self._compute_volume_ratio(symbol, volume) + + quote = UnifiedRealtimeQuote( + code=stock_code, + name=name, + source=RealtimeSource.LONGBRIDGE, + price=price, + change_pct=change_pct, + change_amount=change_amount, + volume=volume if volume > 0 else None, + amount=turnover, + volume_ratio=volume_ratio, + turnover_rate=turnover_rate, + amplitude=amplitude, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=pe_ratio, + pb_ratio=pb_ratio, + total_mv=total_mv, + circ_mv=circ_mv, + ) + + logger.info( + f"[Longbridge] {symbol} 行情获取成功: " + f"价格={price}, 量比={volume_ratio}, 换手率={turnover_rate}" + ) + return quote + + # ------------------------------------------------------------------ + # BaseFetcher abstract methods (historical daily data) + # ------------------------------------------------------------------ + + def _fetch_raw_data( + self, stock_code: str, start_date: str, end_date: str + ) -> pd.DataFrame: + """Fetch historical candlesticks from Longbridge.""" + symbol = _to_longbridge_symbol(stock_code) + if symbol is None: + raise ValueError(f"Cannot convert {stock_code} to Longbridge symbol") + + ctx = self._get_ctx() + if ctx is None: + raise RuntimeError("Longbridge QuoteContext not available") + + from longbridge.openapi import Period, AdjustType + + start_dt = datetime.strptime(start_date, "%Y-%m-%d").date() + end_dt = datetime.strptime(end_date, "%Y-%m-%d").date() + + try: + candles = ctx.history_candlesticks_by_date( + symbol, + Period.Day, + AdjustType.ForwardAdjust, + start_dt, + end_dt, + ) + except Exception as e: + if self._is_connection_error(e): + logger.warning("[Longbridge] 检测到连接已断开,将在下次调用时重建连接") + self._invalidate_ctx() + raise + + if not candles: + return pd.DataFrame() + + rows = [] + for c in candles: + ts = getattr(c, "timestamp", None) + if ts is None: + continue + if hasattr(ts, "date"): + dt = ts.date() + else: + dt = datetime.fromtimestamp(int(ts)).date() + + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": safe_float(getattr(c, "open", None)), + "high": safe_float(getattr(c, "high", None)), + "low": safe_float(getattr(c, "low", None)), + "close": safe_float(getattr(c, "close", None)), + "volume": int(getattr(c, "volume", 0) or 0), + "turnover": safe_float(getattr(c, "turnover", None)), + }) + + return pd.DataFrame(rows) + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """Normalize column names to standard format.""" + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rename_map = {"turnover": "amount"} + df = df.rename(columns=rename_map) + + if "pct_chg" not in df.columns and "close" in df.columns: + df["pct_chg"] = df["close"].pct_change() * 100 + + for col in STANDARD_COLUMNS: + if col not in df.columns: + df[col] = None + + return df[STANDARD_COLUMNS] diff --git a/src/provider/pytdx_fetcher.py b/src/provider/pytdx_fetcher.py new file mode 100644 index 00000000..6455dac3 --- /dev/null +++ b/src/provider/pytdx_fetcher.py @@ -0,0 +1,469 @@ +# -*- coding: utf-8 -*- +""" +=================================== +PytdxFetcher - 通达信数据源 (Priority 2) +=================================== + +数据来源:通达信行情服务器(pytdx 库) +特点:免费、无需 Token、直连行情服务器 +优点:实时数据、稳定、无配额限制 + +关键策略: +1. 多服务器自动切换 +2. 连接超时自动重连 +3. 失败后指数退避重试 +""" + +import logging +import re +from contextlib import contextmanager +from typing import Optional, Generator, List, Tuple + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code, _is_hk_market +import os + +logger = logging.getLogger(__name__) + + +def _parse_hosts_from_env() -> Optional[List[Tuple[str, int]]]: + """ + 从环境变量构建通达信服务器列表。 + + 优先级: + 1. PYTDX_SERVERS:逗号分隔 "ip:port,ip:port"(如 "192.168.1.1:7709,10.0.0.1:7709") + 2. PYTDX_HOST + PYTDX_PORT:单个服务器 + 3. 均未配置时返回 None(调用方使用 DEFAULT_HOSTS) + """ + servers = os.getenv("PYTDX_SERVERS", "").strip() + if servers: + result = [] + for part in servers.split(","): + part = part.strip() + if ":" in part: + host, port_str = part.rsplit(":", 1) + host, port_str = host.strip(), port_str.strip() + if host and port_str: + try: + result.append((host, int(port_str))) + except ValueError: + logger.warning(f"Invalid PYTDX_SERVERS entry: {part}") + else: + logger.warning(f"Invalid PYTDX_SERVERS entry (missing port): {part}") + if result: + return result + + host = os.getenv("PYTDX_HOST", "").strip() + port_str = os.getenv("PYTDX_PORT", "").strip() + if host and port_str: + try: + return [(host, int(port_str))] + except ValueError: + logger.warning(f"Invalid PYTDX_HOST/PYTDX_PORT: {host}:{port_str}") + + return None + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class PytdxFetcher(BaseFetcher): + """ + 通达信数据源实现 + + 优先级:2(与 Tushare 同级) + 数据来源:通达信行情服务器 + + 关键策略: + - 自动选择最优服务器 + - 连接失败自动切换服务器 + - 失败后指数退避重试 + + Pytdx 特点: + - 免费、无需注册 + - 直连行情服务器 + - 支持实时行情和历史数据 + - 支持股票名称查询 + """ + + name = "PytdxFetcher" + priority = int(os.getenv("PYTDX_PRIORITY", "2")) + + # 默认通达信行情服务器列表 + DEFAULT_HOSTS = [ + ("119.147.212.81", 7709), # 深圳 + ("112.74.214.43", 7727), # 深圳 + ("221.231.141.60", 7709), # 上海 + ("101.227.73.20", 7709), # 上海 + ("101.227.77.254", 7709), # 上海 + ("14.215.128.18", 7709), # 广州 + ("59.173.18.140", 7709), # 武汉 + ("180.153.39.51", 7709), # 杭州 + ] + # Pytdx get_security_list returns at most 1000 items per page + SECURITY_LIST_PAGE_SIZE = 1000 + + def __init__(self, hosts: Optional[List[Tuple[str, int]]] = None): + """ + 初始化 PytdxFetcher + + Args: + hosts: 服务器列表 [(host, port), ...]。若未传入,优先使用环境变量 + PYTDX_SERVERS(ip:port,ip:port)或 PYTDX_HOST+PYTDX_PORT, + 否则使用内置 DEFAULT_HOSTS。 + """ + if hosts is not None: + self._hosts = hosts + else: + env_hosts = _parse_hosts_from_env() + self._hosts = env_hosts if env_hosts else self.DEFAULT_HOSTS + self._api = None + self._connected = False + self._current_host_idx = 0 + self._stock_list_cache = None # 股票列表缓存 + self._stock_name_cache = {} # 股票名称缓存 {code: name} + + def _get_pytdx(self): + """ + 延迟加载 pytdx 模块 + + 只在首次使用时导入,避免未安装时报错 + """ + try: + from pytdx.hq import TdxHq_API + return TdxHq_API + except ImportError: + logger.warning("pytdx 未安装,请运行: pip install pytdx") + return None + + @contextmanager + def _pytdx_session(self) -> Generator: + """ + Pytdx 连接上下文管理器 + + 确保: + 1. 进入上下文时自动连接 + 2. 退出上下文时自动断开 + 3. 异常时也能正确断开 + + 使用示例: + with self._pytdx_session() as api: + # 在这里执行数据查询 + """ + TdxHq_API = self._get_pytdx() + if TdxHq_API is None: + raise DataFetchError("pytdx 库未安装") + + api = TdxHq_API() + connected = False + + try: + # 尝试连接服务器(自动选择最优) + for i in range(len(self._hosts)): + host_idx = (self._current_host_idx + i) % len(self._hosts) + host, port = self._hosts[host_idx] + + try: + if api.connect(host, port, time_out=5): + connected = True + self._current_host_idx = host_idx + logger.debug(f"Pytdx 连接成功: {host}:{port}") + break + except Exception as e: + logger.debug(f"Pytdx 连接 {host}:{port} 失败: {e}") + continue + + if not connected: + raise DataFetchError("Pytdx 无法连接任何服务器") + + yield api + + finally: + # 确保断开连接 + try: + api.disconnect() + logger.debug("Pytdx 连接已断开") + except Exception as e: + logger.warning(f"Pytdx 断开连接时出错: {e}") + + def _get_market_code(self, stock_code: str) -> Tuple[int, str]: + """ + 根据股票代码判断市场 + + Pytdx 市场代码: + - 0: 深圳 + - 1: 上海 + + Args: + stock_code: 股票代码 + + Returns: + (market, code) 元组 + """ + code = stock_code.strip() + + # 去除可能的前缀后缀 + code = code.replace('.SH', '').replace('.SZ', '') + code = code.replace('.sh', '').replace('.sz', '') + code = code.replace('sh', '').replace('sz', '') + + # 根据代码前缀判断市场 + # 上海:60xxxx, 68xxxx(科创板) + # 深圳:00xxxx, 30xxxx(创业板), 002xxx(中小板) + if code.startswith(('60', '68')): + return 1, code # 上海 + else: + return 0, code # 深圳 + + def _build_stock_list_cache(self, api) -> None: + """ + Build a full stock code -> name cache from paginated security lists. + """ + self._stock_list_cache = {} + + for market in (0, 1): + start = 0 + while True: + stocks = api.get_security_list(market, start) or [] + for stock in stocks: + code = stock.get('code') + name = stock.get('name') + if code and name: + self._stock_list_cache[code] = name + + if len(stocks) < self.SECURITY_LIST_PAGE_SIZE: + break + + start += self.SECURITY_LIST_PAGE_SIZE + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从通达信获取原始数据 + + 使用 get_security_bars() 获取日线数据 + + 流程: + 1. 检查是否为美股(不支持) + 2. 使用上下文管理器管理连接 + 3. 判断市场代码 + 4. 调用 API 获取 K 线数据 + """ + # 美股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_us_code(stock_code): + raise DataFetchError(f"PytdxFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # 港股不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if _is_hk_market(stock_code): + raise DataFetchError(f"PytdxFetcher 不支持港股 {stock_code},请使用 AkshareFetcher") + + # 北交所不支持,抛出异常让 DataFetcherManager 切换到其他数据源 + if is_bse_code(stock_code): + raise DataFetchError( + f"PytdxFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + + market, code = self._get_market_code(stock_code) + + # 计算需要获取的交易日数量(估算) + from datetime import datetime as dt + start_dt = dt.strptime(start_date, '%Y-%m-%d') + end_dt = dt.strptime(end_date, '%Y-%m-%d') + days = (end_dt - start_dt).days + count = min(max(days * 5 // 7 + 10, 30), 800) # 估算交易日,最大 800 条 + + logger.debug(f"调用 Pytdx get_security_bars(market={market}, code={code}, count={count})") + + with self._pytdx_session() as api: + try: + # 获取日 K 线数据 + # category: 9-日线, 0-5分钟, 1-15分钟, 2-30分钟, 3-1小时 + data = api.get_security_bars( + category=9, # 日线 + market=market, + code=code, + start=0, # 从最新开始 + count=count + ) + + if data is None or len(data) == 0: + raise DataFetchError(f"Pytdx 未查询到 {stock_code} 的数据") + + # 转换为 DataFrame + df = api.to_df(data) + + # 过滤日期范围 + df['datetime'] = pd.to_datetime(df['datetime']) + df = df[(df['datetime'] >= start_date) & (df['datetime'] <= end_date)] + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Pytdx 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Pytdx 数据 + + Pytdx 返回的列名: + datetime, open, high, low, close, vol, amount + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 列名映射 + column_mapping = { + 'datetime': 'date', + 'vol': 'volume', + } + + df = df.rename(columns=column_mapping) + + # 计算涨跌幅(pytdx 不返回涨跌幅,需要自己计算) + if 'pct_chg' not in df.columns and 'close' in df.columns: + df['pct_chg'] = df['close'].pct_change() * 100 + df['pct_chg'] = df['pct_chg'].fillna(0).round(2) + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + # 港股不支持(pytdx 不含港股数据) + if _is_hk_market(stock_code): + return None + + # 先检查缓存 + if stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + try: + market, code = self._get_market_code(stock_code) + + with self._pytdx_session() as api: + # 获取股票列表(缓存) + if self._stock_list_cache is None: + self._build_stock_list_cache(api) + + # 查找股票名称 + name = self._stock_list_cache.get(code) + if name: + self._stock_name_cache[stock_code] = name + return name + + # 尝试使用 get_finance_info + finance_info = api.get_finance_info(market, code) + if finance_info and 'name' in finance_info: + name = finance_info['name'] + self._stock_name_cache[stock_code] = name + return name + + except Exception as e: + logger.warning(f"Pytdx 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[dict]: + """ + 获取实时行情 + + Args: + stock_code: 股票代码 + + Returns: + 实时行情数据字典,失败返回 None + """ + if is_bse_code(stock_code): + raise DataFetchError( + f"PytdxFetcher 不支持北交所 {stock_code},将自动切换其他数据源" + ) + try: + market, code = self._get_market_code(stock_code) + + with self._pytdx_session() as api: + data = api.get_security_quotes([(market, code)]) + + if data and len(data) > 0: + quote = data[0] + return { + 'code': stock_code, + 'name': quote.get('name', ''), + 'price': quote.get('price', 0), + 'open': quote.get('open', 0), + 'high': quote.get('high', 0), + 'low': quote.get('low', 0), + 'pre_close': quote.get('last_close', 0), + 'volume': quote.get('vol', 0), + 'amount': quote.get('amount', 0), + 'bid_prices': [quote.get(f'bid{i}', 0) for i in range(1, 6)], + 'ask_prices': [quote.get(f'ask{i}', 0) for i in range(1, 6)], + } + except Exception as e: + logger.warning(f"Pytdx 获取实时行情失败 {stock_code}: {e}") + + return None + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = PytdxFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + # 测试实时行情 + quote = fetcher.get_realtime_quote('600519') + print(f"实时行情: {quote}") + + except Exception as e: + print(f"获取失败: {e}") diff --git a/src/provider/realtime_types.py b/src/provider/realtime_types.py new file mode 100644 index 00000000..8d7d3b96 --- /dev/null +++ b/src/provider/realtime_types.py @@ -0,0 +1,449 @@ +# -*- coding: utf-8 -*- +""" +=================================== +实时行情统一类型定义 & 熔断机制 +=================================== + +设计目标: +1. 统一各数据源的实时行情返回结构 +2. 实现熔断/冷却机制,避免连续失败时反复请求 +3. 支持多数据源故障切换 + +使用方式: +- 所有 Fetcher 的 get_realtime_quote() 统一返回 UnifiedRealtimeQuote +- CircuitBreaker 管理各数据源的熔断状态 +""" + +import logging +import time +from threading import RLock +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, Union +from enum import Enum + +logger = logging.getLogger(__name__) + + +# ============================================ +# 通用类型转换工具函数 +# ============================================ +# 设计说明: +# 各数据源返回的原始数据类型不一致(str/float/int/NaN), +# 使用这些函数统一转换,避免在各 Fetcher 中重复定义。 + +def safe_float(val: Any, default: Optional[float] = None) -> Optional[float]: + """ + 安全转换为浮点数 + + 处理场景: + - None / 空字符串 → default + - pandas NaN / numpy NaN → default + - 数值字符串 → float + - 已是数值 → float + + Args: + val: 待转换的值 + default: 转换失败时的默认值 + + Returns: + 转换后的浮点数,或默认值 + """ + try: + if val is None: + return default + + # 处理字符串 + if isinstance(val, str): + val = val.strip() + if val == "" or val == "-" or val == "--": + return default + + # 处理 pandas/numpy NaN + # 使用 math.isnan 而不是 pd.isna,避免强制依赖 pandas + import math + try: + if math.isnan(float(val)): + return default + except (ValueError, TypeError): + pass + + return float(val) + except (ValueError, TypeError): + return default + + +def safe_int(val: Any, default: Optional[int] = None) -> Optional[int]: + """ + 安全转换为整数 + + 先转换为 float,再取整,处理 "123.0" 这类情况 + + Args: + val: 待转换的值 + default: 转换失败时的默认值 + + Returns: + 转换后的整数,或默认值 + """ + f_val = safe_float(val, default=None) + if f_val is not None: + return int(f_val) + return default + + +class RealtimeSource(Enum): + """实时行情数据源""" + EFINANCE = "efinance" # 东方财富(efinance库) + AKSHARE_EM = "akshare_em" # 东方财富(akshare库) + AKSHARE_SINA = "akshare_sina" # 新浪财经 + AKSHARE_QQ = "akshare_qq" # 腾讯财经 + TUSHARE = "tushare" # Tushare Pro + TENCENT = "tencent" # 腾讯直连 + SINA = "sina" # 新浪直连 + STOOQ = "stooq" # Stooq 美股兜底 + LONGBRIDGE = "longbridge" # 长桥(美股/港股兜底) + FALLBACK = "fallback" # 降级兜底 + + +@dataclass +class UnifiedRealtimeQuote: + """ + 统一实时行情数据结构 + + 设计原则: + - 各数据源返回的字段可能不同,缺失字段用 None 表示 + - 主流程使用 getattr(quote, field, None) 获取,保证兼容性 + - source 字段标记数据来源,便于调试 + """ + code: str + name: str = "" + source: RealtimeSource = RealtimeSource.FALLBACK + + # === 核心价格数据(几乎所有源都有)=== + price: Optional[float] = None # 最新价 + change_pct: Optional[float] = None # 涨跌幅(%) + change_amount: Optional[float] = None # 涨跌额 + + # === 量价指标(部分源可能缺失)=== + volume: Optional[int] = None # 成交量(手) + amount: Optional[float] = None # 成交额(元) + volume_ratio: Optional[float] = None # 量比 + turnover_rate: Optional[float] = None # 换手率(%) + amplitude: Optional[float] = None # 振幅(%) + + # === 价格区间 === + open_price: Optional[float] = None # 开盘价 + high: Optional[float] = None # 最高价 + low: Optional[float] = None # 最低价 + pre_close: Optional[float] = None # 昨收价 + + # === 估值指标(仅东财等全量接口有)=== + pe_ratio: Optional[float] = None # 市盈率(动态) + pb_ratio: Optional[float] = None # 市净率 + total_mv: Optional[float] = None # 总市值(元) + circ_mv: Optional[float] = None # 流通市值(元) + + # === 其他指标 === + change_60d: Optional[float] = None # 60日涨跌幅(%) + high_52w: Optional[float] = None # 52周最高 + low_52w: Optional[float] = None # 52周最低 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典(过滤 None 值)""" + result = { + 'code': self.code, + 'name': self.name, + 'source': self.source.value, + } + # 只添加非 None 的字段 + optional_fields = [ + 'price', 'change_pct', 'change_amount', 'volume', 'amount', + 'volume_ratio', 'turnover_rate', 'amplitude', + 'open_price', 'high', 'low', 'pre_close', + 'pe_ratio', 'pb_ratio', 'total_mv', 'circ_mv', + 'change_60d', 'high_52w', 'low_52w' + ] + for f in optional_fields: + val = getattr(self, f, None) + if val is not None: + result[f] = val + return result + + def has_basic_data(self) -> bool: + """检查是否有基本的价格数据""" + return self.price is not None and self.price > 0 + + def has_volume_data(self) -> bool: + """检查是否有量价数据""" + return self.volume_ratio is not None or self.turnover_rate is not None + + +@dataclass +class ChipDistribution: + """ + 筹码分布数据 + + 反映持仓成本分布和获利情况 + """ + code: str + date: str = "" + source: str = "akshare" + + # 获利情况 + profit_ratio: float = 0.0 # 获利比例(0-1) + avg_cost: float = 0.0 # 平均成本 + + # 筹码集中度 + cost_90_low: float = 0.0 # 90%筹码成本下限 + cost_90_high: float = 0.0 # 90%筹码成本上限 + concentration_90: float = 0.0 # 90%筹码集中度(越小越集中) + + cost_70_low: float = 0.0 # 70%筹码成本下限 + cost_70_high: float = 0.0 # 70%筹码成本上限 + concentration_70: float = 0.0 # 70%筹码集中度 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'code': self.code, + 'date': self.date, + 'source': self.source, + 'profit_ratio': self.profit_ratio, + 'avg_cost': self.avg_cost, + 'cost_90_low': self.cost_90_low, + 'cost_90_high': self.cost_90_high, + 'concentration_90': self.concentration_90, + 'concentration_70': self.concentration_70, + } + + def get_chip_status(self, current_price: float) -> str: + """ + 获取筹码状态描述 + + Args: + current_price: 当前股价 + + Returns: + 筹码状态描述 + """ + status_parts = [] + + # 获利比例分析 + if self.profit_ratio >= 0.9: + status_parts.append("获利盘极高(获利盘>90%)") + elif self.profit_ratio >= 0.7: + status_parts.append("获利盘较高(获利盘70-90%)") + elif self.profit_ratio >= 0.5: + status_parts.append("获利盘中等(获利盘50-70%)") + elif self.profit_ratio >= 0.3: + status_parts.append("套牢盘中等(套牢盘50-70%)") + elif self.profit_ratio >= 0.1: + status_parts.append("套牢盘较高(套牢盘70-90%)") + else: + status_parts.append("套牢盘极高(套牢盘>90%)") + + # 筹码集中度分析 (90%集中度 < 10% 表示集中) + if self.concentration_90 < 0.08: + status_parts.append("筹码高度集中") + elif self.concentration_90 < 0.15: + status_parts.append("筹码较集中") + elif self.concentration_90 < 0.25: + status_parts.append("筹码分散度中等") + else: + status_parts.append("筹码较分散") + + # 成本与现价关系 + if current_price > 0 and self.avg_cost > 0: + cost_diff = (current_price - self.avg_cost) / self.avg_cost * 100 + if cost_diff > 20: + status_parts.append(f"现价高于平均成本{cost_diff:.1f}%") + elif cost_diff > 5: + status_parts.append(f"现价略高于成本{cost_diff:.1f}%") + elif cost_diff > -5: + status_parts.append("现价接近平均成本") + else: + status_parts.append(f"现价低于平均成本{abs(cost_diff):.1f}%") + + return ",".join(status_parts) + + +class CircuitBreaker: + """ + 熔断器 - 管理数据源的熔断/冷却状态 + + 策略: + - 连续失败 N 次后进入熔断状态 + - 熔断期间跳过该数据源 + - 冷却时间后自动恢复半开状态 + - 半开状态下单次成功则完全恢复,失败则继续熔断 + + 状态机: + CLOSED(正常) --失败N次--> OPEN(熔断)--冷却时间到--> HALF_OPEN(半开) + HALF_OPEN --成功--> CLOSED + HALF_OPEN --失败--> OPEN + """ + + # 状态常量 + CLOSED = "closed" # 正常状态 + OPEN = "open" # 熔断状态(不可用) + HALF_OPEN = "half_open" # 半开状态(试探性请求) + + def __init__( + self, + failure_threshold: int = 3, # 连续失败次数阈值 + cooldown_seconds: float = 300.0, # 冷却时间(秒),默认5分钟 + half_open_max_calls: int = 1 # 半开状态最大尝试次数 + ): + self.failure_threshold = failure_threshold + self.cooldown_seconds = cooldown_seconds + self.half_open_max_calls = half_open_max_calls + + # 各数据源状态 {source_name: {state, failures, last_failure_time, half_open_calls}} + self._states: Dict[str, Dict[str, Any]] = {} + self._lock = RLock() + + def _get_state_locked(self, source: str) -> Dict[str, Any]: + """获取或初始化数据源状态(调用方需持有锁)。""" + if source not in self._states: + self._states[source] = { + 'state': self.CLOSED, + 'failures': 0, + 'last_failure_time': 0.0, + 'half_open_calls': 0 + } + return self._states[source] + + def is_available(self, source: str) -> bool: + """ + 检查数据源是否可用 + + 返回 True 表示可以尝试请求 + 返回 False 表示应跳过该数据源 + """ + with self._lock: + state = self._get_state_locked(source) + current_time = time.time() + + if state['state'] == self.CLOSED: + return True + + if state['state'] == self.OPEN: + # 检查冷却时间 + time_since_failure = current_time - state['last_failure_time'] + if time_since_failure >= self.cooldown_seconds: + # 冷却完成,进入半开状态(不预占名额,由 HALF_OPEN 分支统一管理) + state['state'] = self.HALF_OPEN + state['half_open_calls'] = 0 + state['last_failure_time'] = current_time + logger.info(f"[熔断器] {source} 冷却完成,进入半开状态") + # Fall through to HALF_OPEN check below + else: + remaining = self.cooldown_seconds - time_since_failure + logger.debug(f"[熔断器] {source} 处于熔断状态,剩余冷却时间: {remaining:.0f}s") + return False + + if state['state'] == self.HALF_OPEN: + if state['half_open_calls'] < self.half_open_max_calls: + state['half_open_calls'] += 1 + return True + # 所有探测名额已用完;若冷却时间再次到期仍未收到 + # record_success/record_failure 回调,重置名额允许重新探测, + # 避免永久卡在 HALF_OPEN。 + time_since_failure = current_time - state['last_failure_time'] + if time_since_failure >= self.cooldown_seconds: + state['half_open_calls'] = 1 + state['last_failure_time'] = current_time + logger.info(f"[熔断器] {source} 半开状态探测超时,重新探测") + return True + return False + + return True + + def record_inconclusive(self, source: str) -> None: + """记录不确定的探测结果(如返回 None)。 + + 仅影响 HALF_OPEN 状态:将其转回 OPEN 以便冷却后重新探测。 + CLOSED 状态下为空操作,不影响失败计数。 + """ + with self._lock: + state = self._get_state_locked(source) + if state['state'] == self.HALF_OPEN: + state['state'] = self.OPEN + state['half_open_calls'] = 0 + state['last_failure_time'] = time.time() + logger.info(f"[熔断器] {source} 半开探测结果不确定,重新进入冷却") + + def record_success(self, source: str) -> None: + """记录成功请求""" + with self._lock: + state = self._get_state_locked(source) + + if state['state'] == self.HALF_OPEN: + # 半开状态下成功,完全恢复 + logger.info(f"[熔断器] {source} 半开状态请求成功,恢复正常") + + # 重置状态 + state['state'] = self.CLOSED + state['failures'] = 0 + state['half_open_calls'] = 0 + + def record_failure(self, source: str, error: Optional[str] = None) -> None: + """记录失败请求""" + with self._lock: + state = self._get_state_locked(source) + current_time = time.time() + + state['failures'] += 1 + state['last_failure_time'] = current_time + + if state['state'] == self.HALF_OPEN: + # 半开状态下失败,继续熔断 + state['state'] = self.OPEN + state['half_open_calls'] = 0 + logger.warning(f"[熔断器] {source} 半开状态请求失败,继续熔断 {self.cooldown_seconds}s") + elif state['failures'] >= self.failure_threshold: + # 达到阈值,进入熔断 + state['state'] = self.OPEN + logger.warning(f"[熔断器] {source} 连续失败 {state['failures']} 次,进入熔断状态 " + f"(冷却 {self.cooldown_seconds}s)") + if error: + logger.warning(f"[熔断器] 最后错误: {error}") + + def get_status(self) -> Dict[str, str]: + """获取所有数据源状态""" + with self._lock: + return {source: info['state'] for source, info in self._states.items()} + + def reset(self, source: Optional[str] = None) -> None: + """重置熔断器状态""" + with self._lock: + if source: + if source in self._states: + del self._states[source] + else: + self._states.clear() + + +# 全局熔断器实例(实时行情专用) +_realtime_circuit_breaker = CircuitBreaker( + failure_threshold=3, # 连续失败3次熔断 + cooldown_seconds=300.0, # 冷却5分钟 + half_open_max_calls=1 +) + +# 筹码接口熔断器(更保守的策略,因为该接口更不稳定) +_chip_circuit_breaker = CircuitBreaker( + failure_threshold=2, # 连续失败2次熔断 + cooldown_seconds=600.0, # 冷却10分钟 + half_open_max_calls=1 +) + + +def get_realtime_circuit_breaker() -> CircuitBreaker: + """获取实时行情熔断器""" + return _realtime_circuit_breaker + + +def get_chip_circuit_breaker() -> CircuitBreaker: + """获取筹码接口熔断器""" + return _chip_circuit_breaker diff --git a/src/provider/tickflow_fetcher.py b/src/provider/tickflow_fetcher.py new file mode 100644 index 00000000..ae415644 --- /dev/null +++ b/src/provider/tickflow_fetcher.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- +""" +=================================== +TickFlowFetcher - market review only +=================================== + +Issue #632 only requires TickFlow for A-share market review stability. +This fetcher intentionally implements a narrow P0 surface: + +1. Main A-share indices quotes +2. A-share market breadth statistics + +It does not participate in the general daily-data or per-stock realtime +pipelines and should only be called explicitly by DataFetcherManager. +""" + +import logging +import math +from threading import RLock +from time import monotonic +from typing import Any, Dict, List, Optional + +import pandas as pd + +from .base import ( + BaseFetcher, + DataFetchError, + is_bse_code, + is_kc_cy_stock, + is_st_stock, + normalize_stock_code, +) + + +logger = logging.getLogger(__name__) + +_CN_MAIN_INDEX_QUOTES = ( + ("000001.SH", "000001", "上证指数"), + ("399001.SZ", "399001", "深证成指"), + ("399006.SZ", "399006", "创业板指"), + ("000688.SH", "000688", "科创50"), + ("000016.SH", "000016", "上证50"), + ("000300.SH", "000300", "沪深300"), +) +_MAX_SYMBOLS_PER_QUOTE_REQUEST = 5 +_UNIVERSE_PERMISSION_NEGATIVE_CACHE_TTL_SECONDS = 900 + + +class TickFlowFetcher(BaseFetcher): + """TickFlow-backed market review helper.""" + + name = "TickFlowFetcher" + priority = 99 + + def __init__(self, api_key: Optional[str], timeout: float = 30.0): + self.api_key = (api_key or "").strip() + self.timeout = timeout + self._client = None + self._client_lock = RLock() + self._universe_query_supported: Optional[bool] = None + self._universe_query_checked_at: Optional[float] = None + + def close(self) -> None: + """Close the underlying TickFlow client if it was created.""" + with self._client_lock: + client = self._client + self._client = None + self._universe_query_supported = None + self._universe_query_checked_at = None + if client is not None: + try: + client.close() + except Exception as exc: + logger.debug("[TickFlowFetcher] 关闭客户端失败: %s", exc) + + def __del__(self) -> None: + try: + self.close() + except Exception: + # Best-effort cleanup during interpreter shutdown. + pass + + def _build_client(self): + from tickflow import TickFlow + + return TickFlow(api_key=self.api_key, timeout=self.timeout) + + def _get_client(self): + if not self.api_key: + return None + if self._client is not None: + return self._client + + with self._client_lock: + if self._client is None: + self._client = self._build_client() + return self._client + + def _fetch_raw_data( + self, stock_code: str, start_date: str, end_date: str + ) -> pd.DataFrame: + raise DataFetchError( + "TickFlowFetcher P0 only supports market review endpoints" + ) + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + raise DataFetchError( + "TickFlowFetcher P0 only supports market review endpoints" + ) + + @staticmethod + def _safe_float(value: Any) -> Optional[float]: + if value in (None, "", "-"): + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + @classmethod + def _ratio_to_percent(cls, value: Any) -> Optional[float]: + ratio = cls._safe_float(value) + if ratio is None: + return None + return ratio * 100.0 + + @staticmethod + def _extract_name(quote: Dict[str, Any]) -> str: + ext = quote.get("ext") or {} + name = ext.get("name") or quote.get("name") or "" + return str(name).strip() + + @staticmethod + def _is_universe_permission_error(exc: Exception) -> bool: + status_code = getattr(exc, "status_code", None) + code = str(getattr(exc, "code", "") or "").upper() + message = ( + f"{getattr(exc, 'message', '')} {exc}" + ).strip().lower() + + if status_code == 403: + return True + if code in {"PERMISSION_DENIED", "FORBIDDEN"}: + return True + return any( + keyword in message + for keyword in ( + "标的池查询", + "universe", + "permission", + "forbidden", + ) + ) + + @staticmethod + def _is_cn_equity_symbol(symbol: str) -> bool: + normalized = normalize_stock_code(symbol) + upper_symbol = (symbol or "").strip().upper() + return ( + normalized.isdigit() + and len(normalized) == 6 + and upper_symbol.endswith((".SH", ".SZ", ".BJ")) + ) + + @staticmethod + def _round_limit_price(prev_close: float, ratio: float) -> float: + return math.floor(prev_close * (1 + ratio) * 100 + 0.5) / 100.0 + + @classmethod + def _get_limit_ratio(cls, pure_code: str, name: str) -> float: + if is_bse_code(pure_code): + return 0.30 + if is_kc_cy_stock(pure_code): + return 0.20 + if is_st_stock(name): + return 0.05 + return 0.10 + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """Fetch main A-share indices via TickFlow quotes.""" + if region != "cn": + return None + + client = self._get_client() + if client is None: + return None + + symbols = [symbol for symbol, _, _ in _CN_MAIN_INDEX_QUOTES] + quotes: List[Dict[str, Any]] = [] + for offset in range(0, len(symbols), _MAX_SYMBOLS_PER_QUOTE_REQUEST): + batch_symbols = symbols[offset : offset + _MAX_SYMBOLS_PER_QUOTE_REQUEST] + batch_quotes = client.quotes.get(symbols=batch_symbols) + if batch_quotes: + quotes.extend(batch_quotes) + if not quotes: + logger.warning("[TickFlowFetcher] 指数行情为空") + return None + + quotes_by_symbol = { + str(item.get("symbol", "")).upper(): item for item in quotes if item + } + results: List[Dict[str, Any]] = [] + + for symbol, code, name in _CN_MAIN_INDEX_QUOTES: + quote = quotes_by_symbol.get(symbol) + if not quote: + continue + + ext = quote.get("ext") or {} + current = self._safe_float(quote.get("last_price")) or 0.0 + prev_close = self._safe_float(quote.get("prev_close")) or 0.0 + change = self._safe_float(ext.get("change_amount")) + if change is None: + change = current - prev_close if current or prev_close else 0.0 + amplitude = self._ratio_to_percent(ext.get("amplitude")) + if amplitude is None and prev_close > 0: + high = self._safe_float(quote.get("high")) or 0.0 + low = self._safe_float(quote.get("low")) or 0.0 + amplitude = (high - low) / prev_close * 100 + + results.append( + { + "code": code, + "name": name, + "current": current, + "change": change, + "change_pct": self._ratio_to_percent(ext.get("change_pct")) or 0.0, + "open": self._safe_float(quote.get("open")) or 0.0, + "high": self._safe_float(quote.get("high")) or 0.0, + "low": self._safe_float(quote.get("low")) or 0.0, + "prev_close": prev_close, + "volume": self._safe_float(quote.get("volume")) or 0.0, + "amount": self._safe_float(quote.get("amount")) or 0.0, + "amplitude": amplitude or 0.0, + } + ) + + if len(results) != len(_CN_MAIN_INDEX_QUOTES): + logger.warning( + "[TickFlowFetcher] 指数行情不完整: %s/%s", + len(results), + len(_CN_MAIN_INDEX_QUOTES), + ) + return None + + return results or None + + def get_market_stats(self) -> Optional[Dict[str, Any]]: + """Calculate A-share market breadth from TickFlow universe quotes.""" + client = self._get_client() + if client is None: + return None + + now = monotonic() + if self._universe_query_supported is False: + checked_at = self._universe_query_checked_at or 0.0 + if ( + now - checked_at + < _UNIVERSE_PERMISSION_NEGATIVE_CACHE_TTL_SECONDS + ): + return None + self._universe_query_supported = None + self._universe_query_checked_at = None + + try: + quotes = client.quotes.get(universes=["CN_Equity_A"]) + self._universe_query_supported = True + self._universe_query_checked_at = now + except Exception as exc: + if self._is_universe_permission_error(exc): + self._universe_query_supported = False + self._universe_query_checked_at = now + logger.info( + "[TickFlowFetcher] 当前套餐不支持标的池查询,市场统计回退到现有数据源" + ) + return None + raise + if not quotes: + logger.warning("[TickFlowFetcher] 市场统计行情为空") + return None + + stats = { + "up_count": 0, + "down_count": 0, + "flat_count": 0, + "limit_up_count": 0, + "limit_down_count": 0, + "total_amount": 0.0, + } + valid_rows = 0 + + for quote in quotes: + if not quote: + continue + + symbol = str(quote.get("symbol") or "").strip().upper() + if not self._is_cn_equity_symbol(symbol): + continue + + amount = self._safe_float(quote.get("amount")) + if amount is not None and amount > 0: + stats["total_amount"] += amount / 1e8 + + pure_code = normalize_stock_code(symbol) + last_price = self._safe_float(quote.get("last_price")) + prev_close = self._safe_float(quote.get("prev_close")) + + if last_price is None or prev_close is None or amount is None or amount <= 0: + continue + + name = self._extract_name(quote) + if not name: + logger.debug("[TickFlowFetcher] 缺少股票名称,按非 ST 处理: %s", symbol) + + ratio = self._get_limit_ratio(pure_code, name) + limit_up = self._round_limit_price(prev_close, ratio) + limit_down = math.floor(prev_close * (1 - ratio) * 100 + 0.5) / 100.0 + limit_up_tolerance = round(abs(prev_close * (1 + ratio) - limit_up), 10) + limit_down_tolerance = round( + abs(prev_close * (1 - ratio) - limit_down), 10 + ) + + valid_rows += 1 + + if abs(last_price - limit_up) <= limit_up_tolerance: + stats["limit_up_count"] += 1 + if abs(last_price - limit_down) <= limit_down_tolerance: + stats["limit_down_count"] += 1 + + if last_price > prev_close: + stats["up_count"] += 1 + elif last_price < prev_close: + stats["down_count"] += 1 + else: + stats["flat_count"] += 1 + + if valid_rows == 0: + logger.warning("[TickFlowFetcher] 市场统计未命中有效 A 股行情") + return None + + return stats diff --git a/src/provider/tushare_fetcher.py b/src/provider/tushare_fetcher.py new file mode 100644 index 00000000..ba679bf9 --- /dev/null +++ b/src/provider/tushare_fetcher.py @@ -0,0 +1,1320 @@ +# -*- coding: utf-8 -*- +""" +=================================== +TushareFetcher - 备用数据源 1 (Priority 2) +=================================== + +数据来源:Tushare Pro API(挖地兔) +特点:需要 Token、有请求配额限制 +优点:数据质量高、接口稳定 + +流控策略: +1. 实现"每分钟调用计数器" +2. 超过免费配额(80次/分)时,强制休眠到下一分钟 +3. 使用 tenacity 实现指数退避重试 +""" + +import json as _json +import logging +import re +import time +from datetime import datetime, timedelta +from typing import Optional, Tuple, List, Dict, Any + +import pandas as pd +import requests +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code, _is_hk_market +from .realtime_types import UnifiedRealtimeQuote, ChipDistribution +from src.config import get_config +import os +from zoneinfo import ZoneInfo + +logger = logging.getLogger(__name__) + + +# ETF code prefixes by exchange +# Shanghai: 51xxxx, 52xxxx, 56xxxx, 58xxxx +# Shenzhen: 15xxxx, 16xxxx, 18xxxx +_ETF_SH_PREFIXES = ('51', '52', '56', '58') +_ETF_SZ_PREFIXES = ('15', '16', '18') +_ETF_ALL_PREFIXES = _ETF_SH_PREFIXES + _ETF_SZ_PREFIXES + + +def _is_etf_code(stock_code: str) -> bool: + """ + Check if the code is an ETF fund code. + + ETF code ranges: + - Shanghai ETF: 51xxxx, 52xxxx, 56xxxx, 58xxxx + - Shenzhen ETF: 15xxxx, 16xxxx, 18xxxx + """ + code = stock_code.strip().split('.')[0] + return code.startswith(_ETF_ALL_PREFIXES) and len(code) == 6 + + +def _is_us_code(stock_code: str) -> bool: + """ + 判断代码是否为美股 + + 美股代码规则: + - 1-5个大写字母,如 'AAPL', 'TSLA' + - 可能包含 '.',如 'BRK.B' + """ + code = stock_code.strip().upper() + return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z])?$', code)) + + +class _TushareHttpClient: + """Lightweight Tushare Pro client that does not require the tushare SDK.""" + + def __init__(self, token: str, timeout: int = 30, api_url: str = "http://api.tushare.pro") -> None: + self._token = token + self._timeout = timeout + self._api_url = api_url + + def query(self, api_name: str, fields: str = "", **kwargs) -> pd.DataFrame: + req_params = { + "api_name": api_name, + "token": self._token, + "params": kwargs, + "fields": fields, + } + res = requests.post(self._api_url, json=req_params, timeout=self._timeout) + if res.status_code != 200: + raise Exception(f"Tushare API HTTP {res.status_code}") + + result = _json.loads(res.text) + if result.get("code") != 0: + raise Exception(result.get("msg") or f"Tushare API error code {result.get('code')}") + + data = result.get("data") or {} + columns = data.get("fields") or [] + items = data.get("items") or [] + return pd.DataFrame(items, columns=columns) + + def __getattr__(self, api_name: str): + if api_name.startswith("_"): + raise AttributeError(api_name) + + def caller(**kwargs) -> pd.DataFrame: + return self.query(api_name, **kwargs) + + return caller + + +class TushareFetcher(BaseFetcher): + """ + Tushare Pro 数据源实现 + + 优先级:2 + 数据来源:Tushare Pro API + + 关键策略: + - 每分钟调用计数器,防止超出配额 + - 超过 80 次/分钟时强制等待 + - 失败后指数退避重试 + + 配额说明(Tushare 免费用户): + - 每分钟最多 80 次请求 + - 每天最多 500 次请求 + """ + + name = "TushareFetcher" + priority = int(os.getenv("TUSHARE_PRIORITY", "2")) # 默认优先级,会在 __init__ 中根据配置动态调整 + + def __init__(self, rate_limit_per_minute: int = 80): + """ + 初始化 TushareFetcher + + Args: + rate_limit_per_minute: 每分钟最大请求数(默认80,Tushare免费配额) + """ + self.rate_limit_per_minute = rate_limit_per_minute + self._call_count = 0 # 当前分钟内的调用次数 + self._minute_start: Optional[float] = None # 当前计数周期开始时间 + self._api: Optional[object] = None # Tushare API 实例 + self.date_list: Optional[List[str]] = None # 交易日列表缓存(倒序,最新日期在前) + self._date_list_end: Optional[str] = None # 缓存对应的截止日期,用于跨日刷新 + + # 尝试初始化 API + self._init_api() + + # 根据 API 初始化结果动态调整优先级 + self.priority = self._determine_priority() + + def _init_api(self) -> None: + """ + 初始化 Tushare API + + 如果 Token 未配置,此数据源将不可用。 + 这里直接使用内置 HTTP client,避免运行时强依赖 tushare SDK, + 从而减少 Docker / PyInstaller / 多虚拟环境场景下因缺包导致的初始化失败。 + """ + config = get_config() + + if not config.tushare_token: + logger.warning("Tushare Token 未配置,此数据源不可用") + return + + try: + self._api = self._build_api_client(config.tushare_token) + logger.info("Tushare API 初始化成功") + except Exception as e: + logger.error(f"Tushare API 初始化失败: {e}") + self._api = None + + def _build_api_client(self, token: str) -> _TushareHttpClient: + """ + Build a lightweight Tushare Pro client over direct HTTP requests. + + The project already normalizes all Pro calls through the same request + contract, so we do not need the official tushare SDK during runtime. + """ + client = _TushareHttpClient(token=token) + logger.debug("Tushare API client configured for direct HTTP calls") + return client + + def _determine_priority(self) -> int: + """ + 根据 Token 配置和 API 初始化状态确定优先级 + + 策略: + - Token 配置且 API 初始化成功:优先级 -1(绝对最高,优于 efinance) + - 其他情况:优先级 2(默认) + + Returns: + 优先级数字(0=最高,数字越大优先级越低) + """ + config = get_config() + + if config.tushare_token and self._api is not None: + # Token 配置且 API 初始化成功,提升为最高优先级 + logger.info("✅ 检测到 TUSHARE_TOKEN 且 API 初始化成功,Tushare 数据源优先级提升为最高 (Priority -1)") + return -1 + + # Token 未配置或 API 初始化失败,保持默认优先级 + return 2 + + def is_available(self) -> bool: + """ + 检查数据源是否可用 + + Returns: + True 表示可用,False 表示不可用 + """ + return self._api is not None + + def _check_rate_limit(self) -> None: + """ + 检查并执行速率限制 + + 流控策略: + 1. 检查是否进入新的一分钟 + 2. 如果是,重置计数器 + 3. 如果当前分钟调用次数超过限制,强制休眠 + """ + current_time = time.time() + + # 检查是否需要重置计数器(新的一分钟) + if self._minute_start is None: + self._minute_start = current_time + self._call_count = 0 + elif current_time - self._minute_start >= 60: + # 已经过了一分钟,重置计数器 + self._minute_start = current_time + self._call_count = 0 + logger.debug("速率限制计数器已重置") + + # 检查是否超过配额 + if self._call_count >= self.rate_limit_per_minute: + # 计算需要等待的时间(到下一分钟) + elapsed = current_time - self._minute_start + sleep_time = max(0, 60 - elapsed) + 1 # +1 秒缓冲 + + logger.warning( + f"Tushare 达到速率限制 ({self._call_count}/{self.rate_limit_per_minute} 次/分钟)," + f"等待 {sleep_time:.1f} 秒..." + ) + + time.sleep(sleep_time) + + # 重置计数器 + self._minute_start = time.time() + self._call_count = 0 + + # 增加调用计数 + self._call_count += 1 + logger.debug(f"Tushare 当前分钟调用次数: {self._call_count}/{self.rate_limit_per_minute}") + + def _call_api_with_rate_limit(self, method_name: str, **kwargs) -> pd.DataFrame: + """统一通过速率限制包装 Tushare API 调用。""" + if self._api is None: + raise DataFetchError("Tushare API 未初始化,请检查 Token 配置") + + self._check_rate_limit() + method = getattr(self._api, method_name) + return method(**kwargs) + + def _get_china_now(self) -> datetime: + """返回上海时区当前时间,方便测试覆盖跨日刷新逻辑。""" + return datetime.now(ZoneInfo("Asia/Shanghai")) + + def _get_trade_dates(self, end_date: Optional[str] = None) -> List[str]: + """按自然日刷新交易日历缓存,避免服务跨日后继续复用旧日历。""" + if self._api is None: + return [] + + china_now = self._get_china_now() + requested_end_date = end_date or china_now.strftime("%Y%m%d") + + if self.date_list is not None and self._date_list_end == requested_end_date: + return self.date_list + + start_date = (china_now - timedelta(days=20)).strftime("%Y%m%d") + df_cal = self._call_api_with_rate_limit( + "trade_cal", + exchange="SSE", + start_date=start_date, + end_date=requested_end_date, + ) + + if df_cal is None or df_cal.empty or "cal_date" not in df_cal.columns: + logger.warning("[Tushare] trade_cal 返回为空,无法更新交易日历缓存") + self.date_list = [] + self._date_list_end = requested_end_date + return self.date_list + + trade_dates = sorted( + df_cal[df_cal["is_open"] == 1]["cal_date"].astype(str).tolist(), + reverse=True, + ) + self.date_list = trade_dates + self._date_list_end = requested_end_date + return trade_dates + + @staticmethod + def _pick_trade_date(trade_dates: List[str], use_today: bool) -> Optional[str]: + """根据可用交易日列表选择当天或前一交易日。""" + if not trade_dates: + return None + if use_today or len(trade_dates) == 1: + return trade_dates[0] + return trade_dates[1] + + @staticmethod + def _detect_exchange_hint(stock_code: str) -> Optional[str]: + """Return SH/SZ/BJ when the raw user input carries an explicit exchange hint.""" + upper = (stock_code or "").strip().upper() + if upper.startswith(("SH", "SS")) or upper.endswith((".SH", ".SS")): + return "SH" + if upper.startswith("SZ") or upper.endswith(".SZ"): + return "SZ" + if upper.startswith("BJ") or upper.endswith(".BJ"): + return "BJ" + return None + + @classmethod + def _get_legacy_realtime_symbol(cls, stock_code: str) -> str: + """Build the legacy tushare symbol while preserving explicit SH/SZ hints.""" + code = normalize_stock_code(stock_code) + exchange_hint = cls._detect_exchange_hint(stock_code) + + if code == '000001' and exchange_hint == 'SH': + return 'sh000001' + if code == '399001': + return 'sz399001' + if code == '399006': + return 'sz399006' + if code == '000300': + return 'sh000300' + if is_bse_code(code): + return f"bj{code}" + return code + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换 A 股 / ETF / 北交所等为 Tushare ts_code(不含港股逻辑)。 + + Tushare 要求的格式示例: + - 沪市股票:600519.SH + - 深市股票:000001.SZ + - 沪市 ETF:510050.SH + - 深市 ETF:159919.SZ + + Args: + stock_code: 原始代码,如 '600519', '000001', '563230' + + Returns: + Tushare 格式代码,如 '600519.SH', '000001.SZ' + """ + raw_code = stock_code.strip() + + # Already has suffix + if '.' in raw_code: + ts_code = raw_code.upper() + if ts_code.endswith('.SS'): + return f"{ts_code[:-3]}.SH" + return ts_code + + if _is_us_code(raw_code): + raise DataFetchError(f"TushareFetcher 不支持美股 {raw_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + if _is_hk_market(raw_code): + #raise DataFetchError(f"TushareFetcher 不支持港股 {raw_code},请使用 AkshareFetcher") + return normalize_stock_code(raw_code) + + code = normalize_stock_code(raw_code) + exchange_hint = self._detect_exchange_hint(raw_code) + + if exchange_hint == "SH": + return f"{code}.SH" + if exchange_hint == "SZ": + return f"{code}.SZ" + if exchange_hint == "BJ": + return f"{code}.BJ" + + # ETF: determine exchange by prefix + if code.startswith(_ETF_SH_PREFIXES) and len(code) == 6: + return f"{code}.SH" + if code.startswith(_ETF_SZ_PREFIXES) and len(code) == 6: + return f"{code}.SZ" + + # BSE (Beijing Stock Exchange): 8xxxxx, 4xxxxx, 920xxx + if is_bse_code(code): + return f"{code}.BJ" + + # Regular stocks + # Shanghai: 600xxx, 601xxx, 603xxx, 688xxx (STAR Market) + # Shenzhen: 000xxx, 002xxx, 300xxx (ChiNext) + if code.startswith(('600', '601', '603', '688')): + return f"{code}.SH" + elif code.startswith(('000', '002', '300')): + return f"{code}.SZ" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"{code}.SZ" + + def _convert_hk_stock_code_for_tushare(self, stock_code: str) -> str: + """ + 将用户输入转为 Tushare Pro 接口所需的 ts_code(含港股 nnnnn.HK)。 + + - 非港股:委托 _convert_stock_code(A 股 / ETF / 北交所等)。 + - 港股:从 HK00700、00700、00700.HK 等形式归一为 5 位数字 + .HK。 + """ + raw_code = stock_code.strip() + if _is_hk_market(raw_code): + if "." in raw_code: + ts_code = raw_code.upper() + if ts_code.endswith(".SS"): + return f"{ts_code[:-3]}.SH" + if ts_code.endswith(".HK"): + return ts_code + digits = re.sub(r"\D", "", raw_code) + if not digits: + raise DataFetchError(f"无法识别港股代码 {raw_code}") + code = digits[-5:].rjust(5, "0") + return f"{code}.HK" + return self._convert_stock_code(stock_code) + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Tushare 获取原始数据 + + 根据代码类型选择不同接口: + - 普通股票:daily() + - ETF 基金:fund_daily() + + 流程: + 1. 检查 API 是否可用 + 2. 检查是否为美股(不支持) + 3. 执行速率限制检查 + 4. 转换股票代码格式 + 5. 根据代码类型选择接口并调用 + """ + if self._api is None: + raise DataFetchError("Tushare API 未初始化,请检查 Token 配置") + + # US stocks not supported + if _is_us_code(stock_code): + raise DataFetchError(f"TushareFetcher 不支持美股 {stock_code},请使用 AkshareFetcher 或 YfinanceFetcher") + + # Rate-limit check + self._check_rate_limit() + + is_hk = _is_hk_market(stock_code) + # 判断是否为 ETF / 港股,以选择不同接口 + is_etf = _is_etf_code(stock_code) + if is_hk: + ts_code = self._convert_hk_stock_code_for_tushare(stock_code) + api_name = "hk_daily" + else: + ts_code = self._convert_stock_code(stock_code) + api_name = "fund_daily" if is_etf else "daily" + + # Convert date format (Tushare requires YYYYMMDD) + ts_start = start_date.replace('-', '') + ts_end = end_date.replace('-', '') + + + + logger.debug(f"调用 Tushare {api_name}({ts_code}, {ts_start}, {ts_end})") + + try: + if is_hk: + # 港股使用 hk_daily 接口 + df = self._api.hk_daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + elif is_etf: + # ETF uses fund_daily interface + df = self._api.fund_daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + else: + # Regular A-share stocks use daily interface + df = self._api.daily( + ts_code=ts_code, + start_date=ts_start, + end_date=ts_end, + ) + + return df + + except Exception as e: + error_msg = str(e).lower() + + # 检测配额超限 + if any(keyword in error_msg for keyword in ['quota', '配额', 'limit', '权限']): + logger.warning(f"Tushare 配额可能超限: {e}") + raise RateLimitError(f"Tushare 配额超限: {e}") from e + + raise DataFetchError(f"Tushare 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Tushare 数据 + + Tushare daily / fund_daily 返回的列名: + ts_code, trade_date, open, high, low, close, pre_close, change, pct_chg, vol, amount + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + + 单位缩放仅适用于 A 股(及 ETF 等使用同一套单位的接口): + - vol 按「手」计,乘以 100 转为「股」 + - amount 按「千元」计,乘以 1000 转为「元」 + + 港股 hk_daily 返回的 vol / amount 已是可直接使用的量级,不做上述缩放。 + """ + df = df.copy() + is_hk = _is_hk_market(stock_code) + + # 列名映射 + column_mapping = { + 'trade_date': 'date', + 'vol': 'volume', + # open, high, low, close, amount, pct_chg 列名相同 + } + + df = df.rename(columns=column_mapping) + + # 转换日期格式(YYYYMMDD -> YYYY-MM-DD) + if 'date' in df.columns: + df['date'] = pd.to_datetime(df['date'], format='%Y%m%d') + + # 成交量 / 成交额:仅 A 股类接口做单位换算(港股 hk_daily 不换算) + if 'volume' in df.columns and not is_hk: + df['volume'] = df['volume'] * 100 + + if 'amount' in df.columns and not is_hk: + df['amount'] = df['amount'] * 1000 + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def get_stock_name(self, stock_code: str) -> Optional[str]: + """ + 获取股票名称 + + 使用 Tushare 的 stock_basic 接口获取股票基本信息 + + Args: + stock_code: 股票代码 + + Returns: + 股票名称,失败返回 None + """ + if self._api is None: + logger.warning("Tushare API 未初始化,无法获取股票名称") + return None + + # 检查缓存 + if hasattr(self, '_stock_name_cache') and stock_code in self._stock_name_cache: + return self._stock_name_cache[stock_code] + + # 初始化缓存 + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + + try: + # 速率限制检查 + self._check_rate_limit() + + + # 根据市场/类型选择基础信息接口 + if _is_hk_market(stock_code): + ts_code = self._convert_hk_stock_code_for_tushare(stock_code) + # 港股:使用 hk_basic + df = self._api.hk_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + elif _is_etf_code(stock_code): + ts_code = self._convert_stock_code(stock_code) + # ETF:使用 fund_basic + df = self._api.fund_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + else: + ts_code = self._convert_stock_code(stock_code) + # A 股股票:使用 stock_basic + df = self._api.stock_basic( + ts_code=ts_code, + fields='ts_code,name' + ) + + if df is not None and not df.empty: + name = df.iloc[0]['name'] + self._stock_name_cache[stock_code] = name + logger.debug(f"Tushare 获取股票名称成功: {stock_code} -> {name}") + return name + + except Exception as e: + logger.warning(f"Tushare 获取股票名称失败 {stock_code}: {e}") + + return None + + def get_stock_list(self) -> Optional[pd.DataFrame]: + """ + 获取股票列表 + + 使用 Tushare 的 stock_basic 接口获取 A 股列表(不含港股)。 + + Returns: + 包含 code, name, industry, area, market 列的 DataFrame,失败返回 None + """ + if self._api is None: + logger.warning("Tushare API 未初始化,无法获取股票列表") + return None + + try: + self._check_rate_limit() + + df = self._api.stock_basic( + exchange='', + list_status='L', + fields='ts_code,name,industry,area,market' + ) + + if df is None or df.empty: + return None + + df = df.copy() + df['code'] = df['ts_code'].astype(str).str.split('.').str[0] + + if not hasattr(self, '_stock_name_cache'): + self._stock_name_cache = {} + for _, row in df.iterrows(): + self._stock_name_cache[row['code']] = row['name'] + + logger.info(f"Tushare 获取股票列表成功: {len(df)} 条") + return df[['code', 'name', 'industry', 'area', 'market']] + + except Exception as e: + logger.warning(f"Tushare 获取股票列表失败: {e}") + + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取实时行情 + + 策略: + 1. 优先尝试 Pro 接口(需要2000积分):数据全,稳定性高 + 2. 失败降级到旧版接口:门槛低,数据较少 + + Args: + stock_code: 股票代码 + + Returns: + UnifiedRealtimeQuote 对象,失败返回 None + """ + if self._api is None: + return None + + # HK stocks not supported by Tushare + if _is_hk_market(stock_code): + logger.debug(f"TushareFetcher 跳过港股实时行情 {stock_code}") + return None + + normalized_code = normalize_stock_code(stock_code) + + from .realtime_types import ( + RealtimeSource, + safe_float, safe_int + ) + + # 速率限制检查 + self._check_rate_limit() + + # 尝试 Pro 接口 + try: + ts_code = self._convert_stock_code(stock_code) + # 尝试调用 Pro 实时接口 (需要积分) + df = self._api.quotation(ts_code=ts_code) + + if df is not None and not df.empty: + row = df.iloc[0] + logger.debug(f"Tushare Pro 实时行情获取成功: {stock_code}") + + return UnifiedRealtimeQuote( + code=normalized_code, + name=str(row.get('name', '')), + source=RealtimeSource.TUSHARE, + price=safe_float(row.get('price')), + change_pct=safe_float(row.get('pct_chg')), # Pro 接口通常直接返回涨跌幅 + change_amount=safe_float(row.get('change')), + volume=safe_int(row.get('vol')), + amount=safe_float(row.get('amount')), + high=safe_float(row.get('high')), + low=safe_float(row.get('low')), + open_price=safe_float(row.get('open')), + pre_close=safe_float(row.get('pre_close')), + turnover_rate=safe_float(row.get('turnover_ratio')), # Pro 接口可能有换手率 + pe_ratio=safe_float(row.get('pe')), + pb_ratio=safe_float(row.get('pb')), + total_mv=safe_float(row.get('total_mv')), + ) + except Exception as e: + # 仅记录调试日志,不报错,继续尝试降级 + logger.debug(f"Tushare Pro 实时行情不可用 (可能是积分不足): {e}") + + # 降级:尝试旧版接口 + try: + import tushare as ts + + symbol = self._get_legacy_realtime_symbol(stock_code) + + # 调用旧版实时接口 (ts.get_realtime_quotes) + df = ts.get_realtime_quotes(symbol) + + if df is None or df.empty: + return None + + row = df.iloc[0] + + # 计算涨跌幅 + price = safe_float(row['price']) + pre_close = safe_float(row['pre_close']) + change_pct = 0.0 + change_amount = 0.0 + + if price and pre_close and pre_close > 0: + change_amount = price - pre_close + change_pct = (change_amount / pre_close) * 100 + + # 构建统一对象 + return UnifiedRealtimeQuote( + code=normalized_code, + name=str(row['name']), + source=RealtimeSource.TUSHARE, + price=price, + change_pct=round(change_pct, 2), + change_amount=round(change_amount, 2), + volume=safe_int(row['volume']) // 100, # 转换为手 + amount=safe_float(row['amount']), + high=safe_float(row['high']), + low=safe_float(row['low']), + open_price=safe_float(row['open']), + pre_close=pre_close, + ) + + except Exception as e: + logger.warning(f"Tushare (旧版) 获取实时行情失败 {stock_code}: {e}") + return None + + def get_main_indices(self, region: str = "cn") -> Optional[List[dict]]: + """ + 获取主要指数实时行情 (Tushare Pro),仅支持 A 股 + """ + if region != "cn": + return None + if self._api is None: + return None + + from .realtime_types import safe_float + + # 指数映射:Tushare代码 -> 名称 + indices_map = { + '000001.SH': '上证指数', + '399001.SZ': '深证成指', + '399006.SZ': '创业板指', + '000688.SH': '科创50', + '000016.SH': '上证50', + '000300.SH': '沪深300', + } + + try: + self._check_rate_limit() + + # Tushare index_daily 获取历史数据,实时数据需用其他接口或估算 + # 由于 Tushare 免费用户可能无法获取指数实时行情,这里作为备选 + # 使用 index_daily 获取最近交易日数据 + + end_date = datetime.now().strftime('%Y%m%d') + start_date = (datetime.now() - pd.Timedelta(days=5)).strftime('%Y%m%d') + + results = [] + + # 批量获取所有指数数据 + for ts_code, name in indices_map.items(): + try: + df = self._api.index_daily(ts_code=ts_code, start_date=start_date, end_date=end_date) + if df is not None and not df.empty: + row = df.iloc[0] # 最新一天 + + current = safe_float(row['close']) + prev_close = safe_float(row['pre_close']) + + results.append({ + 'code': ts_code.split('.')[0], # 兼容 sh000001 格式需转换,这里保持纯数字 + 'name': name, + 'current': current, + 'change': safe_float(row['change']), + 'change_pct': safe_float(row['pct_chg']), + 'open': safe_float(row['open']), + 'high': safe_float(row['high']), + 'low': safe_float(row['low']), + 'prev_close': prev_close, + 'volume': safe_float(row['vol']), + 'amount': safe_float(row['amount']) * 1000, # 千元转元 + 'amplitude': 0.0 # Tushare index_daily 不直接返回振幅 + }) + except Exception as e: + logger.debug(f"Tushare 获取指数 {name} 失败: {e}") + continue + + if results: + return results + else: + logger.warning("[Tushare] 未获取到指数行情数据") + + except Exception as e: + logger.error(f"[Tushare] 获取指数行情失败: {e}") + + return None + + def get_market_stats(self) -> Optional[dict]: + """ + 获取市场涨跌统计 (Tushare Pro) + 2000积分 每天访问该接口 ts.pro_api().rt_k 两次 + 接口限制见:https://tushare.pro/document/1?doc_id=108 + """ + if self._api is None: + return None + + try: + logger.info("[Tushare] ts.pro_api() 获取市场统计...") + + # 获取当前中国时间,判断是否在交易时间内 + china_now = self._get_china_now() + current_clock = china_now.strftime("%H:%M") + current_date = china_now.strftime("%Y%m%d") + + trade_dates = self._get_trade_dates(current_date) + if not trade_dates: + return None + + if current_date in trade_dates: + if current_clock < '09:30' or current_clock > '16:30': + use_realtime = False + else: + use_realtime = True + else: + use_realtime = False + + # 若实盘的时候使用 则使用其他可以实盘获取的数据源 akshare、efinance + if use_realtime: + try: + df = self._call_api_with_rate_limit("rt_k", ts_code='3*.SZ,6*.SH,0*.SZ,92*.BJ') + if df is not None and not df.empty: + return self._calc_market_stats(df) + + except Exception as e: + logger.error(f"[Tushare] ts.pro_api().rt_k 尝试获取实时数据失败: {e}") + return None + else: + + if current_date not in trade_dates: + last_date = self._pick_trade_date(trade_dates, use_today=True) # 拿最近的日期 + else: + if current_clock < '09:30': + last_date = self._pick_trade_date(trade_dates, use_today=False) # 拿取前一天的数据 + else: # 即 '> 16:30' + last_date = self._pick_trade_date(trade_dates, use_today=True) # 拿取当天的数据 + + if last_date is None: + return None + + try: + df = self._call_api_with_rate_limit( + "daily", + ts_code='3*.SZ,6*.SH,0*.SZ,92*.BJ', + start_date=last_date, + end_date=last_date, + ) + # 为防止不同接口返回的列名大小写不一致(例如 rt_k 返回小写,daily 返回大写),统一将列名转为小写 + df.columns = [col.lower() for col in df.columns] + + # 获取股票基础信息(包含代码和名称) + df_basic = self._call_api_with_rate_limit("stock_basic", fields='ts_code,name') + df = pd.merge(df, df_basic, on='ts_code', how='left') + # 将 daily的 amount 列的值乘以 1000 来和其他数据源保持一致 + if 'amount' in df.columns: + df['amount'] = df['amount'] * 1000 + + if df is not None and not df.empty: + return self._calc_market_stats(df) + except Exception as e: + logger.error(f"[Tushare] ts.pro_api().daily 获取数据失败: {e}") + + + + except Exception as e: + logger.error(f"[Tushare] 获取市场统计失败: {e}") + + return None + + def _calc_market_stats( + self, + df: pd.DataFrame, + ) -> Optional[Dict[str, Any]]: + """从行情 DataFrame 计算涨跌统计。""" + import numpy as np + + df = df.copy() + + # 1. 提取基础比对数据:最新价、昨收 + # 兼容不同接口返回的列名 sina/em efinance tushare xtdata + code_col = next((c for c in ['代码', '股票代码', 'ts_code','stock_code'] if c in df.columns), None) + name_col = next((c for c in ['名称', '股票名称','name','name'] if c in df.columns), None) + close_col = next((c for c in ['最新价', '最新价', 'close','lastPrice'] if c in df.columns), None) + pre_close_col = next((c for c in ['昨收', '昨日收盘', 'pre_close','lastClose'] if c in df.columns), None) + amount_col = next((c for c in ['成交额', '成交额', 'amount','amount'] if c in df.columns), None) + + limit_up_count = 0 + limit_down_count = 0 + up_count = 0 + down_count = 0 + flat_count = 0 + + for code, name, current_price, pre_close, amount in zip( + df[code_col], df[name_col], df[close_col], df[pre_close_col], df[amount_col] + ): + + # 停牌过滤 efinance 的停牌数据有时候会缺失价格显示为 '-',em 显示为none + if pd.isna(current_price) or pd.isna(pre_close) or current_price in ['-'] or pre_close in ['-'] or amount == 0: + continue + + # em、efinance 为str 需要转换为float + current_price = float(current_price) + pre_close = float(pre_close) + + # 获取去除前缀的纯数字代码 + pure_code = normalize_stock_code(str(code)) + + # A. 确定每只股票的涨跌幅比例 (使用纯数字代码判断) + if is_bse_code(pure_code): + ratio = 0.30 + elif is_kc_cy_stock(pure_code): #pure_code.startswith(('688', '30')): + ratio = 0.20 + elif is_st_stock(name): #'ST' in str_name: + ratio = 0.05 + else: + ratio = 0.10 + + # B. 严格按照 A 股规则计算涨跌停价:昨收 * (1 ± 比例) -> 四舍五入保留2位小数 + limit_up_price = np.floor(pre_close * (1 + ratio) * 100 + 0.5) / 100.0 + limit_down_price = np.floor(pre_close * (1 - ratio) * 100 + 0.5) / 100.0 + + limit_up_price_Tolerance = round(abs(pre_close * (1 + ratio) - limit_up_price), 10) + limit_down_price_Tolerance = round(abs(pre_close * (1 - ratio) - limit_down_price), 10) + + # C. 精确比对 + if current_price > 0 : + is_limit_up = (current_price > 0) and (abs(current_price - limit_up_price) <= limit_up_price_Tolerance) + is_limit_down = (current_price > 0) and (abs(current_price - limit_down_price) <= limit_down_price_Tolerance) + + if is_limit_up: + limit_up_count += 1 + if is_limit_down: + limit_down_count += 1 + + if current_price > pre_close: + up_count += 1 + elif current_price < pre_close: + down_count += 1 + else: + flat_count += 1 + + # 统计数量 + stats = { + 'up_count': up_count, + 'down_count': down_count, + 'flat_count': flat_count, + 'limit_up_count': limit_up_count, + 'limit_down_count': limit_down_count, + 'total_amount': 0.0, + } + + # 成交额统计 + if amount_col and amount_col in df.columns: + df[amount_col] = pd.to_numeric(df[amount_col], errors='coerce') + stats['total_amount'] = (df[amount_col].sum() / 1e8) + + return stats + + def get_trade_time(self,early_time='09:30',late_time='16:30') -> Optional[str]: + ''' + 获取当前时间可以获得数据的开始时间日期 + + Args: + early_time: 默认 '09:30' + late_time: 默认 '16:30' + early_time-late_time 之间为使用上一个交易日数据的时间段,其他时间为使用当天数据的时间段 + Returns: + start_date: 可以获得数据的开始日期 + ''' + china_now = self._get_china_now() + china_date = china_now.strftime("%Y%m%d") + china_clock = china_now.strftime("%H:%M") + + trade_dates = self._get_trade_dates(china_date) + if not trade_dates: + return None + + if china_date in trade_dates: + if early_time < china_clock < late_time: # 使用上一个交易日数据的时间段 + use_today = False + else: + use_today = True + else: + # 非交易日: today不在trade_dates中,trade_dates[0]就是最近交易日 + use_today = True + + start_date = self._pick_trade_date(trade_dates, use_today=use_today) + if start_date is None: + return None + + if not use_today: + logger.info(f"[Tushare] 当前时间 {china_clock} 可能无法获取当天筹码分布,尝试获取前一个交易日的数据 {start_date}") + + return start_date + + def get_sector_rankings(self, n: int = 5) -> Optional[Tuple[list, list]]: + """ + 获取行业板块涨跌榜 (Tushare Pro) + + 数据源优先级: + 1. 同花顺接口 (ts.pro_api().moneyflow_ind_ths) + 2. 东财接口 (ts.pro_api().moneyflow_ind_dc) + 注意:每个接口的行业分类和板块定义不同,会导致结果两者不一致 + """ + def _get_rank_top_n(df: pd.DataFrame, change_col: str, industry_name: str, n: int) -> Tuple[list, list]: + df[change_col] = pd.to_numeric(df[change_col], errors='coerce') + df = df.dropna(subset=[change_col]) + + # 涨幅前n + top = df.nlargest(n, change_col) + top_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in top.iterrows() + ] + + bottom = df.nsmallest(n, change_col) + bottom_sectors = [ + {'name': row[industry_name], 'change_pct': row[change_col]} + for _, row in bottom.iterrows() + ] + return top_sectors, bottom_sectors + + # 15:30之后才有当天数据 + start_date = self.get_trade_time(early_time='00:00', late_time='15:30') + if not start_date: + return None + + # 优先同花顺接口 + logger.info("[Tushare] ts.pro_api().moneyflow_ind_ths 获取板块排行(同花顺)...") + try: + df = self._call_api_with_rate_limit("moneyflow_ind_ths", trade_date=start_date) + if df is not None and not df.empty: + change_col = 'pct_change' + name = 'industry' + if change_col in df.columns: + return _get_rank_top_n(df, change_col, name, n) + except Exception as e: + logger.warning(f"[Tushare] 获取同花顺行业板块涨跌榜失败: {e} 尝试东财接口") + + # 同花顺接口失败,降级尝试东财接口 + logger.info("[Tushare] ts.pro_api().moneyflow_ind_dc 获取板块排行(东财)...") + try: + df = self._call_api_with_rate_limit("moneyflow_ind_dc", trade_date=start_date) + if df is not None and not df.empty: + df = df[df['content_type'] == '行业'] # 过滤出行业板块 + change_col = 'pct_change' + name = 'name' + if change_col in df.columns: + return _get_rank_top_n(df, change_col, name, n) + except Exception as e: + logger.warning(f"[Tushare] 获取东财行业板块涨跌榜失败: {e}") + return None + + # 获取为空或者接口调用失败,返回 None + return None + + + + + def get_chip_distribution(self, stock_code: str) -> Optional[ChipDistribution]: + """ + 获取筹码分布数据 + + 数据来源:ts.pro_api().cyq_chips() + 包含:获利比例、平均成本、筹码集中度 + + 注意:ETF/指数没有筹码分布数据,会直接返回 None;港股不支持,直接返回 None。 + 5000积分以下每天访问15次,每小时访问5次 + + Args: + stock_code: 股票代码 + + Returns: + ChipDistribution 对象(最新交易日的数据),获取失败返回 None + + """ + if _is_us_code(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持美股 {stock_code} 的筹码分布") + return None + + if _is_etf_code(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持 ETF {stock_code} 的筹码分布") + return None + + if _is_hk_market(stock_code): + logger.warning(f"[Tushare] TushareFetcher 不支持港股 {stock_code} 的筹码分布") + return None + + try: + # 19点之后才有当天数据 + start_date = self.get_trade_time(early_time='00:00', late_time='19:00') + if not start_date: + return None + + ts_code = self._convert_stock_code(stock_code) + + df = self._call_api_with_rate_limit( + "cyq_chips", + ts_code=ts_code, + start_date=start_date, + end_date=start_date, + ) + if df is not None and not df.empty: + daily_df = self._call_api_with_rate_limit( + "daily", + ts_code=ts_code, + start_date=start_date, + end_date=start_date, + ) + if daily_df is None or daily_df.empty: + return None + current_price = daily_df.iloc[0]['close'] + metrics = self.compute_cyq_metrics(df, current_price) + + chip = ChipDistribution( + code=stock_code, + date=datetime.strptime(start_date, '%Y%m%d').strftime('%Y-%m-%d'), + profit_ratio=metrics['获利比例'], + avg_cost=metrics['平均成本'], + cost_90_low=metrics['90成本-低'], + cost_90_high=metrics['90成本-高'], + concentration_90=metrics['90集中度'], + cost_70_low=metrics['70成本-低'], + cost_70_high=metrics['70成本-高'], + concentration_70=metrics['70集中度'], + ) + + logger.info(f"[筹码分布] {stock_code} 日期={chip.date}: 获利比例={chip.profit_ratio:.1%}, " + f"平均成本={chip.avg_cost}, 90%集中度={chip.concentration_90:.2%}, " + f"70%集中度={chip.concentration_70:.2%}") + return chip + + except Exception as e: + logger.warning(f"[Tushare] 获取筹码分布失败 {stock_code}: {e}") + return None + + def compute_cyq_metrics(self, df: pd.DataFrame, current_price: float) -> dict: + """ + 基于 Tushare 的筹码分布明细表 (cyq_chips) 计算常用筹码指标 + :param df: 包含 'price' 和 'percent' 列的 DataFrame + :param current_price: 股票当天的当前价/收盘价 (用于计算获利比例) + :return: 包含各项筹码指标的字典 + """ + import numpy as np + # 1. 确保按价格从小到大排序 (Tushare 返回的数据往往是纯倒序的) + df_sorted = df.sort_values(by='price', ascending=True).reset_index(drop=True) + + # 2. 防止原始数据 percent 总和产生浮点数误差,归一化到 100% + total_percent = df_sorted['percent'].sum() + + df_sorted['norm_percent'] = df_sorted['percent'] / total_percent * 100 + + # 3. 计算筹码的累积分布 + df_sorted['cumsum'] = df_sorted['norm_percent'].cumsum() + + # --- 获利比例 --- + # 所有价格 <= 当前价的筹码之和 + winner_rate = df_sorted[df_sorted['price'] <= current_price]['norm_percent'].sum() + + # --- 平均成本 --- + # 价格的加权平均值 + avg_cost = np.average(df_sorted['price'], weights=df_sorted['norm_percent']) + + # --- 辅助函数:求指定累积比例处的价格 --- + def get_percentile_price(target_pct): + # 寻找累积求和第一次大于等于目标百分比的行索引 + idx = df_sorted['cumsum'].searchsorted(target_pct) + idx = min(idx, len(df_sorted) - 1) # 防止越界 + return df_sorted.loc[idx, 'price'] + + # --- 90% 成本区与集中度 --- + # 去头去尾各 5% + cost_90_low = get_percentile_price(5) + cost_90_high = get_percentile_price(95) + if (cost_90_high + cost_90_low) != 0: + concentration_90 = (cost_90_high - cost_90_low) / (cost_90_high + cost_90_low) * 100 + else: + concentration_90 = 0.0 + + # --- 70% 成本区与集中度 --- + # 去头去尾各 15% + cost_70_low = get_percentile_price(15) + cost_70_high = get_percentile_price(85) + if (cost_70_high + cost_70_low) != 0: + concentration_70 = (cost_70_high - cost_70_low) / (cost_70_high + cost_70_low) * 100 + else: + concentration_70 = 0.0 + + # 返回格式化结果 + return { + "获利比例": round(winner_rate/100, 4), # /100 与akshare保持一致,返回小数格式 + "平均成本": round(avg_cost, 4), + "90成本-低": round(cost_90_low, 4), + "90成本-高": round(cost_90_high, 4), + "90集中度": round(concentration_90/100, 4), + "70成本-低": round(cost_70_low, 4), + "70成本-高": round(cost_70_high, 4), + "70集中度": round(concentration_70/100, 4) + } + + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = TushareFetcher() + + try: + # 测试历史数据 + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + + # 测试股票名称 + name = fetcher.get_stock_name('600519') + print(f"股票名称: {name}") + + except Exception as e: + print(f"获取失败: {e}") + + # 测试市场统计 + print("\n" + "=" * 50) + print("Testing get_market_stats (tushare)") + print("=" * 50) + try: + stats = fetcher.get_market_stats() + if stats: + print(f"Market Stats successfully computed:") + print(f"Up: {stats['up_count']} (Limit Up: {stats['limit_up_count']})") + print(f"Down: {stats['down_count']} (Limit Down: {stats['limit_down_count']})") + print(f"Flat: {stats['flat_count']}") + print(f"Total Amount: {stats['total_amount']:.2f} 亿 (Yi)") + else: + print("Failed to compute market stats.") + except Exception as e: + print(f"Failed to compute market stats: {e}") + + + # 测试筹码分布数据 + print("\n" + "=" * 50) + print("测试筹码分布数据获取") + print("=" * 50) + try: + chip = fetcher.get_chip_distribution('600519') # 茅台 + except Exception as e: + print(f"[筹码分布] 获取失败: {e}") + + # 测试行业板块排名 + print("\n" + "=" * 50) + print("测试行业板块排名获取") + print("=" * 50) + try: + rankings = fetcher.get_sector_rankings(n=5) + if rankings: + top, bottom = rankings + print("涨幅榜 Top 5:") + for sector in top: + print(f"{sector['name']}: {sector['change_pct']}%") + print("\n跌幅榜 Top 5:") + for sector in bottom: + print(f"{sector['name']}: {sector['change_pct']}%") + else: + print("未获取到行业板块排名数据") + except Exception as e: + print(f"[行业板块排名] 获取失败: {e}") diff --git a/src/provider/us_index_mapping.py b/src/provider/us_index_mapping.py new file mode 100644 index 00000000..a90ba042 --- /dev/null +++ b/src/provider/us_index_mapping.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +=================================== +美股指数与股票代码工具 +=================================== + +提供: +1. 美股指数代码映射(如 SPX -> ^GSPC) +2. 美股股票代码识别(AAPL、TSLA 等) + +美股指数在 Yahoo Finance 中需使用 ^ 前缀,与股票代码不同。 +""" + +import re + +# 美股代码正则:1-5 个大写字母,可选 .X 后缀(如 BRK.B) +_US_STOCK_PATTERN = re.compile(r'^[A-Z]{1,5}(\.[A-Z])?$') + + +# 用户输入 -> (Yahoo Finance 符号, 中文名称) +US_INDEX_MAPPING = { + # 标普 500 + 'SPX': ('^GSPC', '标普500指数'), + '^GSPC': ('^GSPC', '标普500指数'), + 'GSPC': ('^GSPC', '标普500指数'), + # 道琼斯工业平均指数 + 'DJI': ('^DJI', '道琼斯工业指数'), + '^DJI': ('^DJI', '道琼斯工业指数'), + 'DJIA': ('^DJI', '道琼斯工业指数'), + # 纳斯达克综合指数 + 'IXIC': ('^IXIC', '纳斯达克综合指数'), + '^IXIC': ('^IXIC', '纳斯达克综合指数'), + 'NASDAQ': ('^IXIC', '纳斯达克综合指数'), + # 纳斯达克 100 + 'NDX': ('^NDX', '纳斯达克100指数'), + '^NDX': ('^NDX', '纳斯达克100指数'), + # VIX 波动率指数 + 'VIX': ('^VIX', 'VIX恐慌指数'), + '^VIX': ('^VIX', 'VIX恐慌指数'), + # 罗素 2000 + 'RUT': ('^RUT', '罗素2000指数'), + '^RUT': ('^RUT', '罗素2000指数'), +} + + +def is_us_index_code(code: str) -> bool: + """ + 判断代码是否为美股指数符号。 + + Args: + code: 股票/指数代码,如 'SPX', 'DJI' + + Returns: + True 表示是已知美股指数符号,否则 False + + Examples: + >>> is_us_index_code('SPX') + True + >>> is_us_index_code('AAPL') + False + """ + return (code or '').strip().upper() in US_INDEX_MAPPING + + +def is_us_stock_code(code: str) -> bool: + """ + 判断代码是否为美股股票符号(排除美股指数)。 + + 美股股票代码为 1-5 个大写字母,可选 .X 后缀如 BRK.B。 + 美股指数(SPX、DJI 等)明确排除。 + + Args: + code: 股票代码,如 'AAPL', 'TSLA', 'BRK.B' + + Returns: + True 表示是美股股票符号,否则 False + + Examples: + >>> is_us_stock_code('AAPL') + True + >>> is_us_stock_code('TSLA') + True + >>> is_us_stock_code('BRK.B') + True + >>> is_us_stock_code('SPX') + False + >>> is_us_stock_code('600519') + False + """ + normalized = (code or '').strip().upper() + # 美股指数不是股票 + if normalized in US_INDEX_MAPPING: + return False + return bool(_US_STOCK_PATTERN.match(normalized)) + + +def get_us_index_yf_symbol(code: str) -> tuple: + """ + 获取美股指数的 Yahoo Finance 符号与中文名称。 + + Args: + code: 用户输入,如 'SPX', '^GSPC', 'DJI' + + Returns: + (yf_symbol, chinese_name) 元组,未找到时返回 (None, None)。 + + Examples: + >>> get_us_index_yf_symbol('SPX') + ('^GSPC', '标普500指数') + >>> get_us_index_yf_symbol('AAPL') + (None, None) + """ + normalized = (code or '').strip().upper() + return US_INDEX_MAPPING.get(normalized, (None, None)) diff --git a/src/provider/yfinance_fetcher.py b/src/provider/yfinance_fetcher.py new file mode 100644 index 00000000..e9aefde5 --- /dev/null +++ b/src/provider/yfinance_fetcher.py @@ -0,0 +1,746 @@ +# -*- coding: utf-8 -*- +""" +=================================== +YfinanceFetcher - 兜底数据源 (Priority 4) +=================================== + +数据来源:Yahoo Finance(通过 yfinance 库) +特点:国际数据源、可能有延迟或缺失 +定位:当所有国内数据源都失败时的最后保障 + +关键策略: +1. 自动将 A 股代码转换为 yfinance 格式(.SS / .SZ) +2. 处理 Yahoo Finance 的数据格式差异 +3. 失败后指数退避重试 +""" + +import csv +import logging +from datetime import datetime +from io import StringIO +from typing import Optional, List, Dict, Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + +import pandas as pd +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, + before_sleep_log, +) + +from .base import BaseFetcher, DataFetchError, STANDARD_COLUMNS, is_bse_code +from .realtime_types import UnifiedRealtimeQuote, RealtimeSource +from .us_index_mapping import get_us_index_yf_symbol, is_us_stock_code + +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): + STOCK_NAME_MAP = {} + + def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """简单的名称有效性校验兜底""" + if not name: + return False + n = str(name).strip() + return bool(n and n.upper() != str(stock_code).strip().upper()) + +import os + +logger = logging.getLogger(__name__) + + +class YfinanceFetcher(BaseFetcher): + """ + Yahoo Finance 数据源实现 + + 优先级:4(最低,作为兜底) + 数据来源:Yahoo Finance + + 关键策略: + - 自动转换股票代码格式 + - 处理时区和数据格式差异 + - 失败后指数退避重试 + + 注意事项: + - A 股数据可能有延迟 + - 某些股票可能无数据 + - 数据精度可能与国内源略有差异 + """ + + name = "YfinanceFetcher" + priority = int(os.getenv("YFINANCE_PRIORITY", "4")) + + def __init__(self): + """初始化 YfinanceFetcher""" + pass + + def _convert_stock_code(self, stock_code: str) -> str: + """ + 转换股票代码为 Yahoo Finance 格式 + + Yahoo Finance 代码格式: + - A股沪市:600519.SS (Shanghai Stock Exchange) + - A股深市:000001.SZ (Shenzhen Stock Exchange) + - 港股:0700.HK (Hong Kong Stock Exchange) + - 美股:AAPL, TSLA, GOOGL (无需后缀) + + Args: + stock_code: 原始代码,如 '600519', 'hk00700', 'AAPL' + + Returns: + Yahoo Finance 格式代码 + + Examples: + >>> fetcher._convert_stock_code('600519') + '600519.SS' + >>> fetcher._convert_stock_code('hk00700') + '0700.HK' + >>> fetcher._convert_stock_code('AAPL') + 'AAPL' + """ + code = stock_code.strip().upper() + + # 美股指数:映射到 Yahoo Finance 符号(如 SPX -> ^GSPC) + yf_symbol, _ = get_us_index_yf_symbol(code) + if yf_symbol: + logger.debug(f"识别为美股指数: {code} -> {yf_symbol}") + return yf_symbol + + # 美股:1-5 个大写字母(可选 .X 后缀),原样返回 + if is_us_stock_code(code): + logger.debug(f"识别为美股代码: {code}") + return code + + # 港股:hk前缀 -> .HK后缀 + if code.startswith('HK'): + hk_code = code[2:].lstrip('0') or '0' # 去除前导0,但保留至少一个0 + hk_code = hk_code.zfill(4) # 补齐到4位 + logger.debug(f"转换港股代码: {stock_code} -> {hk_code}.HK") + return f"{hk_code}.HK" + + # 已经包含后缀的情况 + if '.SS' in code or '.SZ' in code or '.HK' in code or '.BJ' in code: + return code + + # 去除可能的 .SH 后缀 + code = code.replace('.SH', '') + + # ETF: Shanghai ETF (51xx, 52xx, 56xx, 58xx) -> .SS; Shenzhen ETF (15xx, 16xx, 18xx) -> .SZ + if len(code) == 6: + if code.startswith(('51', '52', '56', '58')): + return f"{code}.SS" + if code.startswith(('15', '16', '18')): + return f"{code}.SZ" + + # BSE (Beijing Stock Exchange): 8xxxxx, 4xxxxx, 920xxx + if is_bse_code(code): + base = code.split('.')[0] if '.' in code else code + return f"{base}.BJ" + + # A股:根据代码前缀判断市场 + if code.startswith(('600', '601', '603', '688')): + return f"{code}.SS" + elif code.startswith(('000', '002', '300')): + return f"{code}.SZ" + else: + logger.warning(f"无法确定股票 {code} 的市场,默认使用深市") + return f"{code}.SZ" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=30), + retry=retry_if_exception_type((ConnectionError, TimeoutError)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + def _fetch_raw_data(self, stock_code: str, start_date: str, end_date: str) -> pd.DataFrame: + """ + 从 Yahoo Finance 获取原始数据 + + 使用 yfinance.download() 获取历史数据 + + 流程: + 1. 转换股票代码格式 + 2. 调用 yfinance API + 3. 处理返回数据 + """ + import yfinance as yf + + # 转换代码格式 + yf_code = self._convert_stock_code(stock_code) + + logger.debug(f"调用 yfinance.download({yf_code}, {start_date}, {end_date})") + + try: + # 使用 yfinance 下载数据 + df = yf.download( + tickers=yf_code, + start=start_date, + end=end_date, + progress=False, # 禁止进度条 + auto_adjust=True, # 自动调整价格(复权) + multi_level_index=True + ) + + # 筛选出 yf_code 的列, 避免多只股票数据混淆 + if isinstance(df.columns, pd.MultiIndex) and len(df.columns) > 1: + ticker_level = df.columns.get_level_values(1) + mask = ticker_level == yf_code + if mask.any(): + df = df.loc[:, mask].copy() + + if df.empty: + raise DataFetchError(f"Yahoo Finance 未查询到 {stock_code} 的数据") + + return df + + except Exception as e: + if isinstance(e, DataFetchError): + raise + raise DataFetchError(f"Yahoo Finance 获取数据失败: {e}") from e + + def _normalize_data(self, df: pd.DataFrame, stock_code: str) -> pd.DataFrame: + """ + 标准化 Yahoo Finance 数据 + + yfinance 返回的列名: + Open, High, Low, Close, Volume(索引是日期) + + 注意:新版 yfinance 返回 MultiIndex 列名,如 ('Close', 'AMD') + 需要先扁平化列名再进行处理 + + 需要映射到标准列名: + date, open, high, low, close, volume, amount, pct_chg + """ + df = df.copy() + + # 处理 MultiIndex 列名(新版 yfinance 返回格式) + # 例如: ('Close', 'AMD') -> 'Close' + if isinstance(df.columns, pd.MultiIndex): + logger.debug("检测到 MultiIndex 列名,进行扁平化处理") + # 取第一级列名(Price level: Close, High, Low, etc.) + df.columns = df.columns.get_level_values(0) + + # 重置索引,将日期从索引变为列 + df = df.reset_index() + + # 列名映射(yfinance 使用首字母大写) + column_mapping = { + 'Date': 'date', + 'Open': 'open', + 'High': 'high', + 'Low': 'low', + 'Close': 'close', + 'Volume': 'volume', + } + + df = df.rename(columns=column_mapping) + + # 计算涨跌幅(因为 yfinance 不直接提供) + if 'close' in df.columns: + df['pct_chg'] = df['close'].pct_change() * 100 + df['pct_chg'] = df['pct_chg'].fillna(0).round(2) + + # 计算成交额(yfinance 不提供,使用估算值) + # 成交额 ≈ 成交量 * 平均价格 + if 'volume' in df.columns and 'close' in df.columns: + df['amount'] = df['volume'] * df['close'] + else: + df['amount'] = 0 + + # 添加股票代码列 + df['code'] = stock_code + + # 只保留需要的列 + keep_cols = ['code'] + STANDARD_COLUMNS + existing_cols = [col for col in keep_cols if col in df.columns] + df = df[existing_cols] + + return df + + def _fetch_yf_ticker_data(self, yf, yf_code: str, name: str, return_code: str) -> Optional[Dict[str, Any]]: + """ + 通过 yfinance 拉取单个指数/股票的行情数据。 + + Args: + yf: yfinance 模块引用 + yf_code: yfinance 使用的代码(如 '000001.SS'、'^GSPC') + name: 指数显示名称 + return_code: 写入结果 dict 的 code 字段(如 'sh000001'、'SPX') + + Returns: + 行情字典,失败时返回 None + """ + ticker = yf.Ticker(yf_code) + # 取近两日数据以计算涨跌幅 + hist = ticker.history(period='2d') + if hist.empty: + return None + today_row = hist.iloc[-1] + prev_row = hist.iloc[-2] if len(hist) > 1 else today_row + price = float(today_row['Close']) + prev_close = float(prev_row['Close']) + change = price - prev_close + change_pct = (change / prev_close) * 100 if prev_close else 0 + high = float(today_row['High']) + low = float(today_row['Low']) + # 振幅 = (最高 - 最低) / 昨收 * 100 + amplitude = ((high - low) / prev_close * 100) if prev_close else 0 + return { + 'code': return_code, + 'name': name, + 'current': price, + 'change': change, + 'change_pct': change_pct, + 'open': float(today_row['Open']), + 'high': high, + 'low': low, + 'prev_close': prev_close, + 'volume': float(today_row['Volume']), + 'amount': 0.0, # Yahoo Finance 不提供准确成交额 + 'amplitude': amplitude, + } + + def get_main_indices(self, region: str = "cn") -> Optional[List[Dict[str, Any]]]: + """ + 获取主要指数行情 (Yahoo Finance),支持 A 股与美股。 + region=us 时委托给 _get_us_main_indices。 + """ + import yfinance as yf + + if region == "us": + return self._get_us_main_indices(yf) + + # A 股指数:akshare 代码 -> (yfinance 代码, 显示名称) + yf_mapping = { + 'sh000001': ('000001.SS', '上证指数'), + 'sz399001': ('399001.SZ', '深证成指'), + 'sz399006': ('399006.SZ', '创业板指'), + 'sh000688': ('000688.SS', '科创50'), + 'sh000016': ('000016.SS', '上证50'), + 'sh000300': ('000300.SS', '沪深300'), + } + + results = [] + try: + for ak_code, (yf_code, name) in yf_mapping.items(): + try: + item = self._fetch_yf_ticker_data(yf, yf_code, name, ak_code) + if item: + results.append(item) + logger.debug(f"[Yfinance] 获取指数 {name} 成功") + except Exception as e: + logger.warning(f"[Yfinance] 获取指数 {name} 失败: {e}") + + if results: + logger.info(f"[Yfinance] 成功获取 {len(results)} 个 A 股指数行情") + return results + + except Exception as e: + logger.error(f"[Yfinance] 获取 A 股指数行情失败: {e}") + + return None + + def _get_us_main_indices(self, yf) -> Optional[List[Dict[str, Any]]]: + """获取美股主要指数行情(SPX、IXIC、DJI、VIX),复用 _fetch_yf_ticker_data""" + # 大盘复盘所需核心美股指数 + us_indices = ['SPX', 'IXIC', 'DJI', 'VIX'] + results = [] + try: + for code in us_indices: + yf_symbol, name = get_us_index_yf_symbol(code) + if not yf_symbol: + continue + try: + item = self._fetch_yf_ticker_data(yf, yf_symbol, name, code) + if item: + results.append(item) + logger.debug(f"[Yfinance] 获取美股指数 {name} 成功") + except Exception as e: + logger.warning(f"[Yfinance] 获取美股指数 {name} 失败: {e}") + + if results: + logger.info(f"[Yfinance] 成功获取 {len(results)} 个美股指数行情") + return results + + except Exception as e: + logger.error(f"[Yfinance] 获取美股指数行情失败: {e}") + + return None + + def _is_us_stock(self, stock_code: str) -> bool: + """ + 判断代码是否为美股股票(排除美股指数)。 + + 委托给 us_index_mapping 模块的 is_us_stock_code()。 + """ + return is_us_stock_code(stock_code) + + def _get_us_stock_quote_from_stooq(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 使用 Stooq 为美股实时行情提供免密钥兜底。 + + Stooq 提供的是最新交易日行情,精度不如分时实时接口,但在 Yahoo / yfinance + 被限流时,至少能为 Web UI 提供可用价格;若可获取到昨收价,则同时提供涨跌幅等衍生指标。 + """ + symbol = stock_code.strip().upper() + stooq_symbol = f"{symbol.lower()}.us" + url = f"https://stooq.com/q/l/?s={stooq_symbol}" + request = Request( + url, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; DSA/1.0; +https://github.com/ZhuLinsen/daily_stock_analysis)", + "Accept": "text/plain,text/csv,*/*", + }, + ) + + try: + with urlopen(request, timeout=15) as response: + payload = response.read().decode("utf-8", "ignore").strip() + except (HTTPError, URLError, TimeoutError) as exc: + logger.warning(f"[Stooq] 获取美股 {symbol} 实时行情失败: {exc}") + return None + + if not payload or payload.upper().startswith("NO DATA"): + logger.warning(f"[Stooq] 无法获取 {symbol} 的行情数据") + return None + + def _fetch_prev_close() -> Optional[float]: + history_url = f"https://stooq.com/q/d/l/?s={stooq_symbol}&i=d" + history_request = Request( + history_url, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; DSA/1.0; +https://github.com/ZhuLinsen/daily_stock_analysis)", + "Accept": "text/plain,text/csv,*/*", + }, + ) + try: + with urlopen(history_request, timeout=15) as response: + history_payload = response.read().decode("utf-8", "ignore").strip() + except (HTTPError, URLError, TimeoutError) as exc: + logger.debug(f"[Stooq] 获取美股 {symbol} 日线历史失败: {exc}") + return None + + if not history_payload or history_payload.upper().startswith("NO DATA"): + return None + + try: + reader = csv.reader(StringIO(history_payload)) + header = next(reader, None) + if not header: + return None + + header_tokens = [cell.strip().lower() for cell in header] + has_header = "close" in header_tokens and "date" in header_tokens + if not has_header: + return None + + date_index = header_tokens.index("date") + close_index = header_tokens.index("close") + + daily_rows: list[tuple[datetime, float]] = [] + for row in reader: + if not row: + continue + date_text = row[date_index].strip() if len(row) > date_index else "" + close_text = row[close_index].strip() if len(row) > close_index else "" + if not date_text or not close_text: + continue + try: + dt = datetime.strptime(date_text, "%Y-%m-%d") + close_val = float(close_text) + except Exception: + continue + daily_rows.append((dt, close_val)) + + if len(daily_rows) < 2: + return None + + daily_rows.sort(key=lambda item: item[0]) + return daily_rows[-2][1] + except Exception: + return None + + try: + reader = csv.reader(StringIO(payload)) + first_row = next(reader, None) + if first_row is None: + raise ValueError(f"unexpected Stooq payload: {payload}") + + normalized_first_row = [cell.strip() for cell in first_row] + header_tokens = {cell.lower() for cell in normalized_first_row if cell} + has_header = 'open' in header_tokens and 'close' in header_tokens + row = next(reader, None) if has_header else first_row + if row is None: + raise ValueError(f"unexpected Stooq payload: {payload}") + + normalized_row = [cell.strip() for cell in row] + while normalized_row and normalized_row[-1] == '': + normalized_row.pop() + + if len(normalized_row) >= 8: + open_index, high_index, low_index, price_index, volume_index = 3, 4, 5, 6, 7 + elif len(normalized_row) >= 7: + open_index, high_index, low_index, price_index, volume_index = 2, 3, 4, 5, 6 + else: + raise ValueError(f"unexpected Stooq payload: {payload}") + + open_price = float(normalized_row[open_index]) + high = float(normalized_row[high_index]) + low = float(normalized_row[low_index]) + price = float(normalized_row[price_index]) + volume = int(float(normalized_row[volume_index])) + + prev_close = _fetch_prev_close() + change_amount = None + change_pct = None + amplitude = None + if prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + amplitude = ((high - low) / prev_close) * 100 + + quote = UnifiedRealtimeQuote( + code=symbol, + name=STOCK_NAME_MAP.get(symbol, ''), + source=RealtimeSource.STOOQ, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=None, + circ_mv=None, + ) + logger.info(f"[Stooq] 获取美股 {symbol} 兜底行情成功: 价格={price}") + return quote + except Exception as exc: + logger.warning(f"[Stooq] 解析美股 {symbol} 行情失败: {exc}") + return None + + def _get_us_index_realtime_quote( + self, + user_code: str, + yf_symbol: str, + index_name: str, + ) -> Optional[UnifiedRealtimeQuote]: + """ + Get realtime quote for US index (e.g. SPX -> ^GSPC). + + Args: + user_code: User input code (e.g. SPX) + yf_symbol: Yahoo Finance symbol (e.g. ^GSPC) + index_name: Chinese name for the index + + Returns: + UnifiedRealtimeQuote or None + """ + import yfinance as yf + + try: + logger.debug(f"[Yfinance] 获取美股指数 {user_code} ({yf_symbol}) 实时行情") + ticker = yf.Ticker(yf_symbol) + + try: + info = ticker.fast_info + if info is None: + raise ValueError("fast_info is None") + price = getattr(info, 'lastPrice', None) or getattr(info, 'last_price', None) + prev_close = getattr(info, 'previousClose', None) or getattr(info, 'previous_close', None) + open_price = getattr(info, 'open', None) + high = getattr(info, 'dayHigh', None) or getattr(info, 'day_high', None) + low = getattr(info, 'dayLow', None) or getattr(info, 'day_low', None) + volume = getattr(info, 'lastVolume', None) or getattr(info, 'last_volume', None) + except Exception: + logger.debug("[Yfinance] fast_info 失败,尝试 history 方法") + hist = ticker.history(period='2d') + if hist.empty: + logger.warning(f"[Yfinance] 无法获取 {yf_symbol} 的数据") + return None + today = hist.iloc[-1] + prev = hist.iloc[-2] if len(hist) > 1 else today + price = float(today['Close']) + prev_close = float(prev['Close']) + open_price = float(today['Open']) + high = float(today['High']) + low = float(today['Low']) + volume = int(today['Volume']) + + change_amount = None + change_pct = None + if price is not None and prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + + amplitude = None + if high is not None and low is not None and prev_close is not None and prev_close > 0: + amplitude = ((high - low) / prev_close) * 100 + + quote = UnifiedRealtimeQuote( + code=user_code, + name=index_name or user_code, + source=RealtimeSource.FALLBACK, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=None, + circ_mv=None, + ) + logger.info(f"[Yfinance] 获取美股指数 {user_code} 实时行情成功: 价格={price}") + return quote + except Exception as e: + logger.warning(f"[Yfinance] 获取美股指数 {user_code} 实时行情失败: {e}") + return None + + def get_realtime_quote(self, stock_code: str) -> Optional[UnifiedRealtimeQuote]: + """ + 获取美股/美股指数实时行情数据 + + 支持美股股票(AAPL、TSLA)和美股指数(SPX、DJI 等)。 + 数据来源:yfinance Ticker.info + + Args: + stock_code: 美股代码或指数代码,如 'AMD', 'AAPL', 'SPX', 'DJI' + + Returns: + UnifiedRealtimeQuote 对象,获取失败返回 None + """ + import yfinance as yf + + # 美股指数:使用映射(SPX -> ^GSPC) + yf_symbol, index_name = get_us_index_yf_symbol(stock_code) + if yf_symbol: + return self._get_us_index_realtime_quote( + user_code=stock_code.strip().upper(), + yf_symbol=yf_symbol, + index_name=index_name, + ) + + # 仅处理美股股票 + if not self._is_us_stock(stock_code): + logger.debug(f"[Yfinance] {stock_code} 不是美股,跳过") + return None + + try: + symbol = stock_code.strip().upper() + logger.debug(f"[Yfinance] 获取美股 {symbol} 实时行情") + + ticker = yf.Ticker(symbol) + + # 尝试获取 fast_info(更快,但字段较少) + try: + info = ticker.fast_info + if info is None: + raise ValueError("fast_info is None") + + price = getattr(info, 'lastPrice', None) or getattr(info, 'last_price', None) + prev_close = getattr(info, 'previousClose', None) or getattr(info, 'previous_close', None) + open_price = getattr(info, 'open', None) + high = getattr(info, 'dayHigh', None) or getattr(info, 'day_high', None) + low = getattr(info, 'dayLow', None) or getattr(info, 'day_low', None) + volume = getattr(info, 'lastVolume', None) or getattr(info, 'last_volume', None) + market_cap = getattr(info, 'marketCap', None) or getattr(info, 'market_cap', None) + + except Exception: + # 回退到 history 方法获取最新数据 + logger.debug("[Yfinance] fast_info 失败,尝试 history 方法") + hist = ticker.history(period='2d') + if hist.empty: + logger.warning(f"[Yfinance] 无法获取 {symbol} 的数据,尝试 Stooq 兜底") + return self._get_us_stock_quote_from_stooq(symbol) + + today = hist.iloc[-1] + prev = hist.iloc[-2] if len(hist) > 1 else today + + price = float(today['Close']) + prev_close = float(prev['Close']) + open_price = float(today['Open']) + high = float(today['High']) + low = float(today['Low']) + volume = int(today['Volume']) + market_cap = None + + # 计算涨跌幅 + change_amount = None + change_pct = None + if price is not None and prev_close is not None and prev_close > 0: + change_amount = price - prev_close + change_pct = (change_amount / prev_close) * 100 + + # 计算振幅 + amplitude = None + if high is not None and low is not None and prev_close is not None and prev_close > 0: + amplitude = ((high - low) / prev_close) * 100 + + # 获取股票名称 + try: + info_name = ticker.info.get('shortName', '') or ticker.info.get('longName', '') or '' + name = info_name if is_meaningful_stock_name(info_name, symbol) else STOCK_NAME_MAP.get(symbol, '') + except Exception: + name = STOCK_NAME_MAP.get(symbol, '') + + quote = UnifiedRealtimeQuote( + code=symbol, + name=name, + source=RealtimeSource.FALLBACK, + price=price, + change_pct=round(change_pct, 2) if change_pct is not None else None, + change_amount=round(change_amount, 4) if change_amount is not None else None, + volume=volume, + amount=None, # yfinance 不直接提供成交额 + volume_ratio=None, + turnover_rate=None, + amplitude=round(amplitude, 2) if amplitude is not None else None, + open_price=open_price, + high=high, + low=low, + pre_close=prev_close, + pe_ratio=None, + pb_ratio=None, + total_mv=market_cap, + circ_mv=None, + ) + + logger.info(f"[Yfinance] 获取美股 {symbol} 实时行情成功: 价格={price}") + return quote + + except Exception as e: + logger.warning(f"[Yfinance] 获取美股 {stock_code} 实时行情失败: {e},尝试 Stooq 兜底") + return self._get_us_stock_quote_from_stooq(stock_code) + + +if __name__ == "__main__": + # 测试代码 + logging.basicConfig(level=logging.DEBUG) + + fetcher = YfinanceFetcher() + + try: + df = fetcher.get_daily_data('600519') # 茅台 + print(f"获取成功,共 {len(df)} 条数据") + print(df.tail()) + except Exception as e: + print(f"获取失败: {e}") From af202638d834c408a36942711a60921df1f23660 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:50:46 +0800 Subject: [PATCH 09/43] refactor: move test_rest and test_websocket into tests/ --- .../20250423-project-restructure-design.md | 191 ++++ .../20250423-project-restructure-plan.md | 534 ++++++++++ ...026-04-22-top-usstock-daily-data-design.md | 223 ++++ .../2026-04-22-top-usstock-daily-data-plan.md | 997 ++++++++++++++++++ .../prds/.20250423-OPT_DATA_PROVIDER.md.swp | Bin 0 -> 4096 bytes .../prds/20250422-TOP_USSTOCK_DAILY_DATA.md | 18 + .claude/prds/20250423-OPT_CODE.md | 14 + .claude/prds/20250423-OPT_DATA_PROVIDER.md | 46 + .idea/.gitignore | 5 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/massive-com.iml | 12 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + CLAUDE.md | 70 ++ LICENSE | 19 - docs/README.md | 570 ++++++++++ docs/requirements.txt | 2 +- project/__init__.py | 0 project/us_daily/__init__.py | 0 project/us_daily/__main__.py | 97 -- project/us_daily/agg_fetcher.py | 117 -- project/us_daily/config.json | 8 - project/us_daily/config.py | 24 - project/us_daily/storage.py | 25 - project/us_daily/ticker_filter.py | 59 -- src/massive/rest/base.py | 2 + {test_rest => tests/test_rest}/base.py | 0 .../AUD/USD&amount=100&precision=2.json | 0 ...393873000×tamp.gte=1477972800000.json | 0 ...ong_window=20×tamp.gt=2022-08-09.json | 0 ...adjusted=true×tamp.gt=2022-08-18.json | 0 ...amp=1483958600&expand_underlying=true.json | 0 .../mocks/v1/last/crypto/BTC/USD.json | 0 .../v1/last_quote/currencies/AUD/USD.json | 0 .../test_rest}/mocks/v1/marketstatus/now.json | 0 .../mocks/v1/marketstatus/upcoming.json | 0 .../AAPL/2005-04-01&adjusted=true.json | 0 ...05000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json | 0 .../stocks/2005-04-04&adjusted=true.json | 0 .../mocks/v2/aggs/ticker/AAPL/prev.json | 0 .../range/1/day/2005-04-01/2005-04-04.json | 0 .../range/1/day/2005-04-02/2005-04-04.json | 0 .../test_rest}/mocks/v2/last/nbbo/AAPL.json | 0 .../test_rest}/mocks/v2/last/trade/AAPL.json | 0 .../mocks/v2/reference/news&ticker=NFLX.json | 0 .../markets/crypto/tickers/X;BTCUSD/book.json | 0 .../locale/us/markets/stocks/gainers.json | 0 .../us/markets/stocks/tickers/AAPL.json | 0 .../us/markets/stocks/tickers/index.json | 0 ...1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json | 0 .../test_rest}/mocks/v3/quotes/AAPL.json | 0 .../conditions&asset_class=stocks.json | 0 .../mocks/v3/reference/dividends.json | 0 .../mocks/v3/reference/exchanges.json | 0 ...s&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json | 0 .../mocks/v3/reference/options/contracts.json | 0 .../contracts/OEVRI240119C00002500.json | 0 .../test_rest}/mocks/v3/reference/splits.json | 0 ...GU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json | 0 .../mocks/v3/reference/tickers.json | 0 .../tickers/AAPL&date=2020-10-01.json | 0 .../mocks/v3/reference/tickers/AAPL.json | 0 .../mocks/v3/reference/tickers/types.json | 0 .../test_rest}/mocks/v3/snapshot.json | 0 ...indices&ticker.any_of=SPX%2CAPx%2CAPy.json | 0 .../mocks/v3/snapshot/options/AAPL.json | 0 .../options/AAPL/O;AAPL230616C00150000.json | 0 .../mocks/v3/trades/AAPL&limit=2.json | 0 .../mocks/vX/reference/financials.json | 0 .../META/events&types=ticker_change.json | 0 .../test_rest}/models/test_requests.py | 0 {test_rest => tests/test_rest}/test_aggs.py | 0 .../test_rest}/test_conditions.py | 0 .../test_rest}/test_contracts.py | 0 .../test_rest}/test_dividends.py | 0 .../test_rest}/test_exchanges.py | 0 .../test_rest}/test_indicators.py | 0 .../test_rest}/test_markets.py | 0 .../test_rest}/test_modelclass.py | 0 {test_rest => tests/test_rest}/test_quotes.py | 0 .../test_rest}/test_snapshots.py | 0 {test_rest => tests/test_rest}/test_splits.py | 0 .../test_rest}/test_summaries.py | 0 .../test_rest}/test_tickers.py | 0 {test_rest => tests/test_rest}/test_trades.py | 0 tests/test_us_daily/test_agg_fetcher.py | 60 +- tests/test_us_daily/test_config.py | 14 +- tests/test_us_daily/test_storage.py | 10 +- tests/test_us_daily/test_ticker_filter.py | 24 +- .../test_websocket}/base_ws.py | 0 .../test_websocket}/mock_server.py | 0 .../test_websocket}/test_conn.py | 0 93 files changed, 2764 insertions(+), 404 deletions(-) create mode 100644 .claude/plans/20250423-project-restructure-design.md create mode 100644 .claude/plans/20250423-project-restructure-plan.md create mode 100644 .claude/plans/2026-04-22-top-usstock-daily-data-design.md create mode 100644 .claude/plans/2026-04-22-top-usstock-daily-data-plan.md create mode 100644 .claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp create mode 100644 .claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md create mode 100644 .claude/prds/20250423-OPT_CODE.md create mode 100644 .claude/prds/20250423-OPT_DATA_PROVIDER.md create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/massive-com.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 CLAUDE.md delete mode 100644 LICENSE create mode 100644 docs/README.md delete mode 100644 project/__init__.py delete mode 100644 project/us_daily/__init__.py delete mode 100644 project/us_daily/__main__.py delete mode 100644 project/us_daily/agg_fetcher.py delete mode 100644 project/us_daily/config.json delete mode 100644 project/us_daily/config.py delete mode 100644 project/us_daily/storage.py delete mode 100644 project/us_daily/ticker_filter.py rename {test_rest => tests/test_rest}/base.py (100%) rename {test_rest => tests/test_rest}/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/last/crypto/BTC/USD.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/last_quote/currencies/AUD/USD.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/marketstatus/now.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/marketstatus/upcoming.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json (100%) rename {test_rest => tests/test_rest}/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/aggs/ticker/AAPL/prev.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/last/nbbo/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/last/trade/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/reference/news&ticker=NFLX.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/quotes/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/conditions&asset_class=stocks.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/dividends.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/exchanges.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/options/contracts.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/splits.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/tickers.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/tickers/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/reference/tickers/types.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/snapshot.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/snapshot/options/AAPL.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json (100%) rename {test_rest => tests/test_rest}/mocks/v3/trades/AAPL&limit=2.json (100%) rename {test_rest => tests/test_rest}/mocks/vX/reference/financials.json (100%) rename {test_rest => tests/test_rest}/mocks/vX/reference/tickers/META/events&types=ticker_change.json (100%) rename {test_rest => tests/test_rest}/models/test_requests.py (100%) rename {test_rest => tests/test_rest}/test_aggs.py (100%) rename {test_rest => tests/test_rest}/test_conditions.py (100%) rename {test_rest => tests/test_rest}/test_contracts.py (100%) rename {test_rest => tests/test_rest}/test_dividends.py (100%) rename {test_rest => tests/test_rest}/test_exchanges.py (100%) rename {test_rest => tests/test_rest}/test_indicators.py (100%) rename {test_rest => tests/test_rest}/test_markets.py (100%) rename {test_rest => tests/test_rest}/test_modelclass.py (100%) rename {test_rest => tests/test_rest}/test_quotes.py (100%) rename {test_rest => tests/test_rest}/test_snapshots.py (100%) rename {test_rest => tests/test_rest}/test_splits.py (100%) rename {test_rest => tests/test_rest}/test_summaries.py (100%) rename {test_rest => tests/test_rest}/test_tickers.py (100%) rename {test_rest => tests/test_rest}/test_trades.py (100%) rename {test_websocket => tests/test_websocket}/base_ws.py (100%) rename {test_websocket => tests/test_websocket}/mock_server.py (100%) rename {test_websocket => tests/test_websocket}/test_conn.py (100%) diff --git a/.claude/plans/20250423-project-restructure-design.md b/.claude/plans/20250423-project-restructure-design.md new file mode 100644 index 00000000..40670e46 --- /dev/null +++ b/.claude/plans/20250423-project-restructure-design.md @@ -0,0 +1,191 @@ +# Project Restructure Design + +**Date:** 2025-04-23 +**Scope:** One-shot restructure — directory migration, pyproject.toml rewrite, import fixup, cleanup + +--- + +## 1. Goal + +将项目从 Poetry-based 结构重构为标准 `src/` layout + PEP 621,同时重命名 `data_provider` → `processor`,并将 `provider/` 的第三方依赖正规化写入 `pyproject.toml`。 + +## 2. Target Directory Structure + +``` +massive-com/ +├── src/ +│ ├── massive/ # SDK 客户端(REST + WebSocket) +│ │ ├── __init__.py +│ │ ├── rest/ +│ │ ├── websocket/ +│ │ ├── exceptions.py +│ │ ├── logging.py +│ │ └── modelclass.py +│ ├── provider/ # 多数据源获取层 +│ │ ├── __init__.py +│ │ ├── base.py +│ │ ├── realtime_types.py +│ │ ├── us_index_mapping.py +│ │ ├── fundamental_adapter.py +│ │ ├── efinance_fetcher.py +│ │ ├── akshare_fetcher.py +│ │ ├── tushare_fetcher.py +│ │ ├── pytdx_fetcher.py +│ │ ├── baostock_fetcher.py +│ │ ├── yfinance_fetcher.py +│ │ ├── longbridge_fetcher.py +│ │ └── tickflow_fetcher.py +│ └── processor/ # 原 data_provider,重命名 +│ ├── __init__.py +│ └── us_daily/ +│ ├── __init__.py +│ ├── __main__.py +│ ├── config.py +│ ├── config.json +│ ├── storage.py +│ ├── ticker_filter.py +│ └── agg_fetcher.py +├── tests/ +│ ├── test_rest/ # 原顶层 test_rest/ +│ ├── test_websocket/ # 原顶层 test_websocket/ +│ └── test_us_daily/ # 原 tests/test_us_daily/ +├── examples/ # 不动 +├── docs/ # 不动 +├── data/ # 不动 +├── logs/ # 不动 +├── pyproject.toml # 重写 +└── README.md +``` + +## 3. pyproject.toml + +从 `[tool.poetry]` 迁移到 PEP 621 + setuptools: + +```toml +[project] +name = "massive" +version = "0.0.0" +description = "Official Massive (formerly Polygon.io) REST and Websocket client." +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", +] +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", +] + +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" +``` + +## 4. Import Path Changes + +### 4.1 `data_provider` → `processor`(~36 处) + +**源码文件(3 个):** +- `src/processor/us_daily/__main__.py` +- `src/processor/us_daily/ticker_filter.py` +- `src/processor/us_daily/agg_fetcher.py` + +**测试文件(4 个):** +- `tests/test_us_daily/test_agg_fetcher.py`(含 `patch()` 路径) +- `tests/test_us_daily/test_config.py` +- `tests/test_us_daily/test_storage.py` +- `tests/test_us_daily/test_ticker_filter.py`(含 `patch()` 路径) + +全部执行 `data_provider` → `processor` 全局替换。 + +### 4.2 `massive` 包(0 处变更) + +内部使用相对 import,搬入 `src/massive/` 后路径自动生效。两处绝对 import(`indicators.py`、`summaries.py`)配合 `pythonpath = ["src"]` 仍然有效。 + +### 4.3 `provider` 包(0 处变更) + +当前无任何文件 import `provider`。 + +### 4.4 测试文件(0 处变更) + +`test_rest/`、`test_websocket/` 中的 `from massive import ...` 路径不变,配合 `pythonpath = ["src"]` 生效。 + +## 5. config.py 路径修复 + +`processor/us_daily/config.py` 的 `load_config()` 默认参数从硬编码路径改为基于 `__file__` 的相对定位: + +```python +def load_config(config_path: str = None) -> Config: + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + ... +``` + +`data_dir = "data/us_daily"` 保持不变(相对于项目根目录)。 + +## 6. CLAUDE.md 更新 + +- 去掉 Poetry/Makefile 命令 +- 新命令:`pip install -e ".[all,dev]"`、`pytest`、`black src/ tests/`、`mypy src/` +- 补充 `src/` layout、`provider/`、`processor/` 架构说明 +- `python -m processor.us_daily` 作为 processor 运行入口 + +## 7. Delete & Cleanup + +| 操作 | 目标 | +|------|------| +| 删除 | `Makefile`、`poetry.lock` | +| 保留不动 | `.massive/`、`docs/`、`examples/`、`data/`、`logs/`、`README.md` | + +## 8. Not In Scope + +- `provider/` 内部重组织(保持扁平结构不变) +- `processor/` 功能扩展(仅重命名占位) +- examples 路径更新(`from massive import` 不变) +- 测试框架迁移(保留 unittest,增加 pytest 支持) diff --git a/.claude/plans/20250423-project-restructure-plan.md b/.claude/plans/20250423-project-restructure-plan.md new file mode 100644 index 00000000..98a309a4 --- /dev/null +++ b/.claude/plans/20250423-project-restructure-plan.md @@ -0,0 +1,534 @@ +# Project Restructure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Restructure the project from Poetry-based layout to standard `src/` layout with PEP 621, rename `data_provider` to `processor`, and normalize dependencies in `pyproject.toml`. + +**Architecture:** Move `massive/`, `provider/`, `data_provider/` into `src/` (renaming `data_provider` to `processor`). Move `test_rest/` and `test_websocket/` into `tests/`. Rewrite `pyproject.toml` from `[tool.poetry]` to PEP 621 + setuptools. Fix all `data_provider` → `processor` imports. Delete `Makefile` and `poetry.lock`. + +**Tech Stack:** Python 3.9+, setuptools, pytest + +**Design Doc:** `.claude/plans/20250423-project-restructure-design.md` + +--- + +### Task 1: Create `src/` directory and move packages + +**Files:** +- Create: `src/` directory +- Move: `massive/` → `src/massive/` +- Move: `provider/` → `src/provider/` +- Move: `data_provider/` → `src/processor/` (rename) + +- [ ] **Step 1: Create src directory** + +```bash +mkdir -p src +``` + +- [ ] **Step 2: Move massive/ into src/** + +```bash +git mv massive src/massive +``` + +- [ ] **Step 3: Move provider/ into src/** + +```bash +git mv provider src/provider +``` + +- [ ] **Step 4: Move data_provider/ to src/processor/ (rename)** + +```bash +git mv data_provider src/processor +``` + +- [ ] **Step 5: Verify directory structure** + +```bash +ls src/ +``` + +Expected: `massive processor provider` + +```bash +ls src/processor/us_daily/ +``` + +Expected: `__init__.py __main__.py agg_fetcher.py config.json config.py storage.py ticker_filter.py` + +- [ ] **Step 6: Commit** + +```bash +git add -A +git commit -m "refactor: move massive, provider, data_provider into src/ layout + +Rename data_provider to processor." +``` + +--- + +### Task 2: Move test directories into tests/ + +**Files:** +- Move: `test_rest/` → `tests/test_rest/` +- Move: `test_websocket/` → `tests/test_websocket/` +- Keep: `tests/test_us_daily/` (already in place) + +- [ ] **Step 1: Move test_rest/ into tests/** + +```bash +git mv test_rest tests/test_rest +``` + +- [ ] **Step 2: Move test_websocket/ into tests/** + +```bash +git mv test_websocket tests/test_websocket +``` + +- [ ] **Step 3: Verify structure** + +```bash +ls tests/ +``` + +Expected: `__init__.py test_rest test_us_daily test_websocket` + +- [ ] **Step 4: Commit** + +```bash +git add -A +git commit -m "refactor: move test_rest and test_websocket into tests/" +``` + +--- + +### Task 3: Fix `data_provider` → `processor` imports in source files + +**Files:** +- Modify: `src/processor/us_daily/__main__.py` (lines 8-16) +- Modify: `src/processor/us_daily/ticker_filter.py` (line 5) +- Modify: `src/processor/us_daily/agg_fetcher.py` (lines 7-12) + +- [ ] **Step 1: Fix imports in `__main__.py`** + +In `src/processor/us_daily/__main__.py`, replace all `data_provider` with `processor`: + +```python +# Line 8-16: change from +from data_provider.us_daily.config import load_config +from data_provider.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from data_provider.us_daily.ticker_filter import filter_top_tickers +from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + +# to +from processor.us_daily.config import load_config +from processor.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from processor.us_daily.ticker_filter import filter_top_tickers +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +``` + +- [ ] **Step 2: Fix imports in `ticker_filter.py`** + +In `src/processor/us_daily/ticker_filter.py`, line 5: + +```python +# change from +from data_provider.us_daily.config import Config + +# to +from processor.us_daily.config import Config +``` + +- [ ] **Step 3: Fix imports in `agg_fetcher.py`** + +In `src/processor/us_daily/agg_fetcher.py`, lines 7-12: + +```python +# change from +from data_provider.us_daily.config import Config +from data_provider.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +# to +from processor.us_daily.config import Config +from processor.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/processor/ +git commit -m "refactor: update data_provider imports to processor in source files" +``` + +--- + +### Task 4: Fix `data_provider` → `processor` imports in test files + +**Files:** +- Modify: `tests/test_us_daily/test_agg_fetcher.py` (all `data_provider` refs including `patch()` paths) +- Modify: `tests/test_us_daily/test_config.py` (all `data_provider` refs) +- Modify: `tests/test_us_daily/test_storage.py` (all `data_provider` refs) +- Modify: `tests/test_us_daily/test_ticker_filter.py` (all `data_provider` refs including `patch()` paths) + +- [ ] **Step 1: Fix `test_agg_fetcher.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_agg_fetcher.py`. This covers: +- `from data_provider.us_daily.agg_fetcher import ...` (lines 12, 18, 25, 32, 39, 46, 56, 63, 77, 107, 151, 193) +- `from data_provider.us_daily.config import Config` (lines 78, 108, 152, 194) +- `patch("data_provider.us_daily.agg_fetcher....)` (lines 54, 61, 95, 98, 100, 131, 134, 136, 181, 184, 186, 208, 211, 213) + +All become `processor.us_daily.*`. + +- [ ] **Step 2: Fix `test_config.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_config.py`. This covers: +- `from data_provider.us_daily.config import Config` (line 9) +- `from data_provider.us_daily.config import load_config` (lines 20, 37) + +- [ ] **Step 3: Fix `test_storage.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_storage.py`. This covers: +- `from data_provider.us_daily.storage import ...` (lines 16, 22, 28, 37, 44) + +- [ ] **Step 4: Fix `test_ticker_filter.py`** + +Global replace `data_provider` → `processor` in `tests/test_us_daily/test_ticker_filter.py`. This covers: +- `from data_provider.us_daily.ticker_filter import filter_top_tickers` (lines 22, 54) +- `from data_provider.us_daily.config import Config` (lines 23, 56) +- `patch("data_provider.us_daily.ticker_filter.EXCHANGES", ...)` (lines 45, 69, 101) +- `patch("data_provider.us_daily.ticker_filter.time.sleep")` (lines 46, 70, 102) + +- [ ] **Step 5: Commit** + +```bash +git add tests/test_us_daily/ +git commit -m "refactor: update data_provider imports to processor in test files" +``` + +--- + +### Task 5: Fix `config.py` path to use `__file__`-relative lookup + +**Files:** +- Modify: `src/processor/us_daily/config.py` (line 16) + +- [ ] **Step 1: Update `load_config` default path** + +In `src/processor/us_daily/config.py`, change: + +```python +# from +def load_config(config_path: str = "data_provider/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + +# to +def load_config(config_path: str = None) -> Config: + config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + if os.path.exists(config_path): +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/processor/us_daily/config.py +git commit -m "fix: use __file__-relative path for config.json lookup" +``` + +--- + +### Task 6: Rewrite `pyproject.toml` + +**Files:** +- Modify: `pyproject.toml` (full rewrite) + +- [ ] **Step 1: Replace pyproject.toml content** + +Replace the entire `pyproject.toml` with: + +```toml +[project] +name = "massive" +version = "0.0.0" +description = "Official Massive (formerly Polygon.io) REST and Websocket client." +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", +] +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", +] + +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" +``` + +- [ ] **Step 2: Commit** + +```bash +git add pyproject.toml +git commit -m "refactor: rewrite pyproject.toml from Poetry to PEP 621 + setuptools" +``` + +--- + +### Task 7: Delete Makefile and poetry.lock + +**Files:** +- Delete: `Makefile` +- Delete: `poetry.lock` + +- [ ] **Step 1: Delete Makefile** + +```bash +git rm Makefile +``` + +- [ ] **Step 2: Delete poetry.lock** + +```bash +git rm poetry.lock +``` + +- [ ] **Step 3: Commit** + +```bash +git add -A +git commit -m "chore: remove Makefile and poetry.lock" +``` + +--- + +### Task 8: Update CLAUDE.md + +**Files:** +- Modify: `CLAUDE.md` + +- [ ] **Step 1: Update Development Commands section** + +Replace the Development Commands section with: + +````markdown +## Development Commands + +```bash +# Install dependencies (core + all data sources + dev tools) +pip install -e ".[all,dev]" + +# Run all tests +pytest + +# Run specific test directory +pytest tests/test_rest/ +pytest tests/test_websocket/ +pytest tests/test_us_daily/ + +# Run a single test file +pytest tests/test_rest/test_aggs.py + +# Run a single test method +pytest tests/test_rest/test_aggs.py::TestAggs::test_list_aggs + +# Code formatting (auto-fix) +black src/ tests/ examples/ + +# Static type checking +mypy src/ + +# Run US daily data processor +python -m processor.us_daily + +# Regenerate REST API spec from OpenAPI +python .massive/rest.py + +# Update WebSocket API spec +curl https://api.massive.com/specs/websocket.json > .massive/websocket.json +``` +```` + +- [ ] **Step 2: Update Architecture section** + +Replace the Architecture section with: + +````markdown +## Architecture + +### Project Layout + +Standard `src/` layout with three top-level packages: + +- `src/massive/` — REST and WebSocket SDK client library +- `src/provider/` — Multi-source data fetcher layer with automatic failover +- `src/processor/` — Data collection and processing pipelines + +### Client Structure + +`RESTClient` (in `massive/rest/__init__.py`) uses multiple inheritance to compose domain-specific client mixins (AggsClient, TradesClient, QuotesClient, etc.) on top of `BaseClient` (`massive/rest/base.py`). Each mixin lives in its own file under `massive/rest/` and handles one API domain. + +`WebSocketClient` (`massive/websocket/__init__.py`) is a standalone async client using the `websockets` library with auto-reconnect support. + +### Provider Layer + +`DataFetcherManager` (in `provider/base.py`) orchestrates multiple data source fetchers (efinance, akshare, tushare, pytdx, baostock, yfinance, longbridge) with automatic priority-based failover. Each fetcher extends `BaseFetcher` and implements source-specific data retrieval. + +### Processor + +`processor/us_daily/` fetches US stock daily OHLCV data via the Massive REST API. Run with `python -m processor.us_daily`. + +### Models + +- REST models: `massive/rest/models/` — one file per domain, using the custom `@modelclass` decorator (from `massive/modelclass.py`) which wraps `@dataclass` with flexible init that accepts positional or keyword args. +- WebSocket models: `massive/websocket/models/` + +### API Spec Codegen + +`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. + +### Tests + +- `tests/test_rest/` — uses `pook` for HTTP mocking, with mock responses in `tests/test_rest/mocks/` +- `tests/test_websocket/` — has its own mock WebSocket server in `mock_server.py` +- `tests/test_us_daily/` — unit tests for the US daily processor +- Test base classes: `tests/test_rest/base.py` and `tests/test_websocket/base_ws.py` + +### Key Conventions + +- API key via `MASSIVE_API_KEY` env var or constructor parameter +- Base URL: `https://api.massive.com` +- Auth header: `Authorization: Bearer ` +- Python 3.9+ required +- Formatting: `black`; type checking: `mypy` +```` + +- [ ] **Step 3: Commit** + +```bash +git add CLAUDE.md +git commit -m "docs: update CLAUDE.md for new project structure" +``` + +--- + +### Task 9: Verify everything works + +- [ ] **Step 1: Install in editable mode** + +```bash +pip install -e ".[dev]" +``` + +Expected: Installs successfully with no errors. + +- [ ] **Step 2: Run us_daily tests** + +```bash +pytest tests/test_us_daily/ -v +``` + +Expected: All tests pass. Specifically: +- `test_config.py` — 3 tests pass +- `test_storage.py` — 5 tests pass +- `test_agg_fetcher.py` — 8 tests pass (4 classes) +- `test_ticker_filter.py` — 3 tests pass + +- [ ] **Step 3: Run REST tests** + +```bash +pytest tests/test_rest/ -v +``` + +Expected: All tests pass. + +- [ ] **Step 4: Run WebSocket tests** + +```bash +pytest tests/test_websocket/ -v +``` + +Expected: All tests pass. + +- [ ] **Step 5: Verify import works** + +```bash +python -c "from massive import RESTClient; print('massive OK')" +python -c "from processor.us_daily.config import Config; print('processor OK')" +python -c "from provider.base import DataFetcherManager; print('provider OK')" +``` + +Expected: All three print their "OK" message. + +- [ ] **Step 6: If any failures, fix and commit** + +Address any import errors or test failures discovered in steps 1-5, then commit fixes. diff --git a/.claude/plans/2026-04-22-top-usstock-daily-data-design.md b/.claude/plans/2026-04-22-top-usstock-daily-data-design.md new file mode 100644 index 00000000..ac48e12c --- /dev/null +++ b/.claude/plans/2026-04-22-top-usstock-daily-data-design.md @@ -0,0 +1,223 @@ +# 头部美股日K数据采集 — 设计文档 + +## 概述 + +构建一个数据采集模块,获取纳斯达克、纽交所、NYSE Arca 交易所中市值 >= 50亿美金的头部公司,按月采集自 2020 年以来的日 K 线数据,支持增量更新。 + +## 模块结构 + +``` +project/us_daily/ +├── __init__.py +├── __main__.py # 入口:加载配置 → 筛选 ticker → 逐个抓取 +├── config.py # Config dataclass + 默认值 + 配置文件加载 +├── ticker_filter.py # 调用 list_tickers + get_ticker_details 筛选头部公司 +├── agg_fetcher.py # 按月调用 list_aggs,含增量判断和重试逻辑 +└── storage.py # JSON 文件读写,路径管理 + +data/us_daily/ +├── top_tickers.json # 筛选出的头部公司列表 +└── {TICKER}/ # 每个 ticker 一个目录 + ├── 2020-01.json + ├── 2020-02.json + └── ... + +logs/ +└── us_daily.log # 运行日志 +``` + +## 执行流程 + +``` +1. 加载配置(project/us_daily/config.json) +2. 初始化 RESTClient +3. 是否刷新 ticker 列表? + ├── refresh_tickers=true 或 top_tickers.json 不存在 → 调用 API 筛选 → 写入 top_tickers.json + └── refresh_tickers=false 且文件存在 → 读取 top_tickers.json +4. 遍历每个 ticker: + 4.1 创建 ticker 目录(如不存在) + 4.2 生成 start_date 到当前月的月份列表 + 4.3 对每个月份: + - 文件已存在 且 不是当前月 → 跳过 + - 文件已存在 且 是当前月 → 重新请求并覆盖 + - 文件不存在 → 请求并写入 + - 每次 API 请求后 sleep request_interval 秒 +5. 输出汇总(完成数、失败数及详情) +``` + +## 配置模块 + +### Config dataclass + +```python +@dataclass +class Config: + refresh_tickers: bool = False # 是否刷新头部公司列表 + market_cap_min: float = 5e9 # 市值阈值(美元) + start_date: str = "2020-01" # 数据起始年月 + request_interval: int = 20 # API 请求间隔(秒) + data_dir: str = "data/us_daily" # 数据存储路径 + max_retries: int = 3 # 请求失败重试次数 +``` + +### 配置文件 + +路径:`project/us_daily/config.json`,不存在则使用默认值。 + +```json +{ + "refresh_tickers": true, + "market_cap_min": 5000000000, + "start_date": "2020-01", + "request_interval": 20, + "data_dir": "data/us_daily", + "max_retries": 3 +} +``` + +## Ticker 筛选模块 + +### ticker_filter.py + +**流程:** + +1. 调用 `client.list_tickers(market="stocks", exchange=exchange, active=True, limit=1000)` 遍历三个交易所: + - `XNAS`(纳斯达克) + - `XNYS`(纽约证券交易所) + - `ARCX`(NYSE Arca) +2. 对每个 ticker 调用 `client.get_ticker_details(ticker)` 获取 `market_cap` +3. 过滤 `market_cap >= config.market_cap_min` +4. 每次 API 请求后 sleep `config.request_interval` 秒 +5. 结果写入 `data/us_daily/top_tickers.json` + +### top_tickers.json 格式 + +```json +{ + "updated_at": "2026-04-22", + "market_cap_min": 5000000000, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc.", "market_cap": 3200000000000, "exchange": "XNAS"}, + {"ticker": "MSFT", "name": "Microsoft Corporation", "market_cap": 2800000000000, "exchange": "XNAS"} + ] +} +``` + +## 数据抓取模块 + +### agg_fetcher.py + +**核心逻辑:** + +```python +def fetch_ticker_aggs(client, ticker, config): + months = generate_months(config.start_date, current_month()) + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + # 增量判断 + if file_exists(file_path) and not is_current_month(month): + continue # 历史月份已有数据,跳过 + + # 请求数据(带重试) + aggs = fetch_with_retry(client, ticker, month, config.max_retries) + + # 写入文件 + save_month_data(file_path, aggs) + + sleep(config.request_interval) +``` + +**月份范围:** `generate_months("2020-01", "2026-04")` → `["2020-01", "2020-02", ..., "2026-04"]` + +**API 调用:** `client.list_aggs(ticker, 1, "day", from_=月初, to=月末, adjusted=True, sort="asc")` + +**重试逻辑:** 最多 `max_retries` 次,每次重试前 sleep `request_interval`。仍然失败则记录日志,跳过该月份继续。 + +### 月数据文件格式 + +`data/us_daily/{TICKER}/{YYYY-MM}.json`: + +```json +{ + "ticker": "AAPL", + "month": "2020-01", + "fetched_at": "2026-04-22T10:30:00", + "data": [ + { + "open": 74.06, + "high": 75.15, + "low": 73.80, + "close": 74.36, + "volume": 108872000, + "vwap": 74.53, + "timestamp": 1577854800000, + "transactions": 480012 + } + ] +} +``` + +## 存储模块 + +### storage.py + +**核心函数:** + +- `get_tickers_file_path(data_dir)` → `data/us_daily/top_tickers.json` +- `get_month_file_path(data_dir, ticker, month)` → `data/us_daily/AAPL/2020-01.json` +- `save_json(path, data)` — 写入 JSON,自动创建父目录 +- `load_json(path)` — 读取 JSON +- `file_exists(path)` — 判断文件是否存在 + +## 入口模块 + +### __main__.py + +```python +def main(): + # 1. 加载配置 + config = load_config() + + # 2. 初始化日志(输出到 logs/us_daily.log + stdout) + setup_logging() + + # 3. 初始化 REST 客户端 + client = RESTClient() + + # 4. 获取 ticker 列表 + if config.refresh_tickers or not tickers_file_exists(config): + tickers = filter_top_tickers(client, config) + save_tickers(config, tickers) + else: + tickers = load_tickers(config) + + # 5. 逐个抓取日K数据 + failed = [] + for i, ticker_info in enumerate(tickers): + logger.info(f"[{i+1}/{len(tickers)}] 开始处理 {ticker_info['ticker']}") + result = fetch_ticker_aggs(client, ticker_info["ticker"], config) + if result.failures: + failed.extend(result.failures) + + # 6. 输出汇总 + logger.info(f"完成:{len(tickers)} 只股票") + if failed: + logger.warning(f"失败:{len(failed)} 个月份") + for f in failed: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") +``` + +**运行方式:** `python -m project.us_daily` + +## 日志 + +- 使用 Python `logging` 模块 +- 同时输出到 `logs/us_daily.log` 和 stdout +- 格式:`2026-04-22 10:30:00 [INFO] [3/150] AAPL - 2020-01 fetched` +- 包含进度信息,便于监控长时间运行 + +## 限流 + +- 每次 API 请求后 sleep `config.request_interval` 秒(默认 20s) +- 适用于 list_tickers 分页、get_ticker_details、list_aggs 所有请求 diff --git a/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md b/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md new file mode 100644 index 00000000..ed0c1b51 --- /dev/null +++ b/.claude/plans/2026-04-22-top-usstock-daily-data-plan.md @@ -0,0 +1,997 @@ +# 头部美股日K数据采集 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** 构建 `project/us_daily` 模块,采集主要美股交易所中市值 >= 50 亿美金公司的日 K 线数据(2020 年至今),按月存储为 JSON 文件,支持增量更新。 + +**Architecture:** 单进程流水线:加载配置 → 筛选 ticker → 按月抓取日K → JSON 存储。通过检查文件是否存在实现增量更新。每次 API 请求后 sleep 20s 满足限流要求。 + +**Tech Stack:** Python 3.9+, `massive` SDK(RESTClient, list_tickers, get_ticker_details, list_aggs),标准库 json/logging/dataclasses/calendar/datetime + +--- + +## File Structure + +| File | Responsibility | +|------|---------------| +| `project/__init__.py` | 空,使 project 成为 package | +| `project/us_daily/__init__.py` | 空,使 us_daily 成为 package | +| `project/us_daily/config.py` | Config dataclass + load_config() | +| `project/us_daily/storage.py` | JSON 读写、路径计算、文件存在判断 | +| `project/us_daily/ticker_filter.py` | 遍历交易所获取 ticker、查详情过滤市值 | +| `project/us_daily/agg_fetcher.py` | 按月获取日K数据、增量判断、重试逻辑 | +| `project/us_daily/__main__.py` | 入口:配置加载 → ticker 筛选 → 数据抓取 → 汇总 | +| `project/us_daily/config.json` | 默认配置文件 | +| `tests/test_us_daily/test_config.py` | config 模块测试 | +| `tests/test_us_daily/test_storage.py` | storage 模块测试 | +| `tests/test_us_daily/test_ticker_filter.py` | ticker_filter 模块测试 | +| `tests/test_us_daily/test_agg_fetcher.py` | agg_fetcher 模块测试 | + +--- + +### Task 1: Config 模块 + +**Files:** +- Create: `project/__init__.py` +- Create: `project/us_daily/__init__.py` +- Create: `project/us_daily/config.py` +- Create: `project/us_daily/config.json` +- Create: `tests/test_us_daily/__init__.py` +- Create: `tests/test_us_daily/test_config.py` + +- [ ] **Step 1: Write the failing test for Config defaults and load_config** + +Create `tests/test_us_daily/__init__.py` (empty) and `tests/test_us_daily/test_config.py`: + +```python +import unittest +import json +import os +import tempfile + + +class TestConfig(unittest.TestCase): + def test_default_config(self): + from project.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + self.assertEqual(config.data_dir, "data/us_daily") + self.assertEqual(config.max_retries, 3) + + def test_load_config_from_file(self): + from project.us_daily.config import load_config + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as f: + json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.market_cap_min, 1e10) + # defaults preserved for unspecified fields + self.assertEqual(config.start_date, "2020-01") + self.assertEqual(config.request_interval, 20) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from project.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.market_cap_min, 5e9) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_config.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.config` + +- [ ] **Step 3: Create package files and implement Config** + +Create `project/__init__.py` (empty): + +```python +``` + +Create `project/us_daily/__init__.py` (empty): + +```python +``` + +Create `project/us_daily/config.py`: + +```python +import json +import os +from dataclasses import dataclass + + +@dataclass +class Config: + refresh_tickers: bool = False + market_cap_min: float = 5e9 + start_date: str = "2020-01" + request_interval: int = 20 + data_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = "project/us_daily/config.json") -> Config: + config = Config() + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config +``` + +Create `project/us_daily/config.json`: + +```json +{ + "refresh_tickers": false, + "market_cap_min": 5000000000, + "start_date": "2020-01", + "request_interval": 20, + "data_dir": "data/us_daily", + "max_retries": 3 +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_config.py -v` +Expected: 3 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/__init__.py project/us_daily/__init__.py project/us_daily/config.py project/us_daily/config.json tests/test_us_daily/__init__.py tests/test_us_daily/test_config.py +git commit -m "feat: add config module for us_daily data fetcher" +``` + +--- + +### Task 2: Storage 模块 + +**Files:** +- Create: `project/us_daily/storage.py` +- Create: `tests/test_us_daily/test_storage.py` + +- [ ] **Step 1: Write the failing tests for storage functions** + +Create `tests/test_us_daily/test_storage.py`: + +```python +import unittest +import json +import os +import tempfile +import shutil + + +class TestStorage(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_get_tickers_file_path(self): + from project.us_daily.storage import get_tickers_file_path + + result = get_tickers_file_path("data/us_daily") + self.assertEqual(result, "data/us_daily/top_tickers.json") + + def test_get_month_file_path(self): + from project.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + + def test_save_and_load_json(self): + from project.us_daily.storage import save_json, load_json + + file_path = os.path.join(self.test_dir, "sub", "test.json") + data = {"key": "value", "num": 42} + save_json(file_path, data) + loaded = load_json(file_path) + self.assertEqual(loaded, data) + + def test_save_json_creates_parent_dirs(self): + from project.us_daily.storage import save_json + + file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") + save_json(file_path, {"x": 1}) + self.assertTrue(os.path.exists(file_path)) + + def test_file_exists(self): + from project.us_daily.storage import file_exists + + existing = os.path.join(self.test_dir, "exists.json") + with open(existing, "w") as f: + f.write("{}") + + self.assertTrue(file_exists(existing)) + self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_storage.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.storage` + +- [ ] **Step 3: Implement storage module** + +Create `project/us_daily/storage.py`: + +```python +import json +import os + + +def get_tickers_file_path(data_dir: str) -> str: + return os.path.join(data_dir, "top_tickers.json") + + +def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: + return os.path.join(data_dir, ticker, f"{month}.json") + + +def save_json(path: str, data: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + +def load_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def file_exists(path: str) -> bool: + return os.path.isfile(path) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_storage.py -v` +Expected: 5 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/storage.py tests/test_us_daily/test_storage.py +git commit -m "feat: add storage module for JSON file I/O and path management" +``` + +--- + +### Task 3: Ticker Filter 模块 + +**Files:** +- Create: `project/us_daily/ticker_filter.py` +- Create: `tests/test_us_daily/test_ticker_filter.py` + +- [ ] **Step 1: Write the failing tests** + +Create `tests/test_us_daily/test_ticker_filter.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +from dataclasses import dataclass + + +class TestTickerFilter(unittest.TestCase): + def _make_ticker(self, ticker_str, exchange): + t = MagicMock() + t.ticker = ticker_str + t.primary_exchange = exchange + return t + + def _make_details(self, ticker_str, name, market_cap, exchange): + d = MagicMock() + d.ticker = ticker_str + d.name = name + d.market_cap = market_cap + d.primary_exchange = exchange + return d + + def test_filter_top_tickers_filters_by_market_cap(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + # list_tickers returns different tickers per exchange + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL", "XNAS"), + self._make_ticker("TINY", "XNAS"), + ]) + + # get_ticker_details: AAPL has large cap, TINY does not + def mock_details(ticker): + if ticker == "AAPL": + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + elif ticker == "TINY": + return self._make_details("TINY", "Tiny Corp", 1e9, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("TINY", tickers) + + def test_filter_top_tickers_includes_required_fields(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("MSFT", "XNYS"), + ]) + client.get_ticker_details.return_value = self._make_details( + "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" + ) + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + self.assertEqual(len(result), 1) + entry = result[0] + self.assertEqual(entry["ticker"], "MSFT") + self.assertEqual(entry["name"], "Microsoft Corporation") + self.assertEqual(entry["market_cap"], 2.8e12) + self.assertEqual(entry["exchange"], "XNYS") + + def test_filter_skips_ticker_on_details_error(self): + from project.us_daily.ticker_filter import filter_top_tickers + from project.us_daily.config import Config + + config = Config(market_cap_min=5e9, request_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL", "XNAS"), + self._make_ticker("AAPL", "XNAS"), + ]) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") + + client.get_ticker_details.side_effect = mock_details + + with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("project.us_daily.ticker_filter.time.sleep"): + result = filter_top_tickers(client, config) + + tickers = [t["ticker"] for t in result] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_ticker_filter.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.ticker_filter` + +- [ ] **Step 3: Implement ticker_filter module** + +Create `project/us_daily/ticker_filter.py`: + +```python +import logging +import time +from typing import List + +from project.us_daily.config import Config + +logger = logging.getLogger("us_daily") + +EXCHANGES = ["XNAS", "XNYS", "ARCX"] + + +def filter_top_tickers(client, config: Config) -> List[dict]: + result = [] + for exchange in EXCHANGES: + logger.info(f"Fetching tickers for exchange: {exchange}") + try: + tickers = client.list_tickers( + market="stocks", + exchange=exchange, + active=True, + limit=1000, + ) + except Exception as e: + logger.error(f"Failed to list tickers for {exchange}: {e}") + continue + + time.sleep(config.request_interval) + + for ticker_obj in tickers: + ticker_str = ticker_obj.ticker + try: + details = client.get_ticker_details(ticker_str) + time.sleep(config.request_interval) + except Exception as e: + logger.warning( + f"Failed to get details for {ticker_str}: {e}" + ) + continue + + if details.market_cap is None: + logger.debug(f"{ticker_str}: no market_cap data, skipping") + continue + + if details.market_cap >= config.market_cap_min: + entry = { + "ticker": details.ticker, + "name": details.name, + "market_cap": details.market_cap, + "exchange": details.primary_exchange, + } + result.append(entry) + logger.info( + f" {details.ticker}: market_cap={details.market_cap:.0f} ✓" + ) + else: + logger.debug( + f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" + ) + + logger.info(f"Total top tickers found: {len(result)}") + return result +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_ticker_filter.py -v` +Expected: 3 tests PASS + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/ticker_filter.py tests/test_us_daily/test_ticker_filter.py +git commit -m "feat: add ticker_filter module to select top US stocks by market cap" +``` + +--- + +### Task 4: Agg Fetcher 模块 + +**Files:** +- Create: `project/us_daily/agg_fetcher.py` +- Create: `tests/test_us_daily/test_agg_fetcher.py` + +- [ ] **Step 1: Write the failing tests** + +Create `tests/test_us_daily/test_agg_fetcher.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json +from datetime import date + + +class TestGenerateMonths(unittest.TestCase): + def test_generate_months_basic(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2020-01", "2020-04") + self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) + + def test_generate_months_cross_year(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2023-11", "2024-02") + self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) + + def test_generate_months_single(self): + from project.us_daily.agg_fetcher import generate_months + + result = generate_months("2024-06", "2024-06") + self.assertEqual(result, ["2024-06"]) + + +class TestMonthBounds(unittest.TestCase): + def test_month_bounds_january(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2020-01") + self.assertEqual(start, "2020-01-01") + self.assertEqual(end, "2020-01-31") + + def test_month_bounds_february_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2024-02") + self.assertEqual(start, "2024-02-01") + self.assertEqual(end, "2024-02-29") + + def test_month_bounds_february_non_leap(self): + from project.us_daily.agg_fetcher import get_month_bounds + + start, end = get_month_bounds("2023-02") + self.assertEqual(start, "2023-02-01") + self.assertEqual(end, "2023-02-28") + + +class TestIsCurrentMonth(unittest.TestCase): + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_true(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertTrue(is_current_month("2026-04")) + + @patch("project.us_daily.agg_fetcher.date") + def test_is_current_month_false(self, mock_date): + from project.us_daily.agg_fetcher import is_current_month + + mock_date.today.return_value = date(2026, 4, 22) + self.assertFalse(is_current_month("2026-03")) + + +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_skips_existing_historical_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for 2020-01 + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + + client = MagicMock() + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should not have called list_aggs since file exists and not current month + client.list_aggs.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + ) + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000.0 + agg1.vwap = 74.53 + agg1.timestamp = 1577854800000 + agg1.transactions = 480012 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Verify file was created + file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["month"], "2020-01") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["open"], 74.06) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_month(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2026-04", + data_dir=self.test_dir, + request_interval=0, + ) + + # Create existing file for current month + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + + agg1 = MagicMock() + agg1.open = 200.0 + agg1.high = 210.0 + agg1.low = 195.0 + agg1.close = 205.0 + agg1.volume = 50000000.0 + agg1.vwap = 203.0 + agg1.timestamp = 1714348800000 + agg1.transactions = 300000 + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=True): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + # Should have called list_aggs even though file exists + client.list_aggs.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_after_retries(self): + from project.us_daily.agg_fetcher import fetch_ticker_aggs + from project.us_daily.config import Config + + config = Config( + start_date="2020-01", + data_dir=self.test_dir, + request_interval=0, + max_retries=2, + ) + + client = MagicMock() + client.list_aggs.side_effect = Exception("API timeout") + + with patch("project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"]): + with patch("project.us_daily.agg_fetcher.is_current_month", return_value=False): + with patch("project.us_daily.agg_fetcher.time.sleep"): + result = fetch_ticker_aggs(client, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["month"], "2020-01") + self.assertIn("API timeout", result["failures"][0]["error"]) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `poetry run python -m pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: FAIL — ModuleNotFoundError for `project.us_daily.agg_fetcher` + +- [ ] **Step 3: Implement agg_fetcher module** + +Create `project/us_daily/agg_fetcher.py`: + +```python +import calendar +import logging +import time +from datetime import date, datetime +from typing import List, Tuple + +from project.us_daily.config import Config +from project.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.data_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + aggs = None + + for attempt in range(1, config.max_retries + 1): + try: + aggs_iter = client.list_aggs( + ticker, + 1, + "day", + from_=start_date, + to=end_date, + adjusted=True, + sort="asc", + ) + aggs = list(aggs_iter) + break + except Exception as e: + logger.warning( + f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" + ) + if attempt < config.max_retries: + time.sleep(config.request_interval) + + if aggs is None: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(e), + }) + logger.error(f" {ticker} {month}: all retries failed, skipping") + continue + + data = { + "ticker": ticker, + "month": month, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": [ + { + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + "vwap": a.vwap, + "timestamp": a.timestamp, + "transactions": a.transactions, + } + for a in aggs + ], + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") + time.sleep(config.request_interval) + + return {"failures": failures} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `poetry run python -m pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: 8 tests PASS (4 utility + 4 integration) + +- [ ] **Step 5: Commit** + +```bash +git add project/us_daily/agg_fetcher.py tests/test_us_daily/test_agg_fetcher.py +git commit -m "feat: add agg_fetcher module for incremental daily bar data collection" +``` + +--- + +### Task 5: 入口模块 (__main__.py) + +**Files:** +- Create: `project/us_daily/__main__.py` + +- [ ] **Step 1: Implement __main__.py** + +Create `project/us_daily/__main__.py`: + +```python +import logging +import os +import sys +from datetime import datetime + +from massive import RESTClient + +from project.us_daily.config import load_config +from project.us_daily.storage import ( + get_tickers_file_path, + file_exists, + save_json, + load_json, +) +from project.us_daily.ticker_filter import filter_top_tickers +from project.us_daily.agg_fetcher import fetch_ticker_aggs + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Get ticker list + tickers_path = get_tickers_file_path(config.data_dir) + if config.refresh_tickers or not file_exists(tickers_path): + logger.info("Filtering top tickers from API...") + tickers = filter_top_tickers(client, config) + save_json(tickers_path, { + "updated_at": datetime.now().strftime("%Y-%m-%d"), + "market_cap_min": config.market_cap_min, + "tickers": tickers, + }) + logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") + else: + data = load_json(tickers_path) + tickers = data["tickers"] + logger.info( + f"Loaded {len(tickers)} tickers from {tickers_path} " + f"(updated: {data.get('updated_at', 'unknown')})" + ) + + # Step 2: Fetch agg data for each ticker + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(client, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Verify it can be invoked (dry run)** + +Run: `poetry run python -m project.us_daily --help 2>&1 || echo "Module loads OK (no --help support expected)"` + +This just checks the module can be imported without errors. Actual execution requires a valid API key and would hit the real API. + +- [ ] **Step 3: Commit** + +```bash +git add project/us_daily/__main__.py +git commit -m "feat: add __main__.py entry point for us_daily data fetcher" +``` + +--- + +### Task 6: Run all tests and final verification + +**Files:** None (verification only) + +- [ ] **Step 1: Run the full test suite** + +Run: `poetry run python -m pytest tests/test_us_daily/ -v` +Expected: All tests PASS (3 config + 5 storage + 3 ticker_filter + 8 agg_fetcher = 19 tests) + +- [ ] **Step 2: Run type check** + +Run: `poetry run mypy project/` +Expected: No errors (or only notes about the massive library types) + +- [ ] **Step 3: Run style check** + +Run: `make style` +Expected: Files formatted + +- [ ] **Step 4: Final commit if style changes** + +```bash +git add -A project/ tests/test_us_daily/ +git commit -m "style: format us_daily module with black" +``` diff --git a/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp b/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp new file mode 100644 index 0000000000000000000000000000000000000000..221a1f672f8bdb5b8cf688321df73c2be2218c2e GIT binary patch literal 4096 zcmYc?2=nw+u+%eT00IF92EKDIGx_-{7#vj@8Pc;dK%xWzP+dx@Z(_1jetx!YKvBM4 zPJVJ?4yLv`phErR{FGFEM{iI4+{EJI%(7J7Ymi=U3Z|)}BBLQN8Uh4EptK}S*Mc|H*vQZTq*hr;Q9)QJ ll%Tdz1*0J_8UmvsFd71*Aut*OqaiRF0;3@?8Un*71OS+tD5(Gd literal 0 HcmV?d00001 diff --git a/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md b/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md new file mode 100644 index 00000000..0f370bc8 --- /dev/null +++ b/.claude/prds/20250422-TOP_USSTOCK_DAILY_DATA.md @@ -0,0 +1,18 @@ +# 需求描述 + +搜集头部公司的日级别交易数据(massive/rest/models/aggs.py:Agg) +1、获取纳斯达克、道琼斯、标普500 市值>=50亿美金的公司列表,存储到data/us_daily_data目录下 +2、针对每个股票,按月获取从2020年开始的日级别数据,每个股票有一个单独的文件夹,每月有一份数据(存储到data/us_daily_data) + - 如果已经存储给定月份的股票数据,当不是当前月份,则更新,否则不用重新请求 + +# 限制 +一次请求后,sleep 20s + + +# 相关文件: +- rest模块:./massive/rest +- 数据存储:./data/us_daily +- 代码目录:./project + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.claude/prds/20250423-OPT_CODE.md b/.claude/prds/20250423-OPT_CODE.md new file mode 100644 index 00000000..c4fa9816 --- /dev/null +++ b/.claude/prds/20250423-OPT_CODE.md @@ -0,0 +1,14 @@ +# 需求描述 + +## 一、目录修改 +data_provider 目录 修改成 processor 目录 + +## 二、引入更多获取股票数据的源头 +阅读 ./provider,调整包结构 + +## 三、修改项目结构 +1、不再使用poetry方式,test相关内容可以不要 +2、不要Makefile方式 + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.claude/prds/20250423-OPT_DATA_PROVIDER.md b/.claude/prds/20250423-OPT_DATA_PROVIDER.md new file mode 100644 index 00000000..52ef3b08 --- /dev/null +++ b/.claude/prds/20250423-OPT_DATA_PROVIDER.md @@ -0,0 +1,46 @@ +# 需求描述 + +## 一、目录修改 +data_provider 目录 修改成 processor 目录 + +## 二、逻辑优化 +将data_provider升级成两部分: + +1、获取股票列表 +功能描述:给定交易所(纳斯达克、道琼斯、标普500),获取对应交易所所有股票,并获取股票detail信息,不再做市值过滤,存储TickerDetails所有字段 +文件地址:./data/us_list + +2、引入更多获取股票数据的源头 +阅读 https://github.com/ZhuLinsen/daily_stock_analysis,将data_provider的内容迁移过来,存储为provider + +3、获取股票天级数据 +* 功能描述:给定股票列表文件和时间区间,获取股票文件对应的天级别数据 + - 每个股票一个文件夹,放在./data/us_daily + - 每个股票中的数据按月存储 + - 如果目录中的月份已经存在,当不是当前月份,则更新,否则不用重新请求 +* 除了原来的massive获取方式, + + +相关限制:一次请求后,sleep 12s + + +将data_provider的逻辑分两部分 +step1:使用massive-com包(不用支持多家获取),获取纳斯达克、道琼斯、标普500三个交易所的所有股票代码,并获取detail信息,但不再做市值过滤,字段丰富一些,直接使用massive-com中的TickerDetails,每个交易所存一份 +step2: 读取ticker_details文件,获取给定时间区间的天级别数据,按月存储,保持原来的存储方式 + +1、参考/Users/gjh/code/AKI/massive-com/data_provider,在本项目中实现同等功能 + - 依赖 git 上 的 massive-com/client-python 包 +2、将daily_stock_analysis中的数据获取方法,同步到./src/data_provider/sdk中 +3、由于massive-com的api限速比较厉害,us_daily实现可以优先使用sdk的数据获取方法 + +## 相关文件: +- 数据存储:./data/us_daily +- 代码目录:./src/data_provider +- 参考代码:/Users/gjh/code/AKI/massive-com/data_provider + +## 引入依赖 +- https://github.com/ZhuLinsen/daily_stock_analysis +- https://github.com/massive-com/client-python + +# 要求 +design和plan文档写到.claude/plans目录 diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..10b731c5 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/massive-com.iml b/.idea/massive-com.iml new file mode 100644 index 00000000..07abf202 --- /dev/null +++ b/.idea/massive-com.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..db8786c0 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..e2e44839 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..02727fe7 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,70 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Official Python client library for the Massive (formerly Polygon.io) REST and WebSocket APIs. Provides market data access for stocks, options, forex, crypto, and more. Published as the `massive` package on PyPI. + +## Development Commands + +```bash +# Install dependencies +poetry install + +# Run all tests +make test + +# Run only REST or WebSocket tests +make test_rest +make test_websocket + +# Run a single test file +poetry run python -m unittest test_rest/test_aggs.py + +# Run a single test method +poetry run python -m unittest test_rest.test_aggs.TestAggs.test_list_aggs + +# Code formatting (auto-fix) +make style + +# Static type checking +poetry run mypy massive test_* examples + +# Both style + static checks +make lint + +# Regenerate REST API spec from OpenAPI +make rest-spec +``` + +## Architecture + +### Client Structure + +`RESTClient` (in `massive/rest/__init__.py`) uses multiple inheritance to compose domain-specific client mixins (AggsClient, TradesClient, QuotesClient, etc.) on top of `BaseClient` (`massive/rest/base.py`). Each mixin lives in its own file under `massive/rest/` and handles one API domain. + +`WebSocketClient` (`massive/websocket/__init__.py`) is a standalone async client using the `websockets` library with auto-reconnect support. + +### Models + +- REST models: `massive/rest/models/` — one file per domain, using the custom `@modelclass` decorator (from `massive/modelclass.py`) which wraps `@dataclass` with flexible init that accepts positional or keyword args. +- WebSocket models: `massive/websocket/models/` + +### API Spec Codegen + +`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. Use `make rest-spec` / `make ws-spec` to update specs from the API. + +### Tests + +- `test_rest/` — uses `pook` for HTTP mocking, with mock responses in `test_rest/mocks/` +- `test_websocket/` — has its own mock WebSocket server in `mock_server.py` +- Test base classes: `test_rest/base.py` and `test_websocket/base_ws.py` + +### Key Conventions + +- API key via `MASSIVE_API_KEY` env var or constructor parameter +- Base URL: `https://api.massive.com` +- Auth header: `Authorization: Bearer ` +- Python 3.9+ required +- Formatting: `black`; type checking: `mypy` diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 96f1555d..00000000 --- a/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2018 The Python Packaging Authority - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..ee704b09 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,570 @@ +================================================================================ +Massive Python Client — 代码架构与逻辑文档 +================================================================================ + +项目概述 +-------- +Massive(原 Polygon.io)官方 Python 客户端库,提供 REST 和 WebSocket 两种方式 +访问股票、期权、外汇、加密货币、期货、指数等金融市场数据。发布为 PyPI 包 `massive`, +要求 Python 3.9+。 + + +================================================================================ +一、顶层目录结构 +================================================================================ + +massive-com/ +├── massive/ # 库源码 +│ ├── __init__.py # 公共 API 导出: RESTClient, WebSocketClient, exceptions +│ ├── modelclass.py # @modelclass 装饰器(自定义 dataclass 封装) +│ ├── exceptions.py # AuthError, BadResponse +│ ├── logging.py # 统一日志工具 get_logger() +│ ├── rest/ # REST 客户端 +│ │ ├── __init__.py # RESTClient(多重继承组合所有 domain mixin) +│ │ ├── base.py # BaseClient: HTTP 请求、分页、重试、参数转换 +│ │ ├── aggs.py # AggsClient — 聚合K线 +│ │ ├── trades.py # TradesClient — 逐笔成交 +│ │ ├── quotes.py # QuotesClient — 报价/NBBO +│ │ ├── snapshot.py # SnapshotClient — 快照 +│ │ ├── reference.py # 参考数据: Markets/Tickers/Splits/Dividends/... +│ │ ├── indicators.py # IndicatorsClient — 技术指标 SMA/EMA/RSI/MACD +│ │ ├── financials.py # FinancialsClient — 财务报表 +│ │ ├── benzinga.py # BenzingaClient — 研报/评级 +│ │ ├── economy.py # EconomyClient — 宏观经济 +│ │ ├── etf_global.py # EtfGlobalClient — ETF 分析 +│ │ ├── futures.py # FuturesClient — 期货 +│ │ ├── tmx.py # TmxClient — 多伦多交易所 +│ │ ├── summaries.py # SummariesClient — 摘要 +│ │ ├── vX.py # VXClient — 旧版 vX 端点 +│ │ └── models/ # REST 数据模型 +│ │ ├── __init__.py # 统一导出所有模型 +│ │ ├── common.py # 公共枚举: Sort, Market, Timeframe 等 +│ │ ├── request.py # RequestOptionBuilder(Launchpad 边缘头构建器) +│ │ ├── aggs.py # Agg, GroupedDailyAgg, DailyOpenCloseAgg, PreviousCloseAgg +│ │ ├── trades.py # Trade, LastTrade, CryptoTrade +│ │ └── ... # 各领域模型文件(与 rest/ 下 mixin 一一对应) +│ └── websocket/ # WebSocket 客户端 +│ ├── __init__.py # WebSocketClient: 异步连接、认证、订阅、重连 +│ └── models/ +│ ├── __init__.py # MARKET_EVENT_MAP 注册表 + parse() 解析器 +│ ├── common.py # Feed, Market, EventType 枚举 +│ └── models.py # 消息模型: EquityTrade, CryptoQuote, Level2Book 等 +├── test_rest/ # REST 单元测试(pook HTTP mock) +│ ├── base.py # BaseTest: 自动加载 mocks/ 下 JSON 文件注册 mock +│ ├── mocks/ # Mock 响应 JSON 文件(目录结构映射 URL 路径) +│ └── test_*.py # 各领域测试 +├── test_websocket/ # WebSocket 单元测试 +│ ├── base_ws.py # BaseTest: IsolatedAsyncioTestCase + mock server +│ ├── mock_server.py # 内置 mock WebSocket 服务器 +│ └── test_conn.py # 连接/认证/订阅测试 +├── examples/ # 使用示例 +│ ├── rest/ # REST 示例脚本 +│ └── websocket/ # WebSocket 示例脚本 +├── .massive/ # OpenAPI 规范与代码生成 +│ ├── rest.json # REST OpenAPI 规范文件 +│ ├── rest.py # 从 api.massive.com/openapi 拉取规范的脚本 +│ └── websocket.json # WebSocket 规范文件 +├── docs/ # Sphinx 文档 +├── pyproject.toml # Poetry 项目配置 +├── Makefile # 开发命令入口 +└── poetry.lock # 依赖锁文件 + + +================================================================================ +二、核心架构:REST 客户端 +================================================================================ + +2.1 多重继承 Mixin 组合模式 +---------------------------- + +RESTClient 通过多重继承将 19 个领域 Mixin 组合为一个统一客户端: + + class RESTClient( + AggsClient, # 聚合K线 /v2/aggs + FuturesClient, # 期货 /v1/futures + FinancialsClient, # 财报 /vX/reference/financials + BenzingaClient, # 研报 /v1/meta/symbols + EconomyClient, # 宏观 /v1/economy + EtfGlobalClient, # ETF /v1/etf + TmxClient, # TMX /v1/tmx + TradesClient, # 成交 /v3/trades + QuotesClient, # 报价 /v3/quotes + SnapshotClient, # 快照 /v3/snapshot + MarketsClient, # 市场 /v3/reference/markets + TickersClient, # 标的 /v3/reference/tickers + SplitsClient, # 拆股 /v3/reference/splits + DividendsClient, # 分红 /v3/reference/dividends + ConditionsClient, # 条件码 /v3/reference/conditions + ExchangesClient, # 交易所 /v3/reference/exchanges + ContractsClient, # 合约 /v3/reference/options/contracts + IndicatorsClient, # 技术指标 /v1/indicators + SummariesClient, # 摘要 /v3/summaries + ) + + 构造函数将所有参数传递给 BaseClient.__init__(), + 并额外实例化 self.vx = VXClient(...) 用于旧版端点。 + +优点: + - 每个领域独立文件,职责单一 + - 新增 API 领域只需添加 Mixin + 模型,在 RESTClient 继承链中注册 + - 各 Mixin 可独立测试 + + +2.2 BaseClient — HTTP 基础设施 +------------------------------- + +所在文件: massive/rest/base.py + +BaseClient 是所有 REST Mixin 的共同基类,封装全部 HTTP 通信逻辑。 + +初始化流程: + 1. 验证 API key(缺失则抛出 AuthError) + 2. 构建默认请求头: Authorization: Bearer , Accept-Encoding: gzip, User-Agent + 3. 创建 urllib3.PoolManager: + - SSL 证书验证(certifi) + - 重试策略: 默认 3 次,指数退避(因子 0.1),针对 [413,429,499,500,502,503,504] + - 可配置连接池数量、超时时间 + 4. 初始化可选的自定义 JSON 编解码器 + +核心方法: + + _get(path, params, result_key, deserializer, raw, options) + │ 执行 GET 请求到 BASE + path + │ params 作为查询参数 + │ raw=True 时返回原始 HTTPResponse + │ 否则解析 JSON,提取 result_key 对应的字段 + └→ 用 deserializer 函数将每条数据转换为模型对象 + + _get_params(fn, caller_locals, datetime_res="nanos") + │ 参数转换引擎: 将 Python 函数参数自动映射为 API 查询参数 + │ 处理规则: + │ - Enum → 取 .value + │ - bool → 小写字符串 "true"/"false" + │ - datetime → 按精度转换为 Unix 时间戳 + │ - 下划线后缀 → 点号(如 timestamp_lt → timestamp.lt) + │ - any_of 后缀 → 逗号拼接列表 + └→ 返回可直接用于请求的 dict + + _paginate(path, params, raw, deserializer, result_key, options) + │ 分页入口 + │ raw=True → 返回单页原始响应 + └→ raw=False → 返回 _paginate_iter() 生成器 + + _paginate_iter(path, params, deserializer, result_key, options) + │ 分页迭代生成器 + │ while 循环: + │ 1. 发送请求获取一页数据 + │ 2. 对 result_key 下每条记录调用 deserializer → yield 模型对象 + │ 3. 检查响应中的 next_url + │ 4. 有 next_url 且 pagination=True → 解析 URL 继续请求 + └→ 无 next_url → 结束 + + +2.3 领域 Mixin 方法模式 +------------------------ + +所有 Mixin 方法遵循统一模式: + + def list_xxx(self, ticker, param1, ..., params=None, raw=False, options=None): + url = f"/v3/some/endpoint/{ticker}" + return self._paginate( + path=url, + params=self._get_params(self.list_xxx, locals()), + raw=raw, + deserializer=SomeModel.from_dict, + result_key="results", + options=options, + ) + + def get_xxx(self, ticker, ..., params=None, raw=False, options=None): + url = f"/v2/some/endpoint/{ticker}" + return self._get( + path=url, + params=self._get_params(self.get_xxx, locals()), + result_key="results", + deserializer=SomeModel.from_dict, + raw=raw, + options=options, + ) + +方法命名约定: + - list_xxx() → 分页接口,返回 Iterator[Model] + - get_xxx() → 单次请求,返回 Model 或 List[Model] + +参数命名约定: + - params: Optional[dict] — 额外查询参数 + - raw: bool — True 时跳过反序列化,返回原始 HTTP 响应 + - options: RequestOptionBuilder — 自定义请求头(Launchpad 边缘场景) + + +================================================================================ +三、核心架构:WebSocket 客户端 +================================================================================ + +所在文件: massive/websocket/__init__.py + +3.1 连接与认证流程 +------------------- + + 客户端实例化 + ↓ + WebSocketClient(api_key, feed, market, subscriptions=["T.*"]) + ↓ 存储 scheduled_subs = {"T.*"} + ↓ + client.run(callback) — 同步入口,内部调用 asyncio.run(connect()) + ↓ + connect() — 异步主循环 + ↓ + 建立 WebSocket 连接 → wss://socket.massive.com/{market} + ↓ + 接收 welcome 消息 + ↓ + 发送认证: {"action": "auth", "params": ""} + ↓ + 接收认证响应(失败则抛出 AuthError) + ↓ + 进入主消息循环 + +3.2 订阅管理 +------------- + +WebSocketClient 维护两个集合: + - subs: 当前已向服务器发送的订阅 + - scheduled_subs: 用户期望的订阅集 + +每次循环迭代检查 schedule_resub 标志: + 若 True → 计算差集: + 新增 = scheduled_subs - subs → 发送 {"action": "subscribe", "params": "T.*,..."} + 移除 = subs - scheduled_subs → 发送 {"action": "unsubscribe", "params": "..."} + 更新 subs = scheduled_subs.copy() + +通配符处理: + 订阅 "T.*" 时自动移除已有的 "T.AAPL", "T.MSFT" 等具体订阅 + +用户可在运行时动态调用: + client.subscribe("Q.AAPL") # 添加订阅 + client.unsubscribe("T.*") # 取消订阅 + client.unsubscribe_all() # 清空所有订阅 + +3.3 消息处理 +------------- + + 服务器推送消息(JSON 数组) + ↓ + raw=False 路径: + ↓ + parse(msg_list, logger, market) + ↓ 遍历每条消息 + 查找 MARKET_EVENT_MAP[(market, event_type)] + ↓ 得到对应模型类 + Model.from_dict(msg) → 模型实例 + ↓ + 返回 List[Model] 给用户 callback + + raw=True 路径: + ↓ + 直接将原始 str/bytes 传给用户 callback + +3.4 重连机制 +------------- + + - 默认最大重连 5 次(可配置 max_reconnects) + - ConnectionClosedError 触发重连: 递增计数器 → 重设 schedule_resub → 重建连接 + - 超过最大次数 → 抛出最后一个异常 + - ConnectionClosedOK → 正常退出不重连 + +3.5 WebSocket 消息模型注册表 +----------------------------- + +所在文件: massive/websocket/models/__init__.py + +MARKET_EVENT_MAP 是一个嵌套字典,键为 (Market, EventType),值为模型类: + + MARKET_EVENT_MAP = { + Market.Stocks: { + "T": EquityTrade, # 逐笔成交 + "Q": EquityQuote, # NBBO 报价 + "A": EquityAgg, # 秒级聚合 + "AM": EquityAgg, # 分钟级聚合 + "LULD": LimitUpLimitDown, + "NOI": Imbalance, + ... + }, + Market.Crypto: { + "XT": CryptoTrade, + "XQ": CryptoQuote, + "XA": CurrencyAgg, + "XL2": Level2Book, + ... + }, + ... + } + +新增事件类型只需: 定义模型类 + 在 MARKET_EVENT_MAP 中注册。 + + +================================================================================ +四、模型系统 +================================================================================ + +4.1 @modelclass 装饰器 +----------------------- + +所在文件: massive/modelclass.py + + @modelclass + class Agg: + open: Optional[float] = None + high: Optional[float] = None + ... + +@modelclass 在标准 @dataclass 基础上: + - 重写 __init__: 同时支持位置参数和关键字参数 + - 位置参数按类属性声明顺序映射 + - 允许混合使用: Agg(1.0, 2.0, close=3.0) + +4.2 from_dict() 反序列化 +-------------------------- + +每个模型类定义 @staticmethod from_dict(d) 方法: + + @staticmethod + def from_dict(d): + return Agg( + d.get("o", None), # API 简写 "o" → open + d.get("h", None), # "h" → high + d.get("l", None), # "l" → low + d.get("c", None), # "c" → close + d.get("v", None), # "v" → volume + d.get("t", None), # "t" → timestamp + ... + ) + +此设计将 API 响应的缩写键名与 Python 的可读属性名解耦。 + +4.3 公共枚举 +------------- + +所在文件: massive/rest/models/common.py + + Sort / Order — 排序方向 (ASC, DESC) + Market / AssetClass — 市场/资产类型 + Locale — 地区 (US, GLOBAL) + Timeframe — 时间框架 (ANNUAL, QUARTERLY) + SeriesType — 序列类型 (OPEN, CLOSE, HIGH, LOW) + Direction — 涨跌排行 (GAINERS, LOSERS) + DividendType — 股息类型 + DataType / SIP — 数据源类型 + 等等 + +4.4 RequestOptionBuilder +------------------------- + +所在文件: massive/rest/models/request.py + +用于 Launchpad 多租户场景,构建边缘请求头: + + options = RequestOptionBuilder( + edge_id="user123", + edge_ip_address="192.168.1.1", + edge_user="agent-string" + ) + client.list_trades("AAPL", options=options) + +生成的头部: + X-Massive-Edge-ID: user123 + X-Massive-Edge-IP-Address: 192.168.1.1 + X-Massive-Edge-User-Agent: agent-string + + +================================================================================ +五、异常与日志 +================================================================================ + +5.1 异常体系 +------------- + +所在文件: massive/exceptions.py + + AuthError — API key 为空或认证失败 + BadResponse — API 返回非 200 状态码 + +5.2 日志 +--------- + +所在文件: massive/logging.py + + get_logger(name) → logging.Logger + - 输出到 stdout + - 格式: "%(asctime)s %(name)s %(levelname)s: %(message)s" + + verbose=True → 设置 DEBUG 级别 + trace=True → 额外打印请求 URL 和响应头(API key 已脱敏) + + +================================================================================ +六、OpenAPI 规范与代码生成 +================================================================================ + +所在文件: .massive/ + + rest.json — REST API OpenAPI 规范(从 api.massive.com/openapi 拉取) + rest.py — 拉取脚本: make rest-spec + websocket.json — WebSocket API 规范: make ws-spec + +REST 客户端代码(Mixin + 模型)需与 rest.json 规范保持同步。 +新增/变更 API 端点时: + 1. make rest-spec 更新规范 + 2. 按规范新增或修改 Mixin 方法和模型类 + + +================================================================================ +七、测试体系 +================================================================================ + +7.1 REST 测试 +-------------- + +所在目录: test_rest/ + +基类 BaseTest (test_rest/base.py): + - 继承 unittest.TestCase + - 使用 pook 库拦截 HTTP 请求 + - 自动扫描 test_rest/mocks/ 目录,将 JSON 文件注册为 mock 响应 + - mock 文件路径映射 URL 路径(特殊字符替换: ? → &, : → ;) + - setUpClass() 创建共享 RESTClient 实例 + +运行: + make test_rest + poetry run python -m unittest test_rest.test_aggs + +7.2 WebSocket 测试 +------------------- + +所在目录: test_websocket/ + +基类 BaseTest (test_websocket/base_ws.py): + - 继承 unittest.IsolatedAsyncioTestCase(异步测试支持) + - 内置 mock WebSocket 服务器 (mock_server.py) + - expectResponse() 预设期望消息 + - expectProcessor() 断言收到的消息与期望匹配 + +运行: + make test_websocket + poetry run python -m unittest test_websocket.test_conn + + +================================================================================ +八、完整数据流示例 +================================================================================ + +8.1 REST 分页请求流程 +---------------------- + +用户代码: + for trade in client.list_trades("AAPL", limit=100): + process(trade) + +内部流程: + + TradesClient.list_trades("AAPL", limit=100) + │ + ├→ url = "/v3/trades/AAPL" + ├→ params = _get_params() → {"limit": 100} + └→ _paginate(url, params, deserializer=Trade.from_dict, result_key="results") + │ + └→ _paginate_iter() [生成器] + │ + ├→ _get(url, params, raw=True) → HTTPResponse + │ │ + │ ├→ urllib3.PoolManager.request("GET", BASE+url, fields=params) + │ ├→ 自动重试(指数退避,最多 3 次) + │ └→ 返回 HTTPResponse + │ + ├→ 解析 JSON → {"results": [...], "next_url": "..."} + │ + ├→ for item in results: + │ Trade.from_dict(item) → yield Trade 对象 + │ + ├→ 检查 next_url + │ 有 → 解析新 URL 和参数 → 继续循环 + │ 无 → 生成器结束 + │ + └→ 用户逐个接收 Trade 对象(惰性加载,按需翻页) + +8.2 WebSocket 实时数据流程 +--------------------------- + +用户代码: + def handle(msgs): + for m in msgs: + print(m) + client = WebSocketClient(subscriptions=["T.*"]) + client.run(handle) + +内部流程: + + asyncio.run(connect(handle)) + │ + ├→ 建立 wss://socket.massive.com/stocks 连接 + │ + ├→ 接收 welcome → 发送 auth → 接收 auth 确认 + │ + ├→ 检查 schedule_resub=True + │ └→ 发送 {"action": "subscribe", "params": "T.*"} + │ + └→ 消息循环(永久运行): + │ + ├→ ws.recv(timeout=1s) + │ 超时 → 继续循环 + │ 收到数据 → 解析 JSON + │ + ├→ parse(msg_list, logger, Market.Stocks) + │ │ + │ ├→ msg["ev"] = "T" (trade 事件) + │ ├→ MARKET_EVENT_MAP[Stocks]["T"] → EquityTrade + │ └→ EquityTrade.from_dict(msg) → 模型实例 + │ + ├→ await handle([EquityTrade, ...]) + │ + └→ 异常处理: + ConnectionClosedError → 重连(最多 5 次) + ConnectionClosedOK → 正常退出 + + +================================================================================ +九、扩展指南 +================================================================================ + +新增 REST API 领域: + 1. 在 massive/rest/models/ 下创建模型文件,定义 @modelclass + from_dict() + 2. 在 massive/rest/ 下创建 Mixin 文件,继承 BaseClient,实现方法 + 3. 在 massive/rest/__init__.py 的 RESTClient 继承链中加入新 Mixin + 4. 在 massive/rest/models/__init__.py 中导出新模型 + 5. 在 test_rest/ 下添加测试和 mock 数据 + +新增 WebSocket 事件类型: + 1. 在 massive/websocket/models/models.py 定义消息模型 + 2. 在 massive/websocket/models/__init__.py 的 MARKET_EVENT_MAP 中注册 + 3. 在 test_websocket/ 下添加测试 + +自定义 JSON 编解码器: + client = RESTClient(custom_json=orjson) + — 自定义编解码器需提供 loads() 和 dumps() 方法 + +Launchpad 边缘请求: + opts = RequestOptionBuilder(edge_id="uid", edge_ip_address="1.2.3.4") + client.list_trades("AAPL", options=opts) + + +================================================================================ +十、关键设计决策总结 +================================================================================ + + 1. Mixin 多重继承 — 领域隔离,组合灵活,避免深层继承链 + 2. @modelclass 装饰器 — 在 dataclass 基础上支持位置参数,简化 from_dict() 调用 + 3. 参数自动转换 (_get_params) — 利用 inspect 反射将函数签名直接映射为 API 参数 + 4. 生成器分页 — 惰性加载,用户无需关心分页细节,内存友好 + 5. 异步 WebSocket + 同步包装 — connect() 原生 async,run() 提供便捷同步入口 + 6. 事件注册表 (MARKET_EVENT_MAP) — 解耦消息路由与模型定义,扩展性好 + 7. pook HTTP mock — 测试不依赖真实 API,mock 文件按 URL 路径组织 diff --git a/docs/requirements.txt b/docs/requirements.txt index e8c712fd..97d8d2fb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ sphinx-autodoc-typehints~=1.19.2 -websockets~=10.3 +websockets>=14.0 diff --git a/project/__init__.py b/project/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/project/us_daily/__init__.py b/project/us_daily/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/project/us_daily/__main__.py b/project/us_daily/__main__.py deleted file mode 100644 index fd9df063..00000000 --- a/project/us_daily/__main__.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging -import os -import sys -from datetime import datetime - -from massive import RESTClient - -from project.us_daily.config import load_config -from project.us_daily.storage import ( - get_tickers_file_path, - file_exists, - save_json, - load_json, -) -from project.us_daily.ticker_filter import filter_top_tickers -from project.us_daily.agg_fetcher import fetch_ticker_aggs - - -def setup_logging(): - os.makedirs("logs", exist_ok=True) - logger = logging.getLogger("us_daily") - logger.setLevel(logging.DEBUG) - - formatter = logging.Formatter( - "%(asctime)s [%(levelname)s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - ) - - file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(formatter) - - stream_handler = logging.StreamHandler(sys.stdout) - stream_handler.setLevel(logging.INFO) - stream_handler.setFormatter(formatter) - - logger.addHandler(file_handler) - logger.addHandler(stream_handler) - - return logger - - -def main(): - logger = setup_logging() - config = load_config() - - logger.info("=== US Daily Data Fetcher Started ===") - logger.info(f"Config: {config}") - - client = RESTClient() - - # Step 1: Get ticker list - tickers_path = get_tickers_file_path(config.data_dir) - if config.refresh_tickers or not file_exists(tickers_path): - logger.info("Filtering top tickers from API...") - tickers = filter_top_tickers(client, config) - save_json( - tickers_path, - { - "updated_at": datetime.now().strftime("%Y-%m-%d"), - "market_cap_min": config.market_cap_min, - "tickers": tickers, - }, - ) - logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") - else: - data = load_json(tickers_path) - tickers = data["tickers"] - logger.info( - f"Loaded {len(tickers)} tickers from {tickers_path} " - f"(updated: {data.get('updated_at', 'unknown')})" - ) - - # Step 2: Fetch agg data for each ticker - all_failures = [] - total = len(tickers) - for i, ticker_info in enumerate(tickers): - ticker = ticker_info["ticker"] - logger.info(f"[{i + 1}/{total}] Processing {ticker}") - result = fetch_ticker_aggs(client, ticker, config) - if result["failures"]: - all_failures.extend(result["failures"]) - - # Step 3: Summary - logger.info("=== Summary ===") - logger.info(f"Total tickers: {total}") - if all_failures: - logger.warning(f"Failed months: {len(all_failures)}") - for f in all_failures: - logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") - else: - logger.info("All data fetched successfully") - logger.info("=== Done ===") - - -if __name__ == "__main__": - main() diff --git a/project/us_daily/agg_fetcher.py b/project/us_daily/agg_fetcher.py deleted file mode 100644 index 5d2e5e6d..00000000 --- a/project/us_daily/agg_fetcher.py +++ /dev/null @@ -1,117 +0,0 @@ -import calendar -import logging -import time -from datetime import date, datetime -from typing import List, Tuple - -from project.us_daily.config import Config -from project.us_daily.storage import ( - get_month_file_path, - file_exists, - save_json, -) - -logger = logging.getLogger("us_daily") - - -def generate_months(start: str, end: str) -> List[str]: - start_year, start_month = int(start[:4]), int(start[5:7]) - end_year, end_month = int(end[:4]), int(end[5:7]) - - months = [] - year, month = start_year, start_month - while (year, month) <= (end_year, end_month): - months.append(f"{year:04d}-{month:02d}") - month += 1 - if month > 12: - month = 1 - year += 1 - return months - - -def get_month_bounds(month: str) -> Tuple[str, str]: - year, mon = int(month[:4]), int(month[5:7]) - last_day = calendar.monthrange(year, mon)[1] - return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" - - -def is_current_month(month: str) -> bool: - today = date.today() - return month == f"{today.year:04d}-{today.month:02d}" - - -def current_month() -> str: - today = date.today() - return f"{today.year:04d}-{today.month:02d}" - - -def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: - months = generate_months(config.start_date, current_month()) - failures = [] - - for month in months: - file_path = get_month_file_path(config.data_dir, ticker, month) - - if file_exists(file_path) and not is_current_month(month): - logger.debug(f" {ticker} {month}: exists, skipping") - continue - - start_date, end_date = get_month_bounds(month) - aggs = None - last_error = None - - for attempt in range(1, config.max_retries + 1): - try: - aggs_iter = client.list_aggs( - ticker, - 1, - "day", - from_=start_date, - to=end_date, - adjusted=True, - sort="asc", - ) - aggs = list(aggs_iter) - break - except Exception as e: - last_error = e - logger.warning( - f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" - ) - if attempt < config.max_retries: - time.sleep(config.request_interval) - - if aggs is None: - failures.append( - { - "ticker": ticker, - "month": month, - "error": str(last_error), - } - ) - logger.error(f" {ticker} {month}: all retries failed, skipping") - continue - - data = { - "ticker": ticker, - "month": month, - "fetched_at": datetime.now().isoformat(timespec="seconds"), - "data": [ - { - "open": a.open, - "high": a.high, - "low": a.low, - "close": a.close, - "volume": a.volume, - "vwap": a.vwap, - "timestamp": a.timestamp, - "transactions": a.transactions, - } - for a in aggs - ], - } - save_json(file_path, data) - logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") - time.sleep(config.request_interval) - - return {"failures": failures} diff --git a/project/us_daily/config.json b/project/us_daily/config.json deleted file mode 100644 index e7ff2381..00000000 --- a/project/us_daily/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "refresh_tickers": false, - "market_cap_min": 5000000000, - "start_date": "2020-01", - "request_interval": 20, - "data_dir": "data/us_daily", - "max_retries": 3 -} diff --git a/project/us_daily/config.py b/project/us_daily/config.py deleted file mode 100644 index c08bfbb2..00000000 --- a/project/us_daily/config.py +++ /dev/null @@ -1,24 +0,0 @@ -import json -import os -from dataclasses import dataclass - - -@dataclass -class Config: - refresh_tickers: bool = False - market_cap_min: float = 5e9 - start_date: str = "2020-01" - request_interval: int = 20 - data_dir: str = "data/us_daily" - max_retries: int = 3 - - -def load_config(config_path: str = "project/us_daily/config.json") -> Config: - config = Config() - if os.path.exists(config_path): - with open(config_path, "r") as f: - data = json.load(f) - for key, value in data.items(): - if hasattr(config, key): - setattr(config, key, value) - return config diff --git a/project/us_daily/storage.py b/project/us_daily/storage.py deleted file mode 100644 index af80505b..00000000 --- a/project/us_daily/storage.py +++ /dev/null @@ -1,25 +0,0 @@ -import json -import os - - -def get_tickers_file_path(data_dir: str) -> str: - return os.path.join(data_dir, "top_tickers.json") - - -def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: - return os.path.join(data_dir, ticker, f"{month}.json") - - -def save_json(path: str, data: dict) -> None: - os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=2) - - -def load_json(path: str) -> dict: - with open(path, "r", encoding="utf-8") as f: - return json.load(f) - - -def file_exists(path: str) -> bool: - return os.path.isfile(path) diff --git a/project/us_daily/ticker_filter.py b/project/us_daily/ticker_filter.py deleted file mode 100644 index 733904d2..00000000 --- a/project/us_daily/ticker_filter.py +++ /dev/null @@ -1,59 +0,0 @@ -import logging -import time -from typing import List - -from project.us_daily.config import Config - -logger = logging.getLogger("us_daily") - -EXCHANGES = ["XNAS", "XNYS", "ARCX"] - - -def filter_top_tickers(client, config: Config) -> List[dict]: - result = [] - for exchange in EXCHANGES: - logger.info(f"Fetching tickers for exchange: {exchange}") - try: - tickers = client.list_tickers( - market="stocks", - exchange=exchange, - active=True, - limit=1000, - ) - except Exception as e: - logger.error(f"Failed to list tickers for {exchange}: {e}") - continue - - time.sleep(config.request_interval) - - for ticker_obj in tickers: - ticker_str = ticker_obj.ticker - try: - details = client.get_ticker_details(ticker_str) - time.sleep(config.request_interval) - except Exception as e: - logger.warning(f"Failed to get details for {ticker_str}: {e}") - continue - - if details.market_cap is None: - logger.debug(f"{ticker_str}: no market_cap data, skipping") - continue - - if details.market_cap >= config.market_cap_min: - entry = { - "ticker": details.ticker, - "name": details.name, - "market_cap": details.market_cap, - "exchange": details.primary_exchange, - } - result.append(entry) - logger.info( - f" {details.ticker}: market_cap={details.market_cap:.0f} included" - ) - else: - logger.debug( - f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" - ) - - logger.info(f"Total top tickers found: {len(result)}") - return result diff --git a/src/massive/rest/base.py b/src/massive/rest/base.py index 3349d7ef..288798e3 100644 --- a/src/massive/rest/base.py +++ b/src/massive/rest/base.py @@ -2,6 +2,7 @@ import json import urllib3 import inspect +import time from urllib3.util.retry import Retry from enum import Enum from typing import Optional, Any, Dict @@ -241,6 +242,7 @@ def _paginate_iter( if parsed.query: path += "?" + parsed.query params = {} + time.sleep(12) def _paginate( self, diff --git a/test_rest/base.py b/tests/test_rest/base.py similarity index 100% rename from test_rest/base.py rename to tests/test_rest/base.py diff --git a/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json b/tests/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json similarity index 100% rename from test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json rename to tests/test_rest/mocks/v1/conversion/AUD/USD&amount=100&precision=2.json diff --git a/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json b/tests/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json similarity index 100% rename from test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json rename to tests/test_rest/mocks/v1/indicators/ema/AAPL&window=5&adjusted=false×tamp.lte=1478393873000×tamp.gte=1477972800000.json diff --git a/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json b/tests/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json similarity index 100% rename from test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json rename to tests/test_rest/mocks/v1/indicators/macd/SPY&signal_window=10&long_window=20×tamp.gt=2022-08-09.json diff --git a/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json b/tests/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json similarity index 100% rename from test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json rename to tests/test_rest/mocks/v1/indicators/rsi/AAPL&window=20×pan=minute&adjusted=true×tamp.gt=2022-08-18.json diff --git a/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json b/tests/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json similarity index 100% rename from test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json rename to tests/test_rest/mocks/v1/indicators/sma/AAPL&window=30×pan=quarter×tamp=1483958600&expand_underlying=true.json diff --git a/test_rest/mocks/v1/last/crypto/BTC/USD.json b/tests/test_rest/mocks/v1/last/crypto/BTC/USD.json similarity index 100% rename from test_rest/mocks/v1/last/crypto/BTC/USD.json rename to tests/test_rest/mocks/v1/last/crypto/BTC/USD.json diff --git a/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json b/tests/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json similarity index 100% rename from test_rest/mocks/v1/last_quote/currencies/AUD/USD.json rename to tests/test_rest/mocks/v1/last_quote/currencies/AUD/USD.json diff --git a/test_rest/mocks/v1/marketstatus/now.json b/tests/test_rest/mocks/v1/marketstatus/now.json similarity index 100% rename from test_rest/mocks/v1/marketstatus/now.json rename to tests/test_rest/mocks/v1/marketstatus/now.json diff --git a/test_rest/mocks/v1/marketstatus/upcoming.json b/tests/test_rest/mocks/v1/marketstatus/upcoming.json similarity index 100% rename from test_rest/mocks/v1/marketstatus/upcoming.json rename to tests/test_rest/mocks/v1/marketstatus/upcoming.json diff --git a/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json b/tests/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json similarity index 100% rename from test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json rename to tests/test_rest/mocks/v1/open-close/AAPL/2005-04-01&adjusted=true.json diff --git a/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json b/tests/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json similarity index 100% rename from test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json rename to tests/test_rest/mocks/v1/summaries&ticker.any_of=NCLH%2CO%3ANCLH221014C00005000%2CC%3AEURUSD%2CX%3ABTCUSD%2CAPx.json diff --git a/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json b/tests/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json similarity index 100% rename from test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json rename to tests/test_rest/mocks/v2/aggs/grouped/locale/us/market/stocks/2005-04-04&adjusted=true.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/prev.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/prev.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-01/2005-04-04.json diff --git a/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json b/tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json similarity index 100% rename from test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json rename to tests/test_rest/mocks/v2/aggs/ticker/AAPL/range/1/day/2005-04-02/2005-04-04.json diff --git a/test_rest/mocks/v2/last/nbbo/AAPL.json b/tests/test_rest/mocks/v2/last/nbbo/AAPL.json similarity index 100% rename from test_rest/mocks/v2/last/nbbo/AAPL.json rename to tests/test_rest/mocks/v2/last/nbbo/AAPL.json diff --git a/test_rest/mocks/v2/last/trade/AAPL.json b/tests/test_rest/mocks/v2/last/trade/AAPL.json similarity index 100% rename from test_rest/mocks/v2/last/trade/AAPL.json rename to tests/test_rest/mocks/v2/last/trade/AAPL.json diff --git a/test_rest/mocks/v2/reference/news&ticker=NFLX.json b/tests/test_rest/mocks/v2/reference/news&ticker=NFLX.json similarity index 100% rename from test_rest/mocks/v2/reference/news&ticker=NFLX.json rename to tests/test_rest/mocks/v2/reference/news&ticker=NFLX.json diff --git a/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json b/tests/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json rename to tests/test_rest/mocks/v2/snapshot/locale/global/markets/crypto/tickers/X;BTCUSD/book.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/gainers.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/AAPL.json diff --git a/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json b/tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json similarity index 100% rename from test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json rename to tests/test_rest/mocks/v2/snapshot/locale/us/markets/stocks/tickers/index.json diff --git a/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json b/tests/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json similarity index 100% rename from test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json rename to tests/test_rest/mocks/v3/quotes/AAPL&cursor=YXA9MTkyODgxNjYmYXM9JmxpbWl0PTEwJm9yZGVyPWRlc2Mmc29ydD10aW1lc3RhbXAmdGltZXN0YW1wLmx0ZT0yMDIyLTA1LTEwVDE0JTNBMTElM0ExMi42OTA2NjExODla.json diff --git a/test_rest/mocks/v3/quotes/AAPL.json b/tests/test_rest/mocks/v3/quotes/AAPL.json similarity index 100% rename from test_rest/mocks/v3/quotes/AAPL.json rename to tests/test_rest/mocks/v3/quotes/AAPL.json diff --git a/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json b/tests/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json similarity index 100% rename from test_rest/mocks/v3/reference/conditions&asset_class=stocks.json rename to tests/test_rest/mocks/v3/reference/conditions&asset_class=stocks.json diff --git a/test_rest/mocks/v3/reference/dividends.json b/tests/test_rest/mocks/v3/reference/dividends.json similarity index 100% rename from test_rest/mocks/v3/reference/dividends.json rename to tests/test_rest/mocks/v3/reference/dividends.json diff --git a/test_rest/mocks/v3/reference/exchanges.json b/tests/test_rest/mocks/v3/reference/exchanges.json similarity index 100% rename from test_rest/mocks/v3/reference/exchanges.json rename to tests/test_rest/mocks/v3/reference/exchanges.json diff --git a/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json b/tests/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json rename to tests/test_rest/mocks/v3/reference/options/contracts&cursor=YXA9JTdCJTIySUQlMjIlM0ElMjIy.json diff --git a/test_rest/mocks/v3/reference/options/contracts.json b/tests/test_rest/mocks/v3/reference/options/contracts.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts.json rename to tests/test_rest/mocks/v3/reference/options/contracts.json diff --git a/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json b/tests/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json similarity index 100% rename from test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json rename to tests/test_rest/mocks/v3/reference/options/contracts/OEVRI240119C00002500.json diff --git a/test_rest/mocks/v3/reference/splits.json b/tests/test_rest/mocks/v3/reference/splits.json similarity index 100% rename from test_rest/mocks/v3/reference/splits.json rename to tests/test_rest/mocks/v3/reference/splits.json diff --git a/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json b/tests/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json rename to tests/test_rest/mocks/v3/reference/tickers&cursor=YWN0aXZlPXRydWUmZGF0ZT0yMDIyLTA0LTI3JmxpbWl0PTImb3JkZXI9YXNjJnBhZ2VfbWFya2VyPUFBJTdDZjEyMmJjYmY4YWQwNzRmZmJlMTZmNjkxOWQ0ZDc3NjZlMzA3MWNmNmU1Nzg3OGE0OGU1NjQ1YzQyM2U3NzJhOSZzb3J0PXRpY2tlcg.json diff --git a/test_rest/mocks/v3/reference/tickers.json b/tests/test_rest/mocks/v3/reference/tickers.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers.json rename to tests/test_rest/mocks/v3/reference/tickers.json diff --git a/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json b/tests/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json rename to tests/test_rest/mocks/v3/reference/tickers/AAPL&date=2020-10-01.json diff --git a/test_rest/mocks/v3/reference/tickers/AAPL.json b/tests/test_rest/mocks/v3/reference/tickers/AAPL.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/AAPL.json rename to tests/test_rest/mocks/v3/reference/tickers/AAPL.json diff --git a/test_rest/mocks/v3/reference/tickers/types.json b/tests/test_rest/mocks/v3/reference/tickers/types.json similarity index 100% rename from test_rest/mocks/v3/reference/tickers/types.json rename to tests/test_rest/mocks/v3/reference/tickers/types.json diff --git a/test_rest/mocks/v3/snapshot.json b/tests/test_rest/mocks/v3/snapshot.json similarity index 100% rename from test_rest/mocks/v3/snapshot.json rename to tests/test_rest/mocks/v3/snapshot.json diff --git a/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json b/tests/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json similarity index 100% rename from test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json rename to tests/test_rest/mocks/v3/snapshot/indices&ticker.any_of=SPX%2CAPx%2CAPy.json diff --git a/test_rest/mocks/v3/snapshot/options/AAPL.json b/tests/test_rest/mocks/v3/snapshot/options/AAPL.json similarity index 100% rename from test_rest/mocks/v3/snapshot/options/AAPL.json rename to tests/test_rest/mocks/v3/snapshot/options/AAPL.json diff --git a/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json b/tests/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json similarity index 100% rename from test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json rename to tests/test_rest/mocks/v3/snapshot/options/AAPL/O;AAPL230616C00150000.json diff --git a/test_rest/mocks/v3/trades/AAPL&limit=2.json b/tests/test_rest/mocks/v3/trades/AAPL&limit=2.json similarity index 100% rename from test_rest/mocks/v3/trades/AAPL&limit=2.json rename to tests/test_rest/mocks/v3/trades/AAPL&limit=2.json diff --git a/test_rest/mocks/vX/reference/financials.json b/tests/test_rest/mocks/vX/reference/financials.json similarity index 100% rename from test_rest/mocks/vX/reference/financials.json rename to tests/test_rest/mocks/vX/reference/financials.json diff --git a/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json b/tests/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json similarity index 100% rename from test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json rename to tests/test_rest/mocks/vX/reference/tickers/META/events&types=ticker_change.json diff --git a/test_rest/models/test_requests.py b/tests/test_rest/models/test_requests.py similarity index 100% rename from test_rest/models/test_requests.py rename to tests/test_rest/models/test_requests.py diff --git a/test_rest/test_aggs.py b/tests/test_rest/test_aggs.py similarity index 100% rename from test_rest/test_aggs.py rename to tests/test_rest/test_aggs.py diff --git a/test_rest/test_conditions.py b/tests/test_rest/test_conditions.py similarity index 100% rename from test_rest/test_conditions.py rename to tests/test_rest/test_conditions.py diff --git a/test_rest/test_contracts.py b/tests/test_rest/test_contracts.py similarity index 100% rename from test_rest/test_contracts.py rename to tests/test_rest/test_contracts.py diff --git a/test_rest/test_dividends.py b/tests/test_rest/test_dividends.py similarity index 100% rename from test_rest/test_dividends.py rename to tests/test_rest/test_dividends.py diff --git a/test_rest/test_exchanges.py b/tests/test_rest/test_exchanges.py similarity index 100% rename from test_rest/test_exchanges.py rename to tests/test_rest/test_exchanges.py diff --git a/test_rest/test_indicators.py b/tests/test_rest/test_indicators.py similarity index 100% rename from test_rest/test_indicators.py rename to tests/test_rest/test_indicators.py diff --git a/test_rest/test_markets.py b/tests/test_rest/test_markets.py similarity index 100% rename from test_rest/test_markets.py rename to tests/test_rest/test_markets.py diff --git a/test_rest/test_modelclass.py b/tests/test_rest/test_modelclass.py similarity index 100% rename from test_rest/test_modelclass.py rename to tests/test_rest/test_modelclass.py diff --git a/test_rest/test_quotes.py b/tests/test_rest/test_quotes.py similarity index 100% rename from test_rest/test_quotes.py rename to tests/test_rest/test_quotes.py diff --git a/test_rest/test_snapshots.py b/tests/test_rest/test_snapshots.py similarity index 100% rename from test_rest/test_snapshots.py rename to tests/test_rest/test_snapshots.py diff --git a/test_rest/test_splits.py b/tests/test_rest/test_splits.py similarity index 100% rename from test_rest/test_splits.py rename to tests/test_rest/test_splits.py diff --git a/test_rest/test_summaries.py b/tests/test_rest/test_summaries.py similarity index 100% rename from test_rest/test_summaries.py rename to tests/test_rest/test_summaries.py diff --git a/test_rest/test_tickers.py b/tests/test_rest/test_tickers.py similarity index 100% rename from test_rest/test_tickers.py rename to tests/test_rest/test_tickers.py diff --git a/test_rest/test_trades.py b/tests/test_rest/test_trades.py similarity index 100% rename from test_rest/test_trades.py rename to tests/test_rest/test_trades.py diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index cb7b1b92..6375ecf9 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -9,19 +9,19 @@ class TestGenerateMonths(unittest.TestCase): def test_generate_months_basic(self): - from project.us_daily.agg_fetcher import generate_months + from data_provider.us_daily.agg_fetcher import generate_months result = generate_months("2020-01", "2020-04") self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) def test_generate_months_cross_year(self): - from project.us_daily.agg_fetcher import generate_months + from data_provider.us_daily.agg_fetcher import generate_months result = generate_months("2023-11", "2024-02") self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) def test_generate_months_single(self): - from project.us_daily.agg_fetcher import generate_months + from data_provider.us_daily.agg_fetcher import generate_months result = generate_months("2024-06", "2024-06") self.assertEqual(result, ["2024-06"]) @@ -29,21 +29,21 @@ def test_generate_months_single(self): class TestMonthBounds(unittest.TestCase): def test_month_bounds_january(self): - from project.us_daily.agg_fetcher import get_month_bounds + from data_provider.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2020-01") self.assertEqual(start, "2020-01-01") self.assertEqual(end, "2020-01-31") def test_month_bounds_february_leap(self): - from project.us_daily.agg_fetcher import get_month_bounds + from data_provider.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2024-02") self.assertEqual(start, "2024-02-01") self.assertEqual(end, "2024-02-29") def test_month_bounds_february_non_leap(self): - from project.us_daily.agg_fetcher import get_month_bounds + from data_provider.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2023-02") self.assertEqual(start, "2023-02-01") @@ -51,16 +51,16 @@ def test_month_bounds_february_non_leap(self): class TestIsCurrentMonth(unittest.TestCase): - @patch("project.us_daily.agg_fetcher.date") + @patch("data_provider.us_daily.agg_fetcher.date") def test_is_current_month_true(self, mock_date): - from project.us_daily.agg_fetcher import is_current_month + from data_provider.us_daily.agg_fetcher import is_current_month mock_date.today.return_value = date(2026, 4, 22) self.assertTrue(is_current_month("2026-04")) - @patch("project.us_daily.agg_fetcher.date") + @patch("data_provider.us_daily.agg_fetcher.date") def test_is_current_month_false(self, mock_date): - from project.us_daily.agg_fetcher import is_current_month + from data_provider.us_daily.agg_fetcher import is_current_month mock_date.today.return_value = date(2026, 4, 22) self.assertFalse(is_current_month("2026-03")) @@ -74,8 +74,8 @@ def tearDown(self): shutil.rmtree(self.test_dir) def test_skips_existing_historical_month(self): - from project.us_daily.agg_fetcher import fetch_ticker_aggs - from project.us_daily.config import Config + from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + from data_provider.us_daily.config import Config config = Config( start_date="2020-01", @@ -92,12 +92,12 @@ def test_skips_existing_historical_month(self): client = MagicMock() with patch( - "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "project.us_daily.agg_fetcher.is_current_month", return_value=False + "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("project.us_daily.agg_fetcher.time.sleep"): + with patch("data_provider.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Should not have called list_aggs since file exists and not current month @@ -105,8 +105,8 @@ def test_skips_existing_historical_month(self): self.assertEqual(result["failures"], []) def test_fetches_missing_month(self): - from project.us_daily.agg_fetcher import fetch_ticker_aggs - from project.us_daily.config import Config + from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + from data_provider.us_daily.config import Config config = Config( start_date="2020-01", @@ -128,12 +128,12 @@ def test_fetches_missing_month(self): client.list_aggs.return_value = iter([agg1]) with patch( - "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "project.us_daily.agg_fetcher.is_current_month", return_value=False + "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("project.us_daily.agg_fetcher.time.sleep"): + with patch("data_provider.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Verify file was created @@ -149,8 +149,8 @@ def test_fetches_missing_month(self): self.assertEqual(result["failures"], []) def test_refreshes_current_month(self): - from project.us_daily.agg_fetcher import fetch_ticker_aggs - from project.us_daily.config import Config + from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + from data_provider.us_daily.config import Config config = Config( start_date="2026-04", @@ -178,12 +178,12 @@ def test_refreshes_current_month(self): client.list_aggs.return_value = iter([agg1]) with patch( - "project.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] ): with patch( - "project.us_daily.agg_fetcher.is_current_month", return_value=True + "data_provider.us_daily.agg_fetcher.is_current_month", return_value=True ): - with patch("project.us_daily.agg_fetcher.time.sleep"): + with patch("data_provider.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Should have called list_aggs even though file exists @@ -191,8 +191,8 @@ def test_refreshes_current_month(self): self.assertEqual(result["failures"], []) def test_records_failure_after_retries(self): - from project.us_daily.agg_fetcher import fetch_ticker_aggs - from project.us_daily.config import Config + from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs + from data_provider.us_daily.config import Config config = Config( start_date="2020-01", @@ -205,12 +205,12 @@ def test_records_failure_after_retries(self): client.list_aggs.side_effect = Exception("API timeout") with patch( - "project.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "project.us_daily.agg_fetcher.is_current_month", return_value=False + "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("project.us_daily.agg_fetcher.time.sleep"): + with patch("data_provider.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) self.assertEqual(len(result["failures"]), 1) diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 9458d194..ee1212e4 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -6,18 +6,18 @@ class TestConfig(unittest.TestCase): def test_default_config(self): - from project.us_daily.config import Config + from data_provider.us_daily.config import Config config = Config() self.assertEqual(config.refresh_tickers, False) self.assertEqual(config.market_cap_min, 5e9) - self.assertEqual(config.start_date, "2020-01") - self.assertEqual(config.request_interval, 20) + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.request_interval, 12) self.assertEqual(config.data_dir, "data/us_daily") self.assertEqual(config.max_retries, 3) def test_load_config_from_file(self): - from project.us_daily.config import load_config + from data_provider.us_daily.config import load_config with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) @@ -28,13 +28,13 @@ def test_load_config_from_file(self): self.assertEqual(config.refresh_tickers, True) self.assertEqual(config.market_cap_min, 1e10) # defaults preserved for unspecified fields - self.assertEqual(config.start_date, "2020-01") - self.assertEqual(config.request_interval, 20) + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.request_interval, 12) finally: os.unlink(tmp_path) def test_load_config_missing_file_uses_defaults(self): - from project.us_daily.config import load_config + from data_provider.us_daily.config import load_config config = load_config("/nonexistent/path/config.json") self.assertEqual(config.refresh_tickers, False) diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py index 11a7f207..5e4bcf4a 100644 --- a/tests/test_us_daily/test_storage.py +++ b/tests/test_us_daily/test_storage.py @@ -13,19 +13,19 @@ def tearDown(self): shutil.rmtree(self.test_dir) def test_get_tickers_file_path(self): - from project.us_daily.storage import get_tickers_file_path + from data_provider.us_daily.storage import get_tickers_file_path result = get_tickers_file_path("data/us_daily") self.assertEqual(result, "data/us_daily/top_tickers.json") def test_get_month_file_path(self): - from project.us_daily.storage import get_month_file_path + from data_provider.us_daily.storage import get_month_file_path result = get_month_file_path("data/us_daily", "AAPL", "2020-01") self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") def test_save_and_load_json(self): - from project.us_daily.storage import save_json, load_json + from data_provider.us_daily.storage import save_json, load_json file_path = os.path.join(self.test_dir, "sub", "test.json") data = {"key": "value", "num": 42} @@ -34,14 +34,14 @@ def test_save_and_load_json(self): self.assertEqual(loaded, data) def test_save_json_creates_parent_dirs(self): - from project.us_daily.storage import save_json + from data_provider.us_daily.storage import save_json file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") save_json(file_path, {"x": 1}) self.assertTrue(os.path.exists(file_path)) def test_file_exists(self): - from project.us_daily.storage import file_exists + from data_provider.us_daily.storage import file_exists existing = os.path.join(self.test_dir, "exists.json") with open(existing, "w") as f: diff --git a/tests/test_us_daily/test_ticker_filter.py b/tests/test_us_daily/test_ticker_filter.py index 4e2b48bb..6a652961 100644 --- a/tests/test_us_daily/test_ticker_filter.py +++ b/tests/test_us_daily/test_ticker_filter.py @@ -19,8 +19,8 @@ def _make_details(self, ticker_str, name, market_cap, exchange): return d def test_filter_top_tickers_filters_by_market_cap(self): - from project.us_daily.ticker_filter import filter_top_tickers - from project.us_daily.config import Config + from data_provider.us_daily.ticker_filter import filter_top_tickers + from data_provider.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -42,8 +42,8 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details - with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("project.us_daily.ticker_filter.time.sleep"): + with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("data_provider.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) tickers = [t["ticker"] for t in result] @@ -51,8 +51,8 @@ def mock_details(ticker): self.assertNotIn("TINY", tickers) def test_filter_top_tickers_includes_required_fields(self): - from project.us_daily.ticker_filter import filter_top_tickers - from project.us_daily.config import Config + from data_provider.us_daily.ticker_filter import filter_top_tickers + from data_provider.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -66,8 +66,8 @@ def test_filter_top_tickers_includes_required_fields(self): "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" ) - with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): - with patch("project.us_daily.ticker_filter.time.sleep"): + with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): + with patch("data_provider.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) self.assertEqual(len(result), 1) @@ -78,8 +78,8 @@ def test_filter_top_tickers_includes_required_fields(self): self.assertEqual(entry["exchange"], "XNYS") def test_filter_skips_ticker_on_details_error(self): - from project.us_daily.ticker_filter import filter_top_tickers - from project.us_daily.config import Config + from data_provider.us_daily.ticker_filter import filter_top_tickers + from data_provider.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -98,8 +98,8 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details - with patch("project.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("project.us_daily.ticker_filter.time.sleep"): + with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("data_provider.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) tickers = [t["ticker"] for t in result] diff --git a/test_websocket/base_ws.py b/tests/test_websocket/base_ws.py similarity index 100% rename from test_websocket/base_ws.py rename to tests/test_websocket/base_ws.py diff --git a/test_websocket/mock_server.py b/tests/test_websocket/mock_server.py similarity index 100% rename from test_websocket/mock_server.py rename to tests/test_websocket/mock_server.py diff --git a/test_websocket/test_conn.py b/tests/test_websocket/test_conn.py similarity index 100% rename from test_websocket/test_conn.py rename to tests/test_websocket/test_conn.py From 1314b667569efde56bf6837a5d8c9bcd679a2bdf Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:51:52 +0800 Subject: [PATCH 10/43] refactor: update data_provider imports to processor in source files --- src/processor/us_daily/__main__.py | 8 ++++---- src/processor/us_daily/agg_fetcher.py | 4 ++-- src/processor/us_daily/ticker_filter.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py index 9e68c5f7..56d9deac 100644 --- a/src/processor/us_daily/__main__.py +++ b/src/processor/us_daily/__main__.py @@ -5,15 +5,15 @@ from massive import RESTClient -from data_provider.us_daily.config import load_config -from data_provider.us_daily.storage import ( +from processor.us_daily.config import load_config +from processor.us_daily.storage import ( get_tickers_file_path, file_exists, save_json, load_json, ) -from data_provider.us_daily.ticker_filter import filter_top_tickers -from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs +from processor.us_daily.ticker_filter import filter_top_tickers +from processor.us_daily.agg_fetcher import fetch_ticker_aggs def setup_logging(): diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py index 656c0820..cebf8d53 100644 --- a/src/processor/us_daily/agg_fetcher.py +++ b/src/processor/us_daily/agg_fetcher.py @@ -4,8 +4,8 @@ from datetime import date, datetime from typing import List, Tuple -from data_provider.us_daily.config import Config -from data_provider.us_daily.storage import ( +from processor.us_daily.config import Config +from processor.us_daily.storage import ( get_month_file_path, file_exists, save_json, diff --git a/src/processor/us_daily/ticker_filter.py b/src/processor/us_daily/ticker_filter.py index 1056d134..23647e93 100644 --- a/src/processor/us_daily/ticker_filter.py +++ b/src/processor/us_daily/ticker_filter.py @@ -2,7 +2,7 @@ import time from typing import List -from data_provider.us_daily.config import Config +from processor.us_daily.config import Config logger = logging.getLogger("us_daily") From e9c35e85a2d8e0c91a97b310e176a3373d7ab89a Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:52:08 +0800 Subject: [PATCH 11/43] refactor: update data_provider imports to processor in test files --- tests/test_us_daily/test_agg_fetcher.py | 60 +++++++++++------------ tests/test_us_daily/test_config.py | 6 +-- tests/test_us_daily/test_storage.py | 10 ++-- tests/test_us_daily/test_ticker_filter.py | 24 ++++----- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index 6375ecf9..cb6823e7 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -9,19 +9,19 @@ class TestGenerateMonths(unittest.TestCase): def test_generate_months_basic(self): - from data_provider.us_daily.agg_fetcher import generate_months + from processor.us_daily.agg_fetcher import generate_months result = generate_months("2020-01", "2020-04") self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) def test_generate_months_cross_year(self): - from data_provider.us_daily.agg_fetcher import generate_months + from processor.us_daily.agg_fetcher import generate_months result = generate_months("2023-11", "2024-02") self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) def test_generate_months_single(self): - from data_provider.us_daily.agg_fetcher import generate_months + from processor.us_daily.agg_fetcher import generate_months result = generate_months("2024-06", "2024-06") self.assertEqual(result, ["2024-06"]) @@ -29,21 +29,21 @@ def test_generate_months_single(self): class TestMonthBounds(unittest.TestCase): def test_month_bounds_january(self): - from data_provider.us_daily.agg_fetcher import get_month_bounds + from processor.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2020-01") self.assertEqual(start, "2020-01-01") self.assertEqual(end, "2020-01-31") def test_month_bounds_february_leap(self): - from data_provider.us_daily.agg_fetcher import get_month_bounds + from processor.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2024-02") self.assertEqual(start, "2024-02-01") self.assertEqual(end, "2024-02-29") def test_month_bounds_february_non_leap(self): - from data_provider.us_daily.agg_fetcher import get_month_bounds + from processor.us_daily.agg_fetcher import get_month_bounds start, end = get_month_bounds("2023-02") self.assertEqual(start, "2023-02-01") @@ -51,16 +51,16 @@ def test_month_bounds_february_non_leap(self): class TestIsCurrentMonth(unittest.TestCase): - @patch("data_provider.us_daily.agg_fetcher.date") + @patch("processor.us_daily.agg_fetcher.date") def test_is_current_month_true(self, mock_date): - from data_provider.us_daily.agg_fetcher import is_current_month + from processor.us_daily.agg_fetcher import is_current_month mock_date.today.return_value = date(2026, 4, 22) self.assertTrue(is_current_month("2026-04")) - @patch("data_provider.us_daily.agg_fetcher.date") + @patch("processor.us_daily.agg_fetcher.date") def test_is_current_month_false(self, mock_date): - from data_provider.us_daily.agg_fetcher import is_current_month + from processor.us_daily.agg_fetcher import is_current_month mock_date.today.return_value = date(2026, 4, 22) self.assertFalse(is_current_month("2026-03")) @@ -74,8 +74,8 @@ def tearDown(self): shutil.rmtree(self.test_dir) def test_skips_existing_historical_month(self): - from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs - from data_provider.us_daily.config import Config + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config config = Config( start_date="2020-01", @@ -92,12 +92,12 @@ def test_skips_existing_historical_month(self): client = MagicMock() with patch( - "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("data_provider.us_daily.agg_fetcher.time.sleep"): + with patch("processor.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Should not have called list_aggs since file exists and not current month @@ -105,8 +105,8 @@ def test_skips_existing_historical_month(self): self.assertEqual(result["failures"], []) def test_fetches_missing_month(self): - from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs - from data_provider.us_daily.config import Config + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config config = Config( start_date="2020-01", @@ -128,12 +128,12 @@ def test_fetches_missing_month(self): client.list_aggs.return_value = iter([agg1]) with patch( - "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("data_provider.us_daily.agg_fetcher.time.sleep"): + with patch("processor.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Verify file was created @@ -149,8 +149,8 @@ def test_fetches_missing_month(self): self.assertEqual(result["failures"], []) def test_refreshes_current_month(self): - from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs - from data_provider.us_daily.config import Config + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config config = Config( start_date="2026-04", @@ -178,12 +178,12 @@ def test_refreshes_current_month(self): client.list_aggs.return_value = iter([agg1]) with patch( - "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + "processor.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] ): with patch( - "data_provider.us_daily.agg_fetcher.is_current_month", return_value=True + "processor.us_daily.agg_fetcher.is_current_month", return_value=True ): - with patch("data_provider.us_daily.agg_fetcher.time.sleep"): + with patch("processor.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) # Should have called list_aggs even though file exists @@ -191,8 +191,8 @@ def test_refreshes_current_month(self): self.assertEqual(result["failures"], []) def test_records_failure_after_retries(self): - from data_provider.us_daily.agg_fetcher import fetch_ticker_aggs - from data_provider.us_daily.config import Config + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config config = Config( start_date="2020-01", @@ -205,12 +205,12 @@ def test_records_failure_after_retries(self): client.list_aggs.side_effect = Exception("API timeout") with patch( - "data_provider.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] ): with patch( - "data_provider.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("data_provider.us_daily.agg_fetcher.time.sleep"): + with patch("processor.us_daily.agg_fetcher.time.sleep"): result = fetch_ticker_aggs(client, "AAPL", config) self.assertEqual(len(result["failures"]), 1) diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index ee1212e4..369a3de2 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -6,7 +6,7 @@ class TestConfig(unittest.TestCase): def test_default_config(self): - from data_provider.us_daily.config import Config + from processor.us_daily.config import Config config = Config() self.assertEqual(config.refresh_tickers, False) @@ -17,7 +17,7 @@ def test_default_config(self): self.assertEqual(config.max_retries, 3) def test_load_config_from_file(self): - from data_provider.us_daily.config import load_config + from processor.us_daily.config import load_config with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) @@ -34,7 +34,7 @@ def test_load_config_from_file(self): os.unlink(tmp_path) def test_load_config_missing_file_uses_defaults(self): - from data_provider.us_daily.config import load_config + from processor.us_daily.config import load_config config = load_config("/nonexistent/path/config.json") self.assertEqual(config.refresh_tickers, False) diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py index 5e4bcf4a..1d595598 100644 --- a/tests/test_us_daily/test_storage.py +++ b/tests/test_us_daily/test_storage.py @@ -13,19 +13,19 @@ def tearDown(self): shutil.rmtree(self.test_dir) def test_get_tickers_file_path(self): - from data_provider.us_daily.storage import get_tickers_file_path + from processor.us_daily.storage import get_tickers_file_path result = get_tickers_file_path("data/us_daily") self.assertEqual(result, "data/us_daily/top_tickers.json") def test_get_month_file_path(self): - from data_provider.us_daily.storage import get_month_file_path + from processor.us_daily.storage import get_month_file_path result = get_month_file_path("data/us_daily", "AAPL", "2020-01") self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") def test_save_and_load_json(self): - from data_provider.us_daily.storage import save_json, load_json + from processor.us_daily.storage import save_json, load_json file_path = os.path.join(self.test_dir, "sub", "test.json") data = {"key": "value", "num": 42} @@ -34,14 +34,14 @@ def test_save_and_load_json(self): self.assertEqual(loaded, data) def test_save_json_creates_parent_dirs(self): - from data_provider.us_daily.storage import save_json + from processor.us_daily.storage import save_json file_path = os.path.join(self.test_dir, "a", "b", "c", "test.json") save_json(file_path, {"x": 1}) self.assertTrue(os.path.exists(file_path)) def test_file_exists(self): - from data_provider.us_daily.storage import file_exists + from processor.us_daily.storage import file_exists existing = os.path.join(self.test_dir, "exists.json") with open(existing, "w") as f: diff --git a/tests/test_us_daily/test_ticker_filter.py b/tests/test_us_daily/test_ticker_filter.py index 6a652961..284a9579 100644 --- a/tests/test_us_daily/test_ticker_filter.py +++ b/tests/test_us_daily/test_ticker_filter.py @@ -19,8 +19,8 @@ def _make_details(self, ticker_str, name, market_cap, exchange): return d def test_filter_top_tickers_filters_by_market_cap(self): - from data_provider.us_daily.ticker_filter import filter_top_tickers - from data_provider.us_daily.config import Config + from processor.us_daily.ticker_filter import filter_top_tickers + from processor.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -42,8 +42,8 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details - with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("data_provider.us_daily.ticker_filter.time.sleep"): + with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("processor.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) tickers = [t["ticker"] for t in result] @@ -51,8 +51,8 @@ def mock_details(ticker): self.assertNotIn("TINY", tickers) def test_filter_top_tickers_includes_required_fields(self): - from data_provider.us_daily.ticker_filter import filter_top_tickers - from data_provider.us_daily.config import Config + from processor.us_daily.ticker_filter import filter_top_tickers + from processor.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -66,8 +66,8 @@ def test_filter_top_tickers_includes_required_fields(self): "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" ) - with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): - with patch("data_provider.us_daily.ticker_filter.time.sleep"): + with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): + with patch("processor.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) self.assertEqual(len(result), 1) @@ -78,8 +78,8 @@ def test_filter_top_tickers_includes_required_fields(self): self.assertEqual(entry["exchange"], "XNYS") def test_filter_skips_ticker_on_details_error(self): - from data_provider.us_daily.ticker_filter import filter_top_tickers - from data_provider.us_daily.config import Config + from processor.us_daily.ticker_filter import filter_top_tickers + from processor.us_daily.config import Config config = Config(market_cap_min=5e9, request_interval=0) @@ -98,8 +98,8 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details - with patch("data_provider.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("data_provider.us_daily.ticker_filter.time.sleep"): + with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): + with patch("processor.us_daily.ticker_filter.time.sleep"): result = filter_top_tickers(client, config) tickers = [t["ticker"] for t in result] From f22168169d2cab9fa2a48bc0550a385e5011e364 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:52:16 +0800 Subject: [PATCH 12/43] fix: use __file__-relative path for config.json lookup --- src/processor/us_daily/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py index e4cea829..6316a455 100644 --- a/src/processor/us_daily/config.py +++ b/src/processor/us_daily/config.py @@ -13,8 +13,10 @@ class Config: max_retries: int = 3 -def load_config(config_path: str = "data_provider/us_daily/config.json") -> Config: +def load_config(config_path: str = None) -> Config: config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") if os.path.exists(config_path): with open(config_path, "r") as f: data = json.load(f) From 020e39a74f444ba01dc184d985b1a839e2257a01 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:53:13 +0800 Subject: [PATCH 13/43] refactor: rewrite pyproject.toml from Poetry to PEP 621 + setuptools --- pyproject.toml | 94 +++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 058444af..534fc16e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,47 +1,63 @@ -[tool.poetry] +[project] name = "massive" version = "0.0.0" description = "Official Massive (formerly Polygon.io) REST and Websocket client." -authors = ["massive.com"] -license = "MIT" -homepage = "https://massive.com" -repository = "https://github.com/massive-com/client-python" -documentation = "https://massive.com/docs" -keywords = [ - "massive", - "free", - "rest", - "stock", - "market", - "data", - "api", - "massive.com", - "websocket", - "client" +requires-python = ">=3.9" +license = {text = "MIT"} + +dependencies = [ + "urllib3>=1.26.9", + "websockets>=14.0", + "certifi>=2022.5.18,<2027.0.0", + "pandas", +] + +[project.optional-dependencies] +efinance = ["efinance"] +akshare = ["akshare"] +tushare = ["tushare"] +pytdx = ["pytdx"] +baostock = ["baostock"] +yfinance = ["yfinance"] +longbridge = ["longbridge-openapi"] +all = [ + "efinance", + "akshare", + "tushare", + "pytdx", + "baostock", + "yfinance", + "longbridge-openapi", +] +dev = [ + "black>=24.8.0", + "mypy>=1.19", + "types-urllib3>=1.26.25", + "types-certifi>=2021.10.8", + "types-setuptools>=81.0.0", + "pook>=2.1.4", + "orjson>=3.11.5", + "pytest", ] -packages = [ - { include = "massive" } +docs = [ + "Sphinx>=7.4.7", + "sphinx-rtd-theme>=3.1.0", + "sphinx-autodoc-typehints>=2.3.0", ] -[tool.poetry.dependencies] -python = "^3.9" -urllib3 = ">=1.26.9" -websockets = ">=14.0" -certifi = ">=2022.5.18,<2027.0.0" +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" -[tool.poetry.dev-dependencies] -black = "^24.8.0" -mypy = "^1.19" -types-urllib3 = "^1.26.25" -Sphinx = "^7.4.7" -sphinx-rtd-theme = "^3.1.0" -# keep this in sync with docs/requirements.txt for readthedocs.org -sphinx-autodoc-typehints = "^2.3.0" -types-certifi = "^2021.10.8" -types-setuptools = "^81.0.0" -pook = "^2.1.4" -orjson = "^3.11.5" +[tool.setuptools.packages.find] +where = ["src"] -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.black] +line-length = 88 + +[tool.mypy] +python_version = "3.9" From a1db12d0bde2058b25b4f45ed1b4b4395c4bdcc4 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:53:19 +0800 Subject: [PATCH 14/43] chore: remove Makefile and poetry.lock --- .../prds/.20250423-OPT_DATA_PROVIDER.md.swp | Bin 4096 -> 12288 bytes Makefile | 49 - poetry.lock | 1120 ----------------- 3 files changed, 1169 deletions(-) delete mode 100644 Makefile delete mode 100644 poetry.lock diff --git a/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp b/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp index 221a1f672f8bdb5b8cf688321df73c2be2218c2e..1ab15263874d9964053a5a047311dd727e47e487 100644 GIT binary patch literal 12288 zcmeI2O>7%Q6vw9s2@14D^o|xH4pfRaaj66q1%k?vGn)a<2P^K z{N8(GS>B@^NA~TPUa@r&Jf0?G>vwhUwyhtLt`|0uo)3G?(FZn`RAnV&m4Z> zG+`hJ2m*qDARq_`0)l`bAP5Ko{~ZG1;3o13w)041OD{IAo9?+b-inJLAP5Kof`A|( z2nYg#fFK|U2m*qDARq|*3kf*!kGlRCA?F`Q@c93K_4ofT+X(p?x(n5yDwKonY$fCy z=xZnoy#_rB-QGe-0P2F?fL@1W==Y}x`3?FN`Vm@ul8`;ntI(Y%2)PJ#L;7Yy81w^l z6-q$wLHnUS&}Qf-#CRL}9x~&78+sP{1MzRi^)9a8LgFC^2m*qDARq|*zX-TJinqs4 zPpLtl)6d37*xY$K+;VV=#)`P>uJvbp6wj?c=2Q6%dss)&XY#Z*_As`jO)@S-*_8o8 z+NIubIH>HF~?y6 zCmc#R;OcWY{Z8Kr#jB9EHi=fp=ul2unl)~Wo13Aj0V6V~RnF_@qAS&D`LN;%De`@9 zd~`d*PDd~lIPP_OLYU4ca-f)Qla-JQYp*wAV?~z8(}hVoFmD-|26DgRu!1zB(q~F^ z=o$(@tK5VIAv^CzqWb6+t(3ZZWD~uXTgESeZ`D-gxb=4WopV5;= znks0OoK~HrLouzktdGts?mo=sff&hKG&Rn~7umHjV#h+XINS=CR$6E|2g}j4$`*2F zU2WX3jI}V8NRk9T7HA2jB59?Q68uR`8B+OkV2uzZ>5AYLq6=ve}$zZ)F^yX*Ff7-HF`td^AD8 zu2-rtt&~8T*l59Yke#Oan>>(}Y8>k{f-0j#3dYhB9r_HI>e17Bx{49JX6HuORDM@S zhgOPfrHfiQ%O+2=c*IO#Ji;<@V_;6t&9BzEN*5dc`S!KtITkHoA+0uyNa%2aMyF|M zfJUmu@(8PBO}+~gEPq2Q6=^}$N)fF*gF$`fI!op@?1`7oA&c-{4j9I)kZO(%cuYT) z8nFtnsb>&%JyJ22KBqB+ldTWV;|0DM8q|1Wpbyv$^sE^SyF6zUZsB~Q=_E!JpT`rF zI_(PKjRjL0(Tuq|7vD(AX-iod!HMS-=Y#PoOejV~)qB7&wg=&DZG*c+0b0A4y=42i{nTBWfh8 zssEY_t)*!MWO)tK>>}1-qe}?NW}QWU9W^^7-U=+0GL{j}G_P~M9*s>HbMp-+r_|=+ zheOVXTQ+P8i?lpy*7^Fru+EfbbN)GR8vJUsPC1$>(%Hzm)6nNt!lY$vh zG&c>kqFnRP+zn .massive/websocket.json - -test_rest: - poetry run python -m unittest discover -s test_rest - -test_websocket: - poetry run python -m unittest discover -s test_websocket - -## Run the unit tests -test: test_rest test_websocket diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 8cb653a5..00000000 --- a/poetry.lock +++ /dev/null @@ -1,1120 +0,0 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. - -[[package]] -name = "alabaster" -version = "0.7.16" -description = "A light, configurable Sphinx theme" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"}, - {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, -] - -[[package]] -name = "attrs" -version = "22.1.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, - {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, -] - -[package.extras] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] -docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] -tests-no-zope = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] - -[[package]] -name = "babel" -version = "2.17.0" -description = "Internationalization utilities" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, - {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, -] - -[package.extras] -dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] - -[[package]] -name = "black" -version = "24.8.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6"}, - {file = "black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb"}, - {file = "black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42"}, - {file = "black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a"}, - {file = "black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1"}, - {file = "black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af"}, - {file = "black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4"}, - {file = "black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af"}, - {file = "black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368"}, - {file = "black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed"}, - {file = "black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018"}, - {file = "black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2"}, - {file = "black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd"}, - {file = "black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2"}, - {file = "black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e"}, - {file = "black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920"}, - {file = "black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c"}, - {file = "black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e"}, - {file = "black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47"}, - {file = "black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb"}, - {file = "black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed"}, - {file = "black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "certifi" -version = "2026.2.25" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.7" -groups = ["main", "dev"] -files = [ - {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"}, - {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, -] - -[[package]] -name = "charset-normalizer" -version = "2.1.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.6.0" -groups = ["dev"] -files = [ - {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, - {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, -] - -[package.extras] -unicode-backport = ["unicodedata2"] - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] -markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "docutils" -version = "0.21.2" -description = "Docutils -- Python Documentation Utilities" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, - {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, -] - -[[package]] -name = "furl" -version = "2.1.3" -description = "URL manipulation made simple." -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "furl-2.1.3-py2.py3-none-any.whl", hash = "sha256:9ab425062c4217f9802508e45feb4a83e54324273ac4b202f1850363309666c0"}, - {file = "furl-2.1.3.tar.gz", hash = "sha256:5a6188fe2666c484a12159c18be97a1977a71d632ef5bb867ef15f54af39cc4e"}, -] - -[package.dependencies] -orderedmultidict = ">=1.0.1" -six = ">=1.8.0" - -[[package]] -name = "idna" -version = "3.7" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, -] - -[[package]] -name = "imagesize" -version = "1.4.1" -description = "Getting image size from png/jpeg/jpeg2000/gif file" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["dev"] -files = [ - {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, - {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, -] - -[[package]] -name = "importlib-metadata" -version = "8.7.0" -description = "Read metadata from Python packages" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "python_version == \"3.9\"" -files = [ - {file = "importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd"}, - {file = "importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000"}, -] - -[package.dependencies] -zipp = ">=3.20" - -[package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -perf = ["ipython"] -test = ["flufl.flake8", "importlib_resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] -type = ["pytest-mypy"] - -[[package]] -name = "jinja2" -version = "3.1.6" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, - {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "jsonschema" -version = "4.17.1" -description = "An implementation of JSON Schema validation for Python" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "jsonschema-4.17.1-py3-none-any.whl", hash = "sha256:410ef23dcdbca4eaedc08b850079179883c2ed09378bd1f760d4af4aacfa28d7"}, - {file = "jsonschema-4.17.1.tar.gz", hash = "sha256:05b2d22c83640cde0b7e0aa329ca7754fbd98ea66ad8ae24aa61328dfe057fa3"}, -] - -[package.dependencies] -attrs = ">=17.4.0" -pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" - -[package.extras] -format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] - -[[package]] -name = "librt" -version = "0.7.2" -description = "Mypyc runtime library" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "platform_python_implementation != \"PyPy\"" -files = [ - {file = "librt-0.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0090f146caa593f47e641307bd0bef778b76629b1d7a5bec95d3a83ed49d49de"}, - {file = "librt-0.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c44321bc013cf4b41169e463a2c441412497cea44dbf79eee0ccad8104d05b7b"}, - {file = "librt-0.7.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8913d92224da3e0ef54e40cdc36f1c0789f375349aa36f7fd44c89dfda1e6d24"}, - {file = "librt-0.7.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f968b951f0713b15ad56090c5499bc63e4718e7636d698e1e1fc2eb66c855f97"}, - {file = "librt-0.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e8801d41dcfbb76407daa5e35e69ebe7b0fc826b7c63d462cbbab530b5672b"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9672ee71a08c5b1cb5bb92fc5cc07f88c947716ff3c6b8c3bc0f57ee7ddc12fa"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9786b621b5c7e6e2aaab0cacf118c1c3af5f70b9c0e3fe614734b1d9fbc37cd3"}, - {file = "librt-0.7.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:332bd6505e345c0d92ad5ede7419bdd2c96ad7526681be5feb2bb26667819c4f"}, - {file = "librt-0.7.2-cp310-cp310-win32.whl", hash = "sha256:0ca4ff852be76094074bede6fcd1fc75374962ec365aceb396fa7aa3bc733c12"}, - {file = "librt-0.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:dd2b75815270534c62e203ee5755ae1f66540ce4ee08432d4b1e623ddb2fa175"}, - {file = "librt-0.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f8f02d40621f55c659ff1ed7ea91320f8bc16e75fe67f822445cd0e9b5fa1d1"}, - {file = "librt-0.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0bc8425c7f9e9bfc16fae651b56b402b11e25c223a90353fb71fa47ed3e1c048"}, - {file = "librt-0.7.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f9a8a6e3cea9c01d2d9c55cf81ab68580b10d01c54b82cab89e85ba036e1d272"}, - {file = "librt-0.7.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de0aceb7d19f6dd4aa6594be45f82af19c74bd0fcf2fa2d42c116d25826f1625"}, - {file = "librt-0.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d29bb29aba2a849ea8255744655b359ce420ab55018c31a9b58c103415e47918"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f172088974eac0101ecbe460d89411c945fa57601e4fc3dc461e718991322e00"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab4ca61a3b774d3b1886b26f7cc295e75a42ebc26c7a1a04e11c427e5313922f"}, - {file = "librt-0.7.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d891fb657a14d8d77e3b565332e064fbcd67741e99043634e5b7cbded88d9d8e"}, - {file = "librt-0.7.2-cp311-cp311-win32.whl", hash = "sha256:2272e1a4752ad0b9f59793f63ffce06178fbe15a1fd4d2d8ad9ea2fe026d9912"}, - {file = "librt-0.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:eab548b8c771a1846d328a01e83c14ed0414853bf9a91fe7c692f74de513238f"}, - {file = "librt-0.7.2-cp311-cp311-win_arm64.whl", hash = "sha256:0259a726416369e22306177be3404cc29b88fc806d31100802c816fd29f58873"}, - {file = "librt-0.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18d56630bd5793ca860f148cfa6d79a81b3d9c7d5544991c906a8f412eecce63"}, - {file = "librt-0.7.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4076beec27478116ff276731daf676ecd03ceae03fabdefdca400f7e837f477a"}, - {file = "librt-0.7.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7698a3b75f0aa004fa089410b44088628851b3c62c9044822c61a8367fc8caea"}, - {file = "librt-0.7.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e706fdfef8692ee82ac5464c822800d99b436511a9bba402a88e878751b342a9"}, - {file = "librt-0.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39d2b8df134910a2c58d91fbf50cd6ea0b815a50fcdf45de1e21af0a10fcb606"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:035c5f2f4bd96326f4528ce48bd60ed19ae35f0c000540971aa597a441e83509"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:14798167e2be3cb8202c9617d90d5e4b2b50a92a9c30f8aceb672e12cf26abbf"}, - {file = "librt-0.7.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f6b564c8e9e768fe79651d626917b4b3d10b3d587779eda2231e235b64caab41"}, - {file = "librt-0.7.2-cp312-cp312-win32.whl", hash = "sha256:605c7bbc94aa30288d33d2ade86d3a70c939efa01f3e64d98d72a72466d43161"}, - {file = "librt-0.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:a48f4c5d3d12eced3462d135ecfe0c4e2a143e64161a471b3f3c1491330fcd74"}, - {file = "librt-0.7.2-cp312-cp312-win_arm64.whl", hash = "sha256:0cbe93690e07c9d4ac76bed107e1be8a612dd6fbc94e21a17a5cff002f5f55d5"}, - {file = "librt-0.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b8fdc5e6eb9698ed66bb652f18fa637853fd03b016864bed098f1a28a8d129d"}, - {file = "librt-0.7.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:66d0f0de87033ab7e54f48bd46c042d047ecc3d4e4d5b7b1071e934f34d97054"}, - {file = "librt-0.7.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9da65ed19f6c7c4bbebd7acb37d4dbb95943792b51a74bc96d35673270853e16"}, - {file = "librt-0.7.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eeb76e18c2adac6bcc709ba7f728acca2d42baf0c7a3b9eba392bab84d591961"}, - {file = "librt-0.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b5d5f8f617fc3db80864f7353f43db69d9282bf9cd74c7e6cf5be1a7e5d5a83f"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cae1b429f9077254622d7d12ade5d04a6b326b2ff456d032fa3fa653ef994979"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:edd20b266055b41ccee667b9373b3eff9d77b8e0890fd26a469c89ef48b29bf0"}, - {file = "librt-0.7.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cf748211b5782fb9e85945d7ffdef9587bf303344e2ad3e65dee55b44b1c8ac1"}, - {file = "librt-0.7.2-cp313-cp313-win32.whl", hash = "sha256:c4fefe752dcf30564b031e85e6cbc70d82685e52fbbfffc6fab275a47b5c3de7"}, - {file = "librt-0.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:65cd928b7e0c1142235e54e4b615a0a7f4ad046d1d4cbdd454c311bafca97aed"}, - {file = "librt-0.7.2-cp313-cp313-win_arm64.whl", hash = "sha256:10d6d5d52026e44ddd0f638e822a5d451df0d5b6701cb5112362a3a9f4b00229"}, - {file = "librt-0.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0baabd8daa4339f6cbffada3c66795722c37880ce768de83c7cba379d469ee3b"}, - {file = "librt-0.7.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:462d9672a4ade935d78c70713847bcba643bf4d94c013fdf29ea5f153bb15922"}, - {file = "librt-0.7.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838b16343fc4ed6869edb3ed9dc89c4bc9b113b1c6028592bede4a93ad360aa4"}, - {file = "librt-0.7.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b6ee74bfa7055c07e0acb56226efd49687488486db8fcfdea5da4cf25323a91"}, - {file = "librt-0.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5e3502a543b9b3f906f6d4e88582b7ba13320897e19c60d7c098fa9fda1611f"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0f330d6af5bcfba339690694bf7c4aedabfa3dd40b17212a2b94a417962ccf"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:610a25e8239836fe8eff92628602db13dca5d867e868503239c37f3809b3ce9a"}, - {file = "librt-0.7.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98854ffd0dd6fd64b8a4be4973593746038152e6c239251de908b5a176d8f64a"}, - {file = "librt-0.7.2-cp314-cp314-win32.whl", hash = "sha256:879f789b22e9534df279a6cd3af12d26f8fd96785c47db0d2508304cfc6fd7d9"}, - {file = "librt-0.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:cba3ee432767960ce1e908c67c1fa136831c25ac3705e1e188e63ddaf1b46a06"}, - {file = "librt-0.7.2-cp314-cp314-win_arm64.whl", hash = "sha256:d775e5de996105c9a85136c18bce94204f57021af77a913644e8f9b17733a917"}, - {file = "librt-0.7.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:7fecc4dcc74e0c97ca36435048e3392ee6aa2ae3e77c285394192f9ad1e1a283"}, - {file = "librt-0.7.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d643941996b678699fed64271d02439fe23d31d8dee45f0e0b02c81ee77a4d79"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dcefbd09a5db038693d22adc1962111d4c2df0b838fde2f3a61fceec9953b9c5"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11445c0460d4522c5959f7770015cdcd7dd025ac2c095c50b78e27878f9cab15"}, - {file = "librt-0.7.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c10ae62472a03dc8db52d5dca4a9af5d0935899cf8c550565a39645bf7735d87"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a38575adf344ca7423bfb10c3a7b5df066dfbe9b95e8b35f1f79eb84e4b38cad"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:2dcae85482674912bdd9dc98c6a236a9698c2c13ee53366a996851e3460da26a"}, - {file = "librt-0.7.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f02f9a7a8b720ae3c46b4df736a71d2ef07b59f3149180ad1e1eba7fccabaadf"}, - {file = "librt-0.7.2-cp314-cp314t-win32.whl", hash = "sha256:062de7065ec0d060f0541602a16bed566c4b948aa1d8466c483bb949e27e0ef7"}, - {file = "librt-0.7.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fb6a190f76a687b034362e610c4990306ad0d913e98a8e588dcec91486797869"}, - {file = "librt-0.7.2-cp314-cp314t-win_arm64.whl", hash = "sha256:35e1c435ee1e24ba2b018172a3ed1caed5275168a016e560e695057acd532add"}, - {file = "librt-0.7.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9da7e00662b75fe2245f6c709c1a2c3b84e3c70aef0c088d3d25cfcfb6ec13c6"}, - {file = "librt-0.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6d412f959d485db6ad02a7b4685f7cbd2d6e80530d95e1add553bc4278c415c5"}, - {file = "librt-0.7.2-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c52990838b821f0fc86a40d244360426550312dac55da984a0878947d62598b6"}, - {file = "librt-0.7.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e53979afcf7bcc6c4a7d31d61e88feb83ed9f3f00407a835df3283ff450eac"}, - {file = "librt-0.7.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca151523e0ea06015e070ccc5fdea0dc374f292fba62e65ba315aef241296c93"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:025871d474b48eae232562d575ee7a82fa69ac12b0aa9c9138c27900178fd8ca"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:13b72520500fae1e6d10fb10a8972858a10ed4b6edb7e800f1d11b385803a868"}, - {file = "librt-0.7.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5292bf3d6756301ff72e578703837afebc6660b235cf338ab9a1074cb3b988fd"}, - {file = "librt-0.7.2-cp39-cp39-win32.whl", hash = "sha256:22a7a751590444a90e3a663205caa582da4a2e6cdbb7515ae579ca7b95e015ae"}, - {file = "librt-0.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:fd612a78cd330c0371d2b918bf73aeb976f2c031562c4b571e0100069626b390"}, - {file = "librt-0.7.2.tar.gz", hash = "sha256:48aa0f311bdf90ec9a63e3669b6aff04967f24f2f67fe9372c570a21dc9ae873"}, -] - -[[package]] -name = "MarkupSafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, - {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, -] - -[[package]] -name = "mypy" -version = "1.19.1" -description = "Optional static typing for Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"}, - {file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"}, - {file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"}, - {file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"}, - {file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"}, - {file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"}, - {file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"}, - {file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"}, - {file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"}, - {file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"}, - {file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"}, - {file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"}, - {file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"}, - {file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"}, - {file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"}, - {file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"}, - {file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"}, - {file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"}, - {file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"}, - {file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"}, - {file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"}, - {file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"}, - {file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"}, - {file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"}, - {file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"}, - {file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"}, - {file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"}, - {file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"}, - {file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"}, - {file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"}, - {file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"}, - {file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"}, - {file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"}, - {file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"}, - {file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"}, - {file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"}, - {file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"}, - {file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"}, -] - -[package.dependencies] -librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""} -mypy_extensions = ">=1.0.0" -pathspec = ">=0.9.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing_extensions = ">=4.6.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -faster-cache = ["orjson"] -install-types = ["pip"] -mypyc = ["setuptools (>=50)"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] - -[[package]] -name = "orderedmultidict" -version = "1.0.1" -description = "Ordered Multivalue Dictionary" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "orderedmultidict-1.0.1-py2.py3-none-any.whl", hash = "sha256:43c839a17ee3cdd62234c47deca1a8508a3f2ca1d0678a3bf791c87cf84adbf3"}, - {file = "orderedmultidict-1.0.1.tar.gz", hash = "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad"}, -] - -[package.dependencies] -six = ">=1.8.0" - -[[package]] -name = "orjson" -version = "3.11.5" -description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "orjson-3.11.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:df9eadb2a6386d5ea2bfd81309c505e125cfc9ba2b1b99a97e60985b0b3665d1"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccc70da619744467d8f1f49a8cadae5ec7bbe054e5232d95f92ed8737f8c5870"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:073aab025294c2f6fc0807201c76fdaed86f8fc4be52c440fb78fbb759a1ac09"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:835f26fa24ba0bb8c53ae2a9328d1706135b74ec653ed933869b74b6909e63fd"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667c132f1f3651c14522a119e4dd631fad98761fa960c55e8e7430bb2a1ba4ac"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42e8961196af655bb5e63ce6c60d25e8798cd4dfbc04f4203457fa3869322c2e"}, - {file = "orjson-3.11.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75412ca06e20904c19170f8a24486c4e6c7887dea591ba18a1ab572f1300ee9f"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6af8680328c69e15324b5af3ae38abbfcf9cbec37b5346ebfd52339c3d7e8a18"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a86fe4ff4ea523eac8f4b57fdac319faf037d3c1be12405e6a7e86b3fbc4756a"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e607b49b1a106ee2086633167033afbd63f76f2999e9236f638b06b112b24ea7"}, - {file = "orjson-3.11.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7339f41c244d0eea251637727f016b3d20050636695bc78345cce9029b189401"}, - {file = "orjson-3.11.5-cp310-cp310-win32.whl", hash = "sha256:8be318da8413cdbbce77b8c5fac8d13f6eb0f0db41b30bb598631412619572e8"}, - {file = "orjson-3.11.5-cp310-cp310-win_amd64.whl", hash = "sha256:b9f86d69ae822cabc2a0f6c099b43e8733dda788405cba2665595b7e8dd8d167"}, - {file = "orjson-3.11.5-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9c8494625ad60a923af6b2b0bd74107146efe9b55099e20d7740d995f338fcd8"}, - {file = "orjson-3.11.5-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:7bb2ce0b82bc9fd1168a513ddae7a857994b780b2945a8c51db4ab1c4b751ebc"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67394d3becd50b954c4ecd24ac90b5051ee7c903d167459f93e77fc6f5b4c968"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:298d2451f375e5f17b897794bcc3e7b821c0f32b4788b9bcae47ada24d7f3cf7"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa5e4244063db8e1d87e0f54c3f7522f14b2dc937e65d5241ef0076a096409fd"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1db2088b490761976c1b2e956d5d4e6409f3732e9d79cfa69f876c5248d1baf9"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2ed66358f32c24e10ceea518e16eb3549e34f33a9d51f99ce23b0251776a1ef"}, - {file = "orjson-3.11.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2021afda46c1ed64d74b555065dbd4c2558d510d8cec5ea6a53001b3e5e82a9"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b42ffbed9128e547a1647a3e50bc88ab28ae9daa61713962e0d3dd35e820c125"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:8d5f16195bb671a5dd3d1dbea758918bada8f6cc27de72bd64adfbd748770814"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c0e5d9f7a0227df2927d343a6e3859bebf9208b427c79bd31949abcc2fa32fa5"}, - {file = "orjson-3.11.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:23d04c4543e78f724c4dfe656b3791b5f98e4c9253e13b2636f1af5d90e4a880"}, - {file = "orjson-3.11.5-cp311-cp311-win32.whl", hash = "sha256:c404603df4865f8e0afe981aa3c4b62b406e6d06049564d58934860b62b7f91d"}, - {file = "orjson-3.11.5-cp311-cp311-win_amd64.whl", hash = "sha256:9645ef655735a74da4990c24ffbd6894828fbfa117bc97c1edd98c282ecb52e1"}, - {file = "orjson-3.11.5-cp311-cp311-win_arm64.whl", hash = "sha256:1cbf2735722623fcdee8e712cbaaab9e372bbcb0c7924ad711b261c2eccf4a5c"}, - {file = "orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d"}, - {file = "orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa"}, - {file = "orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3"}, - {file = "orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca"}, - {file = "orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98"}, - {file = "orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875"}, - {file = "orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe"}, - {file = "orjson-3.11.5-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3b01799262081a4c47c035dd77c1301d40f568f77cc7ec1bb7db5d63b0a01629"}, - {file = "orjson-3.11.5-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:61de247948108484779f57a9f406e4c84d636fa5a59e411e6352484985e8a7c3"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:894aea2e63d4f24a7f04a1908307c738d0dce992e9249e744b8f4e8dd9197f39"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ddc21521598dbe369d83d4d40338e23d4101dad21dae0e79fa20465dbace019f"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cce16ae2f5fb2c53c3eafdd1706cb7b6530a67cc1c17abe8ec747f5cd7c0c51"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e46c762d9f0e1cfb4ccc8515de7f349abbc95b59cb5a2bd68df5973fdef913f8"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7345c759276b798ccd6d77a87136029e71e66a8bbf2d2755cbdde1d82e78706"}, - {file = "orjson-3.11.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75bc2e59e6a2ac1dd28901d07115abdebc4563b5b07dd612bf64260a201b1c7f"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:54aae9b654554c3b4edd61896b978568c6daa16af96fa4681c9b5babd469f863"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4bdd8d164a871c4ec773f9de0f6fe8769c2d6727879c37a9666ba4183b7f8228"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a261fef929bcf98a60713bf5e95ad067cea16ae345d9a35034e73c3990e927d2"}, - {file = "orjson-3.11.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c028a394c766693c5c9909dec76b24f37e6a1b91999e8d0c0d5feecbe93c3e05"}, - {file = "orjson-3.11.5-cp313-cp313-win32.whl", hash = "sha256:2cc79aaad1dfabe1bd2d50ee09814a1253164b3da4c00a78c458d82d04b3bdef"}, - {file = "orjson-3.11.5-cp313-cp313-win_amd64.whl", hash = "sha256:ff7877d376add4e16b274e35a3f58b7f37b362abf4aa31863dadacdd20e3a583"}, - {file = "orjson-3.11.5-cp313-cp313-win_arm64.whl", hash = "sha256:59ac72ea775c88b163ba8d21b0177628bd015c5dd060647bbab6e22da3aad287"}, - {file = "orjson-3.11.5-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e446a8ea0a4c366ceafc7d97067bfd55292969143b57e3c846d87fc701e797a0"}, - {file = "orjson-3.11.5-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:53deb5addae9c22bbe3739298f5f2196afa881ea75944e7720681c7080909a81"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82cd00d49d6063d2b8791da5d4f9d20539c5951f965e45ccf4e96d33505ce68f"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3fd15f9fc8c203aeceff4fda211157fad114dde66e92e24097b3647a08f4ee9e"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df95000fbe6777bf9820ae82ab7578e8662051bb5f83d71a28992f539d2cda7"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92a8d676748fca47ade5bc3da7430ed7767afe51b2f8100e3cd65e151c0eaceb"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa0f513be38b40234c77975e68805506cad5d57b3dfd8fe3baa7f4f4051e15b4"}, - {file = "orjson-3.11.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1863e75b92891f553b7922ce4ee10ed06db061e104f2b7815de80cdcb135ad"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4be86b58e9ea262617b8ca6251a2f0d63cc132a6da4b5fcc8e0a4128782c829"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:b923c1c13fa02084eb38c9c065afd860a5cff58026813319a06949c3af5732ac"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1b6bd351202b2cd987f35a13b5e16471cf4d952b42a73c391cc537974c43ef6d"}, - {file = "orjson-3.11.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb150d529637d541e6af06bbe3d02f5498d628b7f98267ff87647584293ab439"}, - {file = "orjson-3.11.5-cp314-cp314-win32.whl", hash = "sha256:9cc1e55c884921434a84a0c3dd2699eb9f92e7b441d7f53f3941079ec6ce7499"}, - {file = "orjson-3.11.5-cp314-cp314-win_amd64.whl", hash = "sha256:a4f3cb2d874e03bc7767c8f88adaa1a9a05cecea3712649c3b58589ec7317310"}, - {file = "orjson-3.11.5-cp314-cp314-win_arm64.whl", hash = "sha256:38b22f476c351f9a1c43e5b07d8b5a02eb24a6ab8e75f700f7d479d4568346a5"}, - {file = "orjson-3.11.5-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1b280e2d2d284a6713b0cfec7b08918ebe57df23e3f76b27586197afca3cb1e9"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c8d8a112b274fae8c5f0f01954cb0480137072c271f3f4958127b010dfefaec"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f0a2ae6f09ac7bd47d2d5a5305c1d9ed08ac057cda55bb0a49fa506f0d2da00"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0d87bd1896faac0d10b4f849016db81a63e4ec5df38757ffae84d45ab38aa71"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:801a821e8e6099b8c459ac7540b3c32dba6013437c57fdcaec205b169754f38c"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a0f6ac618c98c74b7fbc8c0172ba86f9e01dbf9f62aa0b1776c2231a7bffe5"}, - {file = "orjson-3.11.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea7339bdd22e6f1060c55ac31b6a755d86a5b2ad3657f2669ec243f8e3b2bdb"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4dad582bc93cef8f26513e12771e76385a7e6187fd713157e971c784112aad56"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:0522003e9f7fba91982e83a97fec0708f5a714c96c4209db7104e6b9d132f111"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7403851e430a478440ecc1258bcbacbfbd8175f9ac1e39031a7121dd0de05ff8"}, - {file = "orjson-3.11.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5f691263425d3177977c8d1dd896cde7b98d93cbf390b2544a090675e83a6a0a"}, - {file = "orjson-3.11.5-cp39-cp39-win32.whl", hash = "sha256:61026196a1c4b968e1b1e540563e277843082e9e97d78afa03eb89315af531f1"}, - {file = "orjson-3.11.5-cp39-cp39-win_amd64.whl", hash = "sha256:09b94b947ac08586af635ef922d69dc9bc63321527a3a04647f4986a73f4bd30"}, - {file = "orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5"}, -] - -[[package]] -name = "packaging" -version = "23.1" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, -] - -[[package]] -name = "pathspec" -version = "0.10.2" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "pathspec-0.10.2-py3-none-any.whl", hash = "sha256:88c2606f2c1e818b978540f73ecc908e13999c6c3a383daf3705652ae79807a5"}, - {file = "pathspec-0.10.2.tar.gz", hash = "sha256:8f6bf73e5758fd365ef5d58ce09ac7c27d2833a8d7da51712eac6e27e35141b0"}, -] - -[[package]] -name = "platformdirs" -version = "2.5.4" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "platformdirs-2.5.4-py3-none-any.whl", hash = "sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10"}, - {file = "platformdirs-2.5.4.tar.gz", hash = "sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7"}, -] - -[package.extras] -docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.4)"] -test = ["appdirs (==1.4.4)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] - -[[package]] -name = "pook" -version = "2.1.4" -description = "HTTP traffic mocking and expectations made easy" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pook-2.1.4-py3-none-any.whl", hash = "sha256:3f273ab189874dd775a15c3fa1b1bf89f28b001d2619c5f909e4d3f7df66d36e"}, - {file = "pook-2.1.4.tar.gz", hash = "sha256:2bcbc7d58d1d88b6f2da98c711f5391d5f690292bdd5ff2ccda927576500937a"}, -] - -[package.dependencies] -furl = ">=0.5.6" -jsonschema = ">=2.5.1" -xmltodict = ">=0.11.0" - -[[package]] -name = "pygments" -version = "2.20.0" -description = "Pygments is a syntax highlighting package written in Python." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, - {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, -] - -[package.extras] -windows-terminal = ["colorama (>=0.4.6)"] - -[[package]] -name = "pyrsistent" -version = "0.19.2" -description = "Persistent/Functional/Immutable data structures" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "pyrsistent-0.19.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d6982b5a0237e1b7d876b60265564648a69b14017f3b5f908c5be2de3f9abb7a"}, - {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:187d5730b0507d9285a96fca9716310d572e5464cadd19f22b63a6976254d77a"}, - {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:055ab45d5911d7cae397dc418808d8802fb95262751872c841c170b0dbf51eed"}, - {file = "pyrsistent-0.19.2-cp310-cp310-win32.whl", hash = "sha256:456cb30ca8bff00596519f2c53e42c245c09e1a4543945703acd4312949bfd41"}, - {file = "pyrsistent-0.19.2-cp310-cp310-win_amd64.whl", hash = "sha256:b39725209e06759217d1ac5fcdb510e98670af9e37223985f330b611f62e7425"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aede922a488861de0ad00c7630a6e2d57e8023e4be72d9d7147a9fcd2d30712"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879b4c2f4d41585c42df4d7654ddffff1239dc4065bc88b745f0341828b83e78"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43bec251bbd10e3cb58ced80609c5c1eb238da9ca78b964aea410fb820d00d6"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-win32.whl", hash = "sha256:d690b18ac4b3e3cab73b0b7aa7dbe65978a172ff94970ff98d82f2031f8971c2"}, - {file = "pyrsistent-0.19.2-cp37-cp37m-win_amd64.whl", hash = "sha256:3ba4134a3ff0fc7ad225b6b457d1309f4698108fb6b35532d015dca8f5abed73"}, - {file = "pyrsistent-0.19.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a178209e2df710e3f142cbd05313ba0c5ebed0a55d78d9945ac7a4e09d923308"}, - {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e371b844cec09d8dc424d940e54bba8f67a03ebea20ff7b7b0d56f526c71d584"}, - {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111156137b2e71f3a9936baf27cb322e8024dac3dc54ec7fb9f0bcf3249e68bb"}, - {file = "pyrsistent-0.19.2-cp38-cp38-win32.whl", hash = "sha256:e5d8f84d81e3729c3b506657dddfe46e8ba9c330bf1858ee33108f8bb2adb38a"}, - {file = "pyrsistent-0.19.2-cp38-cp38-win_amd64.whl", hash = "sha256:9cd3e9978d12b5d99cbdc727a3022da0430ad007dacf33d0bf554b96427f33ab"}, - {file = "pyrsistent-0.19.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f1258f4e6c42ad0b20f9cfcc3ada5bd6b83374516cd01c0960e3cb75fdca6770"}, - {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21455e2b16000440e896ab99e8304617151981ed40c29e9507ef1c2e4314ee95"}, - {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfd880614c6237243ff53a0539f1cb26987a6dc8ac6e66e0c5a40617296a045e"}, - {file = "pyrsistent-0.19.2-cp39-cp39-win32.whl", hash = "sha256:71d332b0320642b3261e9fee47ab9e65872c2bd90260e5d225dabeed93cbd42b"}, - {file = "pyrsistent-0.19.2-cp39-cp39-win_amd64.whl", hash = "sha256:dec3eac7549869365fe263831f576c8457f6c833937c68542d08fde73457d291"}, - {file = "pyrsistent-0.19.2-py3-none-any.whl", hash = "sha256:ea6b79a02a28550c98b6ca9c35b9f492beaa54d7c5c9e9949555893c8a9234d0"}, - {file = "pyrsistent-0.19.2.tar.gz", hash = "sha256:bfa0351be89c9fcbcb8c9879b826f4353be10f58f8a677efab0c017bf7137ec2"}, -] - -[[package]] -name = "requests" -version = "2.32.4" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, - {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset_normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["dev"] -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[[package]] -name = "snowballstemmer" -version = "2.2.0" -description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, - {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, -] - -[[package]] -name = "sphinx" -version = "7.4.7" -description = "Python documentation generator" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"}, - {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"}, -] - -[package.dependencies] -alabaster = ">=0.7.14,<0.8.0" -babel = ">=2.13" -colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""} -docutils = ">=0.20,<0.22" -imagesize = ">=1.3" -importlib-metadata = {version = ">=6.0", markers = "python_version < \"3.10\""} -Jinja2 = ">=3.1" -packaging = ">=23.0" -Pygments = ">=2.17" -requests = ">=2.30.0" -snowballstemmer = ">=2.2" -sphinxcontrib-applehelp = "*" -sphinxcontrib-devhelp = "*" -sphinxcontrib-htmlhelp = ">=2.0.0" -sphinxcontrib-jsmath = "*" -sphinxcontrib-qthelp = "*" -sphinxcontrib-serializinghtml = ">=1.1.9" -tomli = {version = ">=2", markers = "python_version < \"3.11\""} - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"] -test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] - -[[package]] -name = "sphinx-autodoc-typehints" -version = "2.3.0" -description = "Type hints (PEP 484) support for the Sphinx autodoc extension" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinx_autodoc_typehints-2.3.0-py3-none-any.whl", hash = "sha256:3098e2c6d0ba99eacd013eb06861acc9b51c6e595be86ab05c08ee5506ac0c67"}, - {file = "sphinx_autodoc_typehints-2.3.0.tar.gz", hash = "sha256:535c78ed2d6a1bad393ba9f3dfa2602cf424e2631ee207263e07874c38fde084"}, -] - -[package.dependencies] -sphinx = ">=7.3.5" - -[package.extras] -docs = ["furo (>=2024.1.29)"] -numpy = ["nptyping (>=2.5)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.4.4)", "defusedxml (>=0.7.1)", "diff-cover (>=9)", "pytest (>=8.1.1)", "pytest-cov (>=5)", "sphobjinv (>=2.3.1)", "typing-extensions (>=4.11)"] - -[[package]] -name = "sphinx-rtd-theme" -version = "3.1.0" -description = "Read the Docs theme for Sphinx" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl", hash = "sha256:1785824ae8e6632060490f67cf3a72d404a85d2d9fc26bce3619944de5682b89"}, - {file = "sphinx_rtd_theme-3.1.0.tar.gz", hash = "sha256:b44276f2c276e909239a4f6c955aa667aaafeb78597923b1c60babc76db78e4c"}, -] - -[package.dependencies] -docutils = ">0.18,<0.23" -sphinx = ">=6,<10" -sphinxcontrib-jquery = ">=4,<5" - -[package.extras] -dev = ["bump2version", "transifex-client", "twine", "wheel"] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.2" -description = "sphinxcontrib-applehelp is a sphinx extension which outputs Apple help books" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-applehelp-1.0.2.tar.gz", hash = "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"}, - {file = "sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.2" -description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"}, - {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.0" -description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" -optional = false -python-versions = ">=3.6" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-htmlhelp-2.0.0.tar.gz", hash = "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"}, - {file = "sphinxcontrib_htmlhelp-2.0.0-py2.py3-none-any.whl", hash = "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["html5lib", "pytest"] - -[[package]] -name = "sphinxcontrib-jquery" -version = "4.1" -description = "Extension to include jQuery on newer Sphinx releases" -optional = false -python-versions = ">=2.7" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, - {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"}, -] - -[package.dependencies] -Sphinx = ">=1.8" - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -description = "A sphinx extension which renders display math in HTML via JavaScript" -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, - {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, -] - -[package.extras] -test = ["flake8", "mypy", "pytest"] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.3" -description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." -optional = false -python-versions = ">=3.5" -groups = ["dev"] -files = [ - {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"}, - {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"}, -] - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "2.0.0" -description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"}, - {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"}, -] - -[package.extras] -lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] -standalone = ["Sphinx (>=5)"] -test = ["pytest"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] - -[[package]] -name = "types-certifi" -version = "2021.10.8.3" -description = "Typing stubs for certifi" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f"}, - {file = "types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a"}, -] - -[[package]] -name = "types-setuptools" -version = "81.0.0.20260209" -description = "Typing stubs for setuptools" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "types_setuptools-81.0.0.20260209-py3-none-any.whl", hash = "sha256:4facf71e3f953f8f5ac0020cd6c1b5e493aaff0183e85830bc34870b6abf8475"}, - {file = "types_setuptools-81.0.0.20260209.tar.gz", hash = "sha256:2c2eb64499b41b672c387f6f45678a28d20a143a81b45a5c77acbfd4da0df3e1"}, -] - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, - {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "urllib3" -version = "2.6.3" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.9" -groups = ["main", "dev"] -files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, -] - -[package.extras] -brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] - -[[package]] -name = "websockets" -version = "15.0.1" -description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, - {file = "websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf"}, - {file = "websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9"}, - {file = "websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c"}, - {file = "websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256"}, - {file = "websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57"}, - {file = "websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792"}, - {file = "websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3"}, - {file = "websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf"}, - {file = "websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85"}, - {file = "websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665"}, - {file = "websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5"}, - {file = "websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4"}, - {file = "websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597"}, - {file = "websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9"}, - {file = "websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675"}, - {file = "websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f"}, - {file = "websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d"}, - {file = "websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4"}, - {file = "websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa"}, - {file = "websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a"}, - {file = "websockets-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb"}, - {file = "websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed"}, - {file = "websockets-15.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880"}, - {file = "websockets-15.0.1-cp39-cp39-win32.whl", hash = "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411"}, - {file = "websockets-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04"}, - {file = "websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f"}, - {file = "websockets-15.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123"}, - {file = "websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f"}, - {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, -] - -[[package]] -name = "xmltodict" -version = "0.13.0" -description = "Makes working with XML feel like you are working with JSON" -optional = false -python-versions = ">=3.4" -groups = ["dev"] -files = [ - {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, - {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, -] - -[[package]] -name = "zipp" -version = "3.23.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -markers = "python_version == \"3.9\"" -files = [ - {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, - {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, -] - -[package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""] -cover = ["pytest-cov"] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] -type = ["pytest-mypy"] - -[metadata] -lock-version = "2.1" -python-versions = "^3.9" -content-hash = "859fb753010770932bb13116107b08bf52ef64a130954852216b405cc219fc21" From 11c180a5295b30dd38f9b70f8ef8a7b7f05147ac Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:54:26 +0800 Subject: [PATCH 15/43] docs: update CLAUDE.md for new project structure --- CLAUDE.md | 55 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 02727fe7..0a8d8793 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,43 +9,63 @@ Official Python client library for the Massive (formerly Polygon.io) REST and We ## Development Commands ```bash -# Install dependencies -poetry install +# Install dependencies (core + all data sources + dev tools) +pip install -e ".[all,dev]" # Run all tests -make test +pytest -# Run only REST or WebSocket tests -make test_rest -make test_websocket +# Run specific test directory +pytest tests/test_rest/ +pytest tests/test_websocket/ +pytest tests/test_us_daily/ # Run a single test file -poetry run python -m unittest test_rest/test_aggs.py +pytest tests/test_rest/test_aggs.py # Run a single test method -poetry run python -m unittest test_rest.test_aggs.TestAggs.test_list_aggs +pytest tests/test_rest/test_aggs.py::TestAggs::test_list_aggs # Code formatting (auto-fix) -make style +black src/ tests/ examples/ # Static type checking -poetry run mypy massive test_* examples +mypy src/ -# Both style + static checks -make lint +# Run US daily data processor +python -m processor.us_daily # Regenerate REST API spec from OpenAPI -make rest-spec +python .massive/rest.py + +# Update WebSocket API spec +curl https://api.massive.com/specs/websocket.json > .massive/websocket.json ``` ## Architecture +### Project Layout + +Standard `src/` layout with three top-level packages: + +- `src/massive/` — REST and WebSocket SDK client library +- `src/provider/` — Multi-source data fetcher layer with automatic failover +- `src/processor/` — Data collection and processing pipelines + ### Client Structure `RESTClient` (in `massive/rest/__init__.py`) uses multiple inheritance to compose domain-specific client mixins (AggsClient, TradesClient, QuotesClient, etc.) on top of `BaseClient` (`massive/rest/base.py`). Each mixin lives in its own file under `massive/rest/` and handles one API domain. `WebSocketClient` (`massive/websocket/__init__.py`) is a standalone async client using the `websockets` library with auto-reconnect support. +### Provider Layer + +`DataFetcherManager` (in `provider/base.py`) orchestrates multiple data source fetchers (efinance, akshare, tushare, pytdx, baostock, yfinance, longbridge) with automatic priority-based failover. Each fetcher extends `BaseFetcher` and implements source-specific data retrieval. + +### Processor + +`processor/us_daily/` fetches US stock daily OHLCV data via the Massive REST API. Run with `python -m processor.us_daily`. + ### Models - REST models: `massive/rest/models/` — one file per domain, using the custom `@modelclass` decorator (from `massive/modelclass.py`) which wraps `@dataclass` with flexible init that accepts positional or keyword args. @@ -53,13 +73,14 @@ make rest-spec ### API Spec Codegen -`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. Use `make rest-spec` / `make ws-spec` to update specs from the API. +`.massive/rest.py` generates REST client code from `.massive/rest.json` (OpenAPI spec). `.massive/websocket.json` is the WebSocket spec. ### Tests -- `test_rest/` — uses `pook` for HTTP mocking, with mock responses in `test_rest/mocks/` -- `test_websocket/` — has its own mock WebSocket server in `mock_server.py` -- Test base classes: `test_rest/base.py` and `test_websocket/base_ws.py` +- `tests/test_rest/` — uses `pook` for HTTP mocking, with mock responses in `tests/test_rest/mocks/` +- `tests/test_websocket/` — has its own mock WebSocket server in `mock_server.py` +- `tests/test_us_daily/` — unit tests for the US daily processor +- Test base classes: `tests/test_rest/base.py` and `tests/test_websocket/base_ws.py` ### Key Conventions From ac0cd962946fbe13bb11b4445083cb9542ec3bd5 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:18:15 +0800 Subject: [PATCH 16/43] feat: add provider/_data/stock_mapping module --- src/provider/_data/__init__.py | 4 + src/provider/_data/stock_mapping.py | 139 ++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/provider/_data/__init__.py create mode 100644 src/provider/_data/stock_mapping.py diff --git a/src/provider/_data/__init__.py b/src/provider/_data/__init__.py new file mode 100644 index 00000000..d699b040 --- /dev/null +++ b/src/provider/_data/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] diff --git a/src/provider/_data/stock_mapping.py b/src/provider/_data/stock_mapping.py new file mode 100644 index 00000000..211811bf --- /dev/null +++ b/src/provider/_data/stock_mapping.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations + +""" +=================================== +股票代码与名称映射 +=================================== + +Shared stock code -> name mapping, used by analyzer, data_provider, and name_to_code_resolver. +""" + +# Stock code -> name mapping (common stocks) +STOCK_NAME_MAP = { + # === A-shares === + "600519": "贵州茅台", + "000001": "平安银行", + "300750": "宁德时代", + "002594": "比亚迪", + "600036": "招商银行", + "601318": "中国平安", + "000858": "五粮液", + "600276": "恒瑞医药", + "601012": "隆基绿能", + "002475": "立讯精密", + "300059": "东方财富", + "002415": "海康威视", + "600900": "长江电力", + "601166": "兴业银行", + "600028": "中国石化", + "600030": "中信证券", + "600031": "三一重工", + "600050": "中国联通", + "600104": "上汽集团", + "600111": "北方稀土", + "600150": "中国船舶", + "600309": "万华化学", + "600406": "国电南瑞", + "600690": "海尔智家", + "600760": "中航沈飞", + "600809": "山西汾酒", + "600887": "伊利股份", + "600930": "华电新能", + "601088": "中国神华", + "601127": "赛力斯", + "601211": "国泰海通", + "601225": "陕西煤业", + "601288": "农业银行", + "601328": "交通银行", + "601398": "工商银行", + "601601": "中国太保", + "601628": "中国人寿", + "601658": "邮储银行", + "601668": "中国建筑", + "601728": "中国电信", + "601816": "京沪高铁", + "601857": "中国石油", + "601888": "中国中免", + "601899": "紫金矿业", + "601919": "中远海控", + "601985": "中国核电", + "601988": "中国银行", + "603019": "中科曙光", + "603259": "药明康德", + "603501": "豪威集团", + "603993": "洛阳钼业", + "688008": "澜起科技", + "688012": "中微公司", + "688041": "海光信息", + "688111": "金山办公", + "688256": "寒武纪", + "688981": "中芯国际", + # === US stocks === + "AAPL": "苹果", + "TSLA": "特斯拉", + "MSFT": "微软", + "GOOGL": "谷歌A", + "GOOG": "谷歌C", + "AMZN": "亚马逊", + "NVDA": "英伟达", + "META": "Meta", + "AMD": "AMD", + "INTC": "英特尔", + "BABA": "阿里巴巴", + "PDD": "拼多多", + "JD": "京东", + "BIDU": "百度", + "NIO": "蔚来", + "XPEV": "小鹏汽车", + "LI": "理想汽车", + "COIN": "Coinbase", + "MSTR": "MicroStrategy", + # === HK stocks (5-digit) === + "00700": "腾讯控股", + "03690": "美团", + "01810": "小米集团", + "09988": "阿里巴巴", + "09618": "京东集团", + "09888": "百度集团", + "01024": "快手", + "00981": "中芯国际", + "02015": "理想汽车", + "09868": "小鹏汽车", + "00005": "汇丰控股", + "01299": "友邦保险", + "00941": "中国移动", + "00883": "中国海洋石油", +} + + +def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """Return whether a stock name is useful for display or caching.""" + if not name: + return False + + normalized_name = str(name).strip() + if not normalized_name: + return False + + normalized_code = (stock_code or "").strip().upper() + if normalized_name.upper() == normalized_code: + return False + + if normalized_name.startswith("股票"): + return False + + placeholder_values = { + "N/A", + "NA", + "NONE", + "NULL", + "--", + "-", + "UNKNOWN", + "TICKER", + } + if normalized_name.upper() in placeholder_values: + return False + + return True From 79fd1c4ebb4d6651eecbb642bb99e58aa2fa2226 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:18:36 +0800 Subject: [PATCH 17/43] feat: add provider/_data/stock_index_loader module --- src/provider/_data/stock_index_loader.py | 136 +++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 src/provider/_data/stock_index_loader.py diff --git a/src/provider/_data/stock_index_loader.py b/src/provider/_data/stock_index_loader.py new file mode 100644 index 00000000..61685f1d --- /dev/null +++ b/src/provider/_data/stock_index_loader.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +from __future__ import annotations + +import json +import logging +from pathlib import Path +from threading import RLock +from typing import Dict, Iterable + +from provider._data.stock_mapping import is_meaningful_stock_name + +logger = logging.getLogger(__name__) + +_STOCK_INDEX_FILENAME = "stocks.index.json" +_STOCK_INDEX_CACHE: Dict[str, str] | None = None +_STOCK_INDEX_CACHE_LOCK = RLock() + + +def get_stock_index_candidate_paths() -> tuple[Path, ...]: + """Return the supported locations for the generated stock index.""" + repo_root = Path(__file__).resolve().parents[2] + return ( + repo_root / "apps" / "dsa-web" / "public" / _STOCK_INDEX_FILENAME, + repo_root / "static" / _STOCK_INDEX_FILENAME, + ) + + +def _add_lookup_key(keys: set[str], value: str) -> None: + candidate = str(value or "").strip() + if not candidate: + return + keys.add(candidate) + keys.add(candidate.upper()) + + +def _build_lookup_keys(canonical_code: str, display_code: str) -> Iterable[str]: + keys: set[str] = set() + _add_lookup_key(keys, canonical_code) + _add_lookup_key(keys, display_code) + + canonical_upper = str(canonical_code or "").strip().upper() + display_upper = str(display_code or "").strip().upper() + + if "." in canonical_upper: + base, suffix = canonical_upper.rsplit(".", 1) + if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit(): + _add_lookup_key(keys, base) + elif suffix == "HK" and base.isdigit() and 1 <= len(base) <= 5: + digits = base.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + for candidate in (canonical_upper, display_upper): + if candidate.startswith("HK"): + digits = candidate[2:] + if digits.isdigit() and 1 <= len(digits) <= 5: + digits = digits.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + return keys + + +def _load_stock_index_file(index_path: Path) -> Dict[str, str]: + with index_path.open("r", encoding="utf-8") as fh: + raw_items = json.load(fh) + + if not isinstance(raw_items, list): + raise ValueError( + f"Unexpected {_STOCK_INDEX_FILENAME} payload type: {type(raw_items).__name__}" + ) + + stock_name_map: Dict[str, str] = {} + for item in raw_items: + if not isinstance(item, list) or len(item) < 3: + continue + + canonical_code, display_code, name_zh = item[0], item[1], item[2] + if not is_meaningful_stock_name(name_zh, str(display_code or canonical_code or "")): + continue + + for key in _build_lookup_keys(str(canonical_code or ""), str(display_code or "")): + stock_name_map[key] = str(name_zh).strip() + + return stock_name_map + + +def get_stock_name_index_map() -> Dict[str, str]: + """Lazily load and cache the generated stock-name index.""" + global _STOCK_INDEX_CACHE + + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + with _STOCK_INDEX_CACHE_LOCK: + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + for candidate_path in get_stock_index_candidate_paths(): + if not candidate_path.is_file(): + continue + + try: + _STOCK_INDEX_CACHE = _load_stock_index_file(candidate_path) + logger.debug( + "[股票名称] 已加载前端股票索引映射: %s (%d 条)", + candidate_path, + len(_STOCK_INDEX_CACHE), + ) + return _STOCK_INDEX_CACHE + except (OSError, TypeError, ValueError) as exc: + logger.debug("[股票名称] 读取股票索引失败 %s: %s", candidate_path, exc) + + _STOCK_INDEX_CACHE = {} + return _STOCK_INDEX_CACHE + + +def get_index_stock_name(stock_code: str) -> str | None: + """Resolve a stock name from the generated frontend stock index.""" + code = str(stock_code or "").strip() + if not code: + return None + + stock_name_map = get_stock_name_index_map() + for key in _build_lookup_keys(code, code): + name = stock_name_map.get(key) + if is_meaningful_stock_name(name, code): + return name + + return None + + +def _clear_stock_index_cache_for_tests() -> None: + global _STOCK_INDEX_CACHE + with _STOCK_INDEX_CACHE_LOCK: + _STOCK_INDEX_CACHE = None From 8ed2cf922347e72066e4155192d7a0e6e4a89ffe Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:18:53 +0800 Subject: [PATCH 18/43] feat: add provider/_config module with slim Config singleton --- src/provider/_config.py | 111 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/provider/_config.py diff --git a/src/provider/_config.py b/src/provider/_config.py new file mode 100644 index 00000000..f50f24e6 --- /dev/null +++ b/src/provider/_config.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +""" +Slim configuration singleton for provider module. + +Reads configuration from environment variables. Only includes attributes +actually used by provider fetchers. +""" + +import os +from dataclasses import dataclass +from threading import Lock +from typing import Optional + + +# --------------------------------------------------------------------------- +# normalize_report_language (extracted from src/report_language.py) +# --------------------------------------------------------------------------- + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") + +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + + +def normalize_report_language(value: Optional[str], default: str = "zh") -> str: + """Normalize report language to a supported short code.""" + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + + +# --------------------------------------------------------------------------- +# Config singleton +# --------------------------------------------------------------------------- + +@dataclass +class Config: + # Tushare + tushare_token: str = "" + # Longbridge + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + # TickFlow + tickflow_api_key: str = "" + # Feature toggles + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + # Realtime source priority + realtime_source_priority: str = "tencent,akshare,efinance" + # Fundamental pipeline + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + + +_instance: Optional[Config] = None +_lock = Lock() + + +def _env_bool(key: str, default: str = "true") -> bool: + return os.environ.get(key, default).lower() != "false" + + +def get_config() -> Config: + """Return the global Config singleton, creating it on first call.""" + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get( + "REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance" + ), + fundamental_fetch_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30") + ), + fundamental_stage_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60") + ), + fundamental_cache_ttl_seconds=int( + os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600") + ), + fundamental_cache_max_entries=int( + os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256") + ), + fundamental_retry_max=int( + os.environ.get("FUNDAMENTAL_RETRY_MAX", "2") + ), + ) + return _instance From 3080e5dc0719f51766f0a0c37bbb9d8ca7b40162 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:20:08 +0800 Subject: [PATCH 19/43] refactor: update base.py imports from src.* to provider._* --- src/provider/base.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/provider/base.py b/src/provider/base.py index 1c6bb06c..4db74208 100644 --- a/src/provider/base.py +++ b/src/provider/base.py @@ -24,8 +24,8 @@ import pandas as pd import numpy as np -from src.data.stock_index_loader import get_index_stock_name -from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +from provider._data.stock_index_loader import get_index_stock_name +from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name from .fundamental_adapter import AkshareFundamentalAdapter # 配置日志 @@ -561,7 +561,7 @@ def _cache_stock_name(self, stock_code: str, name: Optional[str]) -> Optional[st def _get_tickflow_fetcher(self): """Lazily create a TickFlow fetcher for market-review-only calls.""" - from src.config import get_config + from provider._config import get_config config = get_config() api_key = (getattr(config, "tickflow_api_key", None) or "").strip() @@ -1063,7 +1063,7 @@ def prefetch_realtime_quotes(self, stock_codes: List[str]) -> int: # Normalize all codes stock_codes = [normalize_stock_code(c) for c in stock_codes] - from src.config import get_config + from provider._config import get_config config = get_config() @@ -1148,7 +1148,7 @@ def get_realtime_quote(self, stock_code: str, *, log_final_failure: bool = True) from .akshare_fetcher import _is_us_code from .us_index_mapping import is_us_index_code - from src.config import get_config + from provider._config import get_config config = get_config() @@ -1397,7 +1397,7 @@ def get_chip_distribution(self, stock_code: str): stock_code = normalize_stock_code(stock_code) from .realtime_types import get_chip_circuit_breaker - from src.config import get_config + from provider._config import get_config config = get_config() @@ -1748,7 +1748,7 @@ def _run_with_retry( return None, last_error, total_cost_ms def _get_fundamental_config(self): - from src.config import get_config + from provider._config import get_config return get_config() @staticmethod @@ -1970,7 +1970,7 @@ def get_fundamental_context( """ Aggregate fundamental blocks with fail-open semantics. """ - from src.config import get_config + from provider._config import get_config config = get_config() if not config.enable_fundamental_pipeline: @@ -2267,7 +2267,7 @@ def _consume_budget(consumed_ms: int) -> None: def get_capital_flow_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: """资金流向块(fail-open)。""" - from src.config import get_config + from provider._config import get_config config = get_config() stock_code = normalize_stock_code(stock_code) @@ -2331,7 +2331,7 @@ def get_capital_flow_context(self, stock_code: str, budget_seconds: Optional[flo def get_dragon_tiger_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: """龙虎榜块(fail-open)。""" - from src.config import get_config + from provider._config import get_config config = get_config() stock_code = normalize_stock_code(stock_code) @@ -2381,7 +2381,7 @@ def get_dragon_tiger_context(self, stock_code: str, budget_seconds: Optional[flo def get_board_context(self, stock_code: str, budget_seconds: Optional[float] = None) -> Dict[str, Any]: """板块榜单块(fail-open)。""" - from src.config import get_config + from provider._config import get_config config = get_config() stock_code = normalize_stock_code(stock_code) From 35f2b9f311682d4a5e4aee89c56df618ff02da9c Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:20:26 +0800 Subject: [PATCH 20/43] refactor: update fetcher imports from src.* to provider._* --- src/provider/akshare_fetcher.py | 2 +- src/provider/efinance_fetcher.py | 2 +- src/provider/longbridge_fetcher.py | 6 +++--- src/provider/tushare_fetcher.py | 2 +- src/provider/yfinance_fetcher.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/provider/akshare_fetcher.py b/src/provider/akshare_fetcher.py index f7aa984e..265b9d6e 100644 --- a/src/provider/akshare_fetcher.py +++ b/src/provider/akshare_fetcher.py @@ -42,7 +42,7 @@ ) from patch.eastmoney_patch import eastmoney_patch -from src.config import get_config +from provider._config import get_config from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS, is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code from .realtime_types import ( UnifiedRealtimeQuote, ChipDistribution, RealtimeSource, diff --git a/src/provider/efinance_fetcher.py b/src/provider/efinance_fetcher.py index 70048c0b..88018023 100644 --- a/src/provider/efinance_fetcher.py +++ b/src/provider/efinance_fetcher.py @@ -52,7 +52,7 @@ _EF_CALL_TIMEOUT = 30 from patch.eastmoney_patch import eastmoney_patch -from src.config import get_config +from provider._config import get_config from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code from .realtime_types import ( UnifiedRealtimeQuote, RealtimeSource, diff --git a/src/provider/longbridge_fetcher.py b/src/provider/longbridge_fetcher.py index 5da10f16..0fa111c4 100644 --- a/src/provider/longbridge_fetcher.py +++ b/src/provider/longbridge_fetcher.py @@ -162,7 +162,7 @@ def _longbridge_config_kwargs() -> Dict[str, Any]: if "language" in params: try: - from src.report_language import normalize_report_language + from provider._config import normalize_report_language rl = normalize_report_language(os.getenv("REPORT_LANGUAGE"), default="zh") if rl == "zh": @@ -290,7 +290,7 @@ def _is_available(self) -> bool: if self._available is not None: return self._available try: - from src.config import get_config + from provider._config import get_config config = get_config() has_creds = bool( config.longbridge_app_key @@ -323,7 +323,7 @@ def _get_ctx(self): # ── 2. Ensure credentials are available in env ── try: - from src.config import get_config + from provider._config import get_config app_config = get_config() app_key = app_config.longbridge_app_key app_secret = app_config.longbridge_app_secret diff --git a/src/provider/tushare_fetcher.py b/src/provider/tushare_fetcher.py index ba679bf9..4e5fed43 100644 --- a/src/provider/tushare_fetcher.py +++ b/src/provider/tushare_fetcher.py @@ -33,7 +33,7 @@ from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code, _is_hk_market from .realtime_types import UnifiedRealtimeQuote, ChipDistribution -from src.config import get_config +from provider._config import get_config import os from zoneinfo import ZoneInfo diff --git a/src/provider/yfinance_fetcher.py b/src/provider/yfinance_fetcher.py index e9aefde5..4ba41b6b 100644 --- a/src/provider/yfinance_fetcher.py +++ b/src/provider/yfinance_fetcher.py @@ -37,7 +37,7 @@ # 可选导入本地股票映射补丁,若缺失则使用空字典兜底 try: - from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name + from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name except (ImportError, ModuleNotFoundError): STOCK_NAME_MAP = {} From f032ac2eb93ec30ae37972bc7d9b7d76cb230282 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:31:38 +0800 Subject: [PATCH 21/43] feat: add provider/_patch/eastmoney_patch module and update deps - Copy eastmoney_patch.py from reference repo into provider/_patch/ - Update imports in efinance_fetcher.py and akshare_fetcher.py - Add requests, tenacity, fake-useragent to core dependencies --- pyproject.toml | 3 + src/provider/_patch/__init__.py | 0 src/provider/_patch/eastmoney_patch.py | 182 +++++++++++++++++++++++++ src/provider/akshare_fetcher.py | 2 +- src/provider/efinance_fetcher.py | 2 +- 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 src/provider/_patch/__init__.py create mode 100644 src/provider/_patch/eastmoney_patch.py diff --git a/pyproject.toml b/pyproject.toml index 534fc16e..8b0220a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,9 @@ dependencies = [ "websockets>=14.0", "certifi>=2022.5.18,<2027.0.0", "pandas", + "requests", + "tenacity", + "fake-useragent", ] [project.optional-dependencies] diff --git a/src/provider/_patch/__init__.py b/src/provider/_patch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/provider/_patch/eastmoney_patch.py b/src/provider/_patch/eastmoney_patch.py new file mode 100644 index 00000000..b0e08107 --- /dev/null +++ b/src/provider/_patch/eastmoney_patch.py @@ -0,0 +1,182 @@ +import hashlib +import random +import secrets +import threading +import time +import requests +import json +import uuid +import logging +from fake_useragent import UserAgent + +logger = logging.getLogger(__name__) + +original_request = requests.Session.request + +ua = UserAgent() + + +class AuthCache: + def __init__(self): + self.data = None + self.expire_at = 0 + self.lock = threading.Lock() + self.ttl = 20 + + +_cache = AuthCache() + + +class PatchSign: + def __init__(self): + self.patched = False + + def set_patch(self, patched): + self.patched = patched + + def is_patched(self): + return self.patched + + +_patch_sign = PatchSign() + + +def _get_nid(user_agent): + """ + 获取东方财富的 NID 授权令牌 + + Args: + user_agent (str): 用户代理字符串,用于模拟不同的浏览器访问 + + Returns: + str: 返回获取到的 NID 授权令牌,如果获取失败则返回 None + + 功能说明: + 该函数通过向东方财富的授权接口发送请求来获取 NID 令牌, + 用于后续的数据访问授权。函数实现了缓存机制来避免频繁请求。 + """ + now = time.time() + # 检查缓存是否有效,避免重复请求 + if _cache.data and now < _cache.expire_at: + return _cache.data + # 使用线程锁确保并发安全 + with _cache.lock: + try: + def generate_uuid_md5(): + """ + 生成 UUID 并对其进行 MD5 哈希处理 + :return: MD5 哈希值(32位十六进制字符串) + """ + # 生成 UUID + unique_id = str(uuid.uuid4()) + # 对 UUID 进行 MD5 哈希 + md5_hash = hashlib.md5(unique_id.encode('utf-8')).hexdigest() + return md5_hash + + def generate_st_nvi(): + """ + 生成 st_nvi 值的方法 + :return: 返回生成的 st_nvi 值 + """ + HASH_LENGTH = 4 # 截取哈希值的前几位 + + def generate_random_string(length=21): + """ + 生成指定长度的随机字符串 + :param length: 字符串长度,默认为 21 + :return: 随机字符串 + """ + charset = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict" + return ''.join(secrets.choice(charset) for _ in range(length)) + + def sha256(input_str): + """ + 计算 SHA-256 哈希值 + :param input_str: 输入字符串 + :return: 哈希值(十六进制) + """ + return hashlib.sha256(input_str.encode('utf-8')).hexdigest() + + random_str = generate_random_string() + hash_prefix = sha256(random_str)[:HASH_LENGTH] + return random_str + hash_prefix + + url = "https://anonflow2.eastmoney.com/backend/api/webreport" + # 随机选择屏幕分辨率,增加请求的真实性 + screen_resolution = random.choice(['1920X1080', '2560X1440', '3840X2160']) + payload = json.dumps({ + "osPlatform": "Windows", + "sourceType": "WEB", + "osversion": "Windows 10.0", + "language": "zh-CN", + "timezone": "Asia/Shanghai", + "webDeviceInfo": { + "screenResolution": screen_resolution, + "userAgent": user_agent, + "canvasKey": generate_uuid_md5(), + "webglKey": generate_uuid_md5(), + "fontKey": generate_uuid_md5(), + "audioKey": generate_uuid_md5() + } + }) + headers = { + 'Cookie': f'st_nvi={generate_st_nvi()}', + 'Content-Type': 'application/json' + } + # 增加超时,防止无限等待 + response = requests.request("POST", url, headers=headers, data=payload, timeout=30) + response.raise_for_status() # 对 4xx/5xx 响应抛出 HTTPError + + data = response.json() + nid = data['data']['nid'] + + _cache.data = nid + _cache.expire_at = now + _cache.ttl + return nid + except requests.exceptions.RequestException as e: + logger.warning(f"请求东方财富授权接口失败: {e}") + _cache.data = None + # 该接口请求失败时,方案可能已失效,后续大概率会继续失败,因无法成功获取,下次会继续请求,设置较长过期时间,可避免频繁请求 + _cache.expire_at = now + 5 * 60 + return None + except (KeyError, json.JSONDecodeError) as e: + logger.warning(f"解析东方财富授权接口响应失败: {e}") + _cache.data = None + # 该接口请求失败时,方案可能已失效,后续大概率会继续失败,因无法成功获取,下次会继续请求,设置较长过期时间,可避免频繁请求 + _cache.expire_at = now + 5 * 60 + return None + + +def eastmoney_patch(): + if _patch_sign.is_patched(): + return + + def patched_request(self, method, url, **kwargs): + # 排除非目标域名 + is_target = any( + d in (url or "") + for d in [ + "fund.eastmoney.com", + "push2.eastmoney.com", + "push2his.eastmoney.com", + ] + ) + if not is_target: + return original_request(self, method, url, **kwargs) + # 获取一个随机的 User-Agent + user_agent = ua.random + # 处理 Headers:确保不破坏业务代码传入的 headers + headers = kwargs.get("headers", {}) + headers["User-Agent"] = user_agent + nid = _get_nid(user_agent) + if nid: + headers["Cookie"] = f"nid18={nid}" + kwargs["headers"] = headers + # 随机休眠,降低被封风险 + sleep_time = random.uniform(1, 4) + time.sleep(sleep_time) + return original_request(self, method, url, **kwargs) + + # 全局替换 Session 的 request 入口 + requests.Session.request = patched_request + _patch_sign.set_patch(True) diff --git a/src/provider/akshare_fetcher.py b/src/provider/akshare_fetcher.py index 265b9d6e..f86133d0 100644 --- a/src/provider/akshare_fetcher.py +++ b/src/provider/akshare_fetcher.py @@ -41,7 +41,7 @@ before_sleep_log, ) -from patch.eastmoney_patch import eastmoney_patch +from provider._patch.eastmoney_patch import eastmoney_patch from provider._config import get_config from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS, is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code from .realtime_types import ( diff --git a/src/provider/efinance_fetcher.py b/src/provider/efinance_fetcher.py index 88018023..48c4e255 100644 --- a/src/provider/efinance_fetcher.py +++ b/src/provider/efinance_fetcher.py @@ -51,7 +51,7 @@ ) _EF_CALL_TIMEOUT = 30 -from patch.eastmoney_patch import eastmoney_patch +from provider._patch.eastmoney_patch import eastmoney_patch from provider._config import get_config from .base import BaseFetcher, DataFetchError, RateLimitError, STANDARD_COLUMNS,is_bse_code, is_st_stock, is_kc_cy_stock, normalize_stock_code from .realtime_types import ( From f2abc9959c81dd6f049eb3af2fbe3a4dcbaa366a Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:36:38 +0800 Subject: [PATCH 22/43] docs: add US data provider optimization design spec Co-Authored-By: Claude Opus 4.6 (1M context) --- .../2026-04-23-opt-data-provider-design.md | 216 ++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 .claude/plans/2026-04-23-opt-data-provider-design.md diff --git a/.claude/plans/2026-04-23-opt-data-provider-design.md b/.claude/plans/2026-04-23-opt-data-provider-design.md new file mode 100644 index 00000000..7a11b2f3 --- /dev/null +++ b/.claude/plans/2026-04-23-opt-data-provider-design.md @@ -0,0 +1,216 @@ +# US Data Provider 优化设计 + +**Date:** 2026-04-23 +**Scope:** 重构 `processor/us_daily`,升级股票列表获取和天级数据获取逻辑 + +--- + +## 1. 目标 + +将 `processor/us_daily` 升级为两部分: + +1. **股票列表获取** — 按交易所(NASDAQ, NYSE, ARCA)全量获取所有上市股票及 TickerDetails 全部字段,不做市值过滤,固定使用 massive API +2. **天级数据获取** — 支持 akshare > yfinance > massive 三数据源优先级 failover,归一化到统一列存储,每个数据源独立配置请求间隔 + +## 2. 架构 + +### 2.1 目录结构 + +``` +processor/us_daily/ +├── __init__.py +├── __main__.py # 入口,编排两步流程 +├── config.py # 配置(数据源优先级、各源间隔等) +├── storage.py # 文件 I/O +├── ticker_lister.py # 新文件,替代 ticker_filter.py +├── sources/ # 新目录:数据源抽象 + 实现 +│ ├── __init__.py # 导出 SourceManager +│ ├── base.py # BaseSource 接口 +│ ├── manager.py # SourceManager(failover 编排) +│ ├── akshare_source.py +│ ├── yfinance_source.py +│ └── massive_source.py +└── agg_fetcher.py # 改造:调用 SourceManager +``` + +### 2.2 数据流 + +1. `__main__.py` 加载 config → 初始化 `RESTClient` + `SourceManager` +2. Step 1:`ticker_lister.py` 用 `RESTClient` 从 massive API 按交易所获取全量股票 + TickerDetails,存到 `./data/us_list/` +3. Step 2:`agg_fetcher.py` 遍历股票列表,按月调用 `SourceManager.fetch_daily()` 获取天级数据,归一化后存到 `./data/us_daily//.json` + +## 3. 数据源抽象与 Failover + +### 3.1 BaseSource 接口 + +```python +class BaseSource(ABC): + name: str # "akshare" / "yfinance" / "massive" + request_interval: float # 从 config 读取,每次请求后 sleep + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """返回归一化后的 DataFrame,列为 STANDARD_COLUMNS""" + ... +``` + +### 3.2 STANDARD_COLUMNS + +```python +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] +``` + +只保留所有数据源都能提供的 6 列。 + +### 3.3 SourceManager + +```python +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources # 已按优先级排序 + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> Tuple[pd.DataFrame, str]: + """依次尝试各 source,返回 (df, source_name),全部失败则抛异常""" + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if not df.empty: + time.sleep(source.request_interval) + return df, source.name + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + continue + raise FetchError(f"All sources failed for {ticker}") +``` + +### 3.4 三个实现 + +| Source | 库调用 | ticker 转换 | 请求间隔默认值 | +|--------|--------|------------|--------------| +| AkshareSource | `ak.stock_us_daily(symbol=ticker)` | 直接用 ticker | 2s | +| YfinanceSource | `yf.download(ticker, start, end)` | 直接用 ticker | 1s | +| MassiveSource | `client.list_aggs(ticker, ...)` | 直接用 ticker | 12s | + +## 4. 股票列表获取(ticker_lister.py) + +### 4.1 交易所映射 + +```python +EXCHANGES = { + "nasdaq": "XNAS", + "nyse": "XNYS", + "arca": "ARCX", +} +``` + +### 4.2 流程 + +1. 遍历配置的交易所列表(默认全部三个) +2. 对每个交易所调用 `client.list_tickers(market="stocks", exchange=ex, active=True, limit=1000)` 获取所有 ticker +3. 对每个 ticker 调用 `client.get_ticker_details(ticker)` 获取完整详情 +4. 每次请求后 sleep `config.massive_interval`(12s) +5. 按交易所分别存储 +6. 支持断点续传:如果交易所文件已存在,加载其中已有的 tickers 列表作为已完成集合,只对不在集合中的 ticker 调用 `get_ticker_details`,完成后覆盖写入整个文件 + +### 4.3 存储结构 + +``` +data/us_list/ +├── nasdaq.json +├── nyse.json +└── arca.json +``` + +文件格式: + +```json +{ + "updated_at": "2026-04-23", + "exchange": "XNAS", + "count": 3500, + "tickers": [ + { + "ticker": "AAPL", + "name": "Apple Inc", + "market_cap": 3.2e12, + "description": "...", + "sic_code": "3571", + "total_employees": 164000, + "list_date": "1980-12-12", + "share_class_shares_outstanding": 15500000000 + } + ] +} +``` + +## 5. 天级数据获取与存储(agg_fetcher.py) + +### 5.1 流程 + +1. 从 `./data/us_list/` 加载股票列表(合并所有交易所) +2. 对每个 ticker,生成月份列表(`config.start_date` 到当前月) +3. 对每个月份: + - 文件已存在且不是当前月 → 跳过 + - 文件已存在且是当前月 → 重新获取 + - 文件不存在 → 获取 +4. 调用 `source_manager.fetch_daily(ticker, month_start, month_end)` +5. 归一化后存储 + +### 5.2 存储格式 + +```json +{ + "ticker": "AAPL", + "month": "2026-04", + "source": "akshare", + "fetched_at": "2026-04-23T10:30:45", + "data": [ + { + "date": "2026-04-01", + "open": 150.5, + "high": 152.1, + "low": 150.0, + "close": 151.8, + "volume": 45000000 + } + ] +} +``` + +### 5.3 错误处理 + +所有数据源都失败时,记录到 failures 列表,继续处理下一个 ticker,最后汇总输出失败报告。 + +## 6. 配置(config.py) + +```python +@dataclass +class Config: + # --- 股票列表 --- + refresh_tickers: bool = False + exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) + + # --- 天级数据 --- + start_date: str = "2026-01" + data_source_priority: List[str] = field(default_factory=lambda: ["akshare", "yfinance", "massive"]) + + # --- 各数据源请求间隔(秒)--- + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + + # --- 路径 --- + list_dir: str = "data/us_list" + daily_dir: str = "data/us_daily" + + # --- 重试 --- + max_retries: int = 3 +``` + +删除的配置项:`market_cap_min`(不再做市值过滤)。 + +## 7. 旧文件处理 + +- **删除:** `ticker_filter.py` — 被 `ticker_lister.py` 替代 +- **改造:** `__main__.py`、`agg_fetcher.py`、`config.py`、`storage.py` +- **新增:** `ticker_lister.py`、`sources/` 目录及其下 6 个文件 From 2a60b6013969c09b985fe7a27464ae65246f3b2a Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:44:10 +0800 Subject: [PATCH 23/43] docs: add US data provider optimization implementation plan Co-Authored-By: Claude Opus 4.6 (1M context) --- .../2026-04-23-opt-data-provider-plan.md | 1589 +++++++++++++++++ 1 file changed, 1589 insertions(+) create mode 100644 .claude/plans/2026-04-23-opt-data-provider-plan.md diff --git a/.claude/plans/2026-04-23-opt-data-provider-plan.md b/.claude/plans/2026-04-23-opt-data-provider-plan.md new file mode 100644 index 00000000..7dc617bd --- /dev/null +++ b/.claude/plans/2026-04-23-opt-data-provider-plan.md @@ -0,0 +1,1589 @@ +# US Data Provider Optimization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Refactor `processor/us_daily` to support full stock listing by exchange (no market-cap filter) and multi-source daily data fetching with akshare > yfinance > massive failover. + +**Architecture:** New `sources/` sub-package with `BaseSource` abstract class, three implementations (AkshareSource, YfinanceSource, MassiveSource), and a `SourceManager` for priority-based failover. `ticker_lister.py` replaces `ticker_filter.py` for full-exchange listing. Config updated with per-source intervals and source priority. + +**Tech Stack:** Python 3.9+, dataclasses, akshare, yfinance, massive REST client, pandas + +**Design Doc:** `.claude/plans/2026-04-23-opt-data-provider-design.md` + +--- + +### Task 1: Update Config + +**Files:** +- Modify: `src/processor/us_daily/config.py` +- Modify: `tests/test_us_daily/test_config.py` + +- [ ] **Step 1: Write failing tests for new Config fields** + +In `tests/test_us_daily/test_config.py`, replace the `TestConfig` class with: + +```python +class TestConfig(unittest.TestCase): + def test_default_config(self): + from processor.us_daily.config import Config + + config = Config() + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.max_retries, 3) + self.assertEqual(config.exchanges, ["nasdaq", "nyse", "arca"]) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) + self.assertEqual(config.akshare_interval, 2.0) + self.assertEqual(config.yfinance_interval, 1.0) + self.assertEqual(config.massive_interval, 12.0) + self.assertEqual(config.list_dir, "data/us_list") + self.assertEqual(config.daily_dir, "data/us_daily") + + def test_load_config_from_file(self): + from processor.us_daily.config import load_config + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump({ + "refresh_tickers": True, + "akshare_interval": 3.0, + "exchanges": ["nasdaq"], + }, f) + tmp_path = f.name + + try: + config = load_config(tmp_path) + self.assertEqual(config.refresh_tickers, True) + self.assertEqual(config.akshare_interval, 3.0) + self.assertEqual(config.exchanges, ["nasdaq"]) + # defaults preserved for unspecified fields + self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.massive_interval, 12.0) + finally: + os.unlink(tmp_path) + + def test_load_config_missing_file_uses_defaults(self): + from processor.us_daily.config import load_config + + config = load_config("/nonexistent/path/config.json") + self.assertEqual(config.refresh_tickers, False) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_config.py -v` +Expected: FAIL — `Config` does not have `exchanges`, `data_source_priority`, etc. + +- [ ] **Step 3: Update Config dataclass** + +Replace `src/processor/us_daily/config.py` with: + +```python +import json +import os +from dataclasses import dataclass, field +from typing import List + + +@dataclass +class Config: + refresh_tickers: bool = False + exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) + start_date: str = "2026-01" + data_source_priority: List[str] = field( + default_factory=lambda: ["akshare", "yfinance", "massive"] + ) + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + list_dir: str = "data/us_list" + daily_dir: str = "data/us_daily" + max_retries: int = 3 + + +def load_config(config_path: str = None) -> Config: + config = Config() + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), "config.json") + if os.path.exists(config_path): + with open(config_path, "r") as f: + data = json.load(f) + for key, value in data.items(): + if hasattr(config, key): + setattr(config, key, value) + return config +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_config.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/config.py tests/test_us_daily/test_config.py +git commit -m "refactor: update us_daily Config with multi-source fields" +``` + +--- + +### Task 2: Update Storage helpers + +**Files:** +- Modify: `src/processor/us_daily/storage.py` +- Modify: `tests/test_us_daily/test_storage.py` + +- [ ] **Step 1: Write failing tests for new storage helpers** + +Add new test methods to `TestStorage` in `tests/test_us_daily/test_storage.py`: + +```python + def test_get_list_file_path(self): + from processor.us_daily.storage import get_list_file_path + + result = get_list_file_path("data/us_list", "nasdaq") + self.assertEqual(result, "data/us_list/nasdaq.json") + + def test_get_month_file_path_daily_dir(self): + from processor.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") +``` + +- [ ] **Step 2: Run tests to verify new test fails** + +Run: `pytest tests/test_us_daily/test_storage.py::TestStorage::test_get_list_file_path -v` +Expected: FAIL — `get_list_file_path` does not exist. + +- [ ] **Step 3: Add get_list_file_path to storage.py** + +In `src/processor/us_daily/storage.py`, add after the `get_tickers_file_path` function: + +```python +def get_list_file_path(list_dir: str, exchange: str) -> str: + return os.path.join(list_dir, f"{exchange}.json") +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_storage.py -v` +Expected: All tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/storage.py tests/test_us_daily/test_storage.py +git commit -m "feat: add get_list_file_path to storage helpers" +``` + +--- + +### Task 3: Create BaseSource and SourceManager + +**Files:** +- Create: `src/processor/us_daily/sources/__init__.py` +- Create: `src/processor/us_daily/sources/base.py` +- Create: `src/processor/us_daily/sources/manager.py` +- Create: `tests/test_us_daily/test_sources/__init__.py` +- Create: `tests/test_us_daily/test_sources/test_manager.py` + +- [ ] **Step 1: Write failing tests for SourceManager** + +Create `tests/test_us_daily/test_sources/__init__.py` (empty file). + +Create `tests/test_us_daily/test_sources/test_manager.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + + +class TestSourceManager(unittest.TestCase): + def _make_source(self, name, data=None, error=None): + """Create a mock source that returns data or raises error.""" + from processor.us_daily.sources.base import BaseSource + + source = MagicMock(spec=BaseSource) + source.name = name + source.request_interval = 0.0 + if error: + source.fetch_daily.side_effect = error + elif data is not None: + source.fetch_daily.return_value = data + else: + source.fetch_daily.return_value = pd.DataFrame() + return source + + def test_returns_first_successful_source(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source1") + s1.fetch_daily.assert_called_once_with("AAPL", "2020-01-01", "2020-01-31") + s2.fetch_daily.assert_not_called() + + def test_falls_back_on_failure(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", error=Exception("API down")) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_falls_back_on_empty_dataframe(self): + from processor.us_daily.sources.manager import SourceManager + + empty_df = pd.DataFrame() + good_df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=empty_df) + s2 = self._make_source("source2", data=good_df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_raises_when_all_fail(self): + from processor.us_daily.sources.manager import SourceManager, FetchError + + s1 = self._make_source("source1", error=Exception("fail1")) + s2 = self._make_source("source2", error=Exception("fail2")) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + with self.assertRaises(FetchError): + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + def test_sleeps_after_successful_fetch(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s1.request_interval = 5.0 + + manager = SourceManager([s1]) + with patch("processor.us_daily.sources.manager.time.sleep") as mock_sleep: + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + mock_sleep.assert_called_once_with(5.0) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_manager.py -v` +Expected: FAIL — modules do not exist. + +- [ ] **Step 3: Create sources package with BaseSource** + +Create `src/processor/us_daily/sources/__init__.py`: + +```python +from processor.us_daily.sources.manager import SourceManager, FetchError + +__all__ = ["SourceManager", "FetchError"] +``` + +Create `src/processor/us_daily/sources/base.py`: + +```python +from abc import ABC, abstractmethod + +import pandas as pd + +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] + + +class BaseSource(ABC): + name: str + request_interval: float + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch daily OHLCV data for a US stock ticker. + + Returns a DataFrame with columns matching STANDARD_COLUMNS. + Raises on unrecoverable errors. Returns empty DataFrame if no data. + """ + ... +``` + +- [ ] **Step 4: Create SourceManager** + +Create `src/processor/us_daily/sources/manager.py`: + +```python +import logging +import time +from typing import List, Tuple + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource + +logger = logging.getLogger("us_daily") + + +class FetchError(Exception): + """Raised when all data sources fail.""" + pass + + +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources + + def fetch_daily( + self, ticker: str, start_date: str, end_date: str + ) -> Tuple[pd.DataFrame, str]: + """Try each source in priority order. Return (df, source_name). + + Raises FetchError if all sources fail or return empty data. + """ + errors = [] + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if df is not None and not df.empty: + time.sleep(source.request_interval) + return df, source.name + else: + logger.debug( + f"{source.name} returned empty data for {ticker}" + ) + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + errors.append(f"{source.name}: {e}") + continue + raise FetchError( + f"All sources failed for {ticker}: {'; '.join(errors)}" + ) +``` + +- [ ] **Step 5: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_manager.py -v` +Expected: All 5 tests PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/processor/us_daily/sources/ tests/test_us_daily/test_sources/ +git commit -m "feat: add BaseSource interface and SourceManager with failover" +``` + +--- + +### Task 4: Implement AkshareSource + +**Files:** +- Create: `src/processor/us_daily/sources/akshare_source.py` +- Create: `tests/test_us_daily/test_sources/test_akshare_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_akshare_source.py`: + +```python +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestAkshareSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.akshare_source import AkshareSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2020-01-02", "2020-01-03"]), + "open": [74.06, 75.0], + "high": [75.15, 76.0], + "low": [73.80, 74.5], + "close": [74.36, 75.5], + "volume": [108872000, 98000000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_filters_by_date(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2019-12-31", "2020-01-02", "2020-02-01"]), + "open": [70.0, 74.06, 80.0], + "high": [71.0, 75.15, 81.0], + "low": [69.0, 73.80, 79.0], + "close": [70.5, 74.36, 80.5], + "volume": [100000, 108872000, 90000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_with_correct_symbol(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_ak.stock_us_daily.assert_called_once_with(symbol="AAPL", adjust="qfq") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_akshare_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement AkshareSource** + +Create `src/processor/us_daily/sources/akshare_source.py`: + +```python +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class AkshareSource(BaseSource): + name = "akshare" + + def __init__(self, request_interval: float = 2.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + import akshare as ak + + symbol = ticker.strip().upper() + logger.debug(f"[akshare] fetching {symbol} {start_date}~{end_date}") + + df = ak.stock_us_daily(symbol=symbol, adjust="qfq") + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = pd.to_datetime(df["date"]) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df["date"] >= start_dt) & (df["date"] <= end_dt)] + + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = df["date"].dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_akshare_source.py -v` +Expected: All 4 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/akshare_source.py tests/test_us_daily/test_sources/test_akshare_source.py +git commit -m "feat: add AkshareSource for US daily data" +``` + +--- + +### Task 5: Implement YfinanceSource + +**Files:** +- Create: `src/processor/us_daily/sources/yfinance_source.py` +- Create: `tests/test_us_daily/test_sources/test_yfinance_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_yfinance_source.py`: + +```python +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestYfinanceSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame( + { + "Open": [74.06, 75.0], + "High": [75.15, 76.0], + "Low": [73.80, 74.5], + "Close": [74.36, 75.5], + "Volume": [108872000, 98000000], + }, + index=pd.to_datetime(["2020-01-02", "2020-01-03"]), + ) + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = raw_df + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_passes_correct_params(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_yf.Ticker.assert_called_once_with("AAPL") + mock_ticker.history.assert_called_once_with(start="2020-01-01", end="2020-01-31") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_yfinance_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement YfinanceSource** + +Create `src/processor/us_daily/sources/yfinance_source.py`: + +```python +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class YfinanceSource(BaseSource): + name = "yfinance" + + def __init__(self, request_interval: float = 1.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + import yfinance as yf + + symbol = ticker.strip().upper() + logger.debug(f"[yfinance] fetching {symbol} {start_date}~{end_date}") + + t = yf.Ticker(symbol) + df = t.history(start=start_date, end=end_date) + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df = df.reset_index() + df = df.rename(columns={ + "Date": "date", + "Open": "open", + "High": "high", + "Low": "low", + "Close": "close", + "Volume": "volume", + }) + + df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_yfinance_source.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/yfinance_source.py tests/test_us_daily/test_sources/test_yfinance_source.py +git commit -m "feat: add YfinanceSource for US daily data" +``` + +--- + +### Task 6: Implement MassiveSource + +**Files:** +- Create: `src/processor/us_daily/sources/massive_source.py` +- Create: `tests/test_us_daily/test_sources/test_massive_source.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_sources/test_massive_source.py`: + +```python +import unittest +from unittest.mock import MagicMock +import pandas as pd + + +class TestMassiveSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.massive_source import MassiveSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000 + agg1.timestamp = 1577944800000 # 2020-01-02 UTC + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["close"], 74.36) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_client_correctly(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + client.list_aggs.assert_called_once_with( + "AAPL", 1, "day", + from_="2020-01-01", to="2020-01-31", + adjusted=True, sort="asc", + ) + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_sources/test_massive_source.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement MassiveSource** + +Create `src/processor/us_daily/sources/massive_source.py`: + +```python +import logging +from datetime import datetime, timezone + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class MassiveSource(BaseSource): + name = "massive" + + def __init__(self, client, request_interval: float = 12.0): + self.client = client + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + logger.debug(f"[massive] fetching {ticker} {start_date}~{end_date}") + + aggs = list( + self.client.list_aggs( + ticker, 1, "day", + from_=start_date, to=end_date, + adjusted=True, sort="asc", + ) + ) + + if not aggs: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rows = [] + for a in aggs: + dt = datetime.fromtimestamp(a.timestamp / 1000, tz=timezone.utc) + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + }) + + df = pd.DataFrame(rows, columns=STANDARD_COLUMNS) + return df +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_sources/test_massive_source.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/sources/massive_source.py tests/test_us_daily/test_sources/test_massive_source.py +git commit -m "feat: add MassiveSource for US daily data" +``` + +--- + +### Task 7: Create ticker_lister.py + +**Files:** +- Create: `src/processor/us_daily/ticker_lister.py` +- Create: `tests/test_us_daily/test_ticker_lister.py` + +- [ ] **Step 1: Write failing tests** + +Create `tests/test_us_daily/test_ticker_lister.py`: + +```python +import unittest +from unittest.mock import MagicMock, patch, call +import os +import tempfile +import shutil +import json + + +class TestTickerLister(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_ticker(self, ticker_str): + t = MagicMock() + t.ticker = ticker_str + return t + + def _make_details(self, **kwargs): + """Create a mock TickerDetails with all fields as attributes.""" + d = MagicMock() + for k, v in kwargs.items(): + setattr(d, k, v) + # Simulate __dict__ for serialization + d.__dict__ = kwargs + return d + + def test_list_tickers_for_exchange(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + return {"AAPL": details_aapl, "MSFT": details_msft}[ticker] + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + + self.assertEqual(data["exchange"], "XNAS") + self.assertEqual(data["count"], 2) + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertIn("MSFT", tickers) + + def test_resume_skips_existing_tickers(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + # Pre-populate file with AAPL already fetched + file_path = os.path.join(self.test_dir, "nasdaq.json") + existing_data = { + "updated_at": "2026-04-22", + "exchange": "XNAS", + "count": 1, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc", "market_cap": 3e12}, + ], + } + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "w") as f: + json.dump(existing_data, f) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + client.get_ticker_details.return_value = details_msft + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + # Should only call get_ticker_details for MSFT (AAPL already exists) + client.get_ticker_details.assert_called_once_with("MSFT") + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["count"], 2) + + def test_skips_ticker_on_details_error(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL"), + self._make_ticker("AAPL"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return details_aapl + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + with open(file_path) as f: + data = json.load(f) + + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_ticker_lister.py -v` +Expected: FAIL — module does not exist. + +- [ ] **Step 3: Implement ticker_lister.py** + +Create `src/processor/us_daily/ticker_lister.py`: + +```python +import logging +import time +from datetime import date +from typing import Dict, List + +from processor.us_daily.config import Config +from processor.us_daily.storage import get_list_file_path, save_json, load_json, file_exists + +logger = logging.getLogger("us_daily") + +EXCHANGES: Dict[str, str] = { + "nasdaq": "XNAS", + "nyse": "XNYS", + "arca": "ARCX", +} + + +def _details_to_dict(details) -> dict: + """Convert a TickerDetails object to a plain dict, dropping None values.""" + result = {} + for key, value in vars(details).items(): + if key.startswith("_"): + continue + if value is None: + continue + # Handle nested objects with their own __dict__ + if hasattr(value, "__dict__") and not isinstance(value, (str, int, float, bool)): + value = {k: v for k, v in vars(value).items() if not k.startswith("_") and v is not None} + result[key] = value + return result + + +def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> List[dict]: + """Fetch all tickers for an exchange and save to file. + + Supports resume: if the output file already exists, previously fetched + tickers are kept and only missing ones are fetched. + """ + exchange_code = EXCHANGES[exchange_name] + file_path = get_list_file_path(config.list_dir, exchange_name) + + # Load existing tickers for resume + existing_tickers: Dict[str, dict] = {} + if file_exists(file_path): + data = load_json(file_path) + for t in data.get("tickers", []): + existing_tickers[t["ticker"]] = t + logger.info( + f"[{exchange_name}] Resuming: {len(existing_tickers)} tickers already fetched" + ) + + # Get full ticker list from API + logger.info(f"[{exchange_name}] Listing tickers for {exchange_code}") + try: + ticker_objs = list( + client.list_tickers( + market="stocks", exchange=exchange_code, active=True, limit=1000 + ) + ) + except Exception as e: + logger.error(f"[{exchange_name}] Failed to list tickers: {e}") + return list(existing_tickers.values()) + + time.sleep(config.massive_interval) + logger.info(f"[{exchange_name}] Found {len(ticker_objs)} tickers") + + # Fetch details for new tickers only + for i, ticker_obj in enumerate(ticker_objs): + ticker_str = ticker_obj.ticker + if ticker_str in existing_tickers: + continue + + try: + details = client.get_ticker_details(ticker_str) + entry = _details_to_dict(details) + existing_tickers[ticker_str] = entry + logger.info( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" + ) + except Exception as e: + logger.warning( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" + ) + + time.sleep(config.massive_interval) + + # Save result + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "exchange": exchange_code, + "count": len(tickers_list), + "tickers": tickers_list, + }) + + logger.info(f"[{exchange_name}] Saved {len(tickers_list)} tickers to {file_path}") + return tickers_list +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `pytest tests/test_us_daily/test_ticker_lister.py -v` +Expected: All 3 tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/ticker_lister.py tests/test_us_daily/test_ticker_lister.py +git commit -m "feat: add ticker_lister with full exchange listing and resume support" +``` + +--- + +### Task 8: Refactor agg_fetcher.py to use SourceManager + +**Files:** +- Modify: `src/processor/us_daily/agg_fetcher.py` +- Modify: `tests/test_us_daily/test_agg_fetcher.py` + +- [ ] **Step 1: Update tests for new agg_fetcher interface** + +Replace `TestFetchTickerAggs` class in `tests/test_us_daily/test_agg_fetcher.py` (keep `TestGenerateMonths`, `TestMonthBounds`, `TestIsCurrentMonth` unchanged): + +```python +class TestFetchTickerAggs(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_manager(self, df=None, source_name="akshare", error=None): + from processor.us_daily.sources.manager import SourceManager + + manager = MagicMock(spec=SourceManager) + if error: + manager.fetch_daily.side_effect = error + else: + manager.fetch_daily.return_value = (df, source_name) + return manager + + def test_skips_existing_historical_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + + config = Config(start_date="2020-01", daily_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + + manager = self._make_manager() + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_not_called() + self.assertEqual(result["failures"], []) + + def test_fetches_missing_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_date="2020-01", daily_dir=self.test_dir) + + df = pd.DataFrame({ + "date": ["2020-01-02"], + "open": [74.06], + "high": [75.15], + "low": [73.80], + "close": [74.36], + "volume": [108872000], + }) + manager = self._make_manager(df=df, source_name="akshare") + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["ticker"], "AAPL") + self.assertEqual(data["month"], "2020-01") + self.assertEqual(data["source"], "akshare") + self.assertEqual(len(data["data"]), 1) + self.assertEqual(data["data"][0]["close"], 74.36) + self.assertEqual(result["failures"], []) + + def test_refreshes_current_month(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + import pandas as pd + + config = Config(start_date="2026-04", daily_dir=self.test_dir) + + ticker_dir = os.path.join(self.test_dir, "AAPL") + os.makedirs(ticker_dir) + with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: + json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + + df = pd.DataFrame({ + "date": ["2026-04-01"], + "open": [200.0], + "high": [210.0], + "low": [195.0], + "close": [205.0], + "volume": [50000000], + }) + manager = self._make_manager(df=df, source_name="yfinance") + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=True + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + manager.fetch_daily.assert_called_once() + self.assertEqual(result["failures"], []) + + def test_records_failure_when_all_sources_fail(self): + from processor.us_daily.agg_fetcher import fetch_ticker_aggs + from processor.us_daily.config import Config + from processor.us_daily.sources.manager import FetchError + + config = Config(start_date="2020-01", daily_dir=self.test_dir, max_retries=2) + + manager = self._make_manager( + error=FetchError("All sources failed for AAPL") + ) + + with patch( + "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + ): + with patch( + "processor.us_daily.agg_fetcher.is_current_month", return_value=False + ): + result = fetch_ticker_aggs(manager, "AAPL", config) + + self.assertEqual(len(result["failures"]), 1) + self.assertEqual(result["failures"][0]["ticker"], "AAPL") + self.assertEqual(result["failures"][0]["month"], "2020-01") +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest tests/test_us_daily/test_agg_fetcher.py::TestFetchTickerAggs -v` +Expected: FAIL — `fetch_ticker_aggs` still expects `client` as first arg, not `manager`. + +- [ ] **Step 3: Rewrite agg_fetcher.py to use SourceManager** + +Replace `src/processor/us_daily/agg_fetcher.py` with: + +```python +import calendar +import logging +from datetime import date, datetime +from typing import List, Tuple + +from processor.us_daily.config import Config +from processor.us_daily.sources.manager import FetchError +from processor.us_daily.storage import ( + get_month_file_path, + file_exists, + save_json, +) + +logger = logging.getLogger("us_daily") + + +def generate_months(start: str, end: str) -> List[str]: + start_year, start_month = int(start[:4]), int(start[5:7]) + end_year, end_month = int(end[:4]), int(end[5:7]) + + months = [] + year, month = start_year, start_month + while (year, month) <= (end_year, end_month): + months.append(f"{year:04d}-{month:02d}") + month += 1 + if month > 12: + month = 1 + year += 1 + return months + + +def get_month_bounds(month: str) -> Tuple[str, str]: + year, mon = int(month[:4]), int(month[5:7]) + last_day = calendar.monthrange(year, mon)[1] + return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" + + +def is_current_month(month: str) -> bool: + today = date.today() + return month == f"{today.year:04d}-{today.month:02d}" + + +def current_month() -> str: + today = date.today() + return f"{today.year:04d}-{today.month:02d}" + + +def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: + """Fetch monthly OHLCV data for a ticker using SourceManager. + + Args: + source_manager: SourceManager instance with failover sources. + ticker: Stock ticker symbol (e.g. "AAPL"). + config: Config with daily_dir, start_date, max_retries. + + Returns: + Dict with "failures" list of failed months. + """ + months = generate_months(config.start_date, current_month()) + failures = [] + + for month in months: + file_path = get_month_file_path(config.daily_dir, ticker, month) + + if file_exists(file_path) and not is_current_month(month): + logger.debug(f" {ticker} {month}: exists, skipping") + continue + + start_date, end_date = get_month_bounds(month) + + try: + df, source_name = source_manager.fetch_daily(ticker, start_date, end_date) + except FetchError as e: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(e), + }) + logger.error(f" {ticker} {month}: {e}") + continue + + data = { + "ticker": ticker, + "month": month, + "source": source_name, + "fetched_at": datetime.now().isoformat(timespec="seconds"), + "data": df.to_dict(orient="records"), + } + save_json(file_path, data) + logger.info(f" {ticker} {month}: fetched {len(df)} bars from {source_name}") + + return {"failures": failures} +``` + +- [ ] **Step 4: Run all agg_fetcher tests** + +Run: `pytest tests/test_us_daily/test_agg_fetcher.py -v` +Expected: All tests PASS (including `TestGenerateMonths`, `TestMonthBounds`, `TestIsCurrentMonth`, and the updated `TestFetchTickerAggs`). + +- [ ] **Step 5: Commit** + +```bash +git add src/processor/us_daily/agg_fetcher.py tests/test_us_daily/test_agg_fetcher.py +git commit -m "refactor: update agg_fetcher to use SourceManager with failover" +``` + +--- + +### Task 9: Update __main__.py and delete ticker_filter.py + +**Files:** +- Modify: `src/processor/us_daily/__main__.py` +- Delete: `src/processor/us_daily/ticker_filter.py` +- Delete: `tests/test_us_daily/test_ticker_filter.py` + +- [ ] **Step 1: Rewrite __main__.py** + +Replace `src/processor/us_daily/__main__.py` with: + +```python +import logging +import os +import sys + +from massive import RESTClient + +from processor.us_daily.config import load_config +from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.sources.manager import SourceManager +from processor.us_daily.storage import get_list_file_path, load_json, file_exists + + +SOURCE_CLASSES = { + "akshare": AkshareSource, + "yfinance": YfinanceSource, + "massive": MassiveSource, +} + + +def setup_logging(): + os.makedirs("logs", exist_ok=True) + logger = logging.getLogger("us_daily") + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + file_handler = logging.FileHandler("logs/us_daily.log", encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(stream_handler) + + return logger + + +def build_source_manager(config, client) -> SourceManager: + """Build SourceManager from config priority list.""" + interval_map = { + "akshare": config.akshare_interval, + "yfinance": config.yfinance_interval, + "massive": config.massive_interval, + } + sources = [] + for name in config.data_source_priority: + cls = SOURCE_CLASSES.get(name) + if cls is None: + continue + if name == "massive": + sources.append(cls(client=client, request_interval=interval_map[name])) + else: + sources.append(cls(request_interval=interval_map[name])) + return SourceManager(sources) + + +def load_all_tickers(config) -> list: + """Load tickers from all exchange files in list_dir.""" + all_tickers = [] + seen = set() + for exchange_name in config.exchanges: + file_path = get_list_file_path(config.list_dir, exchange_name) + if not file_exists(file_path): + continue + data = load_json(file_path) + for t in data.get("tickers", []): + ticker = t["ticker"] + if ticker not in seen: + seen.add(ticker) + all_tickers.append(t) + return all_tickers + + +def main(): + logger = setup_logging() + config = load_config() + + logger.info("=== US Daily Data Fetcher Started ===") + logger.info(f"Config: {config}") + + client = RESTClient() + + # Step 1: Fetch ticker lists per exchange + if config.refresh_tickers or any( + not file_exists(get_list_file_path(config.list_dir, ex)) + for ex in config.exchanges + ): + for exchange_name in config.exchanges: + if exchange_name not in EXCHANGES: + logger.warning(f"Unknown exchange: {exchange_name}, skipping") + continue + logger.info(f"Fetching ticker list for {exchange_name}...") + list_tickers_for_exchange(client, exchange_name, config) + + # Load all tickers + tickers = load_all_tickers(config) + logger.info(f"Total tickers loaded: {len(tickers)}") + + # Step 2: Fetch daily data + source_manager = build_source_manager(config, client) + + all_failures = [] + total = len(tickers) + for i, ticker_info in enumerate(tickers): + ticker = ticker_info["ticker"] + logger.info(f"[{i + 1}/{total}] Processing {ticker}") + result = fetch_ticker_aggs(source_manager, ticker, config) + if result["failures"]: + all_failures.extend(result["failures"]) + + # Step 3: Summary + logger.info("=== Summary ===") + logger.info(f"Total tickers: {total}") + if all_failures: + logger.warning(f"Failed months: {len(all_failures)}") + for f in all_failures: + logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + else: + logger.info("All data fetched successfully") + logger.info("=== Done ===") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Delete old ticker_filter.py and its tests** + +```bash +git rm src/processor/us_daily/ticker_filter.py +git rm tests/test_us_daily/test_ticker_filter.py +``` + +- [ ] **Step 3: Run full test suite to verify nothing is broken** + +Run: `pytest tests/test_us_daily/ -v` +Expected: All tests PASS. No imports reference `ticker_filter`. + +- [ ] **Step 4: Commit** + +```bash +git add src/processor/us_daily/__main__.py +git commit -m "refactor: update __main__.py with SourceManager and remove ticker_filter" +``` + +--- + +### Task 10: Run full test suite and verify + +- [ ] **Step 1: Run all us_daily tests** + +```bash +pytest tests/test_us_daily/ -v +``` + +Expected: All tests PASS. + +- [ ] **Step 2: Run import smoke test** + +```bash +python -c " +from processor.us_daily.config import Config, load_config +from processor.us_daily.sources import SourceManager, FetchError +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES +from processor.us_daily.agg_fetcher import fetch_ticker_aggs +print('All imports OK') +print(f'STANDARD_COLUMNS: {STANDARD_COLUMNS}') +print(f'EXCHANGES: {EXCHANGES}') +print(f'Default config: {Config()}') +" +``` + +Expected: `All imports OK` with correct values printed. + +- [ ] **Step 3: Verify no remaining references to deleted code** + +```bash +grep -r "ticker_filter\|market_cap_min\|top_tickers\|data_dir\|request_interval" src/processor/us_daily/ --include="*.py" +``` + +Expected: No references to `ticker_filter`, `market_cap_min`, `top_tickers`, or the old `data_dir`/`request_interval` fields. + +- [ ] **Step 4: Commit if any fixups needed** + +If any issues found in steps 1-3, fix them and commit: + +```bash +git add -A src/processor/us_daily/ tests/test_us_daily/ +git commit -m "fix: resolve remaining issues from us_daily refactor" +``` From 8e5349493a0a27f98a475389e391a1aefe6fdf0a Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:47:12 +0800 Subject: [PATCH 24/43] feat: add get_list_file_path to storage helpers Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/storage.py | 4 ++++ tests/test_us_daily/test_storage.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/processor/us_daily/storage.py b/src/processor/us_daily/storage.py index af80505b..32c6a6ab 100644 --- a/src/processor/us_daily/storage.py +++ b/src/processor/us_daily/storage.py @@ -6,6 +6,10 @@ def get_tickers_file_path(data_dir: str) -> str: return os.path.join(data_dir, "top_tickers.json") +def get_list_file_path(list_dir: str, exchange: str) -> str: + return os.path.join(list_dir, f"{exchange}.json") + + def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: return os.path.join(data_dir, ticker, f"{month}.json") diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py index 1d595598..06a8cd22 100644 --- a/tests/test_us_daily/test_storage.py +++ b/tests/test_us_daily/test_storage.py @@ -50,6 +50,18 @@ def test_file_exists(self): self.assertTrue(file_exists(existing)) self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) + def test_get_list_file_path(self): + from processor.us_daily.storage import get_list_file_path + + result = get_list_file_path("data/us_list", "nasdaq") + self.assertEqual(result, "data/us_list/nasdaq.json") + + def test_get_month_file_path_daily_dir(self): + from processor.us_daily.storage import get_month_file_path + + result = get_month_file_path("data/us_daily", "AAPL", "2020-01") + self.assertEqual(result, "data/us_daily/AAPL/2020-01.json") + if __name__ == "__main__": unittest.main() From cda5dba597180bd5692809cbb09ac70777f39240 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:47:14 +0800 Subject: [PATCH 25/43] refactor: update us_daily Config with multi-source fields Replace market_cap_min/request_interval/data_dir with multi-source support: exchanges list, per-source intervals (akshare/yfinance/massive), separate list_dir/daily_dir, and data_source_priority for priority-based failover. Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/config.py | 15 +++++++++++---- tests/test_us_daily/test_config.py | 23 ++++++++++++++++------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py index 6316a455..97fa63a3 100644 --- a/src/processor/us_daily/config.py +++ b/src/processor/us_daily/config.py @@ -1,15 +1,22 @@ import json import os -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import List @dataclass class Config: refresh_tickers: bool = False - market_cap_min: float = 5e9 + exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) start_date: str = "2026-01" - request_interval: int = 12 - data_dir: str = "data/us_daily" + data_source_priority: List[str] = field( + default_factory=lambda: ["akshare", "yfinance", "massive"] + ) + akshare_interval: float = 2.0 + yfinance_interval: float = 1.0 + massive_interval: float = 12.0 + list_dir: str = "data/us_list" + daily_dir: str = "data/us_daily" max_retries: int = 3 diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 369a3de2..3c31812a 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -10,26 +10,35 @@ def test_default_config(self): config = Config() self.assertEqual(config.refresh_tickers, False) - self.assertEqual(config.market_cap_min, 5e9) self.assertEqual(config.start_date, "2026-01") - self.assertEqual(config.request_interval, 12) - self.assertEqual(config.data_dir, "data/us_daily") self.assertEqual(config.max_retries, 3) + self.assertEqual(config.exchanges, ["nasdaq", "nyse", "arca"]) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) + self.assertEqual(config.akshare_interval, 2.0) + self.assertEqual(config.yfinance_interval, 1.0) + self.assertEqual(config.massive_interval, 12.0) + self.assertEqual(config.list_dir, "data/us_list") + self.assertEqual(config.daily_dir, "data/us_daily") def test_load_config_from_file(self): from processor.us_daily.config import load_config with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: - json.dump({"refresh_tickers": True, "market_cap_min": 1e10}, f) + json.dump({ + "refresh_tickers": True, + "akshare_interval": 3.0, + "exchanges": ["nasdaq"], + }, f) tmp_path = f.name try: config = load_config(tmp_path) self.assertEqual(config.refresh_tickers, True) - self.assertEqual(config.market_cap_min, 1e10) + self.assertEqual(config.akshare_interval, 3.0) + self.assertEqual(config.exchanges, ["nasdaq"]) # defaults preserved for unspecified fields self.assertEqual(config.start_date, "2026-01") - self.assertEqual(config.request_interval, 12) + self.assertEqual(config.massive_interval, 12.0) finally: os.unlink(tmp_path) @@ -38,7 +47,7 @@ def test_load_config_missing_file_uses_defaults(self): config = load_config("/nonexistent/path/config.json") self.assertEqual(config.refresh_tickers, False) - self.assertEqual(config.market_cap_min, 5e9) + self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) if __name__ == "__main__": From 92948ce424fe2f8828c718a0bb551a4d27ca67b4 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:49:03 +0800 Subject: [PATCH 26/43] feat: add BaseSource interface and SourceManager with failover Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/sources/__init__.py | 3 + src/processor/us_daily/sources/base.py | 19 ++++ src/processor/us_daily/sources/manager.py | 45 ++++++++++ tests/test_us_daily/test_sources/__init__.py | 0 .../test_sources/test_manager.py | 90 +++++++++++++++++++ 5 files changed, 157 insertions(+) create mode 100644 src/processor/us_daily/sources/__init__.py create mode 100644 src/processor/us_daily/sources/base.py create mode 100644 src/processor/us_daily/sources/manager.py create mode 100644 tests/test_us_daily/test_sources/__init__.py create mode 100644 tests/test_us_daily/test_sources/test_manager.py diff --git a/src/processor/us_daily/sources/__init__.py b/src/processor/us_daily/sources/__init__.py new file mode 100644 index 00000000..6dd559d0 --- /dev/null +++ b/src/processor/us_daily/sources/__init__.py @@ -0,0 +1,3 @@ +from processor.us_daily.sources.manager import SourceManager, FetchError + +__all__ = ["SourceManager", "FetchError"] diff --git a/src/processor/us_daily/sources/base.py b/src/processor/us_daily/sources/base.py new file mode 100644 index 00000000..19560d6f --- /dev/null +++ b/src/processor/us_daily/sources/base.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod + +import pandas as pd + +STANDARD_COLUMNS = ["date", "open", "high", "low", "close", "volume"] + + +class BaseSource(ABC): + name: str + request_interval: float + + @abstractmethod + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch daily OHLCV data for a US stock ticker. + + Returns a DataFrame with columns matching STANDARD_COLUMNS. + Raises on unrecoverable errors. Returns empty DataFrame if no data. + """ + ... diff --git a/src/processor/us_daily/sources/manager.py b/src/processor/us_daily/sources/manager.py new file mode 100644 index 00000000..7ce7fe57 --- /dev/null +++ b/src/processor/us_daily/sources/manager.py @@ -0,0 +1,45 @@ +import logging +import time +from typing import List, Tuple + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource + +logger = logging.getLogger("us_daily") + + +class FetchError(Exception): + """Raised when all data sources fail.""" + pass + + +class SourceManager: + def __init__(self, sources: List[BaseSource]): + self.sources = sources + + def fetch_daily( + self, ticker: str, start_date: str, end_date: str + ) -> Tuple[pd.DataFrame, str]: + """Try each source in priority order. Return (df, source_name). + + Raises FetchError if all sources fail or return empty data. + """ + errors = [] + for source in self.sources: + try: + df = source.fetch_daily(ticker, start_date, end_date) + if df is not None and not df.empty: + time.sleep(source.request_interval) + return df, source.name + else: + logger.debug( + f"{source.name} returned empty data for {ticker}" + ) + except Exception as e: + logger.warning(f"{source.name} failed for {ticker}: {e}") + errors.append(f"{source.name}: {e}") + continue + raise FetchError( + f"All sources failed for {ticker}: {'; '.join(errors)}" + ) diff --git a/tests/test_us_daily/test_sources/__init__.py b/tests/test_us_daily/test_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_us_daily/test_sources/test_manager.py b/tests/test_us_daily/test_sources/test_manager.py new file mode 100644 index 00000000..8ac8221a --- /dev/null +++ b/tests/test_us_daily/test_sources/test_manager.py @@ -0,0 +1,90 @@ +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd + + +class TestSourceManager(unittest.TestCase): + def _make_source(self, name, data=None, error=None): + """Create a mock source that returns data or raises error.""" + from processor.us_daily.sources.base import BaseSource + + source = MagicMock(spec=BaseSource) + source.name = name + source.request_interval = 0.0 + if error: + source.fetch_daily.side_effect = error + elif data is not None: + source.fetch_daily.return_value = data + else: + source.fetch_daily.return_value = pd.DataFrame() + return source + + def test_returns_first_successful_source(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source1") + s1.fetch_daily.assert_called_once_with("AAPL", "2020-01-01", "2020-01-31") + s2.fetch_daily.assert_not_called() + + def test_falls_back_on_failure(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", error=Exception("API down")) + s2 = self._make_source("source2", data=df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_falls_back_on_empty_dataframe(self): + from processor.us_daily.sources.manager import SourceManager + + empty_df = pd.DataFrame() + good_df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=empty_df) + s2 = self._make_source("source2", data=good_df) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + result_df, source_name = manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(source_name, "source2") + + def test_raises_when_all_fail(self): + from processor.us_daily.sources.manager import SourceManager, FetchError + + s1 = self._make_source("source1", error=Exception("fail1")) + s2 = self._make_source("source2", error=Exception("fail2")) + + manager = SourceManager([s1, s2]) + with patch("processor.us_daily.sources.manager.time.sleep"): + with self.assertRaises(FetchError): + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + def test_sleeps_after_successful_fetch(self): + from processor.us_daily.sources.manager import SourceManager + + df = pd.DataFrame({"date": ["2020-01-02"], "close": [100.0]}) + s1 = self._make_source("source1", data=df) + s1.request_interval = 5.0 + + manager = SourceManager([s1]) + with patch("processor.us_daily.sources.manager.time.sleep") as mock_sleep: + manager.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + mock_sleep.assert_called_once_with(5.0) + + +if __name__ == "__main__": + unittest.main() From 0f6ffa0b21e63abe153ff9eb8fe6e1df5c4db75d Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:50:39 +0800 Subject: [PATCH 27/43] feat: add AkshareSource for US daily data Co-Authored-By: Claude Sonnet 4.6 --- .../us_daily/sources/akshare_source.py | 43 +++++++++++ .../test_sources/test_akshare_source.py | 71 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 src/processor/us_daily/sources/akshare_source.py create mode 100644 tests/test_us_daily/test_sources/test_akshare_source.py diff --git a/src/processor/us_daily/sources/akshare_source.py b/src/processor/us_daily/sources/akshare_source.py new file mode 100644 index 00000000..86bce45e --- /dev/null +++ b/src/processor/us_daily/sources/akshare_source.py @@ -0,0 +1,43 @@ +import logging + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + +try: + import akshare as ak +except ImportError: # pragma: no cover + ak = None # type: ignore[assignment] + + +class AkshareSource(BaseSource): + name = "akshare" + + def __init__(self, request_interval: float = 2.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + if ak is None: + raise ImportError("akshare is not installed") + + symbol = ticker.strip().upper() + logger.debug(f"[akshare] fetching {symbol} {start_date}~{end_date}") + + df = ak.stock_us_daily(symbol=symbol, adjust="qfq") + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = pd.to_datetime(df["date"]) + start_dt = pd.to_datetime(start_date) + end_dt = pd.to_datetime(end_date) + df = df[(df["date"] >= start_dt) & (df["date"] <= end_dt)] + + if df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df["date"] = df["date"].dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df diff --git a/tests/test_us_daily/test_sources/test_akshare_source.py b/tests/test_us_daily/test_sources/test_akshare_source.py new file mode 100644 index 00000000..73738e2b --- /dev/null +++ b/tests/test_us_daily/test_sources/test_akshare_source.py @@ -0,0 +1,71 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestAkshareSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.akshare_source import AkshareSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2020-01-02", "2020-01-03"]), + "open": [74.06, 75.0], + "high": [75.15, 76.0], + "low": [73.80, 74.5], + "close": [74.36, 75.5], + "volume": [108872000, 98000000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_filters_by_date(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + raw_df = pd.DataFrame({ + "date": pd.to_datetime(["2019-12-31", "2020-01-02", "2020-02-01"]), + "open": [70.0, 74.06, 80.0], + "high": [71.0, 75.15, 81.0], + "low": [69.0, 73.80, 79.0], + "close": [70.5, 74.36, 80.5], + "volume": [100000, 108872000, 90000], + }) + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = raw_df + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_with_correct_symbol(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_ak.stock_us_daily.assert_called_once_with(symbol="AAPL", adjust="qfq") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.akshare_source import AkshareSource + + with patch("processor.us_daily.sources.akshare_source.ak") as mock_ak: + mock_ak.stock_us_daily.return_value = pd.DataFrame() + source = AkshareSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() From 0187e4fd048b6e1bc3b9208bfcba70c83afc4a51 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:50:49 +0800 Subject: [PATCH 28/43] feat: add MassiveSource for US daily data Co-Authored-By: Claude Sonnet 4.6 --- .../us_daily/sources/massive_source.py | 45 ++++++++++++++ .../test_sources/test_massive_source.py | 58 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 src/processor/us_daily/sources/massive_source.py create mode 100644 tests/test_us_daily/test_sources/test_massive_source.py diff --git a/src/processor/us_daily/sources/massive_source.py b/src/processor/us_daily/sources/massive_source.py new file mode 100644 index 00000000..1c45299c --- /dev/null +++ b/src/processor/us_daily/sources/massive_source.py @@ -0,0 +1,45 @@ +import logging +from datetime import datetime, timezone + +import pandas as pd + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class MassiveSource(BaseSource): + name = "massive" + + def __init__(self, client, request_interval: float = 12.0): + self.client = client + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + logger.debug(f"[massive] fetching {ticker} {start_date}~{end_date}") + + aggs = list( + self.client.list_aggs( + ticker, 1, "day", + from_=start_date, to=end_date, + adjusted=True, sort="asc", + ) + ) + + if not aggs: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + rows = [] + for a in aggs: + dt = datetime.fromtimestamp(a.timestamp / 1000, tz=timezone.utc) + rows.append({ + "date": dt.strftime("%Y-%m-%d"), + "open": a.open, + "high": a.high, + "low": a.low, + "close": a.close, + "volume": a.volume, + }) + + df = pd.DataFrame(rows, columns=STANDARD_COLUMNS) + return df diff --git a/tests/test_us_daily/test_sources/test_massive_source.py b/tests/test_us_daily/test_sources/test_massive_source.py new file mode 100644 index 00000000..749ffe6f --- /dev/null +++ b/tests/test_us_daily/test_sources/test_massive_source.py @@ -0,0 +1,58 @@ +import unittest +from unittest.mock import MagicMock +import pandas as pd + + +class TestMassiveSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.massive_source import MassiveSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + agg1 = MagicMock() + agg1.open = 74.06 + agg1.high = 75.15 + agg1.low = 73.80 + agg1.close = 74.36 + agg1.volume = 108872000 + agg1.timestamp = 1577944800000 # 2020-01-02 UTC + + client = MagicMock() + client.list_aggs.return_value = iter([agg1]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["close"], 74.36) + self.assertEqual(result.iloc[0]["date"], "2020-01-02") + + def test_fetch_daily_calls_client_correctly(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + client.list_aggs.assert_called_once_with( + "AAPL", 1, "day", + from_="2020-01-01", to="2020-01-31", + adjusted=True, sort="asc", + ) + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.massive_source import MassiveSource + + client = MagicMock() + client.list_aggs.return_value = iter([]) + + source = MassiveSource(client=client, request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() From cb5781fd64147aba02334716733ed37f310575ef Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:51:01 +0800 Subject: [PATCH 29/43] feat: add YfinanceSource for US daily data Co-Authored-By: Claude Sonnet 4.6 --- .../us_daily/sources/yfinance_source.py | 44 ++++++++++++++ .../test_sources/test_yfinance_source.py | 60 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 src/processor/us_daily/sources/yfinance_source.py create mode 100644 tests/test_us_daily/test_sources/test_yfinance_source.py diff --git a/src/processor/us_daily/sources/yfinance_source.py b/src/processor/us_daily/sources/yfinance_source.py new file mode 100644 index 00000000..d5061705 --- /dev/null +++ b/src/processor/us_daily/sources/yfinance_source.py @@ -0,0 +1,44 @@ +import logging + +import pandas as pd + +try: + import yfinance as yf +except ImportError: # pragma: no cover + yf = None # type: ignore[assignment] + +from processor.us_daily.sources.base import BaseSource, STANDARD_COLUMNS + +logger = logging.getLogger("us_daily") + + +class YfinanceSource(BaseSource): + name = "yfinance" + + def __init__(self, request_interval: float = 1.0): + self.request_interval = request_interval + + def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + symbol = ticker.strip().upper() + logger.debug(f"[yfinance] fetching {symbol} {start_date}~{end_date}") + + t = yf.Ticker(symbol) + df = t.history(start=start_date, end=end_date) + + if df is None or df.empty: + return pd.DataFrame(columns=STANDARD_COLUMNS) + + df.index.name = "Date" + df = df.reset_index() + df = df.rename(columns={ + "Date": "date", + "Open": "open", + "High": "high", + "Low": "low", + "Close": "close", + "Volume": "volume", + }) + + df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d") + df = df[STANDARD_COLUMNS].reset_index(drop=True) + return df diff --git a/tests/test_us_daily/test_sources/test_yfinance_source.py b/tests/test_us_daily/test_sources/test_yfinance_source.py new file mode 100644 index 00000000..a264e9aa --- /dev/null +++ b/tests/test_us_daily/test_sources/test_yfinance_source.py @@ -0,0 +1,60 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd + + +class TestYfinanceSource(unittest.TestCase): + def test_fetch_daily_returns_standard_columns(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + from processor.us_daily.sources.base import STANDARD_COLUMNS + + raw_df = pd.DataFrame( + { + "Open": [74.06, 75.0], + "High": [75.15, 76.0], + "Low": [73.80, 74.5], + "Close": [74.36, 75.5], + "Volume": [108872000, 98000000], + }, + index=pd.to_datetime(["2020-01-02", "2020-01-03"]), + ) + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = raw_df + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + self.assertEqual(len(result), 2) + self.assertEqual(result.iloc[0]["close"], 74.36) + + def test_fetch_daily_passes_correct_params(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + source.fetch_daily("aapl", "2020-01-01", "2020-01-31") + + mock_yf.Ticker.assert_called_once_with("AAPL") + mock_ticker.history.assert_called_once_with(start="2020-01-01", end="2020-01-31") + + def test_fetch_daily_returns_empty_on_no_data(self): + from processor.us_daily.sources.yfinance_source import YfinanceSource + + with patch("processor.us_daily.sources.yfinance_source.yf") as mock_yf: + mock_ticker = MagicMock() + mock_ticker.history.return_value = pd.DataFrame() + mock_yf.Ticker.return_value = mock_ticker + source = YfinanceSource(request_interval=0.0) + result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") + + self.assertTrue(result.empty) + + +if __name__ == "__main__": + unittest.main() From 1a97bde7a059ae0c7d8ae04b011cec5e24a06244 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:53:09 +0800 Subject: [PATCH 30/43] feat: add ticker_lister with full exchange listing and resume support Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/ticker_lister.py | 96 ++++++++++++++ tests/test_us_daily/test_ticker_lister.py | 146 ++++++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 src/processor/us_daily/ticker_lister.py create mode 100644 tests/test_us_daily/test_ticker_lister.py diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py new file mode 100644 index 00000000..9046957c --- /dev/null +++ b/src/processor/us_daily/ticker_lister.py @@ -0,0 +1,96 @@ +import logging +import time +from datetime import date +from typing import Dict, List + +from processor.us_daily.config import Config +from processor.us_daily.storage import get_list_file_path, save_json, load_json, file_exists + +logger = logging.getLogger("us_daily") + +EXCHANGES: Dict[str, str] = { + "nasdaq": "XNAS", + "nyse": "XNYS", + "arca": "ARCX", +} + + +def _details_to_dict(details) -> dict: + """Convert a TickerDetails object to a plain dict, dropping None values.""" + result = {} + for key, value in vars(details).items(): + if key.startswith("_"): + continue + if value is None: + continue + if hasattr(value, "__dict__") and not isinstance(value, (str, int, float, bool)): + value = {k: v for k, v in vars(value).items() if not k.startswith("_") and v is not None} + result[key] = value + return result + + +def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> List[dict]: + """Fetch all tickers for an exchange and save to file. + + Supports resume: if the output file already exists, previously fetched + tickers are kept and only missing ones are fetched. + """ + exchange_code = EXCHANGES[exchange_name] + file_path = get_list_file_path(config.list_dir, exchange_name) + + # Load existing tickers for resume + existing_tickers: Dict[str, dict] = {} + if file_exists(file_path): + data = load_json(file_path) + for t in data.get("tickers", []): + existing_tickers[t["ticker"]] = t + logger.info( + f"[{exchange_name}] Resuming: {len(existing_tickers)} tickers already fetched" + ) + + # Get full ticker list from API + logger.info(f"[{exchange_name}] Listing tickers for {exchange_code}") + try: + ticker_objs = list( + client.list_tickers( + market="stocks", exchange=exchange_code, active=True, limit=1000 + ) + ) + except Exception as e: + logger.error(f"[{exchange_name}] Failed to list tickers: {e}") + return list(existing_tickers.values()) + + time.sleep(config.massive_interval) + logger.info(f"[{exchange_name}] Found {len(ticker_objs)} tickers") + + # Fetch details for new tickers only + for i, ticker_obj in enumerate(ticker_objs): + ticker_str = ticker_obj.ticker + if ticker_str in existing_tickers: + continue + + try: + details = client.get_ticker_details(ticker_str) + entry = _details_to_dict(details) + existing_tickers[ticker_str] = entry + logger.info( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" + ) + except Exception as e: + logger.warning( + f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" + ) + + time.sleep(config.massive_interval) + + # Save result + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "exchange": exchange_code, + "count": len(tickers_list), + "tickers": tickers_list, + }) + + logger.info(f"[{exchange_name}] Saved {len(tickers_list)} tickers to {file_path}") + return tickers_list diff --git a/tests/test_us_daily/test_ticker_lister.py b/tests/test_us_daily/test_ticker_lister.py new file mode 100644 index 00000000..3ef467db --- /dev/null +++ b/tests/test_us_daily/test_ticker_lister.py @@ -0,0 +1,146 @@ +import unittest +from unittest.mock import MagicMock, patch, call +from types import SimpleNamespace +import os +import tempfile +import shutil +import json + + +class TestTickerLister(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _make_ticker(self, ticker_str): + t = MagicMock() + t.ticker = ticker_str + return t + + def _make_details(self, **kwargs): + """Create a SimpleNamespace TickerDetails with all fields as attributes.""" + return SimpleNamespace(**kwargs) + + def test_list_tickers_for_exchange(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + return {"AAPL": details_aapl, "MSFT": details_msft}[ticker] + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + self.assertTrue(os.path.exists(file_path)) + + with open(file_path) as f: + data = json.load(f) + + self.assertEqual(data["exchange"], "XNAS") + self.assertEqual(data["count"], 2) + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertIn("MSFT", tickers) + + def test_resume_skips_existing_tickers(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + # Pre-populate file with AAPL already fetched + file_path = os.path.join(self.test_dir, "nasdaq.json") + existing_data = { + "updated_at": "2026-04-22", + "exchange": "XNAS", + "count": 1, + "tickers": [ + {"ticker": "AAPL", "name": "Apple Inc", "market_cap": 3e12}, + ], + } + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "w") as f: + json.dump(existing_data, f) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("AAPL"), + self._make_ticker("MSFT"), + ]) + + details_msft = self._make_details( + ticker="MSFT", name="Microsoft", market_cap=2.8e12, + primary_exchange="XNAS", + ) + client.get_ticker_details.return_value = details_msft + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + # Should only call get_ticker_details for MSFT (AAPL already exists) + client.get_ticker_details.assert_called_once_with("MSFT") + + with open(file_path) as f: + data = json.load(f) + self.assertEqual(data["count"], 2) + + def test_skips_ticker_on_details_error(self): + from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.config import Config + + config = Config(list_dir=self.test_dir, massive_interval=0) + + client = MagicMock() + client.list_tickers.return_value = iter([ + self._make_ticker("FAIL"), + self._make_ticker("AAPL"), + ]) + + details_aapl = self._make_details( + ticker="AAPL", name="Apple Inc", market_cap=3e12, + primary_exchange="XNAS", + ) + + def mock_details(ticker): + if ticker == "FAIL": + raise Exception("API error") + return details_aapl + + client.get_ticker_details.side_effect = mock_details + + with patch("processor.us_daily.ticker_lister.time.sleep"): + list_tickers_for_exchange(client, "nasdaq", config) + + file_path = os.path.join(self.test_dir, "nasdaq.json") + with open(file_path) as f: + data = json.load(f) + + tickers = [t["ticker"] for t in data["tickers"]] + self.assertIn("AAPL", tickers) + self.assertNotIn("FAIL", tickers) + + +if __name__ == "__main__": + unittest.main() From f4f3a68a894306d059d8443c014abdc13cbf3013 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:53:48 +0800 Subject: [PATCH 31/43] refactor: update agg_fetcher to use SourceManager with failover Replace direct massive REST client calls with SourceManager.fetch_daily, catch FetchError instead of generic exceptions, use config.daily_dir, add source field to saved JSON, and remove inline retry/sleep logic. Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/agg_fetcher.py | 77 ++++++---------- tests/test_us_daily/test_agg_fetcher.py | 114 ++++++++++-------------- 2 files changed, 74 insertions(+), 117 deletions(-) diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py index cebf8d53..d1d97bba 100644 --- a/src/processor/us_daily/agg_fetcher.py +++ b/src/processor/us_daily/agg_fetcher.py @@ -1,10 +1,10 @@ import calendar import logging -import time from datetime import date, datetime from typing import List, Tuple from processor.us_daily.config import Config +from processor.us_daily.sources.manager import FetchError from processor.us_daily.storage import ( get_month_file_path, file_exists, @@ -45,73 +45,48 @@ def current_month() -> str: return f"{today.year:04d}-{today.month:02d}" -def fetch_ticker_aggs(client, ticker: str, config: Config) -> dict: +def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: + """Fetch monthly OHLCV data for a ticker using SourceManager. + + Args: + source_manager: SourceManager instance with failover sources. + ticker: Stock ticker symbol (e.g. "AAPL"). + config: Config with daily_dir, start_date, max_retries. + + Returns: + Dict with "failures" list of failed months. + """ months = generate_months(config.start_date, current_month()) failures = [] for month in months: - file_path = get_month_file_path(config.data_dir, ticker, month) + file_path = get_month_file_path(config.daily_dir, ticker, month) if file_exists(file_path) and not is_current_month(month): logger.debug(f" {ticker} {month}: exists, skipping") continue start_date, end_date = get_month_bounds(month) - aggs = None - last_error = None - - for attempt in range(1, config.max_retries + 1): - try: - aggs_iter = client.list_aggs( - ticker, - 1, - "day", - from_=start_date, - to=end_date, - adjusted=True, - sort="asc", - ) - aggs = list(aggs_iter) - break - except Exception as e: - last_error = e - logger.warning( - f" {ticker} {month}: attempt {attempt}/{config.max_retries} failed: {e}" - ) - if attempt < config.max_retries: - time.sleep(config.request_interval) - - if aggs is None: - failures.append( - { - "ticker": ticker, - "month": month, - "error": str(last_error), - } - ) - logger.error(f" {ticker} {month}: all retries failed, skipping") + + try: + df, source_name = source_manager.fetch_daily(ticker, start_date, end_date) + except FetchError as e: + failures.append({ + "ticker": ticker, + "month": month, + "error": str(e), + }) + logger.error(f" {ticker} {month}: {e}") continue data = { "ticker": ticker, "month": month, + "source": source_name, "fetched_at": datetime.now().isoformat(timespec="seconds"), - "data": [ - { - "open": a.open, - "high": a.high, - "low": a.low, - "close": a.close, - "volume": a.volume, - "vwap": a.vwap, - "timestamp": a.timestamp, - "transactions": a.transactions, - } - for a in aggs - ], + "data": df.to_dict(orient="records"), } save_json(file_path, data) - logger.info(f" {ticker} {month}: fetched {len(aggs)} bars") - time.sleep(config.request_interval) + logger.info(f" {ticker} {month}: fetched {len(df)} bars from {source_name}") return {"failures": failures} diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index cb6823e7..9d2a7f03 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -73,23 +73,28 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.test_dir) + def _make_manager(self, df=None, source_name="akshare", error=None): + from processor.us_daily.sources.manager import SourceManager + + manager = MagicMock(spec=SourceManager) + if error: + manager.fetch_daily.side_effect = error + else: + manager.fetch_daily.return_value = (df, source_name) + return manager + def test_skips_existing_historical_month(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config - config = Config( - start_date="2020-01", - data_dir=self.test_dir, - request_interval=0, - ) + config = Config(start_date="2020-01", daily_dir=self.test_dir) - # Create existing file for 2020-01 ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) - client = MagicMock() + manager = self._make_manager() with patch( "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] @@ -97,35 +102,27 @@ def test_skips_existing_historical_month(self): with patch( "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("processor.us_daily.agg_fetcher.time.sleep"): - result = fetch_ticker_aggs(client, "AAPL", config) + result = fetch_ticker_aggs(manager, "AAPL", config) - # Should not have called list_aggs since file exists and not current month - client.list_aggs.assert_not_called() + manager.fetch_daily.assert_not_called() self.assertEqual(result["failures"], []) def test_fetches_missing_month(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config + import pandas as pd - config = Config( - start_date="2020-01", - data_dir=self.test_dir, - request_interval=0, - ) - - agg1 = MagicMock() - agg1.open = 74.06 - agg1.high = 75.15 - agg1.low = 73.80 - agg1.close = 74.36 - agg1.volume = 108872000.0 - agg1.vwap = 74.53 - agg1.timestamp = 1577854800000 - agg1.transactions = 480012 + config = Config(start_date="2020-01", daily_dir=self.test_dir) - client = MagicMock() - client.list_aggs.return_value = iter([agg1]) + df = pd.DataFrame({ + "date": ["2020-01-02"], + "open": [74.06], + "high": [75.15], + "low": [73.80], + "close": [74.36], + "volume": [108872000], + }) + manager = self._make_manager(df=df, source_name="akshare") with patch( "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] @@ -133,10 +130,8 @@ def test_fetches_missing_month(self): with patch( "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("processor.us_daily.agg_fetcher.time.sleep"): - result = fetch_ticker_aggs(client, "AAPL", config) + result = fetch_ticker_aggs(manager, "AAPL", config) - # Verify file was created file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") self.assertTrue(os.path.exists(file_path)) @@ -144,38 +139,32 @@ def test_fetches_missing_month(self): data = json.load(f) self.assertEqual(data["ticker"], "AAPL") self.assertEqual(data["month"], "2020-01") + self.assertEqual(data["source"], "akshare") self.assertEqual(len(data["data"]), 1) - self.assertEqual(data["data"][0]["open"], 74.06) + self.assertEqual(data["data"][0]["close"], 74.36) self.assertEqual(result["failures"], []) def test_refreshes_current_month(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config + import pandas as pd - config = Config( - start_date="2026-04", - data_dir=self.test_dir, - request_interval=0, - ) + config = Config(start_date="2026-04", daily_dir=self.test_dir) - # Create existing file for current month ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) - agg1 = MagicMock() - agg1.open = 200.0 - agg1.high = 210.0 - agg1.low = 195.0 - agg1.close = 205.0 - agg1.volume = 50000000.0 - agg1.vwap = 203.0 - agg1.timestamp = 1714348800000 - agg1.transactions = 300000 - - client = MagicMock() - client.list_aggs.return_value = iter([agg1]) + df = pd.DataFrame({ + "date": ["2026-04-01"], + "open": [200.0], + "high": [210.0], + "low": [195.0], + "close": [205.0], + "volume": [50000000], + }) + manager = self._make_manager(df=df, source_name="yfinance") with patch( "processor.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] @@ -183,26 +172,21 @@ def test_refreshes_current_month(self): with patch( "processor.us_daily.agg_fetcher.is_current_month", return_value=True ): - with patch("processor.us_daily.agg_fetcher.time.sleep"): - result = fetch_ticker_aggs(client, "AAPL", config) + result = fetch_ticker_aggs(manager, "AAPL", config) - # Should have called list_aggs even though file exists - client.list_aggs.assert_called_once() + manager.fetch_daily.assert_called_once() self.assertEqual(result["failures"], []) - def test_records_failure_after_retries(self): + def test_records_failure_when_all_sources_fail(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config + from processor.us_daily.sources.manager import FetchError - config = Config( - start_date="2020-01", - data_dir=self.test_dir, - request_interval=0, - max_retries=2, - ) + config = Config(start_date="2020-01", daily_dir=self.test_dir, max_retries=2) - client = MagicMock() - client.list_aggs.side_effect = Exception("API timeout") + manager = self._make_manager( + error=FetchError("All sources failed for AAPL") + ) with patch( "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] @@ -210,13 +194,11 @@ def test_records_failure_after_retries(self): with patch( "processor.us_daily.agg_fetcher.is_current_month", return_value=False ): - with patch("processor.us_daily.agg_fetcher.time.sleep"): - result = fetch_ticker_aggs(client, "AAPL", config) + result = fetch_ticker_aggs(manager, "AAPL", config) self.assertEqual(len(result["failures"]), 1) self.assertEqual(result["failures"][0]["ticker"], "AAPL") self.assertEqual(result["failures"][0]["month"], "2020-01") - self.assertIn("API timeout", result["failures"][0]["error"]) if __name__ == "__main__": From ec7b7ef8d5145d77a0e72cc72c6f85bf927f1586 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:55:15 +0800 Subject: [PATCH 32/43] refactor: update __main__.py with SourceManager and remove ticker_filter Co-Authored-By: Claude Sonnet 4.6 --- src/processor/us_daily/__main__.py | 102 +++++++++++++------- src/processor/us_daily/ticker_filter.py | 61 ------------ tests/test_us_daily/test_ticker_filter.py | 111 ---------------------- 3 files changed, 68 insertions(+), 206 deletions(-) delete mode 100644 src/processor/us_daily/ticker_filter.py delete mode 100644 tests/test_us_daily/test_ticker_filter.py diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py index 56d9deac..1d4f43b3 100644 --- a/src/processor/us_daily/__main__.py +++ b/src/processor/us_daily/__main__.py @@ -1,19 +1,24 @@ import logging import os import sys -from datetime import datetime from massive import RESTClient from processor.us_daily.config import load_config -from processor.us_daily.storage import ( - get_tickers_file_path, - file_exists, - save_json, - load_json, -) -from processor.us_daily.ticker_filter import filter_top_tickers +from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES from processor.us_daily.agg_fetcher import fetch_ticker_aggs +from processor.us_daily.sources.akshare_source import AkshareSource +from processor.us_daily.sources.yfinance_source import YfinanceSource +from processor.us_daily.sources.massive_source import MassiveSource +from processor.us_daily.sources.manager import SourceManager +from processor.us_daily.storage import get_list_file_path, load_json, file_exists + + +SOURCE_CLASSES = { + "akshare": AkshareSource, + "yfinance": YfinanceSource, + "massive": MassiveSource, +} def setup_logging(): @@ -40,6 +45,42 @@ def setup_logging(): return logger +def build_source_manager(config, client) -> SourceManager: + """Build SourceManager from config priority list.""" + interval_map = { + "akshare": config.akshare_interval, + "yfinance": config.yfinance_interval, + "massive": config.massive_interval, + } + sources = [] + for name in config.data_source_priority: + cls = SOURCE_CLASSES.get(name) + if cls is None: + continue + if name == "massive": + sources.append(cls(client=client, request_interval=interval_map[name])) + else: + sources.append(cls(request_interval=interval_map[name])) + return SourceManager(sources) + + +def load_all_tickers(config) -> list: + """Load tickers from all exchange files in list_dir.""" + all_tickers = [] + seen = set() + for exchange_name in config.exchanges: + file_path = get_list_file_path(config.list_dir, exchange_name) + if not file_exists(file_path): + continue + data = load_json(file_path) + for t in data.get("tickers", []): + ticker = t["ticker"] + if ticker not in seen: + seen.add(ticker) + all_tickers.append(t) + return all_tickers + + def main(): logger = setup_logging() config = load_config() @@ -49,38 +90,31 @@ def main(): client = RESTClient() - # Step 1: Get ticker list - tickers_path = get_tickers_file_path(config.data_dir) - if config.refresh_tickers or not file_exists(tickers_path): - logger.info("Filtering top tickers from API...") - tickers = filter_top_tickers(client, config) - save_json( - tickers_path, - { - "updated_at": datetime.now().strftime("%Y-%m-%d"), - "market_cap_min": config.market_cap_min, - "tickers": tickers, - }, - ) - logger.info(f"Saved {len(tickers)} tickers to {tickers_path}") - else: - data = load_json(tickers_path) - tickers = data["tickers"] - logger.info( - f"Loaded {len(tickers)} tickers from {tickers_path} " - f"(updated: {data.get('updated_at', 'unknown')})" - ) - - # 先获取ticket - return 0 + # Step 1: Fetch ticker lists per exchange + if config.refresh_tickers or any( + not file_exists(get_list_file_path(config.list_dir, ex)) + for ex in config.exchanges + ): + for exchange_name in config.exchanges: + if exchange_name not in EXCHANGES: + logger.warning(f"Unknown exchange: {exchange_name}, skipping") + continue + logger.info(f"Fetching ticker list for {exchange_name}...") + list_tickers_for_exchange(client, exchange_name, config) + + # Load all tickers + tickers = load_all_tickers(config) + logger.info(f"Total tickers loaded: {len(tickers)}") + + # Step 2: Fetch daily data + source_manager = build_source_manager(config, client) - # Step 2: Fetch agg data for each ticker all_failures = [] total = len(tickers) for i, ticker_info in enumerate(tickers): ticker = ticker_info["ticker"] logger.info(f"[{i + 1}/{total}] Processing {ticker}") - result = fetch_ticker_aggs(client, ticker, config) + result = fetch_ticker_aggs(source_manager, ticker, config) if result["failures"]: all_failures.extend(result["failures"]) diff --git a/src/processor/us_daily/ticker_filter.py b/src/processor/us_daily/ticker_filter.py deleted file mode 100644 index 23647e93..00000000 --- a/src/processor/us_daily/ticker_filter.py +++ /dev/null @@ -1,61 +0,0 @@ -import logging -import time -from typing import List - -from processor.us_daily.config import Config - -logger = logging.getLogger("us_daily") - -EXCHANGES = ["XNAS", "XNYS", "ARCX"] - - -def filter_top_tickers(client, config: Config) -> List[dict]: - result = [] - for exchange in EXCHANGES: - logger.info(f"Fetching tickers for exchange: {exchange}") - try: - tickers = client.list_tickers( - market="stocks", - exchange=exchange, - active=True, - limit=1000, - ) - except Exception as e: - logger.error(f"Failed to list tickers for {exchange}: {e}") - continue - - tickers = list(tickers) - logger.info(f"Total tickers found: {len(tickers)}") - time.sleep(config.request_interval) - - for ticker_obj in tickers: - ticker_str = ticker_obj.ticker - try: - details = client.get_ticker_details(ticker_str) - time.sleep(config.request_interval) - except Exception as e: - logger.warning(f"Failed to get details for {ticker_str}: {e}") - continue - - if details.market_cap is None: - logger.debug(f"{ticker_str}: no market_cap data, skipping") - continue - - if details.market_cap >= config.market_cap_min: - entry = { - "ticker": details.ticker, - "name": details.name, - "market_cap": details.market_cap, - "exchange": details.primary_exchange, - } - result.append(entry) - logger.info( - f" {details.ticker}: market_cap={details.market_cap:.0f} included" - ) - else: - logger.debug( - f" {ticker_str}: market_cap={details.market_cap:.0f} < {config.market_cap_min:.0f}, skipping" - ) - - logger.info(f"Total top tickers found: {len(result)}") - return result diff --git a/tests/test_us_daily/test_ticker_filter.py b/tests/test_us_daily/test_ticker_filter.py deleted file mode 100644 index 284a9579..00000000 --- a/tests/test_us_daily/test_ticker_filter.py +++ /dev/null @@ -1,111 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch, call -from dataclasses import dataclass - - -class TestTickerFilter(unittest.TestCase): - def _make_ticker(self, ticker_str, exchange): - t = MagicMock() - t.ticker = ticker_str - t.primary_exchange = exchange - return t - - def _make_details(self, ticker_str, name, market_cap, exchange): - d = MagicMock() - d.ticker = ticker_str - d.name = name - d.market_cap = market_cap - d.primary_exchange = exchange - return d - - def test_filter_top_tickers_filters_by_market_cap(self): - from processor.us_daily.ticker_filter import filter_top_tickers - from processor.us_daily.config import Config - - config = Config(market_cap_min=5e9, request_interval=0) - - client = MagicMock() - # list_tickers returns different tickers per exchange - client.list_tickers.return_value = iter( - [ - self._make_ticker("AAPL", "XNAS"), - self._make_ticker("TINY", "XNAS"), - ] - ) - - # get_ticker_details: AAPL has large cap, TINY does not - def mock_details(ticker): - if ticker == "AAPL": - return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") - elif ticker == "TINY": - return self._make_details("TINY", "Tiny Corp", 1e9, "XNAS") - - client.get_ticker_details.side_effect = mock_details - - with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("processor.us_daily.ticker_filter.time.sleep"): - result = filter_top_tickers(client, config) - - tickers = [t["ticker"] for t in result] - self.assertIn("AAPL", tickers) - self.assertNotIn("TINY", tickers) - - def test_filter_top_tickers_includes_required_fields(self): - from processor.us_daily.ticker_filter import filter_top_tickers - from processor.us_daily.config import Config - - config = Config(market_cap_min=5e9, request_interval=0) - - client = MagicMock() - client.list_tickers.return_value = iter( - [ - self._make_ticker("MSFT", "XNYS"), - ] - ) - client.get_ticker_details.return_value = self._make_details( - "MSFT", "Microsoft Corporation", 2.8e12, "XNYS" - ) - - with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNYS"]): - with patch("processor.us_daily.ticker_filter.time.sleep"): - result = filter_top_tickers(client, config) - - self.assertEqual(len(result), 1) - entry = result[0] - self.assertEqual(entry["ticker"], "MSFT") - self.assertEqual(entry["name"], "Microsoft Corporation") - self.assertEqual(entry["market_cap"], 2.8e12) - self.assertEqual(entry["exchange"], "XNYS") - - def test_filter_skips_ticker_on_details_error(self): - from processor.us_daily.ticker_filter import filter_top_tickers - from processor.us_daily.config import Config - - config = Config(market_cap_min=5e9, request_interval=0) - - client = MagicMock() - client.list_tickers.return_value = iter( - [ - self._make_ticker("FAIL", "XNAS"), - self._make_ticker("AAPL", "XNAS"), - ] - ) - - def mock_details(ticker): - if ticker == "FAIL": - raise Exception("API error") - return self._make_details("AAPL", "Apple Inc.", 3e12, "XNAS") - - client.get_ticker_details.side_effect = mock_details - - with patch("processor.us_daily.ticker_filter.EXCHANGES", ["XNAS"]): - with patch("processor.us_daily.ticker_filter.time.sleep"): - result = filter_top_tickers(client, config) - - tickers = [t["ticker"] for t in result] - self.assertIn("AAPL", tickers) - self.assertNotIn("FAIL", tickers) - - -if __name__ == "__main__": - unittest.main() From a5cd8a6e4d78c56682660367cd9e6221f50e469e Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:26:00 +0800 Subject: [PATCH 33/43] ci --- .../plans/20250423-provider-deps-design.md | 137 ++++ .claude/plans/20250423-provider-deps-plan.md | 702 ++++++++++++++++++ .../prds/.20250423-OPT_DATA_PROVIDER.md.swp | Bin 12288 -> 0 bytes .claude/prds/20250423-OPT_DATA_PROVIDER.md | 34 +- data/us_list/arca.json | 326 ++++++++ data/us_list/nasdaq.json | 499 +++++++++++++ data/us_list/nyse.json | 635 ++++++++++++++++ pyproject.toml | 4 +- src/processor/us_daily/__main__.py | 26 +- src/processor/us_daily/agg_fetcher.py | 4 +- src/processor/us_daily/config.json | 5 +- src/processor/us_daily/config.py | 7 +- .../us_daily/sources/massive_source.py | 6 +- src/processor/us_daily/ticker_lister.py | 21 +- tests/test_us_daily/test_agg_fetcher.py | 8 +- tests/test_us_daily/test_config.py | 8 +- .../test_sources/test_massive_source.py | 8 +- tests/test_us_daily/test_ticker_lister.py | 6 +- 18 files changed, 2382 insertions(+), 54 deletions(-) create mode 100644 .claude/plans/20250423-provider-deps-design.md create mode 100644 .claude/plans/20250423-provider-deps-plan.md delete mode 100644 .claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp create mode 100644 data/us_list/arca.json create mode 100644 data/us_list/nasdaq.json create mode 100644 data/us_list/nyse.json diff --git a/.claude/plans/20250423-provider-deps-design.md b/.claude/plans/20250423-provider-deps-design.md new file mode 100644 index 00000000..b23aeffb --- /dev/null +++ b/.claude/plans/20250423-provider-deps-design.md @@ -0,0 +1,137 @@ +# Provider Dependencies Resolution Design + +**Date:** 2025-04-23 +**Scope:** Resolve `from src.*` imports in `provider/` by creating private internal modules + +--- + +## 1. Goal + +`provider/` 模块依赖 3 个来自外部项目 (daily_stock_analysis) 的模块:`src.config`、`src.data`、`src.report_language`。将这些依赖内化为 `provider/` 的私有模块,使 provider 完全自包含。 + +## 2. Reference Repository + +https://github.com/ZhuLinsen/daily_stock_analysis — 原始项目,provider 模块从该项目中提取。 + +## 3. New Files + +### 3.1 `src/provider/_config.py` (~60 行) + +精简的 Config 单例,仅包含 provider 实际使用的 15 个属性,从环境变量读取: + +```python +import os +from dataclasses import dataclass +from threading import Lock + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + +def normalize_report_language(value, default="zh"): + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + +@dataclass +class Config: + tushare_token: str = "" + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + tickflow_api_key: str = "" + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + realtime_source_priority: str = "tencent,akshare,efinance" + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + +_instance = None +_lock = Lock() + +def get_config() -> Config: + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + def _env_bool(key, default="true"): + return os.environ.get(key, default).lower() != "false" + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get("REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance"), + fundamental_fetch_timeout_seconds=float(os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30")), + fundamental_stage_timeout_seconds=float(os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60")), + fundamental_cache_ttl_seconds=int(os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600")), + fundamental_cache_max_entries=int(os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256")), + fundamental_retry_max=int(os.environ.get("FUNDAMENTAL_RETRY_MAX", "2")), + ) + return _instance +``` + +### 3.2 `src/provider/_data/stock_mapping.py` + +从参考仓库 `src/data/stock_mapping.py` 完整复制。包含: +- `STOCK_NAME_MAP` — 股票代码→名称映射字典(A 股、美股、港股) +- `is_meaningful_stock_name(name, stock_code)` — 判断股票名是否有效 + +### 3.3 `src/provider/_data/stock_index_loader.py` + +从参考仓库 `src/data/stock_index_loader.py` 完整复制,仅改一处 import: +```python +# from +from src.data.stock_mapping import is_meaningful_stock_name +# to +from provider._data.stock_mapping import is_meaningful_stock_name +``` + +### 3.4 `src/provider/_data/__init__.py` + +```python +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] +``` + +## 4. Import Path Changes (~20 处) + +所有变更均为 `from src.*` → `from provider._*` 的机械替换: + +| 文件 | 原 import | 新 import | +|------|-----------|-----------| +| `base.py` (line 27) | `from src.data.stock_index_loader import get_index_stock_name` | `from provider._data.stock_index_loader import get_index_stock_name` | +| `base.py` (line 28) | `from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name` | `from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name` | +| `base.py` (9 处 lazy) | `from src.config import get_config` | `from provider._config import get_config` | +| `efinance_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `akshare_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `tushare_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | +| `yfinance_fetcher.py` | `from src.data.stock_mapping import ...` | `from provider._data.stock_mapping import ...` | +| `longbridge_fetcher.py` | `from src.report_language import normalize_report_language` | `from provider._config import normalize_report_language` | +| `longbridge_fetcher.py` | `from src.config import get_config` | `from provider._config import get_config` | + +## 5. Not In Scope + +- `.env` 文件加载(只读 `os.environ`) +- Config 验证逻辑(`ConfigIssue` 系统) +- `report_language.py` 的其他函数 +- provider 功能性测试(当前无测试,不新增) +- `__init__.py` 的 import 清理 diff --git a/.claude/plans/20250423-provider-deps-plan.md b/.claude/plans/20250423-provider-deps-plan.md new file mode 100644 index 00000000..13f23e8c --- /dev/null +++ b/.claude/plans/20250423-provider-deps-plan.md @@ -0,0 +1,702 @@ +# Provider Dependencies Resolution Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Resolve all `from src.*` imports in `provider/` by creating private internal modules (`_config.py`, `_data/`), making provider fully self-contained. + +**Architecture:** Create `src/provider/_config.py` (slim Config singleton + `normalize_report_language`), `src/provider/_data/` (stock mapping copied from reference repo). Then replace all `from src.*` imports with `from provider._*` imports across 6 fetcher files. + +**Tech Stack:** Python 3.9+, dataclasses, os.environ + +**Design Doc:** `.claude/plans/20250423-provider-deps-design.md` + +--- + +### Task 1: Create `_data/stock_mapping.py` + +**Files:** +- Create: `src/provider/_data/__init__.py` +- Create: `src/provider/_data/stock_mapping.py` + +- [ ] **Step 1: Create `_data/` directory** + +```bash +mkdir -p src/provider/_data +``` + +- [ ] **Step 2: Create `_data/__init__.py`** + +Write `src/provider/_data/__init__.py`: + +```python +# -*- coding: utf-8 -*- +from provider._data.stock_mapping import STOCK_NAME_MAP + +__all__ = ["STOCK_NAME_MAP"] +``` + +- [ ] **Step 3: Create `_data/stock_mapping.py`** + +Write `src/provider/_data/stock_mapping.py` — copy the complete file from reference repo (https://github.com/ZhuLinsen/daily_stock_analysis/blob/main/src/data/stock_mapping.py). This file has no external imports. It contains: +- `STOCK_NAME_MAP` dict (~90 entries: A-shares, US stocks, HK stocks) +- `is_meaningful_stock_name(name, stock_code)` function + +```python +# -*- coding: utf-8 -*- +from __future__ import annotations + +""" +=================================== +股票代码与名称映射 +=================================== + +Shared stock code -> name mapping, used by analyzer, data_provider, and name_to_code_resolver. +""" + +# Stock code -> name mapping (common stocks) +STOCK_NAME_MAP = { + # === A-shares === + "600519": "贵州茅台", + "000001": "平安银行", + "300750": "宁德时代", + "002594": "比亚迪", + "600036": "招商银行", + "601318": "中国平安", + "000858": "五粮液", + "600276": "恒瑞医药", + "601012": "隆基绿能", + "002475": "立讯精密", + "300059": "东方财富", + "002415": "海康威视", + "600900": "长江电力", + "601166": "兴业银行", + "600028": "中国石化", + "600030": "中信证券", + "600031": "三一重工", + "600050": "中国联通", + "600104": "上汽集团", + "600111": "北方稀土", + "600150": "中国船舶", + "600309": "万华化学", + "600406": "国电南瑞", + "600690": "海尔智家", + "600760": "中航沈飞", + "600809": "山西汾酒", + "600887": "伊利股份", + "600930": "华电新能", + "601088": "中国神华", + "601127": "赛力斯", + "601211": "国泰海通", + "601225": "陕西煤业", + "601288": "农业银行", + "601328": "交通银行", + "601398": "工商银行", + "601601": "中国太保", + "601628": "中国人寿", + "601658": "邮储银行", + "601668": "中国建筑", + "601728": "中国电信", + "601816": "京沪高铁", + "601857": "中国石油", + "601888": "中国中免", + "601899": "紫金矿业", + "601919": "中远海控", + "601985": "中国核电", + "601988": "中国银行", + "603019": "中科曙光", + "603259": "药明康德", + "603501": "豪威集团", + "603993": "洛阳钼业", + "688008": "澜起科技", + "688012": "中微公司", + "688041": "海光信息", + "688111": "金山办公", + "688256": "寒武纪", + "688981": "中芯国际", + # === US stocks === + "AAPL": "苹果", + "TSLA": "特斯拉", + "MSFT": "微软", + "GOOGL": "谷歌A", + "GOOG": "谷歌C", + "AMZN": "亚马逊", + "NVDA": "英伟达", + "META": "Meta", + "AMD": "AMD", + "INTC": "英特尔", + "BABA": "阿里巴巴", + "PDD": "拼多多", + "JD": "京东", + "BIDU": "百度", + "NIO": "蔚来", + "XPEV": "小鹏汽车", + "LI": "理想汽车", + "COIN": "Coinbase", + "MSTR": "MicroStrategy", + # === HK stocks (5-digit) === + "00700": "腾讯控股", + "03690": "美团", + "01810": "小米集团", + "09988": "阿里巴巴", + "09618": "京东集团", + "09888": "百度集团", + "01024": "快手", + "00981": "中芯国际", + "02015": "理想汽车", + "09868": "小鹏汽车", + "00005": "汇丰控股", + "01299": "友邦保险", + "00941": "中国移动", + "00883": "中国海洋石油", +} + + +def is_meaningful_stock_name(name: str | None, stock_code: str) -> bool: + """Return whether a stock name is useful for display or caching.""" + if not name: + return False + + normalized_name = str(name).strip() + if not normalized_name: + return False + + normalized_code = (stock_code or "").strip().upper() + if normalized_name.upper() == normalized_code: + return False + + if normalized_name.startswith("股票"): + return False + + placeholder_values = { + "N/A", + "NA", + "NONE", + "NULL", + "--", + "-", + "UNKNOWN", + "TICKER", + } + if normalized_name.upper() in placeholder_values: + return False + + return True +``` + +- [ ] **Step 4: Commit** + +```bash +git add src/provider/_data/ +git commit -m "feat: add provider/_data/stock_mapping module" +``` + +--- + +### Task 2: Create `_data/stock_index_loader.py` + +**Files:** +- Create: `src/provider/_data/stock_index_loader.py` + +- [ ] **Step 1: Create `_data/stock_index_loader.py`** + +Write `src/provider/_data/stock_index_loader.py` — copy from reference repo (https://github.com/ZhuLinsen/daily_stock_analysis/blob/main/src/data/stock_index_loader.py) with ONE import change on line 10: + +```python +# Change from: +from src.data.stock_mapping import is_meaningful_stock_name +# To: +from provider._data.stock_mapping import is_meaningful_stock_name +``` + +Full file content: + +```python +# -*- coding: utf-8 -*- +from __future__ import annotations + +import json +import logging +from pathlib import Path +from threading import RLock +from typing import Dict, Iterable + +from provider._data.stock_mapping import is_meaningful_stock_name + +logger = logging.getLogger(__name__) + +_STOCK_INDEX_FILENAME = "stocks.index.json" +_STOCK_INDEX_CACHE: Dict[str, str] | None = None +_STOCK_INDEX_CACHE_LOCK = RLock() + + +def get_stock_index_candidate_paths() -> tuple[Path, ...]: + """Return the supported locations for the generated stock index.""" + repo_root = Path(__file__).resolve().parents[2] + return ( + repo_root / "apps" / "dsa-web" / "public" / _STOCK_INDEX_FILENAME, + repo_root / "static" / _STOCK_INDEX_FILENAME, + ) + + +def _add_lookup_key(keys: set[str], value: str) -> None: + candidate = str(value or "").strip() + if not candidate: + return + keys.add(candidate) + keys.add(candidate.upper()) + + +def _build_lookup_keys(canonical_code: str, display_code: str) -> Iterable[str]: + keys: set[str] = set() + _add_lookup_key(keys, canonical_code) + _add_lookup_key(keys, display_code) + + canonical_upper = str(canonical_code or "").strip().upper() + display_upper = str(display_code or "").strip().upper() + + if "." in canonical_upper: + base, suffix = canonical_upper.rsplit(".", 1) + if suffix in {"SH", "SZ", "SS", "BJ"} and base.isdigit(): + _add_lookup_key(keys, base) + elif suffix == "HK" and base.isdigit() and 1 <= len(base) <= 5: + digits = base.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + for candidate in (canonical_upper, display_upper): + if candidate.startswith("HK"): + digits = candidate[2:] + if digits.isdigit() and 1 <= len(digits) <= 5: + digits = digits.zfill(5) + _add_lookup_key(keys, digits) + _add_lookup_key(keys, f"HK{digits}") + + return keys + + +def _load_stock_index_file(index_path: Path) -> Dict[str, str]: + with index_path.open("r", encoding="utf-8") as fh: + raw_items = json.load(fh) + + if not isinstance(raw_items, list): + raise ValueError( + f"Unexpected {_STOCK_INDEX_FILENAME} payload type: {type(raw_items).__name__}" + ) + + stock_name_map: Dict[str, str] = {} + for item in raw_items: + if not isinstance(item, list) or len(item) < 3: + continue + + canonical_code, display_code, name_zh = item[0], item[1], item[2] + if not is_meaningful_stock_name(name_zh, str(display_code or canonical_code or "")): + continue + + for key in _build_lookup_keys(str(canonical_code or ""), str(display_code or "")): + stock_name_map[key] = str(name_zh).strip() + + return stock_name_map + + +def get_stock_name_index_map() -> Dict[str, str]: + """Lazily load and cache the generated stock-name index.""" + global _STOCK_INDEX_CACHE + + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + with _STOCK_INDEX_CACHE_LOCK: + if _STOCK_INDEX_CACHE is not None: + return _STOCK_INDEX_CACHE + + for candidate_path in get_stock_index_candidate_paths(): + if not candidate_path.is_file(): + continue + + try: + _STOCK_INDEX_CACHE = _load_stock_index_file(candidate_path) + logger.debug( + "[股票名称] 已加载前端股票索引映射: %s (%d 条)", + candidate_path, + len(_STOCK_INDEX_CACHE), + ) + return _STOCK_INDEX_CACHE + except (OSError, TypeError, ValueError) as exc: + logger.debug("[股票名称] 读取股票索引失败 %s: %s", candidate_path, exc) + + _STOCK_INDEX_CACHE = {} + return _STOCK_INDEX_CACHE + + +def get_index_stock_name(stock_code: str) -> str | None: + """Resolve a stock name from the generated frontend stock index.""" + code = str(stock_code or "").strip() + if not code: + return None + + stock_name_map = get_stock_name_index_map() + for key in _build_lookup_keys(code, code): + name = stock_name_map.get(key) + if is_meaningful_stock_name(name, code): + return name + + return None + + +def _clear_stock_index_cache_for_tests() -> None: + global _STOCK_INDEX_CACHE + with _STOCK_INDEX_CACHE_LOCK: + _STOCK_INDEX_CACHE = None +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/provider/_data/stock_index_loader.py +git commit -m "feat: add provider/_data/stock_index_loader module" +``` + +--- + +### Task 3: Create `_config.py` + +**Files:** +- Create: `src/provider/_config.py` + +- [ ] **Step 1: Create `_config.py`** + +Write `src/provider/_config.py`: + +```python +# -*- coding: utf-8 -*- +""" +Slim configuration singleton for provider module. + +Reads configuration from environment variables. Only includes attributes +actually used by provider fetchers. +""" + +import os +from dataclasses import dataclass +from threading import Lock +from typing import Optional + + +# --------------------------------------------------------------------------- +# normalize_report_language (extracted from src/report_language.py) +# --------------------------------------------------------------------------- + +SUPPORTED_REPORT_LANGUAGES = ("zh", "en") + +_REPORT_LANGUAGE_ALIASES = { + "zh-cn": "zh", "zh_cn": "zh", "zh-hans": "zh", "zh_hans": "zh", + "zh-tw": "zh", "zh_tw": "zh", "cn": "zh", "chinese": "zh", + "english": "en", "en-us": "en", "en_us": "en", "en-gb": "en", "en_gb": "en", +} + + +def normalize_report_language(value: Optional[str], default: str = "zh") -> str: + """Normalize report language to a supported short code.""" + candidate = (value or default).strip().lower().replace(" ", "_") + candidate = _REPORT_LANGUAGE_ALIASES.get(candidate, candidate) + return candidate if candidate in SUPPORTED_REPORT_LANGUAGES else default + + +# --------------------------------------------------------------------------- +# Config singleton +# --------------------------------------------------------------------------- + +@dataclass +class Config: + # Tushare + tushare_token: str = "" + # Longbridge + longbridge_app_key: str = "" + longbridge_app_secret: str = "" + longbridge_access_token: str = "" + # TickFlow + tickflow_api_key: str = "" + # Feature toggles + enable_eastmoney_patch: bool = True + enable_realtime_quote: bool = True + enable_chip_distribution: bool = True + enable_fundamental_pipeline: bool = True + prefetch_realtime_quotes: bool = True + # Realtime source priority + realtime_source_priority: str = "tencent,akshare,efinance" + # Fundamental pipeline + fundamental_fetch_timeout_seconds: float = 30.0 + fundamental_stage_timeout_seconds: float = 60.0 + fundamental_cache_ttl_seconds: int = 3600 + fundamental_cache_max_entries: int = 256 + fundamental_retry_max: int = 2 + + +_instance: Optional[Config] = None +_lock = Lock() + + +def _env_bool(key: str, default: str = "true") -> bool: + return os.environ.get(key, default).lower() != "false" + + +def get_config() -> Config: + """Return the global Config singleton, creating it on first call.""" + global _instance + if _instance is not None: + return _instance + with _lock: + if _instance is not None: + return _instance + _instance = Config( + tushare_token=os.environ.get("TUSHARE_TOKEN", ""), + longbridge_app_key=os.environ.get("LONGBRIDGE_APP_KEY", ""), + longbridge_app_secret=os.environ.get("LONGBRIDGE_APP_SECRET", ""), + longbridge_access_token=os.environ.get("LONGBRIDGE_ACCESS_TOKEN", ""), + tickflow_api_key=os.environ.get("TICKFLOW_API_KEY", ""), + enable_eastmoney_patch=_env_bool("ENABLE_EASTMONEY_PATCH"), + enable_realtime_quote=_env_bool("ENABLE_REALTIME_QUOTE"), + enable_chip_distribution=_env_bool("ENABLE_CHIP_DISTRIBUTION"), + enable_fundamental_pipeline=_env_bool("ENABLE_FUNDAMENTAL_PIPELINE"), + prefetch_realtime_quotes=_env_bool("PREFETCH_REALTIME_QUOTES"), + realtime_source_priority=os.environ.get( + "REALTIME_SOURCE_PRIORITY", "tencent,akshare,efinance" + ), + fundamental_fetch_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_FETCH_TIMEOUT_SECONDS", "30") + ), + fundamental_stage_timeout_seconds=float( + os.environ.get("FUNDAMENTAL_STAGE_TIMEOUT_SECONDS", "60") + ), + fundamental_cache_ttl_seconds=int( + os.environ.get("FUNDAMENTAL_CACHE_TTL_SECONDS", "3600") + ), + fundamental_cache_max_entries=int( + os.environ.get("FUNDAMENTAL_CACHE_MAX_ENTRIES", "256") + ), + fundamental_retry_max=int( + os.environ.get("FUNDAMENTAL_RETRY_MAX", "2") + ), + ) + return _instance +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/provider/_config.py +git commit -m "feat: add provider/_config module with slim Config singleton" +``` + +--- + +### Task 4: Update imports in `base.py` + +**Files:** +- Modify: `src/provider/base.py` + +- [ ] **Step 1: Replace top-level imports (lines 27-28)** + +In `src/provider/base.py`, replace: + +```python +from src.data.stock_index_loader import get_index_stock_name +from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +``` + +with: + +```python +from provider._data.stock_index_loader import get_index_stock_name +from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +``` + +- [ ] **Step 2: Replace all lazy `from src.config import get_config` (9 occurrences)** + +Global replace in `src/provider/base.py`: + +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +This appears at lines 564, 1066, 1151, 1400, 1751, 1973, 2270, 2334, 2384. + +- [ ] **Step 3: Verify no remaining `from src.` in base.py** + +```bash +grep "from src\." src/provider/base.py +``` + +Expected: no output. + +- [ ] **Step 4: Commit** + +```bash +git add src/provider/base.py +git commit -m "refactor: update base.py imports from src.* to provider._*" +``` + +--- + +### Task 5: Update imports in fetcher files + +**Files:** +- Modify: `src/provider/efinance_fetcher.py` (line 55) +- Modify: `src/provider/akshare_fetcher.py` (line 45) +- Modify: `src/provider/tushare_fetcher.py` (line 36) +- Modify: `src/provider/yfinance_fetcher.py` (lines 40-42) +- Modify: `src/provider/longbridge_fetcher.py` (lines 165, 293, 326) + +- [ ] **Step 1: Fix `efinance_fetcher.py`** + +In `src/provider/efinance_fetcher.py`, line 55, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 2: Fix `akshare_fetcher.py`** + +In `src/provider/akshare_fetcher.py`, line 45, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 3: Fix `tushare_fetcher.py`** + +In `src/provider/tushare_fetcher.py`, line 36, replace: + +```python +from src.config import get_config +``` + +with: + +```python +from provider._config import get_config +``` + +- [ ] **Step 4: Fix `yfinance_fetcher.py`** + +In `src/provider/yfinance_fetcher.py`, lines 39-42, replace: + +```python +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from src.data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): +``` + +with: + +```python +# 可选导入本地股票映射补丁,若缺失则使用空字典兜底 +try: + from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name +except (ImportError, ModuleNotFoundError): +``` + +- [ ] **Step 5: Fix `longbridge_fetcher.py`** + +Three lazy imports inside try blocks. Replace each occurrence: + +Line 165: +```python +# from +from src.report_language import normalize_report_language +# to +from provider._config import normalize_report_language +``` + +Line 293: +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +Line 326: +```python +# from +from src.config import get_config +# to +from provider._config import get_config +``` + +- [ ] **Step 6: Verify no remaining `from src.` in any provider file** + +```bash +grep -r "from src\." src/provider/ +``` + +Expected: no output. + +- [ ] **Step 7: Commit** + +```bash +git add src/provider/efinance_fetcher.py src/provider/akshare_fetcher.py src/provider/tushare_fetcher.py src/provider/yfinance_fetcher.py src/provider/longbridge_fetcher.py +git commit -m "refactor: update fetcher imports from src.* to provider._*" +``` + +--- + +### Task 6: Verify provider imports work + +- [ ] **Step 1: Test that _config imports cleanly** + +```bash +python -c "from provider._config import get_config, normalize_report_language; c = get_config(); print(f'Config OK: tushare_token={c.tushare_token!r}'); print(f'Lang: {normalize_report_language(\"chinese\")}')" +``` + +Expected: +``` +Config OK: tushare_token='' +Lang: zh +``` + +- [ ] **Step 2: Test that _data imports cleanly** + +```bash +python -c "from provider._data.stock_mapping import STOCK_NAME_MAP, is_meaningful_stock_name; print(f'Mapping OK: {len(STOCK_NAME_MAP)} entries'); print(f'茅台: {is_meaningful_stock_name(\"贵州茅台\", \"600519\")}')" +``` + +Expected: +``` +Mapping OK: 90 entries +茅台: True +``` + +```bash +python -c "from provider._data.stock_index_loader import get_index_stock_name; print(f'Index loader OK: {get_index_stock_name(\"600519\")}')" +``` + +Expected: `Index loader OK: None` (no index file present, graceful fallback) + +- [ ] **Step 3: Test that provider.__init__ imports without error** + +```bash +python -c "from provider import DataFetcherManager; print('provider OK')" +``` + +Expected: `provider OK` (may show warnings about missing optional deps like efinance/akshare, but no ImportError) + +- [ ] **Step 4: If any failures, fix and commit** + +Address any remaining import errors discovered in steps 1-3. diff --git a/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp b/.claude/prds/.20250423-OPT_DATA_PROVIDER.md.swp deleted file mode 100644 index 1ab15263874d9964053a5a047311dd727e47e487..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2O>7%Q6vw9s2@14D^o|xH4pfRaaj66q1%k?vGn)a<2P^K z{N8(GS>B@^NA~TPUa@r&Jf0?G>vwhUwyhtLt`|0uo)3G?(FZn`RAnV&m4Z> zG+`hJ2m*qDARq_`0)l`bAP5Ko{~ZG1;3o13w)041OD{IAo9?+b-inJLAP5Kof`A|( z2nYg#fFK|U2m*qDARq|*3kf*!kGlRCA?F`Q@c93K_4ofT+X(p?x(n5yDwKonY$fCy z=xZnoy#_rB-QGe-0P2F?fL@1W==Y}x`3?FN`Vm@ul8`;ntI(Y%2)PJ#L;7Yy81w^l z6-q$wLHnUS&}Qf-#CRL}9x~&78+sP{1MzRi^)9a8LgFC^2m*qDARq|*zX-TJinqs4 zPpLtl)6d37*xY$K+;VV=#)`P>uJvbp6wj?c=2Q6%dss)&XY#Z*_As`jO)@S-*_8o8 z+NIubIH>HF~?y6 zCmc#R;OcWY{Z8Kr#jB9EHi=fp=ul2unl)~Wo13Aj0V6V~RnF_@qAS&D`LN;%De`@9 zd~`d*PDd~lIPP_OLYU4ca-f)Qla-JQYp*wAV?~z8(}hVoFmD-|26DgRu!1zB(q~F^ z=o$(@tK5VIAv^CzqWb6+t(3ZZWD~uXTgESeZ`D-gxb=4WopV5;= znks0OoK~HrLouzktdGts?mo=sff&hKG&Rn~7umHjV#h+XINS=CR$6E|2g}j4$`*2F zU2WX3jI}V8NRk9T7HA2jB59?Q68uR`8B+OkV2uzZ>5AYLq6=ve}$zZ)F^yX*Ff7-HF`td^AD8 zu2-rtt&~8T*l59Yke#Oan>>(}Y8>k{f-0j#3dYhB9r_HI>e17Bx{49JX6HuORDM@S zhgOPfrHfiQ%O+2=c*IO#Ji;<@V_;6t&9BzEN*5dc`S!KtITkHoA+0uyNa%2aMyF|M zfJUmu@(8PBO}+~gEPq2Q6=^}$N)fF*gF$`fI!op@?1`7oA&c-{4j9I)kZO(%cuYT) z8nFtnsb>&%JyJ22KBqB+ldTWV;|0DM8q|1Wpbyv$^sE^SyF6zUZsB~Q=_E!JpT`rF zI_(PKjRjL0(Tuq|7vD(AX-iod!HMS-=Y#PoOejV~)qB7&wg=&DZG*c+0b0A4y=42i{nTBWfh8 zssEY_t)*!MWO)tK>>}1-qe}?NW}QWU9W^^7-U=+0GL{j}G_P~M9*s>HbMp-+r_|=+ zheOVXTQ+P8i?lpy*7^Fru+EfbbN)GR8vJUsPC1$>(%Hzm)6nNt!lY$vh zG&c>kqFnRP+zn=24.8.0", diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py index 1d4f43b3..f647ef7f 100644 --- a/src/processor/us_daily/__main__.py +++ b/src/processor/us_daily/__main__.py @@ -65,11 +65,11 @@ def build_source_manager(config, client) -> SourceManager: def load_all_tickers(config) -> list: - """Load tickers from all exchange files in list_dir.""" + """Load tickers from all exchange files in list_data_dir.""" all_tickers = [] seen = set() for exchange_name in config.exchanges: - file_path = get_list_file_path(config.list_dir, exchange_name) + file_path = get_list_file_path(config.list_data_dir, exchange_name) if not file_exists(file_path): continue data = load_json(file_path) @@ -92,7 +92,7 @@ def main(): # Step 1: Fetch ticker lists per exchange if config.refresh_tickers or any( - not file_exists(get_list_file_path(config.list_dir, ex)) + not file_exists(get_list_file_path(config.list_data_dir, ex)) for ex in config.exchanges ): for exchange_name in config.exchanges: @@ -109,6 +109,26 @@ def main(): # Step 2: Fetch daily data source_manager = build_source_manager(config, client) + # Filter tickers by market cap + if config.market_cap_min > 0: + filtered = [] + for t in tickers: + market_cap = t.get("market_cap") + if market_cap is not None and market_cap >= config.market_cap_min: + filtered.append(t) + elif market_cap is None: + logger.debug( + f"Skipping {t['ticker']}: market_cap is None" + ) + else: + logger.debug( + f"Skipping {t['ticker']}: market_cap={market_cap:.0f} < {config.market_cap_min:.0f}" + ) + logger.info( + f"Filtered by market_cap >= {config.market_cap_min:.0f}: {len(filtered)}/{len(tickers)} tickers" + ) + tickers = filtered + all_failures = [] total = len(tickers) for i, ticker_info in enumerate(tickers): diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py index d1d97bba..2135c6e4 100644 --- a/src/processor/us_daily/agg_fetcher.py +++ b/src/processor/us_daily/agg_fetcher.py @@ -51,7 +51,7 @@ def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: Args: source_manager: SourceManager instance with failover sources. ticker: Stock ticker symbol (e.g. "AAPL"). - config: Config with daily_dir, start_date, max_retries. + config: Config with daily_data_dir, start_date, max_retries. Returns: Dict with "failures" list of failed months. @@ -60,7 +60,7 @@ def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: failures = [] for month in months: - file_path = get_month_file_path(config.daily_dir, ticker, month) + file_path = get_month_file_path(config.daily_data_dir, ticker, month) if file_exists(file_path) and not is_current_month(month): logger.debug(f" {ticker} {month}: exists, skipping") diff --git a/src/processor/us_daily/config.json b/src/processor/us_daily/config.json index 20d41292..d74b806e 100644 --- a/src/processor/us_daily/config.json +++ b/src/processor/us_daily/config.json @@ -1,8 +1,9 @@ { - "refresh_tickers": false, + "refresh_tickers": true, "market_cap_min": 1000000000, "start_date": "2026-01", "request_interval": 12, - "data_dir": "data/us_daily", + "list_data_dir": "data/us_list", + "daily_data_dir": "data/us_daily", "max_retries": 3 } diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py index 97fa63a3..adff53a2 100644 --- a/src/processor/us_daily/config.py +++ b/src/processor/us_daily/config.py @@ -10,13 +10,14 @@ class Config: exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) start_date: str = "2026-01" data_source_priority: List[str] = field( - default_factory=lambda: ["akshare", "yfinance", "massive"] + default_factory=lambda: ["massive", "akshare", "yfinance"] ) + market_cap_min: float = 1_000_000_000 akshare_interval: float = 2.0 yfinance_interval: float = 1.0 massive_interval: float = 12.0 - list_dir: str = "data/us_list" - daily_dir: str = "data/us_daily" + list_data_dir: str = "data/us_list" + daily_data_dir: str = "data/us_daily" max_retries: int = 3 diff --git a/src/processor/us_daily/sources/massive_source.py b/src/processor/us_daily/sources/massive_source.py index 1c45299c..555a0a19 100644 --- a/src/processor/us_daily/sources/massive_source.py +++ b/src/processor/us_daily/sources/massive_source.py @@ -39,7 +39,11 @@ def fetch_daily(self, ticker: str, start_date: str, end_date: str) -> pd.DataFra "low": a.low, "close": a.close, "volume": a.volume, + "vwap": a.vwap, + "transactions": a.transactions, + "otc": a.otc, }) - df = pd.DataFrame(rows, columns=STANDARD_COLUMNS) + columns = STANDARD_COLUMNS + ["vwap", "transactions", "otc"] + df = pd.DataFrame(rows, columns=columns) return df diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index 9046957c..9a08ca09 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -36,7 +36,7 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis tickers are kept and only missing ones are fetched. """ exchange_code = EXCHANGES[exchange_name] - file_path = get_list_file_path(config.list_dir, exchange_name) + file_path = get_list_file_path(config.list_data_dir, exchange_name) # Load existing tickers for resume existing_tickers: Dict[str, dict] = {} @@ -64,7 +64,10 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis logger.info(f"[{exchange_name}] Found {len(ticker_objs)} tickers") # Fetch details for new tickers only + new_count = 0 for i, ticker_obj in enumerate(ticker_objs): + if new_count >= 10: + break ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: continue @@ -73,6 +76,7 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis details = client.get_ticker_details(ticker_str) entry = _details_to_dict(details) existing_tickers[ticker_str] = entry + new_count += 1 logger.info( f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" ) @@ -83,7 +87,20 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis time.sleep(config.massive_interval) - # Save result + # Flush to disk every 100 new details to avoid losing progress + if new_count > 0 and new_count % 100 == 0: + tickers_list = list(existing_tickers.values()) + save_json(file_path, { + "updated_at": date.today().strftime("%Y-%m-%d"), + "exchange": exchange_code, + "count": len(tickers_list), + "tickers": tickers_list, + }) + logger.info( + f"[{exchange_name}] Checkpoint: saved {len(tickers_list)} tickers to {file_path}" + ) + + # Final save tickers_list = list(existing_tickers.values()) save_json(file_path, { "updated_at": date.today().strftime("%Y-%m-%d"), diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index 9d2a7f03..7c49aea2 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -87,7 +87,7 @@ def test_skips_existing_historical_month(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config - config = Config(start_date="2020-01", daily_dir=self.test_dir) + config = Config(start_date="2020-01", daily_data_dir=self.test_dir) ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) @@ -112,7 +112,7 @@ def test_fetches_missing_month(self): from processor.us_daily.config import Config import pandas as pd - config = Config(start_date="2020-01", daily_dir=self.test_dir) + config = Config(start_date="2020-01", daily_data_dir=self.test_dir) df = pd.DataFrame({ "date": ["2020-01-02"], @@ -149,7 +149,7 @@ def test_refreshes_current_month(self): from processor.us_daily.config import Config import pandas as pd - config = Config(start_date="2026-04", daily_dir=self.test_dir) + config = Config(start_date="2026-04", daily_data_dir=self.test_dir) ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) @@ -182,7 +182,7 @@ def test_records_failure_when_all_sources_fail(self): from processor.us_daily.config import Config from processor.us_daily.sources.manager import FetchError - config = Config(start_date="2020-01", daily_dir=self.test_dir, max_retries=2) + config = Config(start_date="2020-01", daily_data_dir=self.test_dir, max_retries=2) manager = self._make_manager( error=FetchError("All sources failed for AAPL") diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 3c31812a..172d610b 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -13,12 +13,12 @@ def test_default_config(self): self.assertEqual(config.start_date, "2026-01") self.assertEqual(config.max_retries, 3) self.assertEqual(config.exchanges, ["nasdaq", "nyse", "arca"]) - self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) + self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) self.assertEqual(config.akshare_interval, 2.0) self.assertEqual(config.yfinance_interval, 1.0) self.assertEqual(config.massive_interval, 12.0) - self.assertEqual(config.list_dir, "data/us_list") - self.assertEqual(config.daily_dir, "data/us_daily") + self.assertEqual(config.list_data_dir, "data/us_list") + self.assertEqual(config.daily_data_dir, "data/us_daily") def test_load_config_from_file(self): from processor.us_daily.config import load_config @@ -47,7 +47,7 @@ def test_load_config_missing_file_uses_defaults(self): config = load_config("/nonexistent/path/config.json") self.assertEqual(config.refresh_tickers, False) - self.assertEqual(config.data_source_priority, ["akshare", "yfinance", "massive"]) + self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) if __name__ == "__main__": diff --git a/tests/test_us_daily/test_sources/test_massive_source.py b/tests/test_us_daily/test_sources/test_massive_source.py index 749ffe6f..5bedbe67 100644 --- a/tests/test_us_daily/test_sources/test_massive_source.py +++ b/tests/test_us_daily/test_sources/test_massive_source.py @@ -15,6 +15,9 @@ def test_fetch_daily_returns_standard_columns(self): agg1.close = 74.36 agg1.volume = 108872000 agg1.timestamp = 1577944800000 # 2020-01-02 UTC + agg1.vwap = 74.50 + agg1.transactions = 5000 + agg1.otc = False client = MagicMock() client.list_aggs.return_value = iter([agg1]) @@ -22,10 +25,13 @@ def test_fetch_daily_returns_standard_columns(self): source = MassiveSource(client=client, request_interval=0.0) result = source.fetch_daily("AAPL", "2020-01-01", "2020-01-31") - self.assertListEqual(list(result.columns), STANDARD_COLUMNS) + expected_columns = STANDARD_COLUMNS + ["vwap", "transactions", "otc"] + self.assertListEqual(list(result.columns), expected_columns) self.assertEqual(len(result), 1) self.assertEqual(result.iloc[0]["close"], 74.36) self.assertEqual(result.iloc[0]["date"], "2020-01-02") + self.assertEqual(result.iloc[0]["vwap"], 74.50) + self.assertEqual(result.iloc[0]["transactions"], 5000) def test_fetch_daily_calls_client_correctly(self): from processor.us_daily.sources.massive_source import MassiveSource diff --git a/tests/test_us_daily/test_ticker_lister.py b/tests/test_us_daily/test_ticker_lister.py index 3ef467db..a1144414 100644 --- a/tests/test_us_daily/test_ticker_lister.py +++ b/tests/test_us_daily/test_ticker_lister.py @@ -27,7 +27,7 @@ def test_list_tickers_for_exchange(self): from processor.us_daily.ticker_lister import list_tickers_for_exchange from processor.us_daily.config import Config - config = Config(list_dir=self.test_dir, massive_interval=0) + config = Config(list_data_dir=self.test_dir, massive_interval=0) client = MagicMock() client.list_tickers.return_value = iter([ @@ -68,7 +68,7 @@ def test_resume_skips_existing_tickers(self): from processor.us_daily.ticker_lister import list_tickers_for_exchange from processor.us_daily.config import Config - config = Config(list_dir=self.test_dir, massive_interval=0) + config = Config(list_data_dir=self.test_dir, massive_interval=0) # Pre-populate file with AAPL already fetched file_path = os.path.join(self.test_dir, "nasdaq.json") @@ -110,7 +110,7 @@ def test_skips_ticker_on_details_error(self): from processor.us_daily.ticker_lister import list_tickers_for_exchange from processor.us_daily.config import Config - config = Config(list_dir=self.test_dir, massive_interval=0) + config = Config(list_data_dir=self.test_dir, massive_interval=0) client = MagicMock() client.list_tickers.return_value = iter([ From 82dbe3d40d8a6ba0a7393eae396842f59a13d727 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:43:50 +0800 Subject: [PATCH 34/43] add .env --- data/us_list/arca.json | 160 +++++++++++++++- data/us_list/nasdaq.json | 241 ++++++++++++++++++++++- data/us_list/nyse.json | 304 +++++++++++++++++++++++++++++- pyproject.toml | 1 + src/massive/rest/__init__.py | 3 + src/massive/websocket/__init__.py | 3 + 6 files changed, 709 insertions(+), 3 deletions(-) diff --git a/data/us_list/arca.json b/data/us_list/arca.json index c735c14f..aeaefb6e 100644 --- a/data/us_list/arca.json +++ b/data/us_list/arca.json @@ -1,7 +1,7 @@ { "updated_at": "2026-04-23", "exchange": "ARCX", - "count": 20, + "count": 30, "tickers": [ { "active": true, @@ -321,6 +321,164 @@ "share_class_shares_outstanding": 6400000, "ticker": "AFIX", "type": "ETF" + }, + { + "active": true, + "cik": "0001137360", + "composite_figi": "BBG000H0B9J8", + "currency_name": "usd", + "ticker_root": "AFK", + "list_date": "2008-07-10", + "locale": "us", + "market": "stocks", + "name": "VanEck Africa Index ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG001T33P37", + "share_class_shares_outstanding": 4450000, + "ticker": "AFK", + "type": "ETF" + }, + { + "active": true, + "cik": "0001667919", + "composite_figi": "BBG00QZ0DQB9", + "currency_name": "usd", + "ticker_root": "AFLG", + "list_date": "2019-12-03", + "locale": "us", + "market": "stocks", + "name": "First Trust Active Factor Large Cap ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG00QZ0DR27", + "share_class_shares_outstanding": 14300002, + "ticker": "AFLG", + "type": "ETF" + }, + { + "active": true, + "cik": "0001667919", + "composite_figi": "BBG00QZ0GVP0", + "currency_name": "usd", + "ticker_root": "AFMC", + "list_date": "2019-12-03", + "locale": "us", + "market": "stocks", + "name": "First Trust Active Factor Mid Cap ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG00QZ0GWH7", + "share_class_shares_outstanding": 3900002, + "ticker": "AFMC", + "type": "ETF" + }, + { + "active": true, + "cik": "0001667919", + "composite_figi": "BBG00QZ0H932", + "currency_name": "usd", + "ticker_root": "AFSM", + "list_date": "2019-12-03", + "locale": "us", + "market": "stocks", + "name": "First Trust Active Factor Small Cap ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG00QZ0H9W0", + "share_class_shares_outstanding": 2450002, + "ticker": "AFSM", + "type": "ETF" + }, + { + "active": true, + "cik": "0001100663", + "composite_figi": "BBG000Q123R0", + "currency_name": "usd", + "ticker_root": "AGG", + "list_date": "2003-09-22", + "locale": "us", + "market": "stocks", + "name": "iShares Core U.S. Aggregate Bond ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG001SM1QT8", + "share_class_shares_outstanding": 1365800000, + "ticker": "AGG", + "type": "ETF" + }, + { + "active": true, + "cik": "0001810747", + "composite_figi": "BBG015BC7V75", + "currency_name": "usd", + "ticker_root": "AGGH", + "list_date": "2022-02-14", + "locale": "us", + "market": "stocks", + "name": "Simplify Aggregate Bond ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG015BC7WS0", + "share_class_shares_outstanding": 21325001, + "ticker": "AGGH", + "type": "ETF" + }, + { + "active": true, + "cik": "0001860434", + "composite_figi": "BBG01MCGX1X6", + "currency_name": "usd", + "ticker_root": "AGGS", + "list_date": "2024-05-01", + "locale": "us", + "market": "stocks", + "name": "Harbor Disciplined Bond ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG01MCGX2S0", + "share_class_shares_outstanding": 950000, + "ticker": "AGGS", + "type": "ETF" + }, + { + "active": true, + "cik": "0001350487", + "composite_figi": "BBG009KCY2B7", + "currency_name": "usd", + "ticker_root": "AGGY", + "list_date": "2015-07-09", + "locale": "us", + "market": "stocks", + "name": "WisdomTree Yield Enhanced U.S. Aggregate Bond Fund", + "primary_exchange": "ARCX", + "share_class_figi": "BBG009KCY2C6", + "share_class_shares_outstanding": 20300000, + "ticker": "AGGY", + "type": "ETF" + }, + { + "active": true, + "composite_figi": "BBG01WV67GC4", + "currency_name": "usd", + "ticker_root": "AGIQ", + "list_date": "2025-09-02", + "locale": "us", + "market": "stocks", + "name": "SoFi Agentic AI ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG01WV67HD1", + "share_class_shares_outstanding": 450000, + "ticker": "AGIQ", + "type": "ETF" + }, + { + "active": true, + "composite_figi": "BBG010WX25M2", + "currency_name": "usd", + "ticker_root": "AGOX", + "list_date": "2012-09-20", + "locale": "us", + "market": "stocks", + "name": "Adaptive Alpha Opportunities ETF", + "primary_exchange": "ARCX", + "share_class_figi": "BBG010WX26H6", + "share_class_shares_outstanding": 11393738, + "ticker": "AGOX", + "type": "ETF" } ] } \ No newline at end of file diff --git a/data/us_list/nasdaq.json b/data/us_list/nasdaq.json index 55beac5c..25f01ae3 100644 --- a/data/us_list/nasdaq.json +++ b/data/us_list/nasdaq.json @@ -1,7 +1,7 @@ { "updated_at": "2026-04-23", "exchange": "XNAS", - "count": 20, + "count": 30, "tickers": [ { "active": true, @@ -494,6 +494,245 @@ "share_class_shares_outstanding": 1275001, "ticker": "AAPD", "type": "ETF" + }, + { + "active": true, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YXNjZW50YWdlcGhhcm1hLmNvbQ/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YXNjZW50YWdlcGhhcm1hLmNvbQ/images/2025-04-04_icon.jpeg" + }, + "cik": "0002023311", + "composite_figi": "BBG01RJXM9C9", + "currency_name": "usd", + "description": "Ascentage Pharma Group International is a clinical-stage biotechnology company engaged in the development and sales of novel small-scale therapies for cancers, hepatitis B virus, or HBV, and certain age-related diseases. It focuses on developing therapies that inhibit protein-protein interactions to restore apoptosis or programmed cell death. The Group has one reportable operating segment, which is discovering, developing, and commercializing therapies to address medical needs in hematological malignancies. The company's geographical segments include the United States and Mainland China.", + "ticker_root": "AAPG", + "homepage_url": "https://www.ascentagepharma.com", + "list_date": "2025-01-24", + "locale": "us", + "market": "stocks", + "market_cap": 2393925349.95, + "name": "Ascentage Pharma Group International American Depository Shares", + "primary_exchange": "XNAS", + "share_class_figi": "BBG01RJXMB89", + "share_class_shares_outstanding": 93331173, + "ticker": "AAPG", + "total_employees": 767, + "type": "ADRC", + "weighted_shares_outstanding": 93330423 + }, + { + "active": true, + "address": { + "address1": "ONE APPLE PARK WAY", + "city": "CUPERTINO", + "state": "CA", + "postal_code": "95014" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YXBwbGUuY29t/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YXBwbGUuY29t/images/2025-04-04_icon.png" + }, + "cik": "0000320193", + "composite_figi": "BBG000B9XRY4", + "currency_name": "usd", + "description": "Apple is among the largest companies in the world, with a broad portfolio of hardware and software products targeted at consumers and businesses. Apple's iPhone makes up a majority of the firm sales, and Apple's other products like Mac, iPad, and Watch are designed around the iPhone as the focal point of an expansive software ecosystem. Apple has progressively worked to add new applications, like streaming video, subscription bundles, and augmented reality. The firm designs its own software and semiconductors while working with subcontractors like Foxconn and TSMC to build its products and chips. Slightly less than half of Apple's sales come directly through its flagship stores, with a majority of sales coming indirectly through partnerships and distribution.", + "ticker_root": "AAPL", + "homepage_url": "https://www.apple.com", + "list_date": "1980-12-12", + "locale": "us", + "market": "stocks", + "market_cap": 3907679033800.0, + "name": "Apple Inc.", + "phone_number": "(408) 996-1010", + "primary_exchange": "XNAS", + "share_class_figi": "BBG001S5N8V8", + "share_class_shares_outstanding": 14681140000, + "sic_code": "3571", + "sic_description": "ELECTRONIC COMPUTERS", + "ticker": "AAPL", + "total_employees": 166000, + "type": "CS", + "weighted_shares_outstanding": 14681140000 + }, + { + "active": true, + "composite_figi": "BBG0193YBZ92", + "currency_name": "usd", + "ticker_root": "AAPU", + "list_date": "2022-08-08", + "locale": "us", + "market": "stocks", + "name": "Direxion Shares ETF Trust Direxion Daily AAPL Bull 2X ETF", + "primary_exchange": "XNAS", + "share_class_figi": "BBG0193YC043", + "share_class_shares_outstanding": 5825001, + "ticker": "AAPU", + "type": "ETF" + }, + { + "active": true, + "address": { + "address1": "4370 LA JOLLA VILLAGE DRIVE", + "address2": "SUITE 1050", + "city": "SAN DIEGO", + "state": "CA", + "postal_code": "92122" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWFyZHZhcmt0aGVyYXBldXRpY3MuY29t/images/2025-04-04_logo.png", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWFyZHZhcmt0aGVyYXBldXRpY3MuY29t/images/2025-04-04_icon.jpeg" + }, + "cik": "0001774857", + "composite_figi": "BBG01223DLB2", + "currency_name": "usd", + "description": "Aardvark Therapeutics Inc is a clinical-stage biopharmaceutical company focused on developing novel, small-molecule therapeutics to activate innate homeostatic pathways for the treatment of metabolic diseases. It is focused on on developing selective compounds, targeting Bitter Taste Receptors (TAS2Rs) for hunger-associated conditions. Its product candidate, ARD-101, is an oral gut-restricted small-molecule agonist of certain TAS2Rs expressed in the gut lumen for which have initiated a Phase 3 clinical trial for hyperphagia associated with PWS.", + "ticker_root": "AARD", + "homepage_url": "https://www.aardvarktherapeutics.com", + "list_date": "2025-02-13", + "locale": "us", + "market": "stocks", + "market_cap": 117152140.17, + "name": "Aardvark Therapeutics, Inc. Common Stock", + "phone_number": "(858) 225-7696", + "primary_exchange": "XNAS", + "share_class_figi": "BBG01223DLC1", + "share_class_shares_outstanding": 21816041, + "sic_code": "2834", + "sic_description": "PHARMACEUTICAL PREPARATIONS", + "ticker": "AARD", + "total_employees": 40, + "type": "CS", + "weighted_shares_outstanding": 21816041 + }, + { + "active": true, + "composite_figi": "BBG01W26H9C1", + "currency_name": "usd", + "ticker_root": "AAUS", + "list_date": "2025-07-22", + "locale": "us", + "market": "stocks", + "name": "Alpha Architect US Equity ETF", + "primary_exchange": "XNAS", + "share_class_figi": "BBG01W26HBL6", + "share_class_shares_outstanding": 8817000, + "ticker": "AAUS", + "type": "ETF" + }, + { + "active": true, + "composite_figi": "BBG00GM1DQV1", + "currency_name": "usd", + "ticker_root": "AAVM", + "list_date": "2017-05-02", + "locale": "us", + "market": "stocks", + "name": "EA Series Trust Alpha Architect Global Factor Equity ETF", + "primary_exchange": "XNAS", + "share_class_figi": "BBG00GM1DRK1", + "share_class_shares_outstanding": 660000, + "ticker": "AAVM", + "type": "ETF" + }, + { + "active": true, + "cik": "0001100663", + "composite_figi": "BBG000G6GXC5", + "currency_name": "usd", + "ticker_root": "AAXJ", + "list_date": "2008-08-13", + "locale": "us", + "market": "stocks", + "name": "iShares MSCI All Country Asia ex Japan ETF", + "primary_exchange": "XNAS", + "share_class_figi": "BBG001T2V2D8", + "share_class_shares_outstanding": 34200000, + "ticker": "AAXJ", + "type": "ETF" + }, + { + "active": true, + "address": { + "address1": "100 WASHINGTON STREET", + "address2": "SUITE 100", + "city": "RENO", + "state": "NV", + "postal_code": "89503" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5iYXR0ZXJ5dGVjaG5vbG9neS5jb20/images/2025-04-04_logo.png", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5iYXR0ZXJ5dGVjaG5vbG9neS5jb20/images/2025-04-04_icon.jpeg" + }, + "cik": "0001576873", + "composite_figi": "BBG004M1KJN5", + "currency_name": "usd", + "description": "American Battery Technology Co is an integrated critical battery minerals company that develops technologies for both primary battery minerals manufacturing and lithium-ion battery recycling. It operates battery recycling facilities that process materials from electric vehicle batteries, stationary battery energy storage systems, and consumer electronics. Additionally, the company advances lithium production projects using proprietary technology to produce battery-grade lithium hydroxide. It generates revenue mainly from its recycling operations and is expanding capacity with strategic partnerships and government grants. The company's activities focus on creating a closed-loop battery materials supply chain to support sustainable manufacturing in the United States.", + "ticker_root": "ABAT", + "homepage_url": "https://www.americanbatterytechnology.com", + "list_date": "2015-10-15", + "locale": "us", + "market": "stocks", + "market_cap": 435957600.95, + "name": "American Battery Technology Company Common Stock", + "phone_number": "775-473-4744", + "primary_exchange": "XNAS", + "share_class_figi": "BBG004M1KJP3", + "share_class_shares_outstanding": 131709245, + "sic_code": "1400", + "sic_description": "MINING & QUARRYING OF NONMETALLIC MINERALS (NO FUELS)", + "ticker": "ABAT", + "total_employees": 163, + "type": "CS", + "weighted_shares_outstanding": 131709245 + }, + { + "active": true, + "address": { + "address1": "150 W 4TH AVENUE", + "city": "VANCOUVER", + "state": "A1", + "postal_code": "V5Y 1G6" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJjZWxsZXJhLmNvbQ/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWJjZWxsZXJhLmNvbQ/images/2025-04-04_icon.jpeg" + }, + "cik": "0001703057", + "composite_figi": "BBG00LLW2MF2", + "currency_name": "usd", + "description": "AbCellera Biologics Inc is a clinical-stage biotechnology company focused on discovering and developing first-in-class antibody medicines for indications with high unmet medical need. It has built a platform for advancing antibody drug programs that the company believes provides it with a competitive advantage in addressing challenging, high-value targets such as complex transmembrane proteins and novel modalities, including multispecifics and antibody-drug conjugates. Organisation's pipeline includes two drug candidates in clinical development, two development candidates in Investigational New Drug (IND/Clinical Trial Application (CTA)-enabling activities, and more than 20 active discovery programs across multiple modalities and indications. ABCL635, ABCL386, ABCL575, and ABCL635.", + "ticker_root": "ABCL", + "homepage_url": "https://www.abcellera.com", + "list_date": "2020-12-11", + "locale": "us", + "market": "stocks", + "market_cap": 1209610343.13, + "name": "AbCellera Biologics Inc. Common Shares", + "phone_number": "(604) 559-9005", + "primary_exchange": "XNAS", + "share_class_figi": "BBG00LLW2MH0", + "share_class_shares_outstanding": 303160487, + "sic_code": "2834", + "sic_description": "PHARMACEUTICAL PREPARATIONS", + "ticker": "ABCL", + "total_employees": 562, + "type": "CS", + "weighted_shares_outstanding": 303160487 + }, + { + "active": true, + "composite_figi": "BBG01KMFQRF2", + "currency_name": "usd", + "ticker_root": "ABCS", + "list_date": "2023-12-18", + "locale": "us", + "market": "stocks", + "name": "Alpha Blue Capital US Small-Mid Cap Dynamic ETF", + "primary_exchange": "XNAS", + "share_class_figi": "BBG01KMFQS88", + "share_class_shares_outstanding": 350000, + "ticker": "ABCS", + "type": "ETF" } ] } \ No newline at end of file diff --git a/data/us_list/nyse.json b/data/us_list/nyse.json index d34d4777..868134ee 100644 --- a/data/us_list/nyse.json +++ b/data/us_list/nyse.json @@ -1,7 +1,7 @@ { "updated_at": "2026-04-23", "exchange": "XNYS", - "count": 20, + "count": 30, "tickers": [ { "active": true, @@ -630,6 +630,308 @@ "total_employees": 6390, "type": "CS", "weighted_shares_outstanding": 49098579 + }, + { + "active": true, + "address": { + "address1": "FOUR CORPORATE DRIVE", + "city": "LAKE ZURICH", + "state": "IL", + "postal_code": "60047" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjb2JyYW5kcy5jb20/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjb2JyYW5kcy5jb20/images/2025-04-04_icon.png" + }, + "cik": "0000712034", + "composite_figi": "BBG000J06K07", + "currency_name": "usd", + "description": "ACCO Brands Corp is a consumer, technology and business branded products company providing brands and product solutions used in schools, homes and at work. Its brands include At-A-Glance, Barrilito, Buro, Esselte, Five Star, Foroni, GBC, Hilroy, Kensington, Leitz, Mead, PowerA, Quartet, Rapid, Swingline and Tilibra. The Company's product categories include gaming and computer accessories, storage and organization, notebooks, shredding, laminating and binding machines, dry erase boards and do-it-yourself tools. The Company operates through two segments, Americas and International. Americas includes the U.S., Canada, Brazil, Mexico and Chile, and International includes EMEA, Australia, New Zealand and Asia. Its products are sold in the U.S., Europe, Australia, Canada, Brazil and Mexico.", + "ticker_root": "ACCO", + "homepage_url": "https://www.accobrands.com", + "list_date": "1999-11-09", + "locale": "us", + "market": "stocks", + "market_cap": 303528142.26, + "name": "Acco Brands Corporation", + "phone_number": "847-541-9500", + "primary_exchange": "XNYS", + "share_class_figi": "BBG001SPBTK3", + "share_class_shares_outstanding": 92257794, + "sic_code": "2780", + "sic_description": "BLANKBOOKS, LOOSELEAF BINDERS & BOOKBINDG & RELATD WORK", + "ticker": "ACCO", + "total_employees": 4700, + "type": "CS", + "weighted_shares_outstanding": 92257794 + }, + { + "active": true, + "address": { + "address1": "140 TOWER DRIVE", + "city": "BURR RIDGE", + "state": "IL", + "postal_code": "60527" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjZWxlbnRlcnRhaW5tZW50LmNvbQ/images/2025-04-04_logo.png", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjZWxlbnRlcnRhaW5tZW50LmNvbQ/images/2025-04-04_icon.jpeg" + }, + "cik": "0001698991", + "composite_figi": "BBG00GX221W3", + "currency_name": "usd", + "description": "Accel Entertainment Inc is a distributed gaming and local entertainment operator in the United States. It is engaged in the installation, maintenance, operation, and servicing of gaming terminals and related equipment, redemption devices that disburse winnings and contain automated teller machine (ATM) functionality, and amusement devices in authorized non-casino locations such as restaurants, bars, convenience stores, liquor stores, truck stops, and grocery stores. The Company also operates stand-alone ATMs in gaming and non-gaming locations. It generates revenue from Net gaming, Amusement, Manufacturing, ATM fees, and others.", + "ticker_root": "ACEL", + "homepage_url": "https://www.accelentertainment.com", + "list_date": "2017-08-18", + "locale": "us", + "market": "stocks", + "market_cap": 964195815.0, + "name": "Accel Entertainment, Inc.", + "phone_number": "630-972-2235", + "primary_exchange": "XNYS", + "share_class_figi": "BBG00GX221Z0", + "share_class_shares_outstanding": 81573250, + "sic_code": "7900", + "sic_description": "SERVICES-AMUSEMENT & RECREATION SERVICES", + "ticker": "ACEL", + "total_employees": 1600, + "type": "CS", + "weighted_shares_outstanding": 81573250 + }, + { + "active": true, + "address": { + "address1": "10900 NUCKOLS ROAD", + "address2": "SUITE 400", + "city": "GLEN ALLEN", + "state": "VA", + "postal_code": "23060" + }, + "cik": "0000075252", + "composite_figi": "BBG000CTV5F0", + "currency_name": "usd", + "description": "Accendra Health Inc is a nationwide provider of products, technology, and services that support health beyond the hospital by connecting patients, providers, and insurers. Through its brands, Apria and Byram Healthcare, the company delivers disposable medical supplies, integrated home healthcare equipment, and related services that help improve health outcomes and quality of life for individuals with chronic, complex, and acute health conditions. Its offerings span diabetes treatment, home respiratory therapy, and obstructive sleep apnea treatment, along with patient support services. The company also supplies a broad range of home medical equipment and patient care products, including ostomy, wound care, urology, and incontinence solutions.", + "ticker_root": "ACH", + "homepage_url": "https://www.accendrahealth.com", + "list_date": "1973-01-02", + "locale": "us", + "market": "stocks", + "market_cap": 254538263.61, + "name": "Accendra Health, Inc.", + "phone_number": "(804) 277-4304", + "primary_exchange": "XNYS", + "share_class_figi": "BBG001S72KY7", + "share_class_shares_outstanding": 76437917, + "sic_code": "5047", + "sic_description": "WHOLESALE-MEDICAL, DENTAL & HOSPITAL EQUIPMENT & SUPPLIES", + "ticker": "ACH", + "total_employees": 6500, + "type": "CS", + "weighted_shares_outstanding": 76437917 + }, + { + "active": true, + "address": { + "address1": "190 WEST TASMAN DRIVE", + "city": "SAN JOSE", + "state": "CA", + "postal_code": "95134" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJjaGVyLmNvbQ/images/2025-04-04_logo.png" + }, + "cik": "0001824502", + "composite_figi": "BBG00XRTC910", + "currency_name": "usd", + "description": "Archer Aviation Inc advances the benefits of sustainable air mobility. The company is engaged in designing and developing a fully electric vertical takeoff and landing eVTOL aircraft for use in UAM networks. It is creating an electric airline that moves people throughout cities in a quick, safe, sustainable, and cost-effective manner. The company is building a platform to deliver aircraft, technologies, and services to customers world-wide across commercial and defense sectors.", + "ticker_root": "ACHR", + "homepage_url": "https://www.archer.com", + "list_date": "2021-09-17", + "locale": "us", + "market": "stocks", + "market_cap": 4454193775.68, + "name": "Archer Aviation Inc.", + "phone_number": "650-272-3233", + "primary_exchange": "XNYS", + "share_class_figi": "BBG00XRTC929", + "share_class_shares_outstanding": 744538832, + "sic_code": "3721", + "sic_description": "AIRCRAFT", + "ticker": "ACHR", + "total_employees": 1660, + "type": "CS", + "weighted_shares_outstanding": 749864272 + }, + { + "active": true, + "address": { + "address1": "190 WEST TASMAN DRIVE", + "city": "SAN JOSE", + "state": "CA", + "postal_code": "95134" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJjaGVyLmNvbQ/images/2025-04-04_logo.png" + }, + "cik": "0001824502", + "composite_figi": "BBG00YGCV1N9", + "currency_name": "usd", + "description": "Archer Aviation Inc advances the benefits of sustainable air mobility. The company is engaged in designing and developing a fully electric vertical takeoff and landing eVTOL aircraft for use in UAM networks. It is creating an electric airline that moves people throughout cities in a quick, safe, sustainable, and cost-effective manner. The company is building a platform to deliver aircraft, technologies, and services to customers world-wide across commercial and defense sectors.", + "ticker_root": "ACHR", + "ticker_suffix": "WS", + "homepage_url": "https://www.archer.com", + "list_date": "2021-09-17", + "locale": "us", + "market": "stocks", + "name": "Archer Aviation Inc. Redeemable Warrants, each whole warrant exercisable for one Class A common stock at an exercise price of $11.50", + "phone_number": "650-272-3233", + "primary_exchange": "XNYS", + "sic_code": "3721", + "sic_description": "AIRCRAFT", + "ticker": "ACHR.WS", + "total_employees": 1660, + "type": "WARRANT" + }, + { + "active": true, + "address": { + "address1": "250 PARKCENTER BLVD.", + "city": "BOISE", + "state": "ID", + "postal_code": "83706" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWxiZXJ0c29uc2NvbXBhbmllcy5jb20/images/2025-04-04_logo.svg" + }, + "cik": "0001646972", + "composite_figi": "BBG009KG1750", + "currency_name": "usd", + "description": "Albertsons is the second-largest supermarket operator in the United States with about 2,300 stores across a variety of banners. Around 80% of the firm's sales comes from nonperishable and fresh food, of which 26% comes from its portfolio of private brands. The company operates fuel centers at about 20% of its store locations and pharmacies at 75%. Albertsons went public in 2020 following years of ownership under private equity firm Cerberus Capital Management, which still owns about a fourth of the outstanding shares.", + "ticker_root": "ACI", + "homepage_url": "https://www.albertsonscompanies.com", + "list_date": "2020-06-26", + "locale": "us", + "market": "stocks", + "market_cap": 8663084369.1, + "name": "Albertsons Companies, Inc.", + "phone_number": "208-395-6200", + "primary_exchange": "XNYS", + "share_class_figi": "BBG009KG1741", + "share_class_shares_outstanding": 513913121, + "sic_code": "5411", + "sic_description": "RETAIL-GROCERY STORES", + "ticker": "ACI", + "total_employees": 280000, + "type": "CS", + "weighted_shares_outstanding": 506613121 + }, + { + "active": true, + "cik": "0001831313", + "composite_figi": "BBG01QPVFLN7", + "currency_name": "usd", + "ticker_root": "ACLO", + "list_date": "2024-11-15", + "locale": "us", + "market": "stocks", + "name": "TCW AAA CLO ETF", + "primary_exchange": "XNYS", + "share_class_figi": "BBG01QPVFMJ0", + "share_class_shares_outstanding": 9560000, + "ticker": "ACLO", + "type": "ETF" + }, + { + "active": true, + "address": { + "address1": "13355 NOEL ROAD", + "address2": "SUITE 400", + "city": "DALLAS", + "state": "TX", + "postal_code": "75240" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWVjb20uY29t/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWVjb20uY29t/images/2025-04-04_icon.jpeg" + }, + "cik": "0000868857", + "composite_figi": "BBG000F61RJ8", + "currency_name": "usd", + "description": "Aecom is one of the largest global providers of advisory, design, and engineering services. It serves a broad spectrum of end markets including water, transportation, and environment. Based in Dallas, Aecom employs 51,000. The company generated $16.1 billion in sales in fiscal 2025.", + "ticker_root": "ACM", + "homepage_url": "https://www.aecom.com", + "list_date": "2007-05-10", + "locale": "us", + "market": "stocks", + "market_cap": 10984361495.039999, + "name": "Aecom", + "phone_number": "(972) 788-1000", + "primary_exchange": "XNYS", + "share_class_figi": "BBG001SKTTF1", + "share_class_shares_outstanding": 129288624, + "sic_code": "8711", + "sic_description": "SERVICES-ENGINEERING SERVICES", + "ticker": "ACM", + "total_employees": 51000, + "type": "CS", + "weighted_shares_outstanding": 129288624 + }, + { + "active": true, + "address": { + "address1": "1 GRAND CANAL SQUARE", + "address2": "GRAND CANAL HARBOUR", + "city": "DUBLIN", + "state": "L2", + "postal_code": "D2" + }, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjZW50dXJlLmNvbQ/images/2025-04-04_logo.svg", + "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjZW50dXJlLmNvbQ/images/2025-04-04_icon.jpeg" + }, + "cik": "0001467373", + "currency_name": "usd", + "description": "Accenture is a leading IT services firm that provides consulting, system integration, and business process outsourcing to enterprises around the world. Customers of Accenture come from a variety of sectors, including communications, media and technology, financial services, health and public services, consumer products, and resources. Accenture is the world's largest professional services company by headcount, with around 800,000 employees in over 120 countries.", + "ticker_root": "ACN", + "homepage_url": "https://www.accenture.com", + "list_date": "2001-07-19", + "locale": "us", + "market": "stocks", + "market_cap": 119362072484.56, + "name": "Accenture PLC", + "phone_number": "353-1-646-2000", + "primary_exchange": "XNYS", + "share_class_shares_outstanding": 665142040, + "sic_code": "7389", + "sic_description": "SERVICES-BUSINESS SERVICES, NEC", + "ticker": "ACN", + "total_employees": 779000, + "type": "CS", + "weighted_shares_outstanding": 613939268 + }, + { + "active": true, + "branding": { + "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJyZG5hY3AuY29t/images/2025-04-04_logo.svg" + }, + "cik": "0001503290", + "composite_figi": "BBG0017VSC04", + "currency_name": "usd", + "description": "abrdn Income Credit Strategies Fund is a diversified, closed-end management investment company. Its investment objective is to seek a high level of current income with a secondary objective of capital appreciation. It predominantly invests in debt and loan instruments of issues that operate in a variety of industries and geographic regions.", + "ticker_root": "ACP", + "homepage_url": "http://www.abrdnacp.com", + "list_date": "2011-01-27", + "locale": "us", + "market": "stocks", + "market_cap": 672522834.08, + "name": "abrdn Income Credit Strategies Fund", + "primary_exchange": "XNYS", + "share_class_figi": "BBG001TCSLH4", + "share_class_shares_outstanding": 125470678, + "ticker": "ACP", + "type": "FUND", + "weighted_shares_outstanding": 125470678 } ] } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index abc65901..11d5dc72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "requests", "tenacity", "fake-useragent", + "python-dotenv", ] [project.optional-dependencies] diff --git a/src/massive/rest/__init__.py b/src/massive/rest/__init__.py index 5a00da5a..8fc6c2a7 100644 --- a/src/massive/rest/__init__.py +++ b/src/massive/rest/__init__.py @@ -22,6 +22,9 @@ from .vX import VXClient from typing import Optional, Any import os +from dotenv import load_dotenv + +load_dotenv() BASE = "https://api.massive.com" ENV_KEY = "MASSIVE_API_KEY" diff --git a/src/massive/websocket/__init__.py b/src/massive/websocket/__init__.py index 0d5409cd..ef82ac9c 100644 --- a/src/massive/websocket/__init__.py +++ b/src/massive/websocket/__init__.py @@ -1,4 +1,5 @@ import os +from dotenv import load_dotenv from enum import Enum from typing import Optional, Union, List, Set, Callable, Awaitable, Any import logging @@ -13,6 +14,8 @@ import logging from ..exceptions import AuthError +load_dotenv() + env_key = "MASSIVE_API_KEY" logger = get_logger("WebSocketClient") From 686c5f1077b13edff0be3aaeaf90e4ead768d5fd Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:46:55 +0800 Subject: [PATCH 35/43] all --- src/processor/us_daily/ticker_lister.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index 9a08ca09..86c3a12a 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,8 +66,6 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): - if new_count >= 10: - break ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: continue From bcfc937abebfd27cfbc3a5ab5b459a22d7338026 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:39:15 +0800 Subject: [PATCH 36/43] ci --- src/processor/us_daily/__main__.py | 40 +++++++-------------- src/processor/us_daily/config.py | 1 - src/processor/us_daily/storage.py | 4 --- src/processor/us_daily/ticker_lister.py | 42 +++++++++++------------ tests/test_us_daily/test_config.py | 3 -- tests/test_us_daily/test_storage.py | 6 ---- tests/test_us_daily/test_ticker_lister.py | 27 ++++++++------- 7 files changed, 48 insertions(+), 75 deletions(-) diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py index f647ef7f..2b1cf6af 100644 --- a/src/processor/us_daily/__main__.py +++ b/src/processor/us_daily/__main__.py @@ -5,13 +5,13 @@ from massive import RESTClient from processor.us_daily.config import load_config -from processor.us_daily.ticker_lister import list_tickers_for_exchange, EXCHANGES +from processor.us_daily.ticker_lister import list_all_tickers, _get_tickers_file from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.sources.akshare_source import AkshareSource from processor.us_daily.sources.yfinance_source import YfinanceSource from processor.us_daily.sources.massive_source import MassiveSource from processor.us_daily.sources.manager import SourceManager -from processor.us_daily.storage import get_list_file_path, load_json, file_exists +from processor.us_daily.storage import load_json, file_exists SOURCE_CLASSES = { @@ -65,20 +65,12 @@ def build_source_manager(config, client) -> SourceManager: def load_all_tickers(config) -> list: - """Load tickers from all exchange files in list_data_dir.""" - all_tickers = [] - seen = set() - for exchange_name in config.exchanges: - file_path = get_list_file_path(config.list_data_dir, exchange_name) - if not file_exists(file_path): - continue - data = load_json(file_path) - for t in data.get("tickers", []): - ticker = t["ticker"] - if ticker not in seen: - seen.add(ticker) - all_tickers.append(t) - return all_tickers + """Load tickers from the tickers file.""" + file_path = _get_tickers_file(config) + if not file_exists(file_path): + return [] + data = load_json(file_path) + return data.get("tickers", []) def main(): @@ -90,17 +82,11 @@ def main(): client = RESTClient() - # Step 1: Fetch ticker lists per exchange - if config.refresh_tickers or any( - not file_exists(get_list_file_path(config.list_data_dir, ex)) - for ex in config.exchanges - ): - for exchange_name in config.exchanges: - if exchange_name not in EXCHANGES: - logger.warning(f"Unknown exchange: {exchange_name}, skipping") - continue - logger.info(f"Fetching ticker list for {exchange_name}...") - list_tickers_for_exchange(client, exchange_name, config) + # Step 1: Fetch ticker list + tickers_file = _get_tickers_file(config) + if config.refresh_tickers or not file_exists(tickers_file): + logger.info("Fetching ticker list...") + list_all_tickers(client, config) # Load all tickers tickers = load_all_tickers(config) diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py index adff53a2..d6df88da 100644 --- a/src/processor/us_daily/config.py +++ b/src/processor/us_daily/config.py @@ -7,7 +7,6 @@ @dataclass class Config: refresh_tickers: bool = False - exchanges: List[str] = field(default_factory=lambda: ["nasdaq", "nyse", "arca"]) start_date: str = "2026-01" data_source_priority: List[str] = field( default_factory=lambda: ["massive", "akshare", "yfinance"] diff --git a/src/processor/us_daily/storage.py b/src/processor/us_daily/storage.py index 32c6a6ab..af80505b 100644 --- a/src/processor/us_daily/storage.py +++ b/src/processor/us_daily/storage.py @@ -6,10 +6,6 @@ def get_tickers_file_path(data_dir: str) -> str: return os.path.join(data_dir, "top_tickers.json") -def get_list_file_path(list_dir: str, exchange: str) -> str: - return os.path.join(list_dir, f"{exchange}.json") - - def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: return os.path.join(data_dir, ticker, f"{month}.json") diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index 86c3a12a..c38187ab 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -4,15 +4,11 @@ from typing import Dict, List from processor.us_daily.config import Config -from processor.us_daily.storage import get_list_file_path, save_json, load_json, file_exists +from processor.us_daily.storage import save_json, load_json, file_exists logger = logging.getLogger("us_daily") -EXCHANGES: Dict[str, str] = { - "nasdaq": "XNAS", - "nyse": "XNYS", - "arca": "ARCX", -} +TICKERS_FILE = "tickers.json" def _details_to_dict(details) -> dict: @@ -29,14 +25,18 @@ def _details_to_dict(details) -> dict: return result -def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> List[dict]: - """Fetch all tickers for an exchange and save to file. +def _get_tickers_file(config: Config) -> str: + import os + return os.path.join(config.list_data_dir, TICKERS_FILE) + + +def list_all_tickers(client, config: Config) -> List[dict]: + """Fetch all US stock tickers and save to file. Supports resume: if the output file already exists, previously fetched tickers are kept and only missing ones are fetched. """ - exchange_code = EXCHANGES[exchange_name] - file_path = get_list_file_path(config.list_data_dir, exchange_name) + file_path = _get_tickers_file(config) # Load existing tickers for resume existing_tickers: Dict[str, dict] = {} @@ -45,23 +45,23 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis for t in data.get("tickers", []): existing_tickers[t["ticker"]] = t logger.info( - f"[{exchange_name}] Resuming: {len(existing_tickers)} tickers already fetched" + f"Resuming: {len(existing_tickers)} tickers already fetched" ) - # Get full ticker list from API - logger.info(f"[{exchange_name}] Listing tickers for {exchange_code}") + # Get full ticker list from API (all US stocks, no exchange filter) + logger.info("Listing all US stock tickers") try: ticker_objs = list( client.list_tickers( - market="stocks", exchange=exchange_code, active=True, limit=1000 + market="stocks", active=True, limit=1000 ) ) except Exception as e: - logger.error(f"[{exchange_name}] Failed to list tickers: {e}") + logger.error(f"Failed to list tickers: {e}") return list(existing_tickers.values()) time.sleep(config.massive_interval) - logger.info(f"[{exchange_name}] Found {len(ticker_objs)} tickers") + logger.info(f"Found {len(ticker_objs)} tickers") # Fetch details for new tickers only new_count = 0 @@ -76,11 +76,11 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis existing_tickers[ticker_str] = entry new_count += 1 logger.info( - f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" + f"[{i + 1}/{len(ticker_objs)}] {ticker_str}: OK" ) except Exception as e: logger.warning( - f"[{exchange_name}] [{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" + f"[{i + 1}/{len(ticker_objs)}] {ticker_str}: {e}" ) time.sleep(config.massive_interval) @@ -90,22 +90,20 @@ def list_tickers_for_exchange(client, exchange_name: str, config: Config) -> Lis tickers_list = list(existing_tickers.values()) save_json(file_path, { "updated_at": date.today().strftime("%Y-%m-%d"), - "exchange": exchange_code, "count": len(tickers_list), "tickers": tickers_list, }) logger.info( - f"[{exchange_name}] Checkpoint: saved {len(tickers_list)} tickers to {file_path}" + f"Checkpoint: saved {len(tickers_list)} tickers to {file_path}" ) # Final save tickers_list = list(existing_tickers.values()) save_json(file_path, { "updated_at": date.today().strftime("%Y-%m-%d"), - "exchange": exchange_code, "count": len(tickers_list), "tickers": tickers_list, }) - logger.info(f"[{exchange_name}] Saved {len(tickers_list)} tickers to {file_path}") + logger.info(f"Saved {len(tickers_list)} tickers to {file_path}") return tickers_list diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 172d610b..514847bc 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -12,7 +12,6 @@ def test_default_config(self): self.assertEqual(config.refresh_tickers, False) self.assertEqual(config.start_date, "2026-01") self.assertEqual(config.max_retries, 3) - self.assertEqual(config.exchanges, ["nasdaq", "nyse", "arca"]) self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) self.assertEqual(config.akshare_interval, 2.0) self.assertEqual(config.yfinance_interval, 1.0) @@ -27,7 +26,6 @@ def test_load_config_from_file(self): json.dump({ "refresh_tickers": True, "akshare_interval": 3.0, - "exchanges": ["nasdaq"], }, f) tmp_path = f.name @@ -35,7 +33,6 @@ def test_load_config_from_file(self): config = load_config(tmp_path) self.assertEqual(config.refresh_tickers, True) self.assertEqual(config.akshare_interval, 3.0) - self.assertEqual(config.exchanges, ["nasdaq"]) # defaults preserved for unspecified fields self.assertEqual(config.start_date, "2026-01") self.assertEqual(config.massive_interval, 12.0) diff --git a/tests/test_us_daily/test_storage.py b/tests/test_us_daily/test_storage.py index 06a8cd22..6fdf5e26 100644 --- a/tests/test_us_daily/test_storage.py +++ b/tests/test_us_daily/test_storage.py @@ -50,12 +50,6 @@ def test_file_exists(self): self.assertTrue(file_exists(existing)) self.assertFalse(file_exists(os.path.join(self.test_dir, "nope.json"))) - def test_get_list_file_path(self): - from processor.us_daily.storage import get_list_file_path - - result = get_list_file_path("data/us_list", "nasdaq") - self.assertEqual(result, "data/us_list/nasdaq.json") - def test_get_month_file_path_daily_dir(self): from processor.us_daily.storage import get_month_file_path diff --git a/tests/test_us_daily/test_ticker_lister.py b/tests/test_us_daily/test_ticker_lister.py index a1144414..5c30ab26 100644 --- a/tests/test_us_daily/test_ticker_lister.py +++ b/tests/test_us_daily/test_ticker_lister.py @@ -23,8 +23,8 @@ def _make_details(self, **kwargs): """Create a SimpleNamespace TickerDetails with all fields as attributes.""" return SimpleNamespace(**kwargs) - def test_list_tickers_for_exchange(self): - from processor.us_daily.ticker_lister import list_tickers_for_exchange + def test_list_all_tickers(self): + from processor.us_daily.ticker_lister import list_all_tickers from processor.us_daily.config import Config config = Config(list_data_dir=self.test_dir, massive_interval=0) @@ -50,31 +50,34 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details with patch("processor.us_daily.ticker_lister.time.sleep"): - list_tickers_for_exchange(client, "nasdaq", config) + list_all_tickers(client, config) - file_path = os.path.join(self.test_dir, "nasdaq.json") + # Called without exchange filter + client.list_tickers.assert_called_once_with( + market="stocks", active=True, limit=1000 + ) + + file_path = os.path.join(self.test_dir, "tickers.json") self.assertTrue(os.path.exists(file_path)) with open(file_path) as f: data = json.load(f) - self.assertEqual(data["exchange"], "XNAS") self.assertEqual(data["count"], 2) tickers = [t["ticker"] for t in data["tickers"]] self.assertIn("AAPL", tickers) self.assertIn("MSFT", tickers) def test_resume_skips_existing_tickers(self): - from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.ticker_lister import list_all_tickers from processor.us_daily.config import Config config = Config(list_data_dir=self.test_dir, massive_interval=0) # Pre-populate file with AAPL already fetched - file_path = os.path.join(self.test_dir, "nasdaq.json") + file_path = os.path.join(self.test_dir, "tickers.json") existing_data = { "updated_at": "2026-04-22", - "exchange": "XNAS", "count": 1, "tickers": [ {"ticker": "AAPL", "name": "Apple Inc", "market_cap": 3e12}, @@ -97,7 +100,7 @@ def test_resume_skips_existing_tickers(self): client.get_ticker_details.return_value = details_msft with patch("processor.us_daily.ticker_lister.time.sleep"): - list_tickers_for_exchange(client, "nasdaq", config) + list_all_tickers(client, config) # Should only call get_ticker_details for MSFT (AAPL already exists) client.get_ticker_details.assert_called_once_with("MSFT") @@ -107,7 +110,7 @@ def test_resume_skips_existing_tickers(self): self.assertEqual(data["count"], 2) def test_skips_ticker_on_details_error(self): - from processor.us_daily.ticker_lister import list_tickers_for_exchange + from processor.us_daily.ticker_lister import list_all_tickers from processor.us_daily.config import Config config = Config(list_data_dir=self.test_dir, massive_interval=0) @@ -131,9 +134,9 @@ def mock_details(ticker): client.get_ticker_details.side_effect = mock_details with patch("processor.us_daily.ticker_lister.time.sleep"): - list_tickers_for_exchange(client, "nasdaq", config) + list_all_tickers(client, config) - file_path = os.path.join(self.test_dir, "nasdaq.json") + file_path = os.path.join(self.test_dir, "tickers.json") with open(file_path) as f: data = json.load(f) From f063433696fc91296f18cac3624985941fdd4d1b Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:44:27 +0800 Subject: [PATCH 37/43] ci --- data/us_list/arca.json | 484 ------------ data/us_list/nasdaq.json | 738 ------------------- data/us_list/nyse.json | 937 ------------------------ src/processor/us_daily/ticker_lister.py | 3 + 4 files changed, 3 insertions(+), 2159 deletions(-) delete mode 100644 data/us_list/arca.json delete mode 100644 data/us_list/nasdaq.json delete mode 100644 data/us_list/nyse.json diff --git a/data/us_list/arca.json b/data/us_list/arca.json deleted file mode 100644 index aeaefb6e..00000000 --- a/data/us_list/arca.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "updated_at": "2026-04-23", - "exchange": "ARCX", - "count": 30, - "tickers": [ - { - "active": true, - "cik": "0001776878", - "composite_figi": "BBG01B0JRCS6", - "currency_name": "usd", - "ticker_root": "AAA", - "list_date": "2020-09-08", - "locale": "us", - "market": "stocks", - "name": "Alternative Access First Priority CLO Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01B0JRCT5", - "share_class_shares_outstanding": 1600000, - "ticker": "AAA", - "type": "ETF" - }, - { - "active": true, - "cik": "0001551950", - "composite_figi": "BBG01YZNRVY0", - "currency_name": "usd", - "ticker_root": "AAAC", - "list_date": "2025-12-11", - "locale": "us", - "market": "stocks", - "name": "Columbia AAA CLO ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01YZNRWV1", - "ticker": "AAAC", - "type": "ETF" - }, - { - "active": true, - "cik": "0001199046", - "composite_figi": "BBG00RHHGQY2", - "currency_name": "usd", - "ticker_root": "ABEQ", - "list_date": "2020-01-21", - "locale": "us", - "market": "stocks", - "name": "Absolute Select Value ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00RHHGRP0", - "share_class_shares_outstanding": 3725000, - "ticker": "ABEQ", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01JHFMD75", - "currency_name": "usd", - "ticker_root": "ABNY", - "list_date": "2024-06-24", - "locale": "us", - "market": "stocks", - "name": "YieldMax ABNB Option Income Strategy ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01JHFMF34", - "share_class_shares_outstanding": 659981, - "ticker": "ABNY", - "type": "ETS" - }, - { - "active": true, - "cik": "0001415726", - "composite_figi": "BBG01XK51Q67", - "currency_name": "usd", - "ticker_root": "ACEI", - "list_date": "2025-09-24", - "locale": "us", - "market": "stocks", - "name": "Innovator Equity Autocallable Income Strategy ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01XK51R47", - "share_class_shares_outstanding": 1325000, - "ticker": "ACEI", - "type": "ETF" - }, - { - "active": true, - "cik": "0001414040", - "composite_figi": "BBG00L99FCT0", - "currency_name": "usd", - "ticker_root": "ACES", - "list_date": "2018-06-27", - "locale": "us", - "market": "stocks", - "name": "ALPS Clean Energy ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00L99FDK7", - "share_class_shares_outstanding": 3450002, - "ticker": "ACES", - "type": "ETF" - }, - { - "active": true, - "cik": "0001481714", - "composite_figi": "BBG011MFL031", - "currency_name": "usd", - "ticker_root": "ACGR", - "list_date": "2021-06-29", - "locale": "us", - "market": "stocks", - "name": "American Century Large Cap Growth ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG011MFL0Y7", - "share_class_shares_outstanding": 160000, - "ticker": "ACGR", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "399 PARK AVE.", - "city": "NEW YORK", - "state": "NY", - "postal_code": "10022" - }, - "cik": "0001838614", - "composite_figi": "BBG01XK52JX2", - "currency_name": "usd", - "ticker_root": "ACII", - "list_date": "2025-09-24", - "locale": "us", - "market": "stocks", - "name": "Innovator Index Autocallable Income Strategy ETF", - "phone_number": "(212) 883-3800", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01XK52KW0", - "share_class_shares_outstanding": 2750000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "ACII", - "type": "ETF" - }, - { - "active": true, - "cik": "0001722388", - "composite_figi": "BBG01WHYZLL7", - "currency_name": "usd", - "ticker_root": "ACKY", - "list_date": "2025-09-08", - "locale": "us", - "market": "stocks", - "name": "VistaShares Target 15 ACKtivist Distribution ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01WHYZMG1", - "share_class_shares_outstanding": 2850000, - "ticker": "ACKY", - "type": "ETF" - }, - { - "active": true, - "cik": "0001481714", - "composite_figi": "BBG00W0JV9Q5", - "currency_name": "usd", - "ticker_root": "ACLC", - "list_date": "2020-07-15", - "locale": "us", - "market": "stocks", - "name": "American Century Large Cap Equity ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00W0JVBH0", - "share_class_shares_outstanding": 3760000, - "ticker": "ACLC", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG0200WVGR7", - "currency_name": "usd", - "ticker_root": "ACTS", - "list_date": "2026-03-18", - "locale": "us", - "market": "stocks", - "name": "FIS Tactical Equity ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG0200WVHP7", - "share_class_shares_outstanding": 330000, - "ticker": "ACTS", - "type": "ETF" - }, - { - "active": true, - "cik": "0001771146", - "composite_figi": "BBG00Y04Q9T7", - "currency_name": "usd", - "ticker_root": "ACVF", - "list_date": "2020-10-28", - "locale": "us", - "market": "stocks", - "name": "American Conservative Values ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00Y04QBN8", - "share_class_shares_outstanding": 2800000, - "ticker": "ACVF", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01TQ0YGP6", - "currency_name": "usd", - "ticker_root": "ACVT", - "list_date": "2025-04-29", - "locale": "us", - "market": "stocks", - "name": "Advent Convertible Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01TQ0YHK9", - "share_class_shares_outstanding": 1200000, - "ticker": "ACVT", - "type": "ETF" - }, - { - "active": true, - "cik": "0001329377", - "composite_figi": "BBG020KHJ195", - "currency_name": "usd", - "ticker_root": "ACYN", - "list_date": "2026-02-24", - "locale": "us", - "market": "stocks", - "name": "FT Vest Laddered Autocallable Barrier & Income ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG020KHJ266", - "share_class_shares_outstanding": 7500002, - "ticker": "ACYN", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG0210PT249", - "currency_name": "usd", - "ticker_root": "ADBU", - "list_date": "2026-03-24", - "locale": "us", - "market": "stocks", - "name": "Direxion Daily ADBE Bull 2X ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG0210PT3J1", - "share_class_shares_outstanding": 125001, - "ticker": "ADBU", - "type": "ETS" - }, - { - "active": true, - "composite_figi": "BBG00ZV17TH6", - "currency_name": "usd", - "ticker_root": "ADIV", - "list_date": "2006-03-31", - "locale": "us", - "market": "stocks", - "name": "Guinness Atkinson Asia Pacific Dividend Builder ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00ZV17VD5", - "share_class_shares_outstanding": 2900000, - "ticker": "ADIV", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01BBJQ2C4", - "currency_name": "usd", - "ticker_root": "ADPV", - "list_date": "2022-11-03", - "locale": "us", - "market": "stocks", - "name": "Adaptiv Select ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01BBJQ3B3", - "share_class_shares_outstanding": 3780000, - "ticker": "ADPV", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01JFHH825", - "currency_name": "usd", - "ticker_root": "ADVE", - "list_date": "2023-09-21", - "locale": "us", - "market": "stocks", - "name": "Matthews Asia Dividend Active ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01JFHH923", - "share_class_shares_outstanding": 200000, - "ticker": "ADVE", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01JLGS0C4", - "currency_name": "usd", - "ticker_root": "AETH", - "list_date": "2023-09-29", - "locale": "us", - "market": "stocks", - "name": "Bitwise Trendwise Ether and Treasuries Rotation Strategy ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01JLGS178", - "share_class_shares_outstanding": 150004, - "ticker": "AETH", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01QVK6KB9", - "currency_name": "usd", - "ticker_root": "AFIX", - "list_date": "2024-12-04", - "locale": "us", - "market": "stocks", - "name": "Allspring Broad Market Core Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01QVK6L54", - "share_class_shares_outstanding": 6400000, - "ticker": "AFIX", - "type": "ETF" - }, - { - "active": true, - "cik": "0001137360", - "composite_figi": "BBG000H0B9J8", - "currency_name": "usd", - "ticker_root": "AFK", - "list_date": "2008-07-10", - "locale": "us", - "market": "stocks", - "name": "VanEck Africa Index ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG001T33P37", - "share_class_shares_outstanding": 4450000, - "ticker": "AFK", - "type": "ETF" - }, - { - "active": true, - "cik": "0001667919", - "composite_figi": "BBG00QZ0DQB9", - "currency_name": "usd", - "ticker_root": "AFLG", - "list_date": "2019-12-03", - "locale": "us", - "market": "stocks", - "name": "First Trust Active Factor Large Cap ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00QZ0DR27", - "share_class_shares_outstanding": 14300002, - "ticker": "AFLG", - "type": "ETF" - }, - { - "active": true, - "cik": "0001667919", - "composite_figi": "BBG00QZ0GVP0", - "currency_name": "usd", - "ticker_root": "AFMC", - "list_date": "2019-12-03", - "locale": "us", - "market": "stocks", - "name": "First Trust Active Factor Mid Cap ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00QZ0GWH7", - "share_class_shares_outstanding": 3900002, - "ticker": "AFMC", - "type": "ETF" - }, - { - "active": true, - "cik": "0001667919", - "composite_figi": "BBG00QZ0H932", - "currency_name": "usd", - "ticker_root": "AFSM", - "list_date": "2019-12-03", - "locale": "us", - "market": "stocks", - "name": "First Trust Active Factor Small Cap ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG00QZ0H9W0", - "share_class_shares_outstanding": 2450002, - "ticker": "AFSM", - "type": "ETF" - }, - { - "active": true, - "cik": "0001100663", - "composite_figi": "BBG000Q123R0", - "currency_name": "usd", - "ticker_root": "AGG", - "list_date": "2003-09-22", - "locale": "us", - "market": "stocks", - "name": "iShares Core U.S. Aggregate Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG001SM1QT8", - "share_class_shares_outstanding": 1365800000, - "ticker": "AGG", - "type": "ETF" - }, - { - "active": true, - "cik": "0001810747", - "composite_figi": "BBG015BC7V75", - "currency_name": "usd", - "ticker_root": "AGGH", - "list_date": "2022-02-14", - "locale": "us", - "market": "stocks", - "name": "Simplify Aggregate Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG015BC7WS0", - "share_class_shares_outstanding": 21325001, - "ticker": "AGGH", - "type": "ETF" - }, - { - "active": true, - "cik": "0001860434", - "composite_figi": "BBG01MCGX1X6", - "currency_name": "usd", - "ticker_root": "AGGS", - "list_date": "2024-05-01", - "locale": "us", - "market": "stocks", - "name": "Harbor Disciplined Bond ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01MCGX2S0", - "share_class_shares_outstanding": 950000, - "ticker": "AGGS", - "type": "ETF" - }, - { - "active": true, - "cik": "0001350487", - "composite_figi": "BBG009KCY2B7", - "currency_name": "usd", - "ticker_root": "AGGY", - "list_date": "2015-07-09", - "locale": "us", - "market": "stocks", - "name": "WisdomTree Yield Enhanced U.S. Aggregate Bond Fund", - "primary_exchange": "ARCX", - "share_class_figi": "BBG009KCY2C6", - "share_class_shares_outstanding": 20300000, - "ticker": "AGGY", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01WV67GC4", - "currency_name": "usd", - "ticker_root": "AGIQ", - "list_date": "2025-09-02", - "locale": "us", - "market": "stocks", - "name": "SoFi Agentic AI ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG01WV67HD1", - "share_class_shares_outstanding": 450000, - "ticker": "AGIQ", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG010WX25M2", - "currency_name": "usd", - "ticker_root": "AGOX", - "list_date": "2012-09-20", - "locale": "us", - "market": "stocks", - "name": "Adaptive Alpha Opportunities ETF", - "primary_exchange": "ARCX", - "share_class_figi": "BBG010WX26H6", - "share_class_shares_outstanding": 11393738, - "ticker": "AGOX", - "type": "ETF" - } - ] -} \ No newline at end of file diff --git a/data/us_list/nasdaq.json b/data/us_list/nasdaq.json deleted file mode 100644 index 25f01ae3..00000000 --- a/data/us_list/nasdaq.json +++ /dev/null @@ -1,738 +0,0 @@ -{ - "updated_at": "2026-04-23", - "exchange": "XNAS", - "count": 30, - "tickers": [ - { - "active": true, - "address": { - "address1": "3 COLUMBUS CIRCLE", - "address2": "SUITE 2215", - "city": "NEW YORK", - "state": "NY", - "postal_code": "10019" - }, - "cik": "0002034334", - "currency_name": "usd", - "description": "Artius II Acquisition Inc is a blank check company.", - "ticker_root": "AACB", - "homepage_url": "https://www.artiuscapital.com", - "list_date": "2025-04-07", - "locale": "us", - "market": "stocks", - "market_cap": 287266500.0, - "name": "Artius II Acquisition Inc. Class A Ordinary Shares", - "phone_number": "212 309 7668", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 22175000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACB", - "type": "CS", - "weighted_shares_outstanding": 27675000 - }, - { - "active": true, - "address": { - "address1": "3 COLUMBUS CIRCLE", - "address2": "SUITE 2215", - "city": "NEW YORK", - "state": "NY", - "postal_code": "10019" - }, - "cik": "0002034334", - "currency_name": "usd", - "ticker_root": "AACB", - "ticker_suffix": "R", - "list_date": "2025-04-07", - "locale": "us", - "market": "stocks", - "name": "Artius II Acquisition Inc. Rights", - "phone_number": "212 309 7668", - "primary_exchange": "XNAS", - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACBR", - "type": "RIGHT" - }, - { - "active": true, - "address": { - "address1": "3 COLUMBUS CIRCLE", - "address2": "SUITE 2215", - "city": "NEW YORK", - "state": "NY", - "postal_code": "10019" - }, - "cik": "0002034334", - "currency_name": "usd", - "description": "Artius II Acquisition Inc is a blank check company.", - "ticker_root": "AACB", - "ticker_suffix": "U", - "homepage_url": "https://www.artiuscapital.com", - "list_date": "2025-02-13", - "locale": "us", - "market": "stocks", - "name": "Artius II Acquisition Inc. Units", - "phone_number": "212 309 7668", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 20000000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACBU", - "type": "UNIT" - }, - { - "active": true, - "cik": "0001420529", - "composite_figi": "BBG000V2S3P6", - "currency_name": "usd", - "description": "ATA Creativity Global is an international educational services company focused on providing quality learning experiences that cultivate and enhance students' creativity. The operating segments of the company are (i) Overseas art study services (ii) Other educational services and (iii) Other services. Majority of its revenue comes from Overseas art study services.", - "ticker_root": "AACG", - "homepage_url": "http://www.atai.net.cn", - "list_date": "2008-01-29", - "locale": "us", - "market": "stocks", - "market_cap": 44751196.88, - "name": "ATA Creativity Global American Depositary Shares", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001T125S9", - "share_class_shares_outstanding": 43029997, - "ticker": "AACG", - "total_employees": 489, - "type": "ADRC", - "weighted_shares_outstanding": 43029997 - }, - { - "active": true, - "address": { - "address1": "89 NEXUS WAY", - "city": "CARMANA BAY", - "postal_code": "KY 1-9009" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJtYWRhYWNxLmNvbQ/images/2024-09-01_logo.png" - }, - "cik": "0002092897", - "currency_name": "usd", - "description": "Armada Acquisition Corp III is a blank check company or special purpose acquisition company (SPAC), formed for the purpose of entering into a merger, share exchange, asset acquisition, share purchase, recapitalization, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACI", - "homepage_url": "https://www.armadaacq.com", - "list_date": "2026-03-27", - "locale": "us", - "market": "stocks", - "market_cap": 336895356.6, - "name": "Armada Acquisition Corp. III Class A Ordinary Share", - "phone_number": "215.543.6886", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 24850000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACI", - "type": "CS", - "weighted_shares_outstanding": 34029834 - }, - { - "active": true, - "address": { - "address1": "89 NEXUS WAY", - "city": "CARMANA BAY", - "postal_code": "KY 1-9009" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJtYWRhYWNxLmNvbQ/images/2024-09-01_logo.png" - }, - "cik": "0002092897", - "currency_name": "usd", - "description": "Armada Acquisition Corp III is a blank check company or special purpose acquisition company (SPAC), formed for the purpose of entering into a merger, share exchange, asset acquisition, share purchase, recapitalization, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACI", - "ticker_suffix": "U", - "homepage_url": "https://www.armadaacq.com", - "list_date": "2026-02-18", - "locale": "us", - "market": "stocks", - "name": "Armada Acquisition Corp. III Units", - "phone_number": "215.543.6886", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 23125000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACIU", - "type": "UNIT" - }, - { - "active": true, - "address": { - "address1": "89 NEXUS WAY", - "city": "CARMANA BAY", - "postal_code": "KY 1-9009" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJtYWRhYWNxLmNvbQ/images/2024-09-01_logo.png" - }, - "cik": "0002092897", - "currency_name": "usd", - "description": "Armada Acquisition Corp III is a blank check company or special purpose acquisition company (SPAC), formed for the purpose of entering into a merger, share exchange, asset acquisition, share purchase, recapitalization, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACI", - "ticker_suffix": "W", - "homepage_url": "https://www.armadaacq.com", - "list_date": "2026-03-27", - "locale": "us", - "market": "stocks", - "name": "Armada Acquisition Corp. III Warrant", - "phone_number": "215.543.6886", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 12425000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACIW", - "type": "WARRANT" - }, - { - "active": true, - "address": { - "address1": "1700 S. LAMAR BLVD.", - "address2": "SUITE #338", - "city": "AUSTIN", - "state": "TX", - "postal_code": "78704" - }, - "cik": "0002099906", - "currency_name": "usd", - "description": "Abony Acquisition Corp I is a blank check company formed for the purpose of effecting a merger, amalgamation, share exchange, asset acquisition, share purchase, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACO", - "list_date": "2026-04-13", - "locale": "us", - "market": "stocks", - "market_cap": 309539653.28999996, - "name": "Abony Acquisition Corp. I Class A Ordinary Share", - "phone_number": "215.817.0090", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 23000000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACO", - "type": "CS", - "weighted_shares_outstanding": 31361667 - }, - { - "active": true, - "address": { - "address1": "1700 S. LAMAR BLVD.", - "address2": "SUITE #338", - "city": "AUSTIN", - "state": "TX", - "postal_code": "78704" - }, - "cik": "0002099906", - "currency_name": "usd", - "description": "Abony Acquisition Corp I is a blank check company formed for the purpose of effecting a merger, amalgamation, share exchange, asset acquisition, share purchase, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACO", - "ticker_suffix": "U", - "list_date": "2026-02-19", - "locale": "us", - "market": "stocks", - "name": "Abony Acquisition Corp. I Units", - "phone_number": "215.817.0090", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 20635000, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACOU", - "type": "UNIT" - }, - { - "active": true, - "address": { - "address1": "1700 S. LAMAR BLVD.", - "address2": "SUITE #338", - "city": "AUSTIN", - "state": "TX", - "postal_code": "78704" - }, - "cik": "0002099906", - "currency_name": "usd", - "description": "Abony Acquisition Corp I is a blank check company formed for the purpose of effecting a merger, amalgamation, share exchange, asset acquisition, share purchase, reorganization or similar business combination with one or more businesses.", - "ticker_root": "AACO", - "ticker_suffix": "W", - "list_date": "2026-04-13", - "locale": "us", - "market": "stocks", - "name": "Abony Acquisition Corp. I Warrants", - "phone_number": "215.817.0090", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 7666667, - "sic_code": "6770", - "sic_description": "BLANK CHECKS", - "ticker": "AACOW", - "type": "WARRANT" - }, - { - "active": true, - "cik": "0002102123", - "currency_name": "usd", - "ticker_root": "AACP", - "ticker_suffix": "U", - "list_date": "2026-04-07", - "locale": "us", - "market": "stocks", - "name": "Apogee Acquisition Corp Units", - "primary_exchange": "XNAS", - "share_class_shares_outstanding": 25470000, - "ticker": "AACPU", - "type": "UNIT" - }, - { - "active": true, - "cik": "0001408970", - "composite_figi": "BBG000BDYRW6", - "currency_name": "usd", - "ticker_root": "AADR", - "list_date": "2010-07-20", - "locale": "us", - "market": "stocks", - "name": "AdvisorShares Dorsey Wright ADR ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001T9B1Y4", - "share_class_shares_outstanding": 535000, - "ticker": "AADR", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG01YY0GN44", - "currency_name": "usd", - "ticker_root": "AAEQ", - "list_date": "2025-12-09", - "locale": "us", - "market": "stocks", - "name": "Alpha Architect US Equity 2 ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01YY0GP12", - "share_class_shares_outstanding": 9353000, - "ticker": "AAEQ", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "1 SKYVIEW DRIVE", - "city": "FORT WORTH", - "state": "TX", - "postal_code": "76155" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWEuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWEuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0000006201", - "composite_figi": "BBG005P7Q881", - "currency_name": "usd", - "description": "American Airlines is the world's largest airline by aircraft, capacity, and scheduled revenue passenger miles. Its major US hubs are Dallas/Fort Worth, Charlotte, Chicago, Los Angeles, Miami, New York, Philadelphia, Phoenix, and Washington, D.C. It generates over 30% of US airline revenue connecting Latin America with destinations in the United States. After completing a major fleet renewal, the company has the youngest average fleet of US legacy carriers.", - "ticker_root": "AAL", - "homepage_url": "https://www.aa.com", - "list_date": "1972-06-01", - "locale": "us", - "market": "stocks", - "market_cap": 7771784824.21, - "name": "American Airlines Group Inc.", - "phone_number": "(682) 278-9000", - "primary_exchange": "XNAS", - "share_class_figi": "BBG005P7Q907", - "share_class_shares_outstanding": 660304573, - "sic_code": "4512", - "sic_description": "AIR TRANSPORTATION, SCHEDULED", - "ticker": "AAL", - "total_employees": 139100, - "type": "CS", - "weighted_shares_outstanding": 660304573 - }, - { - "active": true, - "cik": "0001976322", - "composite_figi": "BBG01VH49P35", - "currency_name": "usd", - "ticker_root": "AALG", - "list_date": "2025-07-10", - "locale": "us", - "market": "stocks", - "name": "Leverage Shares 2X Long AAL Daily ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01VH49Q06", - "share_class_shares_outstanding": 455000, - "ticker": "AALG", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "4370 PEACHTREE RD NE", - "city": "ATLANTA", - "state": "GA", - "postal_code": "30319" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXRsYW0uY29t/images/2025-04-04_logo.png" - }, - "cik": "0000008177", - "composite_figi": "BBG000B9XB24", - "currency_name": "usd", - "description": "Atlantic American Corp operates in specialty markets within the life and health and property and casualty insurance industries. The company has two segments: American Southern, It provides property and casualty insurance including bodily injury and property damage liability coverage, uninsured motorist coverage, and physical damage coverage for commercial accounts, and Bankers Fidelity, the company's life and health operations offer a variety of life and supplemental health products including ordinary and term life insurance, Medicare supplement, and other health insurance.", - "ticker_root": "AAME", - "homepage_url": "https://www.atlam.com", - "list_date": "1973-01-02", - "locale": "us", - "market": "stocks", - "market_cap": 50279167.019999996, - "name": "Atlantic American Corp", - "phone_number": "(404) 266-5500", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001S5N8T1", - "share_class_shares_outstanding": 20397228, - "sic_code": "6311", - "sic_description": "LIFE INSURANCE", - "ticker": "AAME", - "total_employees": 156, - "type": "CS", - "weighted_shares_outstanding": 20397228 - }, - { - "active": true, - "address": { - "address1": "13139 JESS PIRTLE BLVD", - "city": "SUGAR LAND", - "state": "TX", - "postal_code": "77478" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YW8taW5jLmNvbQ/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YW8taW5jLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001158114", - "composite_figi": "BBG000D6VW15", - "currency_name": "usd", - "description": "Applied Optoelectronics Inc is a provider of fiber-optic networking products, for four networking end-markets; internet data center, CATV, telecom, and FTTH. The Company designs and manufactures a wide range of optical communications products at varying levels of integration, from components, subassemblies, and modules to meet turn-key equipment. Through direct sales personnel, and manufacturing teams in the United States, China, and Taiwan, the company coordinates with customers to determine product design, qualifications, and performance. The Company has manufacturing and research and development facilities located in the U.S., Taiwan, and China.", - "ticker_root": "AAOI", - "homepage_url": "https://www.ao-inc.com", - "list_date": "2013-09-26", - "locale": "us", - "market": "stocks", - "market_cap": 11695543420.68, - "name": "Applied Optoelectronics, Inc.", - "phone_number": "281-295-1800", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001SG47G4", - "share_class_shares_outstanding": 78943144, - "sic_code": "3674", - "sic_description": "SEMICONDUCTORS & RELATED DEVICES", - "ticker": "AAOI", - "total_employees": 4691, - "type": "CS", - "weighted_shares_outstanding": 77675124 - }, - { - "active": true, - "address": { - "address1": "2425 SOUTH YUKON AVE.", - "city": "TULSA", - "state": "OK", - "postal_code": "74107" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWFvbi5jb20/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWFvbi5jb20/images/2025-04-04_icon.jpeg" - }, - "cik": "0000824142", - "composite_figi": "BBG000C2LZP3", - "currency_name": "usd", - "description": "AAON Inc is a manufacturer of air-conditioning and heating equipment. The products include rooftop units, chillers, packaged outdoor mechanical rooms, air-handling units, makeup air units, energy-recovery units, condensing units, geothermal heat pumps, and self-contained units and coils. The company operates through three segments: AAON Oklahoma, AAON Coil Products, and BASX. It generates the majority of its revenue from the AAON Oklahoma segment which derives maximum revenue, engineers, manufactures, and sells, semi-custom, and custom HVAC systems, designs and manufactures control solutions, and sells retail parts to customers through retail part stores and online.", - "ticker_root": "AAON", - "homepage_url": "https://www.aaon.com", - "list_date": "1990-12-01", - "locale": "us", - "market": "stocks", - "market_cap": 7835716986.879999, - "name": "Aaon Inc", - "phone_number": "(918) 583-2266", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001S6CZK0", - "share_class_shares_outstanding": 81589231, - "sic_code": "3585", - "sic_description": "AIR-COND & WARM AIR HEATG EQUIP & COMM & INDL REFRIG EQUIP", - "ticker": "AAON", - "total_employees": 5897, - "type": "CS", - "weighted_shares_outstanding": 81843712 - }, - { - "active": true, - "cik": "0001689873", - "composite_figi": "BBG0193F21N2", - "currency_name": "usd", - "ticker_root": "AAPB", - "list_date": "2022-08-08", - "locale": "us", - "market": "stocks", - "name": "GraniteShares ETF Trust GraniteShares 2x Long AAPL Daily ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG0193F22H7", - "share_class_shares_outstanding": 580001, - "ticker": "AAPB", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG0193YGJ57", - "currency_name": "usd", - "ticker_root": "AAPD", - "list_date": "2022-08-08", - "locale": "us", - "market": "stocks", - "name": "Direxion Shares ETF Trust Direxion Daily AAPL Bear 1X ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG0193YGK09", - "share_class_shares_outstanding": 1275001, - "ticker": "AAPD", - "type": "ETF" - }, - { - "active": true, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXNjZW50YWdlcGhhcm1hLmNvbQ/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXNjZW50YWdlcGhhcm1hLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0002023311", - "composite_figi": "BBG01RJXM9C9", - "currency_name": "usd", - "description": "Ascentage Pharma Group International is a clinical-stage biotechnology company engaged in the development and sales of novel small-scale therapies for cancers, hepatitis B virus, or HBV, and certain age-related diseases. It focuses on developing therapies that inhibit protein-protein interactions to restore apoptosis or programmed cell death. The Group has one reportable operating segment, which is discovering, developing, and commercializing therapies to address medical needs in hematological malignancies. The company's geographical segments include the United States and Mainland China.", - "ticker_root": "AAPG", - "homepage_url": "https://www.ascentagepharma.com", - "list_date": "2025-01-24", - "locale": "us", - "market": "stocks", - "market_cap": 2393925349.95, - "name": "Ascentage Pharma Group International American Depository Shares", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01RJXMB89", - "share_class_shares_outstanding": 93331173, - "ticker": "AAPG", - "total_employees": 767, - "type": "ADRC", - "weighted_shares_outstanding": 93330423 - }, - { - "active": true, - "address": { - "address1": "ONE APPLE PARK WAY", - "city": "CUPERTINO", - "state": "CA", - "postal_code": "95014" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXBwbGUuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXBwbGUuY29t/images/2025-04-04_icon.png" - }, - "cik": "0000320193", - "composite_figi": "BBG000B9XRY4", - "currency_name": "usd", - "description": "Apple is among the largest companies in the world, with a broad portfolio of hardware and software products targeted at consumers and businesses. Apple's iPhone makes up a majority of the firm sales, and Apple's other products like Mac, iPad, and Watch are designed around the iPhone as the focal point of an expansive software ecosystem. Apple has progressively worked to add new applications, like streaming video, subscription bundles, and augmented reality. The firm designs its own software and semiconductors while working with subcontractors like Foxconn and TSMC to build its products and chips. Slightly less than half of Apple's sales come directly through its flagship stores, with a majority of sales coming indirectly through partnerships and distribution.", - "ticker_root": "AAPL", - "homepage_url": "https://www.apple.com", - "list_date": "1980-12-12", - "locale": "us", - "market": "stocks", - "market_cap": 3907679033800.0, - "name": "Apple Inc.", - "phone_number": "(408) 996-1010", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001S5N8V8", - "share_class_shares_outstanding": 14681140000, - "sic_code": "3571", - "sic_description": "ELECTRONIC COMPUTERS", - "ticker": "AAPL", - "total_employees": 166000, - "type": "CS", - "weighted_shares_outstanding": 14681140000 - }, - { - "active": true, - "composite_figi": "BBG0193YBZ92", - "currency_name": "usd", - "ticker_root": "AAPU", - "list_date": "2022-08-08", - "locale": "us", - "market": "stocks", - "name": "Direxion Shares ETF Trust Direxion Daily AAPL Bull 2X ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG0193YC043", - "share_class_shares_outstanding": 5825001, - "ticker": "AAPU", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "4370 LA JOLLA VILLAGE DRIVE", - "address2": "SUITE 1050", - "city": "SAN DIEGO", - "state": "CA", - "postal_code": "92122" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWFyZHZhcmt0aGVyYXBldXRpY3MuY29t/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWFyZHZhcmt0aGVyYXBldXRpY3MuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001774857", - "composite_figi": "BBG01223DLB2", - "currency_name": "usd", - "description": "Aardvark Therapeutics Inc is a clinical-stage biopharmaceutical company focused on developing novel, small-molecule therapeutics to activate innate homeostatic pathways for the treatment of metabolic diseases. It is focused on on developing selective compounds, targeting Bitter Taste Receptors (TAS2Rs) for hunger-associated conditions. Its product candidate, ARD-101, is an oral gut-restricted small-molecule agonist of certain TAS2Rs expressed in the gut lumen for which have initiated a Phase 3 clinical trial for hyperphagia associated with PWS.", - "ticker_root": "AARD", - "homepage_url": "https://www.aardvarktherapeutics.com", - "list_date": "2025-02-13", - "locale": "us", - "market": "stocks", - "market_cap": 117152140.17, - "name": "Aardvark Therapeutics, Inc. Common Stock", - "phone_number": "(858) 225-7696", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01223DLC1", - "share_class_shares_outstanding": 21816041, - "sic_code": "2834", - "sic_description": "PHARMACEUTICAL PREPARATIONS", - "ticker": "AARD", - "total_employees": 40, - "type": "CS", - "weighted_shares_outstanding": 21816041 - }, - { - "active": true, - "composite_figi": "BBG01W26H9C1", - "currency_name": "usd", - "ticker_root": "AAUS", - "list_date": "2025-07-22", - "locale": "us", - "market": "stocks", - "name": "Alpha Architect US Equity ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01W26HBL6", - "share_class_shares_outstanding": 8817000, - "ticker": "AAUS", - "type": "ETF" - }, - { - "active": true, - "composite_figi": "BBG00GM1DQV1", - "currency_name": "usd", - "ticker_root": "AAVM", - "list_date": "2017-05-02", - "locale": "us", - "market": "stocks", - "name": "EA Series Trust Alpha Architect Global Factor Equity ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG00GM1DRK1", - "share_class_shares_outstanding": 660000, - "ticker": "AAVM", - "type": "ETF" - }, - { - "active": true, - "cik": "0001100663", - "composite_figi": "BBG000G6GXC5", - "currency_name": "usd", - "ticker_root": "AAXJ", - "list_date": "2008-08-13", - "locale": "us", - "market": "stocks", - "name": "iShares MSCI All Country Asia ex Japan ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG001T2V2D8", - "share_class_shares_outstanding": 34200000, - "ticker": "AAXJ", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "100 WASHINGTON STREET", - "address2": "SUITE 100", - "city": "RENO", - "state": "NV", - "postal_code": "89503" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5iYXR0ZXJ5dGVjaG5vbG9neS5jb20/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5iYXR0ZXJ5dGVjaG5vbG9neS5jb20/images/2025-04-04_icon.jpeg" - }, - "cik": "0001576873", - "composite_figi": "BBG004M1KJN5", - "currency_name": "usd", - "description": "American Battery Technology Co is an integrated critical battery minerals company that develops technologies for both primary battery minerals manufacturing and lithium-ion battery recycling. It operates battery recycling facilities that process materials from electric vehicle batteries, stationary battery energy storage systems, and consumer electronics. Additionally, the company advances lithium production projects using proprietary technology to produce battery-grade lithium hydroxide. It generates revenue mainly from its recycling operations and is expanding capacity with strategic partnerships and government grants. The company's activities focus on creating a closed-loop battery materials supply chain to support sustainable manufacturing in the United States.", - "ticker_root": "ABAT", - "homepage_url": "https://www.americanbatterytechnology.com", - "list_date": "2015-10-15", - "locale": "us", - "market": "stocks", - "market_cap": 435957600.95, - "name": "American Battery Technology Company Common Stock", - "phone_number": "775-473-4744", - "primary_exchange": "XNAS", - "share_class_figi": "BBG004M1KJP3", - "share_class_shares_outstanding": 131709245, - "sic_code": "1400", - "sic_description": "MINING & QUARRYING OF NONMETALLIC MINERALS (NO FUELS)", - "ticker": "ABAT", - "total_employees": 163, - "type": "CS", - "weighted_shares_outstanding": 131709245 - }, - { - "active": true, - "address": { - "address1": "150 W 4TH AVENUE", - "city": "VANCOUVER", - "state": "A1", - "postal_code": "V5Y 1G6" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJjZWxsZXJhLmNvbQ/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWJjZWxsZXJhLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001703057", - "composite_figi": "BBG00LLW2MF2", - "currency_name": "usd", - "description": "AbCellera Biologics Inc is a clinical-stage biotechnology company focused on discovering and developing first-in-class antibody medicines for indications with high unmet medical need. It has built a platform for advancing antibody drug programs that the company believes provides it with a competitive advantage in addressing challenging, high-value targets such as complex transmembrane proteins and novel modalities, including multispecifics and antibody-drug conjugates. Organisation's pipeline includes two drug candidates in clinical development, two development candidates in Investigational New Drug (IND/Clinical Trial Application (CTA)-enabling activities, and more than 20 active discovery programs across multiple modalities and indications. ABCL635, ABCL386, ABCL575, and ABCL635.", - "ticker_root": "ABCL", - "homepage_url": "https://www.abcellera.com", - "list_date": "2020-12-11", - "locale": "us", - "market": "stocks", - "market_cap": 1209610343.13, - "name": "AbCellera Biologics Inc. Common Shares", - "phone_number": "(604) 559-9005", - "primary_exchange": "XNAS", - "share_class_figi": "BBG00LLW2MH0", - "share_class_shares_outstanding": 303160487, - "sic_code": "2834", - "sic_description": "PHARMACEUTICAL PREPARATIONS", - "ticker": "ABCL", - "total_employees": 562, - "type": "CS", - "weighted_shares_outstanding": 303160487 - }, - { - "active": true, - "composite_figi": "BBG01KMFQRF2", - "currency_name": "usd", - "ticker_root": "ABCS", - "list_date": "2023-12-18", - "locale": "us", - "market": "stocks", - "name": "Alpha Blue Capital US Small-Mid Cap Dynamic ETF", - "primary_exchange": "XNAS", - "share_class_figi": "BBG01KMFQS88", - "share_class_shares_outstanding": 350000, - "ticker": "ABCS", - "type": "ETF" - } - ] -} \ No newline at end of file diff --git a/data/us_list/nyse.json b/data/us_list/nyse.json deleted file mode 100644 index 868134ee..00000000 --- a/data/us_list/nyse.json +++ /dev/null @@ -1,937 +0,0 @@ -{ - "updated_at": "2026-04-23", - "exchange": "XNYS", - "count": 30, - "tickers": [ - { - "active": true, - "address": { - "address1": "5301 STEVENS CREEK BLVD", - "city": "SANTA CLARA", - "state": "CA", - "postal_code": "95051" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWdpbGVudC5jb20/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWdpbGVudC5jb20/images/2025-04-04_icon.jpeg" - }, - "cik": "0001090872", - "composite_figi": "BBG000C2V3D6", - "currency_name": "usd", - "description": "Originally spun out of Hewlett-Packard in 1999, Agilent has evolved into a leading life science and diagnostic firm. Today, Agilent's measurement technologies serve a broad base of customers with its three operating segments: life science and diagnostics, cross lab operations consisting of consumables and services, and applied end markets. Over half of its sales are generated from the biopharmaceutical, chemical, and advanced materials end markets, which we view as the stickiest end markets, but it also supports clinical lab, environmental, forensics, food, academic, and government-related organizations. The company is geographically diverse, with operations in the US and China representing the largest country concentrations.", - "ticker_root": "A", - "homepage_url": "https://www.agilent.com", - "list_date": "1999-11-18", - "locale": "us", - "market": "stocks", - "market_cap": 34502916882.53, - "name": "Agilent Technologies Inc.", - "phone_number": "(408) 345-8886", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001SCTQY4", - "share_class_shares_outstanding": 282602317, - "sic_code": "3826", - "sic_description": "LABORATORY ANALYTICAL INSTRUMENTS", - "ticker": "A", - "total_employees": 18100, - "type": "CS", - "weighted_shares_outstanding": 282602317 - }, - { - "active": true, - "address": { - "address1": "201 ISABELLA STREET", - "address2": "SUITE 500", - "city": "PITTSBURGH", - "state": "PA", - "postal_code": "15212" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWxjb2EuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWxjb2EuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001675149", - "composite_figi": "BBG00B3T3HD3", - "currency_name": "usd", - "description": "Alcoa is a vertically integrated aluminum company whose operations include bauxite mining, alumina refining, and manufacturing primary aluminum. It is one of the world's largest bauxite miners and alumina refiners by production volume, but sits outside the top-10 aluminum producers, a list dominated by Chinese companies. Profits are closely tied to prevailing commodity prices along the aluminum supply chain.Alcoa was the first mass producer of aluminum, launching the world-changing Hall-Heroult smelting process in the 1880s, making aluminum affordable. It listed as a public company in 1925. In 2016, Alcoa spun off its automotive and aerospace metal parts segment to focus on mining, smelting, and refining. It bought the 40% unowned balance of AWAC in mid-2024.", - "ticker_root": "AA", - "homepage_url": "https://www.alcoa.com", - "list_date": "2016-10-18", - "locale": "us", - "market": "stocks", - "market_cap": 17718373052.800003, - "name": "Alcoa Corporation", - "phone_number": "412-315-2900", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00B3T3HF1", - "share_class_shares_outstanding": 263862492, - "sic_code": "3334", - "sic_description": "PRIMARY PRODUCTION OF ALUMINUM", - "ticker": "AA", - "total_employees": 14900, - "type": "CS", - "weighted_shares_outstanding": 263862592 - }, - { - "active": true, - "address": { - "address1": "200 STATE STREET", - "address2": "SUITE 601A", - "city": "BOSTON", - "state": "MA", - "postal_code": "02109" - }, - "cik": "0001748824", - "composite_figi": "BBG00P2HLNY3", - "currency_name": "usd", - "description": "Acadian Asset Management Inc is a holding company that operates a systematic investment management business through its subsidiary, that offers institutional investors across the globe access to a diversified array of systematic investment strategies designed to meet a range of risk and return objectives.", - "ticker_root": "AAMI", - "homepage_url": "https://www.acadian-inc.com", - "list_date": "2014-10-09", - "locale": "us", - "market": "stocks", - "market_cap": 2349830523.04, - "name": "Acadian Asset Management Inc.", - "phone_number": "(617) 369-7300", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00P2HLNZ2", - "share_class_shares_outstanding": 35722568, - "sic_code": "6282", - "sic_description": "INVESTMENT ADVICE", - "ticker": "AAMI", - "total_employees": 396, - "type": "CS", - "weighted_shares_outstanding": 35722568 - }, - { - "active": true, - "address": { - "address1": "4200 SIX FORKS ROAD", - "city": "RALEIGH", - "state": "NC", - "postal_code": "27609" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWR2YW5jZWF1dG9wYXJ0cy5jb20/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWR2YW5jZWF1dG9wYXJ0cy5jb20/images/2025-04-04_icon.jpeg" - }, - "cik": "0001158449", - "composite_figi": "BBG000F7RCJ1", - "currency_name": "usd", - "description": "Advance Auto Parts is a leading auto-parts retailer in North America with more than 4,000 store and branch locations. About half of the firm's sales are geared toward the professional channel, with the remaining sales in the do-it-yourself market. Through its vast store footprint and distribution network, Advance manages thousands of stock-keeping units for various vehicle makes and models. The retailer primarily competes on inventory availability and service speed, making the operating efficiency of its hub-and-spoke distribution model critical to meeting customer needs.", - "ticker_root": "AAP", - "homepage_url": "https://www.advanceautoparts.com", - "list_date": "2001-11-29", - "locale": "us", - "market": "stocks", - "market_cap": 3543134385.8399997, - "name": "ADVANCE AUTO PARTS INC", - "phone_number": "(540) 362-4911", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001SD2SB2", - "share_class_shares_outstanding": 60329208, - "sic_code": "5531", - "sic_description": "RETAIL-AUTO & HOME SUPPLY STORES", - "ticker": "AAP", - "total_employees": 54007, - "type": "CS", - "weighted_shares_outstanding": 60329208 - }, - { - "active": true, - "address": { - "address1": "3420 CARMEL MOUNTAIN ROAD", - "address2": "SUITE 100", - "city": "SAN DIEGO", - "state": "CA", - "postal_code": "92121" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5hc3NldHN0cnVzdC5jb20/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmljYW5hc3NldHN0cnVzdC5jb20/images/2025-04-04_icon.jpeg" - }, - "cik": "0001500217", - "composite_figi": "BBG00161BCR0", - "currency_name": "usd", - "description": "American Assets Trust Inc is a self-administered real estate investment trust based in the United States. The company invests in, operates, and develops retail, office, residential, and mixed-use properties. Properties are predominantly located in South California, Northern California, Oregon, Washington, and Hawaii. American Assets operates through four segments based on property type: retail; office; mixed-use, which consists of retail and hotel components; and multifamily, which includes the company's apartment properties. The retail and office segments collectively contribute the majority of the total revenue.", - "ticker_root": "AAT", - "homepage_url": "https://www.americanassetstrust.com", - "list_date": "2011-01-13", - "locale": "us", - "market": "stocks", - "market_cap": 1270792375.2, - "name": "AMERICAN ASSETS TRUST, INC.", - "phone_number": "(858) 350-2600", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001TCBJS5", - "share_class_shares_outstanding": 61390936, - "sic_code": "6798", - "sic_description": "REAL ESTATE INVESTMENT TRUSTS", - "ticker": "AAT", - "total_employees": 232, - "type": "CS", - "weighted_shares_outstanding": 61390936 - }, - { - "active": true, - "cik": "0001993344", - "composite_figi": "BBG01K0WWT59", - "currency_name": "usd", - "description": "Allied Gold Corp is a company focused on gold mining activities in Africa. Allied has three mines and several development and exploration projects in Africa, where it has operating experience. Operations are located in Cote d'Ivoire, Mali, and Ethiopia. The company expands and optimizes initiatives at existing operating mines, develops new mines, advances its exploration properties, and targets other consolidation opportunities, with a focus on Africa.", - "ticker_root": "AAUC", - "homepage_url": "https://www.alliedgold.com", - "list_date": "2023-11-02", - "locale": "us", - "market": "stocks", - "market_cap": 4014352392.81, - "name": "Allied Gold Corporation", - "primary_exchange": "XNYS", - "share_class_figi": "BBG01J3PPZ58", - "share_class_shares_outstanding": 125881229, - "ticker": "AAUC", - "total_employees": 2095, - "type": "CS", - "weighted_shares_outstanding": 125881229 - }, - { - "active": true, - "address": { - "address1": "501 COMMERCE STREET", - "city": "NASHVILLE", - "state": "TN", - "postal_code": "37203" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWxsaWFuY2ViZXJuc3RlaW4uY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWxsaWFuY2ViZXJuc3RlaW4uY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0000825313", - "composite_figi": "BBG000B9WM03", - "currency_name": "usd", - "description": "AllianceBernstein provides investment management services to institutional (41% of assets under management), retail (41%), and private (18%) clients through products that includes mutual funds, hedge funds, and separately managed accounts. At the end of February 2026, the company had $880.0 billion in managed assets, composed primarily of fixed-income (37% of AUM) and equity (40%) strategies, with other investments (made up of asset allocation services and certain other alternative investments) accounting for the remainder.", - "ticker_root": "AB", - "homepage_url": "https://www.alliancebernstein.com", - "list_date": "1988-04-14", - "locale": "us", - "market": "stocks", - "market_cap": 3590784719.97, - "name": "AllianceBernstein Holding, L.P.", - "phone_number": "(615) 622-0000", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S5N9S0", - "share_class_shares_outstanding": 92284367, - "sic_code": "6282", - "sic_description": "INVESTMENT ADVICE", - "ticker": "AB", - "total_employees": 4468, - "type": "CS", - "weighted_shares_outstanding": 92284367 - }, - { - "active": true, - "address": { - "address1": "1 NORTH WAUKEGAN ROAD", - "city": "NORTH CHICAGO", - "state": "IL", - "postal_code": "60064" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJidmllLmNvbQ/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWJidmllLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001551152", - "composite_figi": "BBG0025Y4RY4", - "currency_name": "usd", - "description": "AbbVie is a pharmaceutical firm with a strong exposure to immunology (with Humira, Skyrizi, and Rinvoq) and oncology (with Imbruvica and Venclexta). The company was spun off from Abbott in early 2013. The 2020 acquisition of Allergan added several new products and drugs in aesthetics, including Botox. The 2024 acquisitions of Cerevel (neuroscience) and ImmunoGen (oncology) help supplement AbbVie's portfolio.", - "ticker_root": "ABBV", - "homepage_url": "https://www.abbvie.com", - "list_date": "2012-12-10", - "locale": "us", - "market": "stocks", - "market_cap": 362808538770.24, - "name": "ABBVIE INC.", - "phone_number": "(847) 932-7900", - "primary_exchange": "XNYS", - "share_class_figi": "BBG0025Y4RZ3", - "share_class_shares_outstanding": 1768169012, - "sic_code": "2834", - "sic_description": "PHARMACEUTICAL PREPARATIONS", - "ticker": "ABBV", - "total_employees": 57000, - "type": "CS", - "weighted_shares_outstanding": 1768762377 - }, - { - "active": true, - "address": { - "address1": "3490 PIEDMONT RD", - "address2": "SUITE 1550", - "city": "ATLANTA", - "state": "GA", - "postal_code": "30305" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YW1lcmlzYmFuay5jb20/images/2025-04-04_logo.svg" - }, - "cik": "0000351569", - "composite_figi": "BBG000CDY3H5", - "currency_name": "usd", - "description": "Ameris Bancorp is a bank holding company that operates through Ameris Bank, its subsidiary. The company operates branches in Georgia, Alabama, Florida, and South Carolina. It offers traditional banking services such as business banking, personal banking, checking, savings, mobile banking, and others. The bank is organized into four segments: the Banking Division, the Retail Mortgage Division, the Warehouse Lending Division and the Premium Finance Division. The company generates majority of its revenue from the banking division. The company intends to acquire banks in its geographic region to increase its market share and expand beyond its current market.", - "ticker_root": "ABCB", - "homepage_url": "https://www.amerisbank.com", - "list_date": "1994-05-19", - "locale": "us", - "market": "stocks", - "market_cap": 5765318654.55, - "name": "Ameris Bancorp", - "phone_number": "(404) 639-6500", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S80PX7", - "share_class_shares_outstanding": 68269019, - "sic_code": "6022", - "sic_description": "STATE COMMERCIAL BANKS", - "ticker": "ABCB", - "total_employees": 2673, - "type": "CS", - "weighted_shares_outstanding": 68269019 - }, - { - "active": true, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YW1iZXYuY29tLmJy/images/2024-07-01_logo.svg" - }, - "cik": "0001565025", - "composite_figi": "BBG000BN5VZ4", - "currency_name": "usd", - "description": "Ambev is the largest brewer in Latin America and the Caribbean and is Anheuser-Busch InBev's subsidiary in the region. It produces, distributes, and sells beer and PepsiCo products in Brazil and other Latin American countries and owns Argentina's largest brewer, Quinsa. Ambev was formed in 1999 through the merger of Brazil's two largest beverage companies, Brahma and Antarctica. In 2004, Ambev combined with Canadian brewer Labatt, giving AB InBev a controlling interest of 62%.", - "ticker_root": "ABEV", - "homepage_url": "https://www.ambev.com.br", - "list_date": "1996-08-01", - "locale": "us", - "market": "stocks", - "market_cap": 47881850702.799995, - "name": "AMBEV S.A.", - "primary_exchange": "XNYS", - "share_class_figi": "BBG005KLVT74", - "share_class_shares_outstanding": 15763664889, - "ticker": "ABEV", - "total_employees": 39606, - "type": "ADRC", - "weighted_shares_outstanding": 15596694040 - }, - { - "active": true, - "address": { - "address1": "6655 PEACHTREE DUNWOODY ROAD", - "city": "ATLANTA", - "state": "GA", - "postal_code": "30328" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXNidXJ5YXV0by5jb20/images/2025-04-04_logo.svg" - }, - "cik": "0001144980", - "composite_figi": "BBG000BKDWB5", - "currency_name": "usd", - "description": "Asbury Automotive Group is a regional collection of automobile dealerships that went public in March 2002. The company operates 171 new-vehicle stores and 39 collision centers. Over 70% of new-vehicle revenue is from luxury and import brands. Asbury also offers third-party financing and insurance products and its own F&I products via Total Care Auto. Asbury operates in 15 states (mostly in Rocky Mountain states, Texas, the Northeast, and Southeast). Asbury store brands include Herb Chambers in the Northeast, McDavid and Park Place in Texas, Koons in the Washington, D.C. area, and the Larry H. Miller brand in the Western US. Asbury generated about $18 billion of revenue in 2025 and is based in the Atlanta area. The firm targets at least $30 billion of revenue sometime around 2030.", - "ticker_root": "ABG", - "homepage_url": "https://www.asburyauto.com", - "list_date": "2002-03-13", - "locale": "us", - "market": "stocks", - "market_cap": 4013317010.79, - "name": "Asbury Automotive Group, Inc.", - "phone_number": "770-418-8200", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S9B5H5", - "share_class_shares_outstanding": 19295721, - "sic_code": "5500", - "sic_description": "RETAIL-AUTO DEALERS & GASOLINE STATIONS", - "ticker": "ABG", - "total_employees": 15000, - "type": "CS", - "weighted_shares_outstanding": 19295721 - }, - { - "active": true, - "address": { - "address1": "ONE LIBERTY PLAZA", - "address2": "7TH FLOOR", - "city": "NEW YORK", - "state": "NY", - "postal_code": "10006" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJtLmNvbQ/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWJtLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0000771497", - "composite_figi": "BBG000B9YYH7", - "currency_name": "usd", - "description": "ABM Industries Inc is a provider of integrated facility solutions. It offers its solutions through five segments: Business and Industry, Manufacturing and Distribution, Education, Aviation and Technical solutions. The company derives the majority of its revenue from the business and industry segment, which encompasses janitorial, facilities engineering, and parking services for commercial real estate properties and sports and entertainment venues, hospitals, as well as vehicle maintenance and other services to rental car providers. The company mainly operates in the United States of America.", - "ticker_root": "ABM", - "homepage_url": "https://www.abm.com", - "list_date": "1972-06-01", - "locale": "us", - "market": "stocks", - "market_cap": 2356531333.2, - "name": "ABM Industries, Inc.", - "phone_number": "212 297-0200", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S5N9B8", - "share_class_shares_outstanding": 58532820, - "sic_code": "7340", - "sic_description": "SERVICES-TO DWELLINGS & OTHER BUILDINGS", - "ticker": "ABM", - "total_employees": 113000, - "type": "CS", - "weighted_shares_outstanding": 58532820 - }, - { - "active": true, - "address": { - "address1": "333 EARLE OVINGTON BOULEVARD", - "address2": "SUITE 900", - "city": "UNIONDALE", - "state": "NY", - "postal_code": "11553" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001253986", - "composite_figi": "BBG000KMVDV1", - "currency_name": "usd", - "description": "Arbor Realty Trust Inc is a specialized real estate finance company. It invests in a diversified portfolio of structured finance assets in the multifamily, SFR, and commercial real estate markets, consisting of bridge and mezzanine loans, including junior participating interests in first mortgages, preferred, and direct equity. In addition, it may also directly acquire real property and invest in real estate-related notes and certain mortgage-related securities. The company has two business segments, Structured Business and Agency Business. It generates a majority of its revenue from the Structured Business Segment.", - "ticker_root": "ABR", - "homepage_url": "https://www.arbor.com", - "list_date": "2004-04-07", - "locale": "us", - "market": "stocks", - "market_cap": 1569667416.48, - "name": "Arbor Realty Trust, Inc.", - "phone_number": "516-506-4200", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001SJP3G4", - "share_class_shares_outstanding": 192361203, - "sic_code": "6798", - "sic_description": "REAL ESTATE INVESTMENT TRUSTS", - "ticker": "ABR", - "total_employees": 653, - "type": "CS", - "weighted_shares_outstanding": 192361203 - }, - { - "active": true, - "address": { - "address1": "333 EARLE OVINGTON BOULEVARD", - "address2": "SUITE 900", - "city": "UNIONDALE", - "state": "NY", - "postal_code": "11553" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001253986", - "currency_name": "usd", - "description": "Arbor Realty Trust Inc is a specialized real estate finance company. It invests in a diversified portfolio of structured finance assets in the multifamily, SFR, and commercial real estate markets, consisting of bridge and mezzanine loans, including junior participating interests in first mortgages, preferred, and direct equity. In addition, it may also directly acquire real property and invest in real estate-related notes and certain mortgage-related securities. The company has two business segments, Structured Business and Agency Business. It generates a majority of its revenue from the Structured Business Segment.", - "ticker_root": "ABR", - "ticker_suffix": "PRD", - "homepage_url": "https://www.arbor.com", - "list_date": "2021-05-25", - "locale": "us", - "market": "stocks", - "name": "Arbor Realty Trust, Inc. 6.375% Series D Cumulative Redeemable Preferred Stock, Liquidation Preference $25.00 per Share", - "phone_number": "516-506-4200", - "primary_exchange": "XNYS", - "sic_code": "6798", - "sic_description": "REAL ESTATE INVESTMENT TRUSTS", - "ticker": "ABRpD", - "total_employees": 653, - "type": "PFD" - }, - { - "active": true, - "address": { - "address1": "333 EARLE OVINGTON BOULEVARD", - "address2": "SUITE 900", - "city": "UNIONDALE", - "state": "NY", - "postal_code": "11553" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001253986", - "currency_name": "usd", - "description": "Arbor Realty Trust Inc is a specialized real estate finance company. It invests in a diversified portfolio of structured finance assets in the multifamily, SFR, and commercial real estate markets, consisting of bridge and mezzanine loans, including junior participating interests in first mortgages, preferred, and direct equity. In addition, it may also directly acquire real property and invest in real estate-related notes and certain mortgage-related securities. The company has two business segments, Structured Business and Agency Business. It generates a majority of its revenue from the Structured Business Segment.", - "ticker_root": "ABR", - "ticker_suffix": "PRE", - "homepage_url": "https://www.arbor.com", - "list_date": "2021-08-04", - "locale": "us", - "market": "stocks", - "name": "Arbor Realty Trust, Inc. 6.25% Series E Cumulative Redeemable Preferred Stock", - "phone_number": "516-506-4200", - "primary_exchange": "XNYS", - "sic_code": "6798", - "sic_description": "REAL ESTATE INVESTMENT TRUSTS", - "ticker": "ABRpE", - "total_employees": 653, - "type": "PFD" - }, - { - "active": true, - "address": { - "address1": "333 EARLE OVINGTON BOULEVARD", - "address2": "SUITE 900", - "city": "UNIONDALE", - "state": "NY", - "postal_code": "11553" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXJib3IuY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0001253986", - "currency_name": "usd", - "description": "Arbor Realty Trust Inc is a specialized real estate finance company. It invests in a diversified portfolio of structured finance assets in the multifamily, SFR, and commercial real estate markets, consisting of bridge and mezzanine loans, including junior participating interests in first mortgages, preferred, and direct equity. In addition, it may also directly acquire real property and invest in real estate-related notes and certain mortgage-related securities. The company has two business segments, Structured Business and Agency Business. It generates a majority of its revenue from the Structured Business Segment.", - "ticker_root": "ABR", - "ticker_suffix": "PRF", - "homepage_url": "https://www.arbor.com", - "list_date": "2021-10-05", - "locale": "us", - "market": "stocks", - "name": "Arbor Realty Trust, Inc. 6.25% Series F Fixed-to-Floating Rate Cumulative Redeemable Preferred Stock, Liquidation Preference $25.00 per share", - "phone_number": "516-506-4200", - "primary_exchange": "XNYS", - "sic_code": "6798", - "sic_description": "REAL ESTATE INVESTMENT TRUSTS", - "ticker": "ABRpF", - "total_employees": 653, - "type": "PFD" - }, - { - "active": true, - "address": { - "address1": "100 ABBOTT PARK ROAD", - "city": "ABBOTT PARK", - "state": "IL", - "postal_code": "60064-3500" - }, - "cik": "0000001800", - "composite_figi": "BBG000B9ZXB4", - "currency_name": "usd", - "description": "Abbott manufactures and markets cardiovascular and diabetes devices, adult and pediatric nutritional products, diagnostic equipment and testing kits, and branded generic drugs. Products include pacemakers, implantable cardioverter defibrillators, neuromodulation devices, coronary stents, catheters, infant formula, nutritional liquids for adults, continuous glucose monitors, and immunoassays and point-of-care diagnostic equipment. Abbott derives roughly 60% of sales outside the United States.", - "ticker_root": "ABT", - "homepage_url": "https://www.abbott.com", - "list_date": "1949-04-19", - "locale": "us", - "market": "stocks", - "market_cap": 161117213853.68, - "name": "Abbott Laboratories", - "phone_number": "(224) 667-6100", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S5N9M6", - "share_class_shares_outstanding": 1741657324, - "sic_code": "2834", - "sic_description": "PHARMACEUTICAL PREPARATIONS", - "ticker": "ABT", - "total_employees": 115000, - "type": "CS", - "weighted_shares_outstanding": 1737674869 - }, - { - "active": true, - "address": { - "address1": "2101 PARK CENTER DRIVE, SUITE 200", - "city": "ORLANDO", - "state": "FL", - "postal_code": "32835" - }, - "cik": "0001814287", - "composite_figi": "BBG00VY1KB95", - "currency_name": "usd", - "description": "Abacus Global Management Inc is a financial services company specializing in alternative asset management, data-driven wealth solutions, technology innovations, and institutional services. The company operates through four distinct yet complementary divisions: Abacus Life Solutions, Abacus Asset Group, Abacus Intel, and Abacus Wealth Advisors. It operates through three reportable segments, Asset management, Life solutions, and Technology services. It derives the majority revenue from Life solutions segment that generates revenues by buying, selling, and trading policies, and maintaining policies until receipt of death benefits. It also generates revenue by originating life insurance policy settlements between investors or buyers, and the sellers, who is often the original policy owner.", - "ticker_root": "ABX", - "homepage_url": "https://www.abacusgm.com", - "list_date": "2023-07-03", - "locale": "us", - "market": "stocks", - "market_cap": 911357943.1999999, - "name": "Abacus Global Management, Inc.", - "phone_number": "(800) 561-4148", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00VY1KBC1", - "share_class_shares_outstanding": 97752855, - "sic_code": "6282", - "sic_description": "INVESTMENT ADVICE", - "ticker": "ABX", - "total_employees": 326, - "type": "CS", - "weighted_shares_outstanding": 96542155 - }, - { - "active": true, - "address": { - "address1": "2101 PARK CENTER DRIVE, SUITE 200", - "city": "ORLANDO", - "state": "FL", - "postal_code": "32835" - }, - "cik": "0001814287", - "currency_name": "usd", - "ticker_root": "ABXL", - "locale": "us", - "market": "stocks", - "name": "Abacus Global Management, Inc. 9.875% Fixed Rate Senior Notes due 2028", - "phone_number": "(800) 561-4148", - "primary_exchange": "XNYS", - "sic_code": "6282", - "sic_description": "INVESTMENT ADVICE", - "ticker": "ABXL", - "type": "SP" - }, - { - "active": true, - "address": { - "address1": "500 N. AKARD SREET", - "address2": "SUITE 400", - "city": "DALLAS", - "state": "TX", - "postal_code": "75201" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJjb3NhLmNvbQ/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YXJjb3NhLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001739445", - "composite_figi": "BBG00JGMWFM9", - "currency_name": "usd", - "description": "Arcosa Inc is a manufacturer and producer of infrastructure-related products and solutions in the U.S. It operates in three segments: Construction Products, Engineered Structures, and Transportation Products. Maximum revenue is generated from the Construction Products segment, which produces aggregates, specialty materials, asphalt, and construction support gear. The Engineered Structures segment mainly manufactures and sells steel and concrete structures for infrastructure businesses, including utility structures for electricity transmission and distribution, structural wind towers, traffic and lighting structures, and telecommunication structures; and the Transportation Products segment builds inland barges, fiberglass covers, winches, marine hardware, and other industrial equipment.", - "ticker_root": "ACA", - "homepage_url": "https://www.arcosa.com", - "list_date": "2018-10-16", - "locale": "us", - "market": "stocks", - "market_cap": 5618350394.97, - "name": "Arcosa, Inc. Common Stock", - "phone_number": "972-942-6500", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00JGMWGB9", - "share_class_shares_outstanding": 49098579, - "sic_code": "3440", - "sic_description": "FABRICATED STRUCTURAL METAL PRODUCTS", - "ticker": "ACA", - "total_employees": 6390, - "type": "CS", - "weighted_shares_outstanding": 49098579 - }, - { - "active": true, - "address": { - "address1": "FOUR CORPORATE DRIVE", - "city": "LAKE ZURICH", - "state": "IL", - "postal_code": "60047" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjb2JyYW5kcy5jb20/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjb2JyYW5kcy5jb20/images/2025-04-04_icon.png" - }, - "cik": "0000712034", - "composite_figi": "BBG000J06K07", - "currency_name": "usd", - "description": "ACCO Brands Corp is a consumer, technology and business branded products company providing brands and product solutions used in schools, homes and at work. Its brands include At-A-Glance, Barrilito, Buro, Esselte, Five Star, Foroni, GBC, Hilroy, Kensington, Leitz, Mead, PowerA, Quartet, Rapid, Swingline and Tilibra. The Company's product categories include gaming and computer accessories, storage and organization, notebooks, shredding, laminating and binding machines, dry erase boards and do-it-yourself tools. The Company operates through two segments, Americas and International. Americas includes the U.S., Canada, Brazil, Mexico and Chile, and International includes EMEA, Australia, New Zealand and Asia. Its products are sold in the U.S., Europe, Australia, Canada, Brazil and Mexico.", - "ticker_root": "ACCO", - "homepage_url": "https://www.accobrands.com", - "list_date": "1999-11-09", - "locale": "us", - "market": "stocks", - "market_cap": 303528142.26, - "name": "Acco Brands Corporation", - "phone_number": "847-541-9500", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001SPBTK3", - "share_class_shares_outstanding": 92257794, - "sic_code": "2780", - "sic_description": "BLANKBOOKS, LOOSELEAF BINDERS & BOOKBINDG & RELATD WORK", - "ticker": "ACCO", - "total_employees": 4700, - "type": "CS", - "weighted_shares_outstanding": 92257794 - }, - { - "active": true, - "address": { - "address1": "140 TOWER DRIVE", - "city": "BURR RIDGE", - "state": "IL", - "postal_code": "60527" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjZWxlbnRlcnRhaW5tZW50LmNvbQ/images/2025-04-04_logo.png", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjZWxlbnRlcnRhaW5tZW50LmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001698991", - "composite_figi": "BBG00GX221W3", - "currency_name": "usd", - "description": "Accel Entertainment Inc is a distributed gaming and local entertainment operator in the United States. It is engaged in the installation, maintenance, operation, and servicing of gaming terminals and related equipment, redemption devices that disburse winnings and contain automated teller machine (ATM) functionality, and amusement devices in authorized non-casino locations such as restaurants, bars, convenience stores, liquor stores, truck stops, and grocery stores. The Company also operates stand-alone ATMs in gaming and non-gaming locations. It generates revenue from Net gaming, Amusement, Manufacturing, ATM fees, and others.", - "ticker_root": "ACEL", - "homepage_url": "https://www.accelentertainment.com", - "list_date": "2017-08-18", - "locale": "us", - "market": "stocks", - "market_cap": 964195815.0, - "name": "Accel Entertainment, Inc.", - "phone_number": "630-972-2235", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00GX221Z0", - "share_class_shares_outstanding": 81573250, - "sic_code": "7900", - "sic_description": "SERVICES-AMUSEMENT & RECREATION SERVICES", - "ticker": "ACEL", - "total_employees": 1600, - "type": "CS", - "weighted_shares_outstanding": 81573250 - }, - { - "active": true, - "address": { - "address1": "10900 NUCKOLS ROAD", - "address2": "SUITE 400", - "city": "GLEN ALLEN", - "state": "VA", - "postal_code": "23060" - }, - "cik": "0000075252", - "composite_figi": "BBG000CTV5F0", - "currency_name": "usd", - "description": "Accendra Health Inc is a nationwide provider of products, technology, and services that support health beyond the hospital by connecting patients, providers, and insurers. Through its brands, Apria and Byram Healthcare, the company delivers disposable medical supplies, integrated home healthcare equipment, and related services that help improve health outcomes and quality of life for individuals with chronic, complex, and acute health conditions. Its offerings span diabetes treatment, home respiratory therapy, and obstructive sleep apnea treatment, along with patient support services. The company also supplies a broad range of home medical equipment and patient care products, including ostomy, wound care, urology, and incontinence solutions.", - "ticker_root": "ACH", - "homepage_url": "https://www.accendrahealth.com", - "list_date": "1973-01-02", - "locale": "us", - "market": "stocks", - "market_cap": 254538263.61, - "name": "Accendra Health, Inc.", - "phone_number": "(804) 277-4304", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001S72KY7", - "share_class_shares_outstanding": 76437917, - "sic_code": "5047", - "sic_description": "WHOLESALE-MEDICAL, DENTAL & HOSPITAL EQUIPMENT & SUPPLIES", - "ticker": "ACH", - "total_employees": 6500, - "type": "CS", - "weighted_shares_outstanding": 76437917 - }, - { - "active": true, - "address": { - "address1": "190 WEST TASMAN DRIVE", - "city": "SAN JOSE", - "state": "CA", - "postal_code": "95134" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJjaGVyLmNvbQ/images/2025-04-04_logo.png" - }, - "cik": "0001824502", - "composite_figi": "BBG00XRTC910", - "currency_name": "usd", - "description": "Archer Aviation Inc advances the benefits of sustainable air mobility. The company is engaged in designing and developing a fully electric vertical takeoff and landing eVTOL aircraft for use in UAM networks. It is creating an electric airline that moves people throughout cities in a quick, safe, sustainable, and cost-effective manner. The company is building a platform to deliver aircraft, technologies, and services to customers world-wide across commercial and defense sectors.", - "ticker_root": "ACHR", - "homepage_url": "https://www.archer.com", - "list_date": "2021-09-17", - "locale": "us", - "market": "stocks", - "market_cap": 4454193775.68, - "name": "Archer Aviation Inc.", - "phone_number": "650-272-3233", - "primary_exchange": "XNYS", - "share_class_figi": "BBG00XRTC929", - "share_class_shares_outstanding": 744538832, - "sic_code": "3721", - "sic_description": "AIRCRAFT", - "ticker": "ACHR", - "total_employees": 1660, - "type": "CS", - "weighted_shares_outstanding": 749864272 - }, - { - "active": true, - "address": { - "address1": "190 WEST TASMAN DRIVE", - "city": "SAN JOSE", - "state": "CA", - "postal_code": "95134" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YXJjaGVyLmNvbQ/images/2025-04-04_logo.png" - }, - "cik": "0001824502", - "composite_figi": "BBG00YGCV1N9", - "currency_name": "usd", - "description": "Archer Aviation Inc advances the benefits of sustainable air mobility. The company is engaged in designing and developing a fully electric vertical takeoff and landing eVTOL aircraft for use in UAM networks. It is creating an electric airline that moves people throughout cities in a quick, safe, sustainable, and cost-effective manner. The company is building a platform to deliver aircraft, technologies, and services to customers world-wide across commercial and defense sectors.", - "ticker_root": "ACHR", - "ticker_suffix": "WS", - "homepage_url": "https://www.archer.com", - "list_date": "2021-09-17", - "locale": "us", - "market": "stocks", - "name": "Archer Aviation Inc. Redeemable Warrants, each whole warrant exercisable for one Class A common stock at an exercise price of $11.50", - "phone_number": "650-272-3233", - "primary_exchange": "XNYS", - "sic_code": "3721", - "sic_description": "AIRCRAFT", - "ticker": "ACHR.WS", - "total_employees": 1660, - "type": "WARRANT" - }, - { - "active": true, - "address": { - "address1": "250 PARKCENTER BLVD.", - "city": "BOISE", - "state": "ID", - "postal_code": "83706" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWxiZXJ0c29uc2NvbXBhbmllcy5jb20/images/2025-04-04_logo.svg" - }, - "cik": "0001646972", - "composite_figi": "BBG009KG1750", - "currency_name": "usd", - "description": "Albertsons is the second-largest supermarket operator in the United States with about 2,300 stores across a variety of banners. Around 80% of the firm's sales comes from nonperishable and fresh food, of which 26% comes from its portfolio of private brands. The company operates fuel centers at about 20% of its store locations and pharmacies at 75%. Albertsons went public in 2020 following years of ownership under private equity firm Cerberus Capital Management, which still owns about a fourth of the outstanding shares.", - "ticker_root": "ACI", - "homepage_url": "https://www.albertsonscompanies.com", - "list_date": "2020-06-26", - "locale": "us", - "market": "stocks", - "market_cap": 8663084369.1, - "name": "Albertsons Companies, Inc.", - "phone_number": "208-395-6200", - "primary_exchange": "XNYS", - "share_class_figi": "BBG009KG1741", - "share_class_shares_outstanding": 513913121, - "sic_code": "5411", - "sic_description": "RETAIL-GROCERY STORES", - "ticker": "ACI", - "total_employees": 280000, - "type": "CS", - "weighted_shares_outstanding": 506613121 - }, - { - "active": true, - "cik": "0001831313", - "composite_figi": "BBG01QPVFLN7", - "currency_name": "usd", - "ticker_root": "ACLO", - "list_date": "2024-11-15", - "locale": "us", - "market": "stocks", - "name": "TCW AAA CLO ETF", - "primary_exchange": "XNYS", - "share_class_figi": "BBG01QPVFMJ0", - "share_class_shares_outstanding": 9560000, - "ticker": "ACLO", - "type": "ETF" - }, - { - "active": true, - "address": { - "address1": "13355 NOEL ROAD", - "address2": "SUITE 400", - "city": "DALLAS", - "state": "TX", - "postal_code": "75240" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWVjb20uY29t/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWVjb20uY29t/images/2025-04-04_icon.jpeg" - }, - "cik": "0000868857", - "composite_figi": "BBG000F61RJ8", - "currency_name": "usd", - "description": "Aecom is one of the largest global providers of advisory, design, and engineering services. It serves a broad spectrum of end markets including water, transportation, and environment. Based in Dallas, Aecom employs 51,000. The company generated $16.1 billion in sales in fiscal 2025.", - "ticker_root": "ACM", - "homepage_url": "https://www.aecom.com", - "list_date": "2007-05-10", - "locale": "us", - "market": "stocks", - "market_cap": 10984361495.039999, - "name": "Aecom", - "phone_number": "(972) 788-1000", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001SKTTF1", - "share_class_shares_outstanding": 129288624, - "sic_code": "8711", - "sic_description": "SERVICES-ENGINEERING SERVICES", - "ticker": "ACM", - "total_employees": 51000, - "type": "CS", - "weighted_shares_outstanding": 129288624 - }, - { - "active": true, - "address": { - "address1": "1 GRAND CANAL SQUARE", - "address2": "GRAND CANAL HARBOUR", - "city": "DUBLIN", - "state": "L2", - "postal_code": "D2" - }, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWNjZW50dXJlLmNvbQ/images/2025-04-04_logo.svg", - "icon_url": "https://api.massive.com/v1/reference/company-branding/YWNjZW50dXJlLmNvbQ/images/2025-04-04_icon.jpeg" - }, - "cik": "0001467373", - "currency_name": "usd", - "description": "Accenture is a leading IT services firm that provides consulting, system integration, and business process outsourcing to enterprises around the world. Customers of Accenture come from a variety of sectors, including communications, media and technology, financial services, health and public services, consumer products, and resources. Accenture is the world's largest professional services company by headcount, with around 800,000 employees in over 120 countries.", - "ticker_root": "ACN", - "homepage_url": "https://www.accenture.com", - "list_date": "2001-07-19", - "locale": "us", - "market": "stocks", - "market_cap": 119362072484.56, - "name": "Accenture PLC", - "phone_number": "353-1-646-2000", - "primary_exchange": "XNYS", - "share_class_shares_outstanding": 665142040, - "sic_code": "7389", - "sic_description": "SERVICES-BUSINESS SERVICES, NEC", - "ticker": "ACN", - "total_employees": 779000, - "type": "CS", - "weighted_shares_outstanding": 613939268 - }, - { - "active": true, - "branding": { - "logo_url": "https://api.massive.com/v1/reference/company-branding/YWJyZG5hY3AuY29t/images/2025-04-04_logo.svg" - }, - "cik": "0001503290", - "composite_figi": "BBG0017VSC04", - "currency_name": "usd", - "description": "abrdn Income Credit Strategies Fund is a diversified, closed-end management investment company. Its investment objective is to seek a high level of current income with a secondary objective of capital appreciation. It predominantly invests in debt and loan instruments of issues that operate in a variety of industries and geographic regions.", - "ticker_root": "ACP", - "homepage_url": "http://www.abrdnacp.com", - "list_date": "2011-01-27", - "locale": "us", - "market": "stocks", - "market_cap": 672522834.08, - "name": "abrdn Income Credit Strategies Fund", - "primary_exchange": "XNYS", - "share_class_figi": "BBG001TCSLH4", - "share_class_shares_outstanding": 125470678, - "ticker": "ACP", - "type": "FUND", - "weighted_shares_outstanding": 125470678 - } - ] -} \ No newline at end of file diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index c38187ab..95c01676 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,6 +66,9 @@ def list_all_tickers(client, config: Config) -> List[dict]: # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): + if new_count >= 10: + break + ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: continue From 9afb783f66b215cad59e207e174a8e196bfde9f4 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:56:09 +0800 Subject: [PATCH 38/43] ci --- src/massive/rest/reference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/massive/rest/reference.py b/src/massive/rest/reference.py index 06ad037f..12b347b6 100644 --- a/src/massive/rest/reference.py +++ b/src/massive/rest/reference.py @@ -123,7 +123,7 @@ def list_tickers( :return: List of tickers. """ url = "/v3/reference/tickers" - + print("DEBUG_11") return self._paginate( path=url, params=self._get_params(self.list_tickers, locals()), From b72267c4853011e3e8e026c6c31f9e6fd80064fc Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:58:17 +0800 Subject: [PATCH 39/43] update ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 300a17c1..971634ab 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ target/ #Ipython Notebook .ipynb_checkpoints + +# Data files +data/ From fe777c99fd848270c1824bcf43a421178c2f6d6d Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:00:02 +0800 Subject: [PATCH 40/43] upgrade --- src/processor/us_daily/ticker_lister.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index 95c01676..2e10a368 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,8 +66,8 @@ def list_all_tickers(client, config: Config) -> List[dict]: # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): - if new_count >= 10: - break + # if new_count >= 10: + # break ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: From 3f25a1092997060ef8aaadf4917f057bc1fdf2de Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:18:49 +0800 Subject: [PATCH 41/43] upgrade --- src/processor/us_daily/agg_fetcher.py | 62 +++++-------- src/processor/us_daily/config.json | 2 +- src/processor/us_daily/config.py | 2 +- src/processor/us_daily/storage.py | 4 + src/processor/us_daily/ticker_lister.py | 6 +- tests/test_us_daily/test_agg_fetcher.py | 110 ++++++++++-------------- tests/test_us_daily/test_config.py | 4 +- 7 files changed, 77 insertions(+), 113 deletions(-) diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py index 2135c6e4..8f66ce76 100644 --- a/src/processor/us_daily/agg_fetcher.py +++ b/src/processor/us_daily/agg_fetcher.py @@ -1,12 +1,11 @@ -import calendar import logging from datetime import date, datetime -from typing import List, Tuple +from typing import List from processor.us_daily.config import Config from processor.us_daily.sources.manager import FetchError from processor.us_daily.storage import ( - get_month_file_path, + get_year_file_path, file_exists, save_json, ) @@ -14,79 +13,60 @@ logger = logging.getLogger("us_daily") -def generate_months(start: str, end: str) -> List[str]: - start_year, start_month = int(start[:4]), int(start[5:7]) - end_year, end_month = int(end[:4]), int(end[5:7]) +def generate_years(start_year: int, end_year: int) -> List[int]: + return list(range(start_year, end_year + 1)) - months = [] - year, month = start_year, start_month - while (year, month) <= (end_year, end_month): - months.append(f"{year:04d}-{month:02d}") - month += 1 - if month > 12: - month = 1 - year += 1 - return months +def get_year_bounds(year: int) -> tuple: + return f"{year}-01-01", f"{year}-12-31" -def get_month_bounds(month: str) -> Tuple[str, str]: - year, mon = int(month[:4]), int(month[5:7]) - last_day = calendar.monthrange(year, mon)[1] - return f"{year:04d}-{mon:02d}-01", f"{year:04d}-{mon:02d}-{last_day:02d}" - -def is_current_month(month: str) -> bool: - today = date.today() - return month == f"{today.year:04d}-{today.month:02d}" - - -def current_month() -> str: - today = date.today() - return f"{today.year:04d}-{today.month:02d}" +def is_current_year(year: int) -> bool: + return year == date.today().year def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: - """Fetch monthly OHLCV data for a ticker using SourceManager. + """Fetch yearly OHLCV data for a ticker using SourceManager. Args: source_manager: SourceManager instance with failover sources. ticker: Stock ticker symbol (e.g. "AAPL"). - config: Config with daily_data_dir, start_date, max_retries. + config: Config with daily_data_dir, start_year, max_retries. Returns: - Dict with "failures" list of failed months. + Dict with "failures" list of failed years. """ - months = generate_months(config.start_date, current_month()) + years = generate_years(config.start_year, date.today().year) failures = [] - for month in months: - file_path = get_month_file_path(config.daily_data_dir, ticker, month) + for year in years: + file_path = get_year_file_path(config.daily_data_dir, ticker, year) - if file_exists(file_path) and not is_current_month(month): - logger.debug(f" {ticker} {month}: exists, skipping") + if file_exists(file_path) and not is_current_year(year): + logger.debug(f" {ticker} {year}: exists, skipping") continue - start_date, end_date = get_month_bounds(month) + start_date, end_date = get_year_bounds(year) try: df, source_name = source_manager.fetch_daily(ticker, start_date, end_date) except FetchError as e: failures.append({ "ticker": ticker, - "month": month, + "year": year, "error": str(e), }) - logger.error(f" {ticker} {month}: {e}") + logger.error(f" {ticker} {year}: {e}") continue data = { "ticker": ticker, - "month": month, + "year": year, "source": source_name, "fetched_at": datetime.now().isoformat(timespec="seconds"), "data": df.to_dict(orient="records"), } save_json(file_path, data) - logger.info(f" {ticker} {month}: fetched {len(df)} bars from {source_name}") + logger.info(f" {ticker} {year}: fetched {len(df)} bars from {source_name}") return {"failures": failures} diff --git a/src/processor/us_daily/config.json b/src/processor/us_daily/config.json index d74b806e..69ab49b2 100644 --- a/src/processor/us_daily/config.json +++ b/src/processor/us_daily/config.json @@ -1,7 +1,7 @@ { "refresh_tickers": true, "market_cap_min": 1000000000, - "start_date": "2026-01", + "start_year": 2024, "request_interval": 12, "list_data_dir": "data/us_list", "daily_data_dir": "data/us_daily", diff --git a/src/processor/us_daily/config.py b/src/processor/us_daily/config.py index d6df88da..851f049b 100644 --- a/src/processor/us_daily/config.py +++ b/src/processor/us_daily/config.py @@ -7,7 +7,7 @@ @dataclass class Config: refresh_tickers: bool = False - start_date: str = "2026-01" + start_year: int = 2024 data_source_priority: List[str] = field( default_factory=lambda: ["massive", "akshare", "yfinance"] ) diff --git a/src/processor/us_daily/storage.py b/src/processor/us_daily/storage.py index af80505b..526a2218 100644 --- a/src/processor/us_daily/storage.py +++ b/src/processor/us_daily/storage.py @@ -10,6 +10,10 @@ def get_month_file_path(data_dir: str, ticker: str, month: str) -> str: return os.path.join(data_dir, ticker, f"{month}.json") +def get_year_file_path(data_dir: str, ticker: str, year: int) -> str: + return os.path.join(data_dir, ticker, f"{year}.json") + + def save_json(path: str, data: dict) -> None: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w", encoding="utf-8") as f: diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index 2e10a368..fb9882dd 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,8 +66,8 @@ def list_all_tickers(client, config: Config) -> List[dict]: # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): - # if new_count >= 10: - # break + if new_count >= 10: + break ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: @@ -89,7 +89,7 @@ def list_all_tickers(client, config: Config) -> List[dict]: time.sleep(config.massive_interval) # Flush to disk every 100 new details to avoid losing progress - if new_count > 0 and new_count % 100 == 0: + if new_count > 0 and new_count % 20 == 0: tickers_list = list(existing_tickers.values()) save_json(file_path, { "updated_at": date.today().strftime("%Y-%m-%d"), diff --git a/tests/test_us_daily/test_agg_fetcher.py b/tests/test_us_daily/test_agg_fetcher.py index 7c49aea2..0cc42e24 100644 --- a/tests/test_us_daily/test_agg_fetcher.py +++ b/tests/test_us_daily/test_agg_fetcher.py @@ -7,63 +7,43 @@ from datetime import date -class TestGenerateMonths(unittest.TestCase): - def test_generate_months_basic(self): - from processor.us_daily.agg_fetcher import generate_months +class TestGenerateYears(unittest.TestCase): + def test_generate_years_basic(self): + from processor.us_daily.agg_fetcher import generate_years - result = generate_months("2020-01", "2020-04") - self.assertEqual(result, ["2020-01", "2020-02", "2020-03", "2020-04"]) + result = generate_years(2024, 2026) + self.assertEqual(result, [2024, 2025, 2026]) - def test_generate_months_cross_year(self): - from processor.us_daily.agg_fetcher import generate_months + def test_generate_years_single(self): + from processor.us_daily.agg_fetcher import generate_years - result = generate_months("2023-11", "2024-02") - self.assertEqual(result, ["2023-11", "2023-12", "2024-01", "2024-02"]) + result = generate_years(2024, 2024) + self.assertEqual(result, [2024]) - def test_generate_months_single(self): - from processor.us_daily.agg_fetcher import generate_months - result = generate_months("2024-06", "2024-06") - self.assertEqual(result, ["2024-06"]) +class TestYearBounds(unittest.TestCase): + def test_year_bounds(self): + from processor.us_daily.agg_fetcher import get_year_bounds + start, end = get_year_bounds(2024) + self.assertEqual(start, "2024-01-01") + self.assertEqual(end, "2024-12-31") -class TestMonthBounds(unittest.TestCase): - def test_month_bounds_january(self): - from processor.us_daily.agg_fetcher import get_month_bounds - start, end = get_month_bounds("2020-01") - self.assertEqual(start, "2020-01-01") - self.assertEqual(end, "2020-01-31") - - def test_month_bounds_february_leap(self): - from processor.us_daily.agg_fetcher import get_month_bounds - - start, end = get_month_bounds("2024-02") - self.assertEqual(start, "2024-02-01") - self.assertEqual(end, "2024-02-29") - - def test_month_bounds_february_non_leap(self): - from processor.us_daily.agg_fetcher import get_month_bounds - - start, end = get_month_bounds("2023-02") - self.assertEqual(start, "2023-02-01") - self.assertEqual(end, "2023-02-28") - - -class TestIsCurrentMonth(unittest.TestCase): +class TestIsCurrentYear(unittest.TestCase): @patch("processor.us_daily.agg_fetcher.date") - def test_is_current_month_true(self, mock_date): - from processor.us_daily.agg_fetcher import is_current_month + def test_is_current_year_true(self, mock_date): + from processor.us_daily.agg_fetcher import is_current_year mock_date.today.return_value = date(2026, 4, 22) - self.assertTrue(is_current_month("2026-04")) + self.assertTrue(is_current_year(2026)) @patch("processor.us_daily.agg_fetcher.date") - def test_is_current_month_false(self, mock_date): - from processor.us_daily.agg_fetcher import is_current_month + def test_is_current_year_false(self, mock_date): + from processor.us_daily.agg_fetcher import is_current_year mock_date.today.return_value = date(2026, 4, 22) - self.assertFalse(is_current_month("2026-03")) + self.assertFalse(is_current_year(2025)) class TestFetchTickerAggs(unittest.TestCase): @@ -83,39 +63,39 @@ def _make_manager(self, df=None, source_name="akshare", error=None): manager.fetch_daily.return_value = (df, source_name) return manager - def test_skips_existing_historical_month(self): + def test_skips_existing_historical_year(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config - config = Config(start_date="2020-01", daily_data_dir=self.test_dir) + config = Config(start_year=2024, daily_data_dir=self.test_dir) ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) - with open(os.path.join(ticker_dir, "2020-01.json"), "w") as f: - json.dump({"ticker": "AAPL", "month": "2020-01", "data": []}, f) + with open(os.path.join(ticker_dir, "2024.json"), "w") as f: + json.dump({"ticker": "AAPL", "year": 2024, "data": []}, f) manager = self._make_manager() with patch( - "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] ): with patch( - "processor.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_year", return_value=False ): result = fetch_ticker_aggs(manager, "AAPL", config) manager.fetch_daily.assert_not_called() self.assertEqual(result["failures"], []) - def test_fetches_missing_month(self): + def test_fetches_missing_year(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config import pandas as pd - config = Config(start_date="2020-01", daily_data_dir=self.test_dir) + config = Config(start_year=2024, daily_data_dir=self.test_dir) df = pd.DataFrame({ - "date": ["2020-01-02"], + "date": ["2024-01-02"], "open": [74.06], "high": [75.15], "low": [73.80], @@ -125,36 +105,36 @@ def test_fetches_missing_month(self): manager = self._make_manager(df=df, source_name="akshare") with patch( - "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] ): with patch( - "processor.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_year", return_value=False ): result = fetch_ticker_aggs(manager, "AAPL", config) - file_path = os.path.join(self.test_dir, "AAPL", "2020-01.json") + file_path = os.path.join(self.test_dir, "AAPL", "2024.json") self.assertTrue(os.path.exists(file_path)) with open(file_path) as f: data = json.load(f) self.assertEqual(data["ticker"], "AAPL") - self.assertEqual(data["month"], "2020-01") + self.assertEqual(data["year"], 2024) self.assertEqual(data["source"], "akshare") self.assertEqual(len(data["data"]), 1) self.assertEqual(data["data"][0]["close"], 74.36) self.assertEqual(result["failures"], []) - def test_refreshes_current_month(self): + def test_refreshes_current_year(self): from processor.us_daily.agg_fetcher import fetch_ticker_aggs from processor.us_daily.config import Config import pandas as pd - config = Config(start_date="2026-04", daily_data_dir=self.test_dir) + config = Config(start_year=2026, daily_data_dir=self.test_dir) ticker_dir = os.path.join(self.test_dir, "AAPL") os.makedirs(ticker_dir) - with open(os.path.join(ticker_dir, "2026-04.json"), "w") as f: - json.dump({"ticker": "AAPL", "month": "2026-04", "data": []}, f) + with open(os.path.join(ticker_dir, "2026.json"), "w") as f: + json.dump({"ticker": "AAPL", "year": 2026, "data": []}, f) df = pd.DataFrame({ "date": ["2026-04-01"], @@ -167,10 +147,10 @@ def test_refreshes_current_month(self): manager = self._make_manager(df=df, source_name="yfinance") with patch( - "processor.us_daily.agg_fetcher.generate_months", return_value=["2026-04"] + "processor.us_daily.agg_fetcher.generate_years", return_value=[2026] ): with patch( - "processor.us_daily.agg_fetcher.is_current_month", return_value=True + "processor.us_daily.agg_fetcher.is_current_year", return_value=True ): result = fetch_ticker_aggs(manager, "AAPL", config) @@ -182,23 +162,23 @@ def test_records_failure_when_all_sources_fail(self): from processor.us_daily.config import Config from processor.us_daily.sources.manager import FetchError - config = Config(start_date="2020-01", daily_data_dir=self.test_dir, max_retries=2) + config = Config(start_year=2024, daily_data_dir=self.test_dir, max_retries=2) manager = self._make_manager( error=FetchError("All sources failed for AAPL") ) with patch( - "processor.us_daily.agg_fetcher.generate_months", return_value=["2020-01"] + "processor.us_daily.agg_fetcher.generate_years", return_value=[2024] ): with patch( - "processor.us_daily.agg_fetcher.is_current_month", return_value=False + "processor.us_daily.agg_fetcher.is_current_year", return_value=False ): result = fetch_ticker_aggs(manager, "AAPL", config) self.assertEqual(len(result["failures"]), 1) self.assertEqual(result["failures"][0]["ticker"], "AAPL") - self.assertEqual(result["failures"][0]["month"], "2020-01") + self.assertEqual(result["failures"][0]["year"], 2024) if __name__ == "__main__": diff --git a/tests/test_us_daily/test_config.py b/tests/test_us_daily/test_config.py index 514847bc..eb80526c 100644 --- a/tests/test_us_daily/test_config.py +++ b/tests/test_us_daily/test_config.py @@ -10,7 +10,7 @@ def test_default_config(self): config = Config() self.assertEqual(config.refresh_tickers, False) - self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.start_year, 2024) self.assertEqual(config.max_retries, 3) self.assertEqual(config.data_source_priority, ["massive", "akshare", "yfinance"]) self.assertEqual(config.akshare_interval, 2.0) @@ -34,7 +34,7 @@ def test_load_config_from_file(self): self.assertEqual(config.refresh_tickers, True) self.assertEqual(config.akshare_interval, 3.0) # defaults preserved for unspecified fields - self.assertEqual(config.start_date, "2026-01") + self.assertEqual(config.start_year, 2024) self.assertEqual(config.massive_interval, 12.0) finally: os.unlink(tmp_path) From bfef45204e87682d9e0557188c754ce5b87628d8 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:35:25 +0800 Subject: [PATCH 42/43] ci --- src/processor/us_daily/__main__.py | 8 +++++--- src/processor/us_daily/agg_fetcher.py | 5 ++++- src/processor/us_daily/ticker_lister.py | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/processor/us_daily/__main__.py b/src/processor/us_daily/__main__.py index 2b1cf6af..84f5b253 100644 --- a/src/processor/us_daily/__main__.py +++ b/src/processor/us_daily/__main__.py @@ -116,21 +116,23 @@ def main(): tickers = filtered all_failures = [] + all_bars = 0 total = len(tickers) for i, ticker_info in enumerate(tickers): ticker = ticker_info["ticker"] logger.info(f"[{i + 1}/{total}] Processing {ticker}") result = fetch_ticker_aggs(source_manager, ticker, config) + all_bars += result["total_bars"] if result["failures"]: all_failures.extend(result["failures"]) # Step 3: Summary logger.info("=== Summary ===") - logger.info(f"Total tickers: {total}") + logger.info(f"Total tickers: {total}, total bars fetched: {all_bars}") if all_failures: - logger.warning(f"Failed months: {len(all_failures)}") + logger.warning(f"Failed years: {len(all_failures)}") for f in all_failures: - logger.warning(f" - {f['ticker']} {f['month']}: {f['error']}") + logger.warning(f" - {f['ticker']} {f['year']}: {f['error']}") else: logger.info("All data fetched successfully") logger.info("=== Done ===") diff --git a/src/processor/us_daily/agg_fetcher.py b/src/processor/us_daily/agg_fetcher.py index 8f66ce76..f30a2b9d 100644 --- a/src/processor/us_daily/agg_fetcher.py +++ b/src/processor/us_daily/agg_fetcher.py @@ -38,6 +38,7 @@ def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: """ years = generate_years(config.start_year, date.today().year) failures = [] + total_bars = 0 for year in years: file_path = get_year_file_path(config.daily_data_dir, ticker, year) @@ -63,10 +64,12 @@ def fetch_ticker_aggs(source_manager, ticker: str, config: Config) -> dict: "ticker": ticker, "year": year, "source": source_name, + "count": len(df), "fetched_at": datetime.now().isoformat(timespec="seconds"), "data": df.to_dict(orient="records"), } save_json(file_path, data) + total_bars += len(df) logger.info(f" {ticker} {year}: fetched {len(df)} bars from {source_name}") - return {"failures": failures} + return {"failures": failures, "total_bars": total_bars} diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index fb9882dd..ca9f05ea 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,7 +66,7 @@ def list_all_tickers(client, config: Config) -> List[dict]: # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): - if new_count >= 10: + if new_count >= 6: break ticker_str = ticker_obj.ticker From 88fbdec0b7e0b14cdf10c6435a37338c49e07b09 Mon Sep 17 00:00:00 2001 From: gjh-ustc0311 <79145599+gjh-ustc0311@users.noreply.github.com> Date: Thu, 23 Apr 2026 22:35:49 +0800 Subject: [PATCH 43/43] ci --- src/processor/us_daily/ticker_lister.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor/us_daily/ticker_lister.py b/src/processor/us_daily/ticker_lister.py index ca9f05ea..38c19fbe 100644 --- a/src/processor/us_daily/ticker_lister.py +++ b/src/processor/us_daily/ticker_lister.py @@ -66,8 +66,8 @@ def list_all_tickers(client, config: Config) -> List[dict]: # Fetch details for new tickers only new_count = 0 for i, ticker_obj in enumerate(ticker_objs): - if new_count >= 6: - break + # if new_count >= 6: + # break ticker_str = ticker_obj.ticker if ticker_str in existing_tickers: