diff --git a/.gitignore b/.gitignore index b0a557c3..596afcbb 100644 --- a/.gitignore +++ b/.gitignore @@ -202,3 +202,6 @@ test.py # claude code CLAUDE.md +docs/superpowers/ +docs/design/ +.claude/ diff --git a/CLAUDE.md b/AGENTS.md similarity index 79% rename from CLAUDE.md rename to AGENTS.md index e757dbe5..c2954dce 100644 --- a/CLAUDE.md +++ b/AGENTS.md @@ -1,4 +1,4 @@ -# CLAUDE.md +# AGENTS.md This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. @@ -70,6 +70,12 @@ detectors: MyDetector: method_type: new_value_detector auto_config: false # true = auto-discover variables from training data + persist: # optional — omit to disable state saving + path: ./state # base path; detector name is appended automatically + interval_seconds: 300 # save every N seconds + events_until_save: null # also save after N ingested events (null = disabled) + auto_load: false # restore saved state on construction + storage_options: {} # fsspec credentials (S3, Azure, GCS, etc.) events: login_failure: # named event ID (string) or integer EventID instance_label: # arbitrary instance name @@ -149,6 +155,33 @@ class MyDetector(CoreDetector): Same pattern applies for `CoreParser` — implement `parse(input_: LogSchema, output_: ParserSchema) -> bool`. +### Wiring persist support into a new detector + +Detectors that maintain an `EventPersistency` instance must do two things to support the `persist:` config block: + +**1. Call `_register_persistency()` at the end of `__init__`:** + +```python +def __init__(self, name="MyDetector", config=MyDetectorConfig()): + super().__init__(name=name, config=config) + self.persistency = EventPersistency(event_data_class=EventStabilityTracker) + self._register_persistency(self.persistency) # must be last +``` + +**2. Preserve `config.persist` across `set_configuration()` rebuilds:** + +`set_configuration()` replaces `self.config` via `from_dict()`, which produces a config with no `persist` key — silently dropping the user's persist settings. Save and restore it: + +```python +def set_configuration(self) -> None: + old_persist = self.config.persist + # ... build config_dict, call from_dict() ... + self.config = MyDetectorConfig.from_dict(config_dict, self.name) + self.config.persist = old_persist +``` + +Omitting either step means a `persist:` block in the YAML is silently ignored with no error. + ## Code Quality Pre-commit hooks enforce: @@ -158,3 +191,9 @@ Pre-commit hooks enforce: - **docformatter** docstring style Python 3.12 is required (see `.python-version`). + + +# Git +NEVER include "Co-Authored-By ..." in your commit or PR messages. + +Design documents (files under `docs/design/`) must NEVER be committed to the repository. diff --git a/docs/auxiliar/persistency.md b/docs/auxiliar/persistency.md index 6adc9342..88ab7e53 100644 --- a/docs/auxiliar/persistency.md +++ b/docs/auxiliar/persistency.md @@ -1,127 +1,197 @@ # Persistency -The persistency module provides event-based state management for detectors. It allows detectors to accumulate, store, and query data across their lifecycle — during training, detection, and auto-configuration. +The persistency module gives a detector a place to remember things about the +events it sees. State is keyed by `EventID` and survives across training, the +detection loop, and (optionally) restarts on disk. -## EventPersistency +This page is structured to read top-to-bottom: first the mental model, then a +quick start, then the API surface. -`EventPersistency` is the main entry point. It manages one storage backend instance per event ID, so each event type maintains its own isolated state. +## Mental model -### Creating an instance +Persistency has three moving parts. Understanding what each one does makes the +rest of the page much easier to follow. -```python -from detectmatelibrary.common.persistency import EventPersistency +### 1. Events -persistency = EventPersistency( - event_data_class=MyBackend, # storage backend class (see below) - variable_blacklist=["Content"], # variable names to exclude (optional) - event_data_kwargs={"max_rows": 1000} # extra kwargs forwarded to the backend (optional) -) -``` +Logs are grouped by `EventID`. Two events with the same ID share a template +but have their own variable values. Persistency stores **one independent state +object per event ID**, so an `EventStabilityTracker` for `EventID=4733` does +not interfere with one for `EventID=4624`. -| Parameter | Description | -|---|---| -| `event_data_class` | An `EventDataStructure` subclass that defines how data is stored and queried. | -| `variable_blacklist` | Variable names to exclude from storage. Defaults to `["Content"]`. | -| `event_data_kwargs` | A dictionary of keyword arguments forwarded to the backend constructor. | +### 2. Backends (`EventDataStructure`) + +A backend is the thing that actually stores the per-event state. Persistency +owns the dict `{event_id: backend}`; the backend itself decides *how* data is +kept. + +Two families ship today: + +- **DataFrame backends** (`EventDataFrame`, `ChunkedEventDataFrame`) keep the + raw rows. Use these when a detector needs to scan history. +- **Tracker backends** (`EventStabilityTracker`) keep only derived features + (e.g. "this variable has been constant for the last 10k events"). Use these + when you only need a summary, not the raw history — they cost a fraction of + the memory. + +All backends implement the same four-method contract: `add_data`, `get_data`, +`dump`, `load`. That contract is what `EventPersistency` and +`PersistencySaver` rely on — anything you add later only has to follow it. + +### 3. Saver lifecycle (`PersistencySaver`) -### Storing data +`EventPersistency` itself is in-memory. To survive a process restart, the +state has to be written somewhere. `PersistencySaver` wraps an +`EventPersistency` and: + +- writes to disk (or any `fsspec` URI) on two triggers — a wall-clock interval + and an event-count threshold; +- optionally `auto_load`s previously saved state during construction; +- exposes `start()` / `stop()` so the background timer can be torn down + cleanly. `stop()` is idempotent and is called automatically when a + `Component` is used as a context manager. + +In practice a detector never instantiates `PersistencySaver` directly: it sets +a `persist:` block in its config and `CoreDetector` wires the saver up via +[`init_persistency`](../../src/detectmatelibrary/common/persist.py). + +--- + +## Quick start ```python -persistency.ingest_event( - event_id=event_id, - event_template=template, - variables=positional_vars, # optional positional variables - named_variables=named_vars # optional named variables +from detectmatelibrary.utils import persistency + +ep = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, ) + +ep.ingest_event( + event_id="4624", + event_template="An account was successfully logged on.", + named_variables={"AccountName": "alice", "LogonType": "3"}, +) + +tracker = ep.get_event_data("4624") # or ep["4624"] ``` -Each call appends data to the backend associated with the given `event_id`. If no backend exists for that ID yet, one is created automatically. +That snippet covers the whole in-memory API: pick a backend class, ingest +events, query state. -### Retrieving data +--- -```python -# Single event -data = persistency.get_event_data(event_id) +## API reference -# All events -all_data = persistency.get_events_data() # dict[event_id -> backend] +### `EventPersistency` -# Templates -template = persistency.get_event_template(event_id) -all_templates = persistency.get_event_templates() +| Parameter | Description | +|---|---| +| `event_data_class` | An `EventDataStructure` subclass; one instance is created per event ID. | +| `variable_blacklist` | Variable names to skip when ingesting. Defaults to `["Content"]`. | +| `event_data_kwargs` | Extra kwargs forwarded to each backend instance. | -# Bracket access -backend = persistency[event_id] -``` +Common methods: -## Storage backends +```python +ep.ingest_event(event_id, event_template, variables=..., named_variables=...) + +ep.get_event_data(event_id) # backend for a single event +ep.get_events_data() # dict[event_id -> backend] +ep.get_event_template(event_id) +ep.get_event_templates() +ep.get_events_seen() # all event IDs ever ingested +ep[event_id] # alias for get_event_data +``` -The backend determines how ingested data is stored and what queries are available. Choose the backend that fits your detector's needs. +### Available backends -### DataFrame backends +| Class | Use when | +|---|---| +| `persistency.EventDataFrame` | You need history and a Pandas DataFrame is the natural shape. | +| `persistency.ChunkedEventDataFrame` | High-volume / streaming workloads — Polars-backed with row-retention and automatic compaction. | +| `persistency.EventStabilityTracker` | You only care about how variables behave over time (`STATIC` / `STABLE` / `UNSTABLE` / `RANDOM`). Cheapest memory footprint. | -Store raw event data in tabular form. Useful when a detector needs to query or iterate over historical values. +All three are re-exported from the top of the package — `persistency.X` is the +canonical import; the deeply nested submodules are an implementation detail. -- **`EventDataFrame`** — Pandas-backed storage. Simple and familiar. -- **`ChunkedEventDataFrame`** — Polars-backed storage with configurable row retention and automatic compaction. Suited for high-volume or streaming workloads. +### Persisting to disk ```python -from detectmatelibrary.common.persistency.event_data_structures.dataframes import ( - EventDataFrame, - ChunkedEventDataFrame, +saver = persistency.PersistencySaver( + ep, + persistency.PersistencySaverConfig( + path="./state/my-detector", + save_interval_seconds=300, + events_until_save=10_000, # save after this many ingests, too + auto_load=False, + storage_options={}, # forwarded to fsspec + ), ) +saver.start() +# ... detector runs ... +saver.stop() # final flush, stops the background timer ``` -### Tracker backends - -Track variable behavior over time rather than storing raw data. Useful when a detector needs to understand how variables evolve (e.g., whether they converge to constant values). Is optimized for space efficiency since only extracted features from the logs are stored. +`PersistencySaver.save()` is thread-safe, and `stop()` is idempotent. The two +save triggers (`save_interval_seconds` and `events_until_save`) are +independent — whichever fires first wins. -- **`EventStabilityTracker`** — Classifies each variable as `STATIC`, `STABLE`, `UNSTABLE`, `RANDOM`, or `INSUFFICIENT_DATA` based on how its values change over time. +#### Restoring state ```python -from detectmatelibrary.common.persistency.event_data_structures.trackers import ( - EventStabilityTracker, +saver = persistency.PersistencySaver( + ep, + persistency.PersistencySaverConfig(path="./state/my-detector", auto_load=True), ) +# ep is now pre-populated from disk ``` -## Usage in detectors +If `auto_load=True` and no saved state exists, the constructor raises +`persistency.PersistencyLoadError` immediately — fail-fast rather than +silently starting empty. -Persistency is **optional**. A detector can function without it. When a detector does need to maintain state across events — for example, to learn normal values during training and flag deviations during detection — it can integrate persistency by following this pattern: +### Storage backends (fsspec) -### 1. Initialize in `__init__` +`PersistencySaverConfig.path` accepts any URI fsspec understands: a local path +(`./state`), `s3://bucket/key`, `gs://...`, `az://...`, and so on. Provider +credentials and tuning knobs go in `storage_options`. -Create one or more `EventPersistency` instances with the appropriate backend. +--- -```python -class MyDetector(CoreDetector): - def __init__(self, name="MyDetector", config=MyDetectorConfig()): - super().__init__(name=name, ...) - self.persistency = EventPersistency( - event_data_class=EventStabilityTracker, - ) -``` +## Using persistency inside a detector -### 2. Accumulate state in `train()` +The recommended path: declare `persist:` in the detector's config and let +`CoreDetector._register_persistency` build the saver for you. See +[Saving state (persist)](../detectors.md#saving-state-persist) for the config +schema. -During training, ingest each event so the backend builds up its internal state. +In detector code, the pattern is: ```python -def train(self, input_): - variables = self.get_configured_variables(input_, self.config.events) - self.persistency.ingest_event( - event_id=input_["EventID"], - event_template=input_["template"], - named_variables=variables, - ) -``` +from detectmatelibrary.common.detector import CoreDetector +from detectmatelibrary.utils import persistency -### 3. Query state in `detect()` +class MyDetector(CoreDetector): + def __init__(self, name="MyDetector", config=MyDetectorConfig()): + super().__init__(name=name, config=config) + self.persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, + ) + self._register_persistency(self.persistency) -During detection, query the accumulated state to decide whether the incoming event is anomalous. + def train(self, input_): + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables={...}, + ) -```python -def detect(self, input_, output_): - for event_id, backend in self.persistency.get_events_data().items(): - stored_data = backend.get_data() - # compare input_ against stored_data to produce alerts + def detect(self, input_, output_): + tracker = self.persistency.get_events_data().get(input_["EventID"]) + # compare against tracker to produce alerts ``` + +`_register_persistency` is a one-line wrapper around +[`init_persistency`](../../src/detectmatelibrary/common/persist.py); the helper +honours `config.persist` and returns `None` (so `self.saver` stays `None`) +when persistence is disabled. diff --git a/docs/detectors.md b/docs/detectors.md index 26bd97a8..668b5272 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -219,4 +219,95 @@ def set_configuration(self): When `auto_config` is `False`, steps 1 and 2 are skipped entirely. +### Saving state (persist) + +Detectors can persist their training state to disk (or cloud storage) so it +can be restored in a later session. Configure this with a top-level `persist:` +block in the detector config: + +```yaml +detectors: + NewValueDetector: + method_type: new_value_detector + persist: + path: ./state # base path; detector name is appended automatically + interval_seconds: 300 # save every N seconds (default: 300) + events_until_save: null # also save after N ingested events (default: disabled) + auto_load: false # restore saved state on startup (default: false) + storage_options: {} # backend credentials (see below) + events: + ... +``` + +All fields are optional — `persist: {}` uses all defaults. Omitting `persist:` entirely +disables saving (backward compatible). + +The detector name is automatically appended to `path`, so `path: ./state` for a detector +named `NewValueDetector` writes to `./state/NewValueDetector/`. + +#### Fields + +| Field | Type | Default | Description | +|---|---|---|---| +| `path` | `str` | `"./state"` | Base directory or cloud URL. Detector name is appended. | +| `interval_seconds` | `int` | `300` | Background save interval in seconds. | +| `events_until_save` | `int \| null` | `null` | Save after this many ingested events. `null` disables event-count triggering. | +| `auto_load` | `bool` | `false` | Load saved state on construction. Raises `PersistencyLoadError` if no state exists. | +| `storage_options` | `dict` | `{}` | Credentials and options forwarded to [fsspec](https://filesystem-spec.readthedocs.io/). | + +#### Storage options examples + +**Local filesystem** — no `storage_options` needed: + +```yaml +persist: + path: ./state +``` + +**S3**: + +```yaml +persist: + path: s3://my-bucket/detector-state + storage_options: + key: AKIAIOSFODNN7EXAMPLE + secret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + region_name: eu-west-1 +``` + +S3-compatible storage (MinIO, etc.): + +```yaml +persist: + path: s3://my-bucket/detector-state + storage_options: + endpoint_url: http://minio:9000 + key: minioadmin + secret: minioadmin +``` + +**Azure Blob Storage**: + +```yaml +persist: + path: az://my-container/detector-state + storage_options: + account_name: mystorageaccount + account_key: base64encodedkey== +``` + +**GCS**: + +```yaml +persist: + path: gs://my-bucket/detector-state + storage_options: + project: my-gcp-project + token: /path/to/service-account.json +``` + +In practice, credentials are usually supplied via environment variables +(`AWS_ACCESS_KEY_ID`, etc.) or instance roles — in which case `storage_options` +stays empty or is omitted. + Go back [Index](index.md) diff --git a/docs/detectors/new_value.md b/docs/detectors/new_value.md index 5fa15e51..7aff8617 100644 --- a/docs/detectors/new_value.md +++ b/docs/detectors/new_value.md @@ -20,6 +20,10 @@ detectors: method_type: new_value_detector auto_config: False params: {} + persist: # optional — omit to disable saving + path: ./state + interval_seconds: 300 + auto_load: false events: 1: test: diff --git a/pyproject.toml b/pyproject.toml index 10ccecb4..4e09837c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,9 @@ dependencies = [ "pandas>=2.3.2", "polars>=1.38.1", "detectmateperformance @ git+https://github.com/ait-detectmate/DetectMatePerformance@main", + "msgpack>=1.0.0", + "fsspec>=2024.1.0", + "pyarrow>=24.0.0", ] [dependency-groups] diff --git a/src/detectmatelibrary/common/_config/__init__.py b/src/detectmatelibrary/common/_config/__init__.py index 0ee1ed83..12b86205 100644 --- a/src/detectmatelibrary/common/_config/__init__.py +++ b/src/detectmatelibrary/common/_config/__init__.py @@ -68,6 +68,7 @@ def to_dict(self, method_id: str = random_id()) -> Dict[str, Any]: params = {} events_data = None instances_data = None + persist_data: dict[str, Any] | None = None for field_name, field_value in self: # Skip meta fields @@ -88,6 +89,9 @@ def to_dict(self, method_id: str = random_id()) -> Dict[str, Any]: name: inst.to_dict() for name, inst in field_value.items() } + elif field_name == "persist": + if field_value is not None: + persist_data = field_value.model_dump() else: # All other fields go into params params[field_name] = field_value @@ -104,6 +108,9 @@ def to_dict(self, method_id: str = random_id()) -> Dict[str, Any]: if events_data is not None: result["events"] = events_data + if persist_data is not None: + result["persist"] = persist_data + # Wrap in the component_type and method_id structure return { self.component_type: { diff --git a/src/detectmatelibrary/common/_config/_compile.py b/src/detectmatelibrary/common/_config/_compile.py index 0f01d5d6..0097b634 100644 --- a/src/detectmatelibrary/common/_config/_compile.py +++ b/src/detectmatelibrary/common/_config/_compile.py @@ -61,7 +61,10 @@ def __init__(self, expected_type: str, actual_type: str) -> None: class MissingParamsWarning(UserWarning): def __init__(self) -> None: - super().__init__("'auto_config = False' and no 'params' or 'events' provided. Is that intended?") + super().__init__( + "'auto_config = False' and no 'params', 'events', 'global', or 'persist' provided. " + "Is that intended?" + ) class AutoConfigWarning(UserWarning): @@ -94,8 +97,10 @@ def process(config: Dict[str, Any]) -> Dict[str, Any]: has_params = "params" in config has_events = "events" in config has_instances = "global" in config + has_persist = "persist" in config - if not has_params and not has_events and not has_instances and not config.get("auto_config", False): + no_data = not has_params and not has_events and not has_instances and not has_persist + if no_data and not config.get("auto_config", False): warnings.warn(MissingParamsWarning()) if has_params: diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index fcab12f8..cb127283 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -11,12 +11,25 @@ from tools.logging import logger, setup_logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Protocol setup_logging() +class _Stoppable(Protocol): + """Structural type for objects ``Component`` will stop on context-manager + exit. + + Decouples ``Component`` from any concrete saver implementation: a subclass + may assign anything with a ``stop()`` method to ``self.saver`` (today the + only such type is ``PersistencySaver``) without ``common.core`` having to + import the persistency package. This preserves the dependency direction + detectmate -> persistency, not the reverse. + """ + def stop(self) -> None: ... + + class TrainBuffer: def __init__(self) -> None: self.buffer: list[BaseSchema | list[BaseSchema]] = [] @@ -53,6 +66,7 @@ def __init__( config: CoreConfig = CoreConfig(), ) -> None: self.name, self.type_, self.config = name, type_, config + self.saver: _Stoppable | None = None def __repr__(self) -> str: return f"<{self.type_}> {self.name}: {self.config}" @@ -84,6 +98,13 @@ def get_config(self) -> Dict[str, Any]: def update_config(self, new_config: Dict[str, Any]) -> None: self.config.update_config(new_config) + def __enter__(self) -> "Component": + return self + + def __exit__(self, *_: Any) -> None: + if self.saver is not None: + self.saver.stop() + class CoreComponent(Component): """Base class for all components in the system.""" diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 2be71535..aca671a5 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -3,12 +3,15 @@ from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode from detectmatelibrary.utils.aux import get_timestamp -from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils import persistency +from detectmatelibrary.common.persist import init_persistency + +from pydantic import BaseModel, ConfigDict from detectmatelibrary.schemas import ParserSchema, DetectorSchema from typing_extensions import override -from typing import Dict, List, Optional, Any +from typing import Dict, List, Optional, Any, cast from detectmatelibrary.utils.time_format_handler import TimeFormatHandler from tools.logging import logger @@ -17,6 +20,16 @@ _time_handler = TimeFormatHandler() +class PersistConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + path: str = "./state" + interval_seconds: int = 300 + events_until_save: int | None = None + auto_load: bool = False + storage_options: dict[str, Any] = {} + + def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: @@ -94,7 +107,7 @@ def get_global_variables( def validate_config_coverage( detector_name: str, config_events: EventsConfig | dict[str, Any], - persistency: EventPersistency, + event_persistency: persistency.EventPersistency, ) -> None: """Log warnings when configured EventIDs or variables have no training data. @@ -112,8 +125,8 @@ def validate_config_coverage( if not config_ids: return - events_seen = persistency.get_events_seen() - events_with_data = set(persistency.get_events_data().keys()) + events_seen = event_persistency.get_events_seen() + events_with_data = set(event_persistency.get_events_data().keys()) for event_id in config_ids: if event_id not in events_seen: @@ -138,6 +151,7 @@ class CoreDetectorConfig(CoreConfig): auto_config: bool = True events: EventsConfig | dict[str, Any] = {} global_instances: Dict[str, _EventInstance] = {} + persist: PersistConfig | None = None class CoreDetector(CoreComponent): @@ -160,6 +174,11 @@ def __init__( output_schema=DetectorSchema, ) + def _register_persistency(self, event_persistency: persistency.EventPersistency) -> None: + self.saver = init_persistency( + self.name, cast(CoreDetectorConfig, self.config), event_persistency + ) + @override def run( self, input_: List[ParserSchema] | ParserSchema, output_: DetectorSchema # type: ignore diff --git a/src/detectmatelibrary/common/persist.py b/src/detectmatelibrary/common/persist.py new file mode 100644 index 00000000..2e2d9710 --- /dev/null +++ b/src/detectmatelibrary/common/persist.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from detectmatelibrary.utils import persistency + +if TYPE_CHECKING: + from detectmatelibrary.common.detector import CoreDetectorConfig + + +def init_persistency( + name: str, + config: "CoreDetectorConfig", + event_persistency: persistency.EventPersistency, +) -> persistency.PersistencySaver | None: + """Build and start a PersistencySaver for `event_persistency`, or None if + disabled.""" + if config.persist is None: + return None + p = config.persist + saver = persistency.PersistencySaver( + event_persistency, + persistency.PersistencySaverConfig( + path=f"{p.path}/{name}", + save_interval_seconds=p.interval_seconds, + events_until_save=p.events_until_save, + auto_load=p.auto_load, + storage_options=p.storage_options, + ), + ) + saver.start() + return saver diff --git a/src/detectmatelibrary/detectors/new_event_detector.py b/src/detectmatelibrary/detectors/new_event_detector.py index 39097f70..8932f4a7 100644 --- a/src/detectmatelibrary/detectors/new_event_detector.py +++ b/src/detectmatelibrary/detectors/new_event_detector.py @@ -1,11 +1,8 @@ from detectmatelibrary.common._config._compile import generate_detector_config from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables, \ get_global_variables -from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( - EventStabilityTracker -) +from detectmatelibrary.utils import persistency from detectmatelibrary.constants import GLOBAL_EVENT_ID -from detectmatelibrary.utils.persistency.event_persistency import EventPersistency from detectmatelibrary.utils.data_buffer import BufferMode from detectmatelibrary.schemas import ParserSchema, DetectorSchema @@ -28,13 +25,14 @@ def __init__( super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) self.config: NewEventDetectorConfig - self.persistency = EventPersistency( - event_data_class=EventStabilityTracker, + self.persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, ) # auto config checks if individual variables are stable to select combos from - self.auto_conf_persistency = EventPersistency( - event_data_class=EventStabilityTracker + self.auto_conf_persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker ) + self._register_persistency(self.persistency) def train(self, input_: ParserSchema) -> None: # type: ignore """Train the detector by learning values from the input data.""" @@ -87,6 +85,7 @@ def configure(self, input_: ParserSchema) -> None: # type: ignore ) def set_configuration(self) -> None: + old_persist = self.config.persist config_dict = generate_detector_config( variable_selection={}, detector_name=self.name, @@ -94,3 +93,4 @@ def set_configuration(self) -> None: ) # Update the config object from the dictionary instead of replacing it self.config = NewEventDetectorConfig.from_dict(config_dict, self.name) + self.config.persist = old_persist diff --git a/src/detectmatelibrary/detectors/new_value_combo_detector.py b/src/detectmatelibrary/detectors/new_value_combo_detector.py index 5f5a7814..731b58e6 100644 --- a/src/detectmatelibrary/detectors/new_value_combo_detector.py +++ b/src/detectmatelibrary/detectors/new_value_combo_detector.py @@ -10,10 +10,7 @@ ) from detectmatelibrary.utils.data_buffer import BufferMode -from detectmatelibrary.utils.persistency.event_data_structures.trackers import ( - EventStabilityTracker -) -from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils import persistency from detectmatelibrary.schemas import ParserSchema, DetectorSchema from detectmatelibrary.constants import GLOBAL_EVENT_ID @@ -74,19 +71,20 @@ def __init__( super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) self.config = cast(NewValueComboDetectorConfig, self.config) - self.persistency = EventPersistency( - event_data_class=EventStabilityTracker, + self.persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, event_data_kwargs={"converter_function": get_combo} ) # auto config checks if individual variables are stable to select combos from - self.auto_conf_persistency = EventPersistency( - event_data_class=EventStabilityTracker + self.auto_conf_persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker ) - self.auto_conf_persistency_combos = EventPersistency( - event_data_class=EventStabilityTracker, + self.auto_conf_persistency_combos = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, event_data_kwargs={"converter_function": get_all_possible_combos} ) self.inputs: list[ParserSchema] = [] + self._register_persistency(self.persistency) def train(self, input_: ParserSchema) -> None: # type: ignore config = cast(NewValueComboDetectorConfig, self.config) @@ -183,6 +181,7 @@ def set_configuration(self, max_combo_size: int | None = None) -> None: would explode combinatorially). """ config = cast(NewValueComboDetectorConfig, self.config) + old_persist = config.persist # run WITH auto_conf_persistency variable_combos = {} for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): @@ -228,6 +227,7 @@ def set_configuration(self, max_combo_size: int | None = None) -> None: ) # Update the config object from the dictionary instead of replacing it self.config = NewValueComboDetectorConfig.from_dict(config_dict, self.name) + self.config.persist = old_persist events = self.config.events if isinstance(events, EventsConfig) and not events.events: logger.warning( diff --git a/src/detectmatelibrary/detectors/new_value_detector.py b/src/detectmatelibrary/detectors/new_value_detector.py index b7a051f5..8776d9eb 100644 --- a/src/detectmatelibrary/detectors/new_value_detector.py +++ b/src/detectmatelibrary/detectors/new_value_detector.py @@ -8,10 +8,7 @@ get_global_variables, validate_config_coverage, ) -from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( - EventStabilityTracker -) -from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils import persistency from detectmatelibrary.utils.data_buffer import BufferMode from detectmatelibrary.schemas import ParserSchema, DetectorSchema @@ -42,13 +39,14 @@ def __init__( super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) self.config: NewValueDetectorConfig # type narrowing for IDE - self.persistency = EventPersistency( - event_data_class=EventStabilityTracker, + self.persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker, ) # auto config checks if individual variables are stable to select combos from - self.auto_conf_persistency = EventPersistency( - event_data_class=EventStabilityTracker + self.auto_conf_persistency = persistency.EventPersistency( + event_data_class=persistency.EventStabilityTracker ) + self._register_persistency(self.persistency) def train(self, input_: ParserSchema) -> None: # type: ignore """Train the detector by learning values from the input data.""" @@ -134,6 +132,7 @@ def set_configuration(self) -> None: vars_ = stable + static if len(vars_) > 0: variables[event_id] = vars_ + old_persist = self.config.persist config_dict = generate_detector_config( variable_selection=variables, detector_name=self.name, @@ -141,6 +140,7 @@ def set_configuration(self) -> None: ) # Update the config object from the dictionary instead of replacing it self.config = NewValueDetectorConfig.from_dict(config_dict, self.name) + self.config.persist = old_persist events = self.config.events if isinstance(events, EventsConfig) and not events.events: logger.warning( diff --git a/src/detectmatelibrary/utils/persistency/__init__.py b/src/detectmatelibrary/utils/persistency/__init__.py index 5ea37cd7..fff0f391 100644 --- a/src/detectmatelibrary/utils/persistency/__init__.py +++ b/src/detectmatelibrary/utils/persistency/__init__.py @@ -1,5 +1,17 @@ from .event_persistency import EventPersistency +from .persistency_saver import PersistencySaver, PersistencySaverConfig, PersistencyLoadError +from .event_data_structures.base import EventDataStructure +from .event_data_structures.dataframes.event_dataframe import EventDataFrame +from .event_data_structures.dataframes.chunked_event_dataframe import ChunkedEventDataFrame +from .event_data_structures.trackers.stability.stability_tracker import EventStabilityTracker __all__ = [ - "EventPersistency" + "EventPersistency", + "PersistencySaver", + "PersistencySaverConfig", + "PersistencyLoadError", + "EventDataStructure", + "EventDataFrame", + "ChunkedEventDataFrame", + "EventStabilityTracker", ] diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/base.py b/src/detectmatelibrary/utils/persistency/event_data_structures/base.py index 8319e339..08e4909b 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/base.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/base.py @@ -24,6 +24,20 @@ def to_data(self, raw_data: Any) -> Any: """Convert raw data into the appropriate data format for storage.""" pass + @abstractmethod + def dump(self) -> bytes: + """Serialize full state to bytes. + + Format is backend-specific. + """ + ... + + @classmethod + @abstractmethod + def load(cls, data: bytes, **kwargs: Any) -> "EventDataStructure": + """Restore state from bytes produced by dump().""" + ... + def get_template(self) -> str: return self.template diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/chunked_event_dataframe.py b/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/chunked_event_dataframe.py index 8b15bff1..53c32306 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/chunked_event_dataframe.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/chunked_event_dataframe.py @@ -1,6 +1,9 @@ +import io +import struct from typing import Any, Dict, List, Optional from dataclasses import dataclass, field +import msgpack import polars as pl from ..base import EventDataStructure @@ -74,6 +77,41 @@ def get_variables(self) -> Any: def to_data(self, raw_data: Dict[str, List[Any]]) -> pl.DataFrame: return pl.DataFrame(raw_data) + def dump(self) -> bytes: + """Serialize to Parquet bytes with a 4-byte + msgpack config header.""" + # Layout: [uint32 BE config_len][msgpack(config)][parquet bytes]. + # msgpack carries the retention/compaction params alongside the + # Parquet payload so load() can rebuild the instance with the same + # policy (Parquet itself only stores the data). + config: bytes = msgpack.packb( + {"max_rows": self.max_rows, "compact_every": self.compact_every}, + use_bin_type=True, + ) + header: bytes = struct.pack(">I", len(config)) + config + buf = io.BytesIO() + self.get_data().write_parquet(buf) + return header + buf.getvalue() + + @classmethod + def load(cls, data: bytes, **kwargs: Any) -> "ChunkedEventDataFrame": + """Restore from Parquet bytes (with config header). + + Note: event_id and template (base dataclass fields) are not restored; + they remain at defaults (-1 and "") as they are managed by EventPersistency. + """ + config_len = struct.unpack(">I", data[:4])[0] + config = msgpack.unpackb(data[4:4 + config_len], raw=False) + parquet_bytes = data[4 + config_len:] + df = pl.read_parquet(io.BytesIO(parquet_bytes)) if parquet_bytes else pl.DataFrame() + instance = cls( + max_rows=config.get("max_rows", 10_000_000), + compact_every=config.get("compact_every", 1000), + ) + if df.height > 0: + instance.chunks = [df] + instance._rows = df.height + return instance + def __repr__(self) -> str: return ( f"ChunkedEventDataFrame(df=..., rows={self._rows}, chunks={len(self.chunks)}, " diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/event_dataframe.py b/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/event_dataframe.py index a823f288..2a024883 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/event_dataframe.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/dataframes/event_dataframe.py @@ -1,5 +1,5 @@ +import io from typing import Any, Dict, List - from dataclasses import dataclass, field import pandas as pd @@ -7,8 +7,6 @@ from ..base import EventDataStructure -# -------- Pandas backend -------- - @dataclass class EventDataFrame(EventDataStructure): """ @@ -35,5 +33,22 @@ def to_data(self, raw_data: Dict[int | str, Any]) -> pd.DataFrame: data = {key: [value] for key, value in raw_data.items()} return pd.DataFrame(data) + def dump(self) -> bytes: + """Serialize DataFrame to Parquet bytes.""" + buf = io.BytesIO() + self.data.to_parquet(buf, engine="pyarrow", index=False) + return buf.getvalue() + + @classmethod + def load(cls, data: bytes, **kwargs: Any) -> "EventDataFrame": + """Restore DataFrame from Parquet bytes. + + Note: event_id and template (base dataclass fields) are not restored; + they remain at defaults (-1 and "") as they are managed by EventPersistency. + """ + instance = cls() + instance.data = pd.read_parquet(io.BytesIO(data), engine="pyarrow") + return instance + def __repr__(self) -> str: return f"EventDataFrame(df=..., rows={len(self.data)}, variables={self.get_variables()})" diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/event_tracker.py b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/event_tracker.py index 7dd474d3..a267062b 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/event_tracker.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/event_tracker.py @@ -1,7 +1,10 @@ """Event data structure that tracks variable behaviors over time/events.""" +import importlib from typing import Any, Callable, Dict, Type +import msgpack + from detectmatelibrary.utils.preview_helpers import format_dict_repr from .multi_tracker import MultiTracker @@ -40,6 +43,62 @@ def to_data(self, raw_data: Dict[str, Any]) -> Any: """Transform raw data into the format expected by the tracker.""" return self.converter_function(raw_data) + def dump(self) -> bytes: + """Serialize full tracker state to MessagePack bytes. + + Note: converter_function is not serialized (lambdas cannot be pickled). + After load(), converter_function is reset to the identity default. + """ + state = { + "single_tracker_type": self.single_tracker_type.__name__, + "single_tracker_module": self.single_tracker_type.__module__, + "multi_tracker_type": self.multi_tracker_type.__name__, + "multi_tracker_module": self.multi_tracker_type.__module__, + "trackers": { + name: tracker.to_state() + for name, tracker in self.multi_tracker.get_trackers().items() + }, + } + result: bytes = msgpack.packb(state, use_bin_type=True) + return result + + @classmethod + def load(cls, data: bytes, **kwargs: Any) -> "EventTracker": + """Restore tracker state from MessagePack bytes. + + Note: event_id and template (base dataclass fields) are not restored; + they remain at defaults (-1 and "") as they are managed by EventPersistency. + """ + def _list_keys_to_tuples(pairs: list[tuple[Any, Any]]) -> dict[Any, Any]: + """Convert list keys (from msgpack) back to tuples so they can be + used as dict keys.""" + result: dict[Any, Any] = {} + for k, v in pairs: + if isinstance(k, list): + k = tuple(k) + result[k] = v + return result + + state = msgpack.unpackb(data, raw=False, strict_map_key=False, + object_pairs_hook=_list_keys_to_tuples) + single_tracker_cls = getattr( + importlib.import_module(state["single_tracker_module"]), + state["single_tracker_type"], + ) + multi_tracker_cls = getattr( + importlib.import_module(state["multi_tracker_module"]), + state["multi_tracker_type"], + ) + instance = cls.__new__(cls) + EventTracker.__init__( + instance, + single_tracker_type=single_tracker_cls, + multi_tracker_type=multi_tracker_cls, + ) + for name, tracker_state in state["trackers"].items(): + instance.multi_tracker.single_trackers[name] = single_tracker_cls.from_state(tracker_state) + return instance + def __repr__(self) -> str: strs = format_dict_repr(self.multi_tracker.get_trackers(), indent="\t") return f"{self.__class__.__name__}(data={{\n\t{strs}\n}})" diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/single_tracker.py b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/single_tracker.py index de6a017c..01808d89 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/single_tracker.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/base/single_tracker.py @@ -1,6 +1,6 @@ """Tracks whether a variable is converging to a constant value.""" -from typing import Any +from typing import Any, Dict from dataclasses import dataclass from abc import ABC, abstractmethod @@ -26,3 +26,15 @@ def classify(self) -> Classification: @abstractmethod def __repr__(self) -> str: pass + + @abstractmethod + def to_state(self) -> Dict[str, Any]: + """Serialize tracker state to a plain dict (must be msgpack- + compatible).""" + ... + + @classmethod + @abstractmethod + def from_state(cls, state: Dict[str, Any]) -> "SingleTracker": + """Restore tracker from a state dict produced by to_state().""" + ... diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_classifier.py b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_classifier.py index 5c2237c4..25db8bc2 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_classifier.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_classifier.py @@ -3,7 +3,7 @@ from typing import List import numpy as np -from detectmatelibrary.utils.RLE_list import RLEList +from detectmatelibrary.utils.persistency.rle_list import RLEList class StabilityClassifier: diff --git a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_tracker.py b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_tracker.py index f29a78e1..d4b12359 100644 --- a/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_tracker.py +++ b/src/detectmatelibrary/utils/persistency/event_data_structures/trackers/stability/stability_tracker.py @@ -1,9 +1,9 @@ """Tracks whether a variable is converging to a constant value.""" -from typing import Any, Callable, List, Literal, Set +from typing import Any, Callable, Dict, List, Literal, Set from detectmatelibrary.utils.preview_helpers import list_preview_str -from detectmatelibrary.utils.RLE_list import RLEList +from detectmatelibrary.utils.persistency.rle_list import RLEList from ..base import SingleTracker, MultiTracker, EventTracker, Classification from .stability_classifier import StabilityClassifier @@ -58,6 +58,33 @@ def classify(self) -> Classification: reason="No classification matched; variable is unstable" ) + def to_state(self) -> Dict[str, Any]: + """Serialize tracker state to a plain dict (must be msgpack- + compatible).""" + return { + "type": self.__class__.__name__, + "module": self.__class__.__module__, + "min_samples": self.min_samples, + "runs": self.change_series.runs(), + "unique_set": list(self.unique_set), + "segment_thresholds": self.stability_classifier.segment_threshs, + } + + @classmethod + def from_state(cls, state: Dict[str, Any]) -> "SingleStabilityTracker": + """Restore tracker from a state dict produced by to_state().""" + tracker = cls(min_samples=state["min_samples"]) + runs = [(bool(r[0]), int(r[1])) for r in state["runs"]] + tracker.change_series._runs = runs + tracker.change_series._len = sum(count for _, count in runs) + tracker.unique_set = { + tuple(v) if isinstance(v, list) else v for v in state["unique_set"] + } + tracker.stability_classifier = StabilityClassifier( + segment_thresholds=state["segment_thresholds"] + ) + return tracker + def __repr__(self) -> str: # show only part of the series for brevity series_str = list_preview_str(self.change_series) diff --git a/src/detectmatelibrary/utils/persistency/event_persistency.py b/src/detectmatelibrary/utils/persistency/event_persistency.py index 3c6d1789..2ad1b52f 100644 --- a/src/detectmatelibrary/utils/persistency/event_persistency.py +++ b/src/detectmatelibrary/utils/persistency/event_persistency.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Type +from typing import Any, Callable, Dict, List, Optional, Type from .event_data_structures.base import EventDataStructure @@ -31,6 +31,8 @@ def __init__( self.event_data_kwargs = event_data_kwargs or {} self.variable_blacklist = variable_blacklist or [] self.event_templates: Dict[int | str, str] = {} + self._events_since_save: int = 0 + self._on_ingest_callbacks: list[Callable[[], None]] = [] def ingest_event( self, @@ -40,6 +42,9 @@ def ingest_event( named_variables: Dict[str, Any] = {} ) -> None: """Ingest event data into the appropriate EventData store.""" + self._events_since_save += 1 + for _cb in self._on_ingest_callbacks: + _cb() self.events_seen.add(event_id) if not variables and not named_variables: return @@ -54,6 +59,14 @@ def ingest_event( data = data_structure.to_data(all_variables) data_structure.add_data(data) + def reset_events_since_save(self) -> None: + """Reset the events-since-save counter after a successful save.""" + self._events_since_save = 0 + + def register_on_ingest(self, callback: Callable[[], None]) -> None: + """Register a callback invoked after every ingest_event call.""" + self._on_ingest_callbacks.append(callback) + def get_events_seen(self) -> set[int | str]: """Retrieve all event IDs observed via ingest_event(), regardless of whether variables were extracted.""" diff --git a/src/detectmatelibrary/utils/persistency/persistency_saver.py b/src/detectmatelibrary/utils/persistency/persistency_saver.py new file mode 100644 index 00000000..521bdf6d --- /dev/null +++ b/src/detectmatelibrary/utils/persistency/persistency_saver.py @@ -0,0 +1,215 @@ +import atexit +import json +import signal +import threading +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Callable + +import fsspec + +from detectmatelibrary.utils.persistency.event_data_structures.base import EventDataStructure +from detectmatelibrary.utils.persistency.event_data_structures.dataframes import ( + EventDataFrame, + ChunkedEventDataFrame, +) +from detectmatelibrary.utils.persistency.event_data_structures.trackers import ( + EventTracker, + EventStabilityTracker, +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from tools.logging import logger + +_BACKEND_REGISTRY: dict[str, type[EventDataStructure]] = { + "EventTracker": EventTracker, + "EventStabilityTracker": EventStabilityTracker, + "EventDataFrame": EventDataFrame, + "ChunkedEventDataFrame": ChunkedEventDataFrame, +} + +_EXTENSION_MAP: dict[str, str] = { + "EventTracker": "msgpack", + "EventStabilityTracker": "msgpack", + "EventDataFrame": "parquet", + "ChunkedEventDataFrame": "parquet", +} + + +def _coerce_event_id(k: str) -> int | str: + try: + return int(k) + except ValueError: + return k + + +class PersistencyLoadError(Exception): + """Raised when restoring persisted state fails.""" + + +@dataclass +class PersistencySaverConfig: + path: str + save_interval_seconds: int = 300 + events_until_save: int | None = None + auto_load: bool = False + storage_options: dict[str, Any] = field(default_factory=dict) + + +class _SaveTimer(threading.Thread): + """Daemon thread that calls callback every interval seconds.""" + + def __init__(self, interval: float, callback: Callable[[], None]) -> None: + super().__init__(daemon=True) + self._interval = interval + self._callback = callback + self._stop_event = threading.Event() + + def run(self) -> None: + while not self._stop_event.wait(self._interval): + self._callback() + + def stop(self) -> None: + self._stop_event.set() + + +class PersistencySaver: + """Saves and restores EventPersistency state to/from configurable storage + via fsspec.""" + + def __init__(self, persistency: EventPersistency, config: PersistencySaverConfig) -> None: + self._persistency = persistency + self._config = config + self._fs, self._root = fsspec.url_to_fs(config.path, **config.storage_options) + self._lock = threading.Lock() + self._timer: _SaveTimer | None = None + + if config.events_until_save is not None: + persistency.register_on_ingest(self._check_event_count) + + if config.auto_load: + self.load() + + def save(self) -> None: + """Write full EventPersistency state to storage. + + Thread-safe. + """ + with self._lock: + try: + self._fs.makedirs(f"{self._root}/events", exist_ok=True) + event_backends: dict[str, str] = {} + event_extensions: dict[str, str] = {} + + for event_id, data_structure in self._persistency.events_data.items(): + backend_name = type(data_structure).__name__ + ext = _EXTENSION_MAP.get(backend_name, "bin") + event_backends[str(event_id)] = backend_name + event_extensions[str(event_id)] = ext + + file_path = f"{self._root}/events/{event_id}.{ext}" + with self._fs.open(file_path, "wb") as f: + f.write(data_structure.dump()) + + metadata = { + "version": 1, + "saved_at": datetime.now(timezone.utc).isoformat(), + "events_seen": list(self._persistency.events_seen), + "event_templates": { + str(k): v for k, v in self._persistency.event_templates.items() + }, + "event_backends": event_backends, + "event_extensions": event_extensions, + "event_data_kwargs": self._safe_event_data_kwargs(), + } + with self._fs.open(f"{self._root}/metadata.json", "w") as f: + json.dump(metadata, f, indent=2) + + self._persistency.reset_events_since_save() + except Exception as e: + logger.warning(f"PersistencySaver: save failed — {e}") + + def load(self) -> None: + """Restore EventPersistency state from storage. + + Raises PersistencyLoadError on failure. + """ + meta_path = f"{self._root}/metadata.json" + if not self._fs.exists(meta_path): + raise PersistencyLoadError( + f"No saved state found at '{self._config.path}' (metadata.json missing)" + ) + try: + with self._fs.open(meta_path, "r") as f: + metadata = json.load(f) + + self._persistency.events_seen = set(metadata["events_seen"]) + self._persistency.event_templates = { + _coerce_event_id(k): v for k, v in metadata["event_templates"].items() + } + global_kwargs = metadata.get("event_data_kwargs", {}) + + for event_id_str, backend_name in metadata["event_backends"].items(): + event_id = _coerce_event_id(event_id_str) + ext = metadata["event_extensions"][event_id_str] + file_path = f"{self._root}/events/{event_id_str}.{ext}" + with self._fs.open(file_path, "rb") as f: + data = f.read() + if backend_name not in _BACKEND_REGISTRY: + raise PersistencyLoadError( + f"Unknown backend '{backend_name}' — cannot restore event '{event_id}'" + ) + backend_cls = _BACKEND_REGISTRY[backend_name] + self._persistency.events_data[event_id] = backend_cls.load(data, **global_kwargs) + except PersistencyLoadError: + raise + except Exception as e: + raise PersistencyLoadError(f"Failed to restore state: {e}") from e + + def start(self) -> None: + """Start the background save timer and register process-exit hooks.""" + atexit.register(self.stop) + try: + signal.signal(signal.SIGTERM, lambda *_: self.stop()) + except (OSError, ValueError): + pass # not the main thread or signal not available + + self._timer = _SaveTimer( + interval=self._config.save_interval_seconds, + callback=self._tick, + ) + self._timer.start() + + def stop(self) -> None: + """Stop the timer and do a final save.""" + if self._timer is None: + return + self._timer.stop() + self._timer.join(timeout=5.0) + self._timer = None + atexit.unregister(self.stop) + self.save() + + def _safe_event_data_kwargs(self) -> dict[str, Any]: + """Return event_data_kwargs with non-JSON-serializable values + excluded.""" + safe = {} + for k, v in self._persistency.event_data_kwargs.items(): + try: + json.dumps(v) + safe[k] = v + except (TypeError, ValueError): + pass + return safe + + def _check_event_count(self) -> None: + """Trigger a save when ingested-event count reaches + events_until_save.""" + if ( + self._config.events_until_save is not None + and self._persistency._events_since_save >= self._config.events_until_save + ): + self.save() + + def _tick(self) -> None: + """Called by the timer thread each interval.""" + self.save() diff --git a/src/detectmatelibrary/utils/RLE_list.py b/src/detectmatelibrary/utils/persistency/rle_list.py similarity index 97% rename from src/detectmatelibrary/utils/RLE_list.py rename to src/detectmatelibrary/utils/persistency/rle_list.py index 8009433e..586e084a 100644 --- a/src/detectmatelibrary/utils/RLE_list.py +++ b/src/detectmatelibrary/utils/persistency/rle_list.py @@ -3,7 +3,7 @@ from typing import Generic, Iterable, Iterator, List, Tuple, TypeVar -from .preview_helpers import list_preview_str +from ..preview_helpers import list_preview_str T = TypeVar("T") diff --git a/tests/test_common/test_core.py b/tests/test_common/test_core.py index bc5f4abe..bffafcfc 100644 --- a/tests/test_common/test_core.py +++ b/tests/test_common/test_core.py @@ -2,7 +2,7 @@ from detectmatelibrary.common.core import CoreConfig, CoreComponent from detectmatelibrary.common._config import BasicConfig -from detectmatelibrary.utils.data_buffer import ArgsBuffer +from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode import detectmatelibrary.schemas._op as op_schemas import detectmatelibrary.schemas as schemas @@ -428,3 +428,22 @@ def test_post_train_called_on_first_detection_item(self) -> None: # subsequent items don't re-trigger it component.process(self._make_log(4)) assert component.post_train_called == 1 + + +class TestCoreComponentContextManager: + def test_can_be_used_as_context_manager(self): + component = CoreComponent(name="test", config=CoreConfig(), args_buffer=ArgsBuffer(BufferMode.NO_BUF)) + with component as c: + assert c is component + + def test_exit_calls_saver_stop_when_set(self): + stopped = {"called": False} + + class FakeSaver: + def stop(self): stopped["called"] = True + + component = CoreComponent(name="test", config=CoreConfig(), args_buffer=ArgsBuffer(BufferMode.NO_BUF)) + component.saver = FakeSaver() + with component: + pass + assert stopped["called"] diff --git a/tests/test_common/test_persist_config.py b/tests/test_common/test_persist_config.py new file mode 100644 index 00000000..9b38b3bd --- /dev/null +++ b/tests/test_common/test_persist_config.py @@ -0,0 +1,145 @@ +import warnings + +import fsspec +import pytest +from pydantic import ValidationError + +from detectmatelibrary.common._config._compile import MissingParamsWarning +from detectmatelibrary.common.detector import CoreDetector, CoreDetectorConfig, PersistConfig +from detectmatelibrary.detectors.new_value_detector import NewValueDetectorConfig +from detectmatelibrary.utils.persistency.event_data_structures.trackers import EventStabilityTracker +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency + + +class TestPersistConfig: + def test_defaults(self): + cfg = PersistConfig() + assert cfg.path == "./state" + assert cfg.interval_seconds == 300 + assert cfg.events_until_save is None + assert cfg.auto_load is False + assert cfg.storage_options == {} + + def test_custom_values(self): + cfg = PersistConfig(path="./my-path", interval_seconds=60, auto_load=True) + assert cfg.path == "./my-path" + assert cfg.interval_seconds == 60 + assert cfg.auto_load is True + + def test_events_until_save_accepts_int(self): + cfg = PersistConfig(events_until_save=500) + assert cfg.events_until_save == 500 + + def test_extra_fields_rejected(self): + with pytest.raises(ValidationError): + PersistConfig(unknown_field="value") # type: ignore + + +class TestCoreDetectorConfigPersistField: + def test_persist_is_none_by_default(self): + cfg = CoreDetectorConfig() + assert cfg.persist is None + + def test_persist_accepts_persist_config(self): + cfg = CoreDetectorConfig(persist=PersistConfig(path="./custom")) + assert cfg.persist is not None + assert cfg.persist.path == "./custom" + + def test_persist_accepts_none_explicitly(self): + cfg = CoreDetectorConfig(persist=None) + assert cfg.persist is None + + +class TestRegisterPersistency: + def test_noop_when_persist_is_none(self): + det = CoreDetector() + p = EventPersistency(event_data_class=EventStabilityTracker) + det._register_persistency(p) + assert det.saver is None + + def test_creates_saver_when_persist_configured(self): + config = CoreDetectorConfig( + persist=PersistConfig(path="memory://regpersist_create/state") + ) + det = CoreDetector(config=config) + p = EventPersistency(event_data_class=EventStabilityTracker) + det._register_persistency(p) + assert det.saver is not None + det.saver.stop() + + def test_saver_path_includes_detector_name(self): + config = CoreDetectorConfig( + persist=PersistConfig(path="memory://regpersist_path/state") + ) + det = CoreDetector(name="MyDetector", config=config) + p = EventPersistency(event_data_class=EventStabilityTracker) + det._register_persistency(p) + assert det.saver is not None + det.saver.stop() # stop() calls save() as final save + fs = fsspec.filesystem("memory") + assert fs.exists("regpersist_path/state/MyDetector/metadata.json") + + +class TestPersistConfigSerialization: + def test_to_dict_places_persist_at_top_level(self): + config = NewValueDetectorConfig( + auto_config=True, + persist=PersistConfig(path="./my-state", interval_seconds=60), + ) + result = config.to_dict("MyDet") + inner = result["detectors"]["MyDet"] + assert "persist" in inner + assert inner["persist"]["path"] == "./my-state" + assert inner["persist"]["interval_seconds"] == 60 + assert "persist" not in inner.get("params", {}) + + def test_to_dict_omits_persist_when_none(self): + config = NewValueDetectorConfig(auto_config=True) + result = config.to_dict("MyDet") + inner = result["detectors"]["MyDet"] + assert "persist" not in inner + + def test_from_dict_loads_persist_block(self): + config_dict = { + "detectors": { + "MyDet": { + "method_type": "new_value_detector", + "auto_config": True, + "persist": {"path": "./my-state", "interval_seconds": 60}, + } + } + } + config = NewValueDetectorConfig.from_dict(config_dict, "MyDet") + assert config.persist is not None + assert config.persist.path == "./my-state" + assert config.persist.interval_seconds == 60 + + def test_roundtrip_yaml_to_pydantic_to_yaml(self): + config_dict = { + "detectors": { + "MyDet": { + "method_type": "new_value_detector", + "auto_config": True, + "persist": {"path": "./my-state"}, + } + } + } + config = NewValueDetectorConfig.from_dict(config_dict, "MyDet") + result = config.to_dict("MyDet") + assert result["detectors"]["MyDet"]["persist"]["path"] == "./my-state" + + def test_no_missing_params_warning_with_persist_only(self): + config_dict = { + "detectors": { + "MyDet": { + "method_type": "new_value_detector", + "auto_config": False, + "persist": {"path": "./state"}, + } + } + } + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + NewValueDetectorConfig.from_dict(config_dict, "MyDet") + missing_params_warnings = [x for x in w if issubclass(x.category, MissingParamsWarning)] + assert len(missing_params_warnings) == 0 diff --git a/tests/test_detectors/test_persist_integration.py b/tests/test_detectors/test_persist_integration.py new file mode 100644 index 00000000..a32767da --- /dev/null +++ b/tests/test_detectors/test_persist_integration.py @@ -0,0 +1,132 @@ +from detectmatelibrary.detectors.new_value_detector import NewValueDetector, NewValueDetectorConfig +from detectmatelibrary.detectors.new_value_combo_detector import ( + NewValueComboDetector, + NewValueComboDetectorConfig, +) +from detectmatelibrary.detectors.new_event_detector import NewEventDetector, NewEventDetectorConfig +from detectmatelibrary.common.detector import PersistConfig +from detectmatelibrary.utils.persistency.persistency_saver import PersistencySaver + + +class TestNewValueDetectorPersist: + def test_no_saver_by_default(self): + det = NewValueDetector() + assert det.saver is None + + def test_saver_created_when_persist_configured(self): + config = NewValueDetectorConfig( + auto_config=True, + persist=PersistConfig(path="memory://nvd_saver/state"), + ) + det = NewValueDetector(name="NVD1", config=config) + assert det.saver is not None + det.saver.stop() + + def test_save_and_reload(self): + base_path = "memory://nvd_reload/state" + det_name = "NVD_Reload" + + det1 = NewValueDetector( + name=det_name, + config=NewValueDetectorConfig( + auto_config=False, + persist=PersistConfig(path=base_path), + ), + ) + det1.persistency.ingest_event( + event_id=1, + event_template="login <*>", + named_variables={"user": "alice"}, + ) + assert det1.saver is not None + assert isinstance(det1.saver, PersistencySaver) + det1.saver.save() + det1.saver.stop() + + det2 = NewValueDetector( + name=det_name, + config=NewValueDetectorConfig( + auto_config=False, + persist=PersistConfig(path=base_path, auto_load=True), + ), + ) + assert 1 in det2.persistency.get_events_seen() + assert det2.saver is not None + det2.saver.stop() + + def test_set_configuration_preserves_persist_config(self): + """Persist field must survive an auto-config set_configuration() + rebuild.""" + config = NewValueDetectorConfig( + auto_config=True, + persist=PersistConfig(path="memory://set_config_preserve/state"), + ) + det = NewValueDetector(name="SetConfigTest", config=config) + assert det.config.persist is not None # sanity: persist is set before + det.set_configuration() + assert det.config.persist is not None # must survive config rebuild + assert det.config.persist.path == "memory://set_config_preserve/state" + assert det.saver is not None + det.saver.stop() + + +class TestNewValueComboDetectorPersist: + def test_no_saver_by_default(self): + det = NewValueComboDetector() + assert det.saver is None + + def test_saver_created_when_persist_configured(self): + config = NewValueComboDetectorConfig( + auto_config=True, + persist=PersistConfig(path="memory://nvcd_saver/state"), + ) + det = NewValueComboDetector(name="NVCD1", config=config) + assert det.saver is not None + det.saver.stop() + + def test_save_and_reload(self): + base_path = "memory://nvcd_reload/state" + det_name = "NVCD_Reload" + + det1 = NewValueComboDetector( + name=det_name, + config=NewValueComboDetectorConfig( + auto_config=False, + persist=PersistConfig(path=base_path), + ), + ) + det1.persistency.ingest_event( + event_id=1, + event_template="login <*>", + named_variables={"user": "alice"}, + ) + assert det1.saver is not None + assert isinstance(det1.saver, PersistencySaver) + det1.saver.save() + det1.saver.stop() + + det2 = NewValueComboDetector( + name=det_name, + config=NewValueComboDetectorConfig( + auto_config=False, + persist=PersistConfig(path=base_path, auto_load=True), + ), + ) + assert 1 in det2.persistency.get_events_seen() + assert det2.saver is not None + det2.saver.stop() + + +class TestNewEventDetectorPersist: + def test_no_saver_by_default(self): + det = NewEventDetector() + assert det.saver is None + + def test_saver_created_when_persist_configured(self): + config = NewEventDetectorConfig( + auto_config=True, + persist=PersistConfig(path="memory://ned_saver/state"), + ) + det = NewEventDetector(name="NED1", config=config) + assert det.saver is not None + det.saver.stop() diff --git a/tests/test_utils/test_persistency.py b/tests/test_utils/test_persistency.py index 55af32b7..16a829aa 100644 --- a/tests/test_utils/test_persistency.py +++ b/tests/test_utils/test_persistency.py @@ -461,3 +461,26 @@ def test_variable_blacklist_across_backends(self): assert "var_0" in data2.columns # First variable assert "var_1" not in data2.columns # Blacklisted assert "timestamp" not in data2.columns # Blacklisted + + +class TestEventPersistencyEventsSinceSave: + def test_events_since_save_starts_at_zero(self): + p = EventPersistency(event_data_class=EventDataFrame) + assert p._events_since_save == 0 + + def test_events_since_save_increments_on_ingest(self): + p = EventPersistency(event_data_class=EventDataFrame) + p.ingest_event(**SAMPLE_EVENT_1) + assert p._events_since_save == 1 + + def test_events_since_save_increments_for_no_variable_event(self): + p = EventPersistency(event_data_class=EventDataFrame) + p.ingest_event(event_id="E999", event_template="no vars") + assert p._events_since_save == 1 + + def test_reset_events_since_save(self): + p = EventPersistency(event_data_class=EventDataFrame) + p.ingest_event(**SAMPLE_EVENT_1) + p.ingest_event(**SAMPLE_EVENT_2) + p.reset_events_since_save() + assert p._events_since_save == 0 diff --git a/tests/test_utils/test_persistency_dump_load.py b/tests/test_utils/test_persistency_dump_load.py new file mode 100644 index 00000000..b1655974 --- /dev/null +++ b/tests/test_utils/test_persistency_dump_load.py @@ -0,0 +1,190 @@ +import pytest +import pandas as pd +import polars as pl +from dataclasses import dataclass + +from detectmatelibrary.utils.persistency.persistency_saver import PersistencyLoadError +from detectmatelibrary.utils.persistency.event_data_structures.base import ( + EventDataStructure, +) +from detectmatelibrary.utils.persistency.event_data_structures.dataframes.event_dataframe import ( + EventDataFrame, +) +from detectmatelibrary.utils.persistency.event_data_structures.dataframes.chunked_event_dataframe import ( + ChunkedEventDataFrame, +) +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + SingleStabilityTracker, + EventStabilityTracker, +) + + +def test_persistency_load_error_is_exception(): + err = PersistencyLoadError("test error") + assert isinstance(err, Exception) + assert str(err) == "test error" + + +def test_event_data_structure_has_dump_load(): + assert hasattr(EventDataStructure, "dump") + assert hasattr(EventDataStructure, "load") + + +def test_subclass_without_dump_load_cannot_be_instantiated(): + @dataclass + class _Incomplete(EventDataStructure): + def add_data(self, data_object): pass + def get_data(self): pass + def get_variables(self): pass + def to_data(self, raw_data): pass + # intentionally missing dump() and load() + + with pytest.raises(TypeError): + _Incomplete() + + +class TestSingleStabilityTrackerState: + def _make_tracker(self) -> SingleStabilityTracker: + t = SingleStabilityTracker(min_samples=3) + for v in ["a", "b", "a", "c", "a"]: + t.add_value(v) + return t + + def test_round_trip_preserves_change_series(self): + t = self._make_tracker() + state = t.to_state() + t2 = SingleStabilityTracker.from_state(state) + assert list(t2.change_series) == list(t.change_series) + + def test_round_trip_preserves_unique_set(self): + t = self._make_tracker() + state = t.to_state() + t2 = SingleStabilityTracker.from_state(state) + assert t2.unique_set == t.unique_set + + def test_round_trip_preserves_min_samples(self): + t = self._make_tracker() + state = t.to_state() + t2 = SingleStabilityTracker.from_state(state) + assert t2.min_samples == 3 + + def test_round_trip_preserves_classification(self): + t = self._make_tracker() + state = t.to_state() + t2 = SingleStabilityTracker.from_state(state) + assert t2.classify().type == t.classify().type + + def test_state_includes_type_and_module(self): + t = SingleStabilityTracker() + state = t.to_state() + assert state["type"] == "SingleStabilityTracker" + assert "module" in state + + def test_empty_tracker_round_trip(self): + t = SingleStabilityTracker(min_samples=5) + state = t.to_state() + t2 = SingleStabilityTracker.from_state(state) + assert len(t2.change_series) == 0 + assert len(t2.unique_set) == 0 + + +class TestEventTrackerDumpLoad: + def _make_tracker(self) -> EventStabilityTracker: + t = EventStabilityTracker() + for i in range(10): + t.add_data({"var_0": f"value_{i % 3}", "var_1": str(i)}) + return t + + def test_dump_returns_bytes(self): + t = self._make_tracker() + assert isinstance(t.dump(), bytes) + + def test_round_trip_restores_tracker_keys(self): + t = self._make_tracker() + t2 = EventStabilityTracker.load(t.dump()) + assert set(t2.get_data().keys()) == set(t.get_data().keys()) + + def test_round_trip_preserves_change_series(self): + t = self._make_tracker() + t2 = EventStabilityTracker.load(t.dump()) + for key in t.get_data(): + original = list(t.get_data()[key].change_series) + restored = list(t2.get_data()[key].change_series) + assert restored == original + + def test_round_trip_preserves_unique_set(self): + t = self._make_tracker() + t2 = EventStabilityTracker.load(t.dump()) + for key in t.get_data(): + assert t2.get_data()[key].unique_set == t.get_data()[key].unique_set + + def test_empty_tracker_round_trip(self): + t = EventStabilityTracker() + t2 = EventStabilityTracker.load(t.dump()) + assert t2.get_data() == {} + + +class TestEventDataFrameDumpLoad: + def _make_edf(self) -> EventDataFrame: + edf = EventDataFrame() + edf.add_data(edf.to_data({"user": "alice", "ip": "192.168.1.1"})) + edf.add_data(edf.to_data({"user": "bob", "ip": "192.168.1.2"})) + return edf + + def test_dump_returns_bytes(self): + assert isinstance(self._make_edf().dump(), bytes) + + def test_round_trip_preserves_rows(self): + edf = self._make_edf() + edf2 = EventDataFrame.load(edf.dump()) + assert len(edf2.get_data()) == 2 + + def test_round_trip_preserves_columns(self): + edf = self._make_edf() + edf2 = EventDataFrame.load(edf.dump()) + assert list(edf2.get_data().columns) == ["user", "ip"] + + def test_round_trip_preserves_values(self): + edf = self._make_edf() + edf2 = EventDataFrame.load(edf.dump()) + assert edf2.get_data()["user"].tolist() == ["alice", "bob"] + + def test_empty_dataframe_round_trip(self): + edf = EventDataFrame() + edf2 = EventDataFrame.load(edf.dump()) + assert isinstance(edf2.get_data(), pd.DataFrame) + assert len(edf2.get_data()) == 0 + + +class TestChunkedEventDataFrameDumpLoad: + def _make_cedf(self) -> ChunkedEventDataFrame: + cedf = ChunkedEventDataFrame(max_rows=100, compact_every=1000) + for i in range(5): + cedf.add_data(cedf.to_data({"user": [f"user_{i}"], "val": [i]})) + return cedf + + def test_dump_returns_bytes(self): + assert isinstance(self._make_cedf().dump(), bytes) + + def test_round_trip_preserves_rows(self): + cedf = self._make_cedf() + cedf2 = ChunkedEventDataFrame.load(cedf.dump()) + assert len(cedf2.get_data()) == 5 + + def test_round_trip_preserves_columns(self): + cedf = self._make_cedf() + cedf2 = ChunkedEventDataFrame.load(cedf.dump()) + assert set(cedf2.get_data().columns) == {"user", "val"} + + def test_round_trip_restores_config(self): + cedf = ChunkedEventDataFrame(max_rows=42, compact_every=7) + cedf.add_data(cedf.to_data({"x": [1]})) + cedf2 = ChunkedEventDataFrame.load(cedf.dump()) + assert cedf2.max_rows == 42 + assert cedf2.compact_every == 7 + + def test_empty_round_trip(self): + cedf = ChunkedEventDataFrame() + cedf2 = ChunkedEventDataFrame.load(cedf.dump()) + assert isinstance(cedf2.get_data(), pl.DataFrame) + assert len(cedf2.get_data()) == 0 diff --git a/tests/test_utils/test_persistency_saver.py b/tests/test_utils/test_persistency_saver.py new file mode 100644 index 00000000..f8aad6c7 --- /dev/null +++ b/tests/test_utils/test_persistency_saver.py @@ -0,0 +1,278 @@ +import json +import time +import threading + +import fsspec +import pytest + +from detectmatelibrary.utils.persistency.event_data_structures.dataframes import EventDataFrame +from detectmatelibrary.utils.persistency.event_data_structures.trackers import EventStabilityTracker +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.persistency.persistency_saver import ( + PersistencySaverConfig, + PersistencyLoadError, + PersistencySaver, + _SaveTimer, +) + + +class TestPersistencySaverConfig: + def test_requires_path(self): + cfg = PersistencySaverConfig(path="file:///tmp/test") + assert cfg.path == "file:///tmp/test" + + def test_defaults(self): + cfg = PersistencySaverConfig(path="file:///tmp/test") + assert cfg.save_interval_seconds == 300 + assert cfg.events_until_save is None + assert cfg.auto_load is False + assert cfg.storage_options == {} + + +class TestSaveTimer: + def test_callback_fires_after_interval(self): + fired = threading.Event() + timer = _SaveTimer(interval=0.05, callback=fired.set) + timer.start() + assert fired.wait(timeout=1.0), "callback did not fire" + timer.stop() + timer.join(timeout=1.0) + + def test_stop_prevents_further_callbacks(self): + count = {"n": 0} + + def inc(): + count["n"] += 1 + + timer = _SaveTimer(interval=0.05, callback=inc) + timer.start() + time.sleep(0.12) + timer.stop() + timer.join(timeout=1.0) + captured = count["n"] + time.sleep(0.12) + assert count["n"] == captured # no more fires after stop + + +def _make_persistency_with_data() -> EventPersistency: + p = EventPersistency(event_data_class=EventDataFrame) + p.ingest_event(event_id="E1", event_template="User <*>", variables=["alice"], named_variables={}) + p.ingest_event(event_id="E1", event_template="User <*>", variables=["bob"], named_variables={}) + p.ingest_event(event_id="E2", event_template="Error <*>", variables=["timeout"], named_variables={}) + return p + + +def _memory_saver(path: str = "memory://test/state") -> tuple[PersistencySaver, EventPersistency]: + p = _make_persistency_with_data() + cfg = PersistencySaverConfig(path=path) + saver = PersistencySaver(p, cfg) + return saver, p + + +class TestPersistencySaverSaveLoad: + def test_save_creates_metadata_json(self): + saver, _ = _memory_saver() + saver.save() + fs = fsspec.filesystem("memory") + assert fs.exists("test/state/metadata.json") + + def test_save_creates_event_files(self): + saver, _ = _memory_saver() + saver.save() + fs = fsspec.filesystem("memory") + assert fs.exists("test/state/events/E1.parquet") + assert fs.exists("test/state/events/E2.parquet") + + def test_metadata_contains_events_seen(self): + saver, _ = _memory_saver() + saver.save() + fs = fsspec.filesystem("memory") + with fs.open("test/state/metadata.json", "r") as f: + meta = json.load(f) + assert set(meta["events_seen"]) == {"E1", "E2"} + + def test_save_resets_events_since_save(self): + saver, p = _memory_saver() + assert p._events_since_save == 3 + saver.save() + assert p._events_since_save == 0 + + def test_load_restores_events_seen(self): + saver, _ = _memory_saver() + saver.save() + + p2 = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig(path="memory://test/state") + saver2 = PersistencySaver(p2, cfg) + saver2.load() + assert "E1" in p2.get_events_seen() + assert "E2" in p2.get_events_seen() + + def test_load_restores_event_data(self): + saver, _ = _memory_saver() + saver.save() + + p2 = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig(path="memory://test/state") + PersistencySaver(p2, cfg).load() + assert len(p2.get_event_data("E1")) == 2 + + def test_load_restores_templates(self): + saver, _ = _memory_saver() + saver.save() + + p2 = EventPersistency(event_data_class=EventDataFrame) + PersistencySaver(p2, PersistencySaverConfig(path="memory://test/state")).load() + assert p2.get_event_template("E1") == "User <*>" + + def test_load_raises_on_missing_path(self): + p = EventPersistency(event_data_class=EventDataFrame) + saver = PersistencySaver(p, PersistencySaverConfig(path="memory://nonexistent/path")) + with pytest.raises(PersistencyLoadError): + saver.load() + + +class TestPersistencySaverTriggers: + def test_timer_triggers_save(self): + p = _make_persistency_with_data() + cfg = PersistencySaverConfig( + path="memory://trigger_test/state", + save_interval_seconds=0, # fire immediately + ) + saver = PersistencySaver(p, cfg) + saver.start() + time.sleep(0.15) + saver.stop() + fs = fsspec.filesystem("memory") + assert fs.exists("trigger_test/state/metadata.json") + + def test_timed_save_resets_events_since_save(self): + p = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig( + path="memory://dirty_test2/state", + save_interval_seconds=0, + ) + saver = PersistencySaver(p, cfg) + saver.start() + + p.ingest_event(event_id="E1", event_template="T", variables=["x"], named_variables={}) + p.ingest_event(event_id="E1", event_template="T", variables=["y"], named_variables={}) + time.sleep(0.15) + saver.stop() + + assert p._events_since_save == 0 # save() was called by the timer, which resets the counter + + def test_stop_does_final_save(self): + p = _make_persistency_with_data() + cfg = PersistencySaverConfig( + path="memory://stop_test/state", + save_interval_seconds=9999, + ) + saver = PersistencySaver(p, cfg) + saver.start() + saver.stop() + fs = fsspec.filesystem("memory") + assert fs.exists("stop_test/state/metadata.json") + + def test_events_until_save_triggers_save(self): + p = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig( + path="memory://events_count_test/state", + save_interval_seconds=9999, + events_until_save=3, + ) + PersistencySaver(p, cfg) # no start() needed — callback fires on ingest + for i in range(3): + p.ingest_event(event_id="E1", event_template="T", variables=[str(i)], named_variables={}) + fs = fsspec.filesystem("memory") + assert fs.exists("events_count_test/state/metadata.json") + + def test_events_until_save_no_save_before_threshold(self): + p = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig( + path="memory://events_count_test2/state", + save_interval_seconds=9999, + events_until_save=5, + ) + PersistencySaver(p, cfg) + for i in range(4): + p.ingest_event(event_id="E1", event_template="T", variables=[str(i)], named_variables={}) + fs = fsspec.filesystem("memory") + assert not fs.exists("events_count_test2/state/metadata.json") + + def test_events_until_save_resets_counter_and_retrigggers(self): + p = EventPersistency(event_data_class=EventDataFrame) + cfg = PersistencySaverConfig( + path="memory://events_count_test3/state", + save_interval_seconds=9999, + events_until_save=2, + ) + PersistencySaver(p, cfg) + for i in range(4): + p.ingest_event(event_id="E1", event_template="T", variables=[str(i)], named_variables={}) + # counter should be 0 — two saves fired (at event 2 and event 4) + assert p._events_since_save == 0 + + def test_auto_load_on_init(self): + # First: save some state + p1 = _make_persistency_with_data() + PersistencySaver(p1, PersistencySaverConfig(path="memory://autoload/state")).save() + + # Then: create new persistency with auto_load=True + p2 = EventPersistency(event_data_class=EventDataFrame) + PersistencySaver(p2, PersistencySaverConfig(path="memory://autoload/state", auto_load=True)) + assert "E1" in p2.get_events_seen() + + +class TestPersistencySaverIntegration: + def test_full_cycle_dataframe_backend(self): + """Train → save → restore → verify data identical.""" + p1 = EventPersistency(event_data_class=EventDataFrame) + for i in range(20): + p1.ingest_event( + event_id=f"E{i % 3}", + event_template=f"Template {i % 3}", + variables=[f"val_{i}"], + named_variables={}, + ) + + saver1 = PersistencySaver(p1, PersistencySaverConfig(path="memory://integration/df")) + saver1.save() + + p2 = EventPersistency(event_data_class=EventDataFrame) + PersistencySaver(p2, PersistencySaverConfig(path="memory://integration/df")).load() + + assert p2.get_events_seen() == p1.get_events_seen() + assert p2.get_event_templates() == p1.get_event_templates() + for eid in p1.get_events_data(): + original = p1.get_event_data(eid) + restored = p2.get_event_data(eid) + assert len(restored) == len(original) + assert list(restored.columns) == list(original.columns) + assert list(restored["var_0"]) == list(original["var_0"]) + + def test_full_cycle_tracker_backend(self): + """Train → save → restore → verify tracker state identical.""" + p1 = EventPersistency(event_data_class=EventStabilityTracker) + for i in range(30): + p1.ingest_event( + event_id="E1", + event_template="Tmpl", + variables=[f"v_{i % 5}"], + named_variables={}, + ) + + saver1 = PersistencySaver(p1, PersistencySaverConfig(path="memory://integration/tracker")) + saver1.save() + + p2 = EventPersistency(event_data_class=EventStabilityTracker) + PersistencySaver(p2, PersistencySaverConfig(path="memory://integration/tracker")).load() + + original_tracker = p1.get_events_data()["E1"] + restored_tracker = p2.get_events_data()["E1"] + + for var_name in original_tracker.get_variables(): + orig = original_tracker.get_data()[var_name] + rest = restored_tracker.get_data()[var_name] + assert list(rest.change_series) == list(orig.change_series) + assert rest.unique_set == orig.unique_set diff --git a/tests/test_utils/test_stability_tracking.py b/tests/test_utils/test_stability_tracking.py index c36b34b3..3da95205 100644 --- a/tests/test_utils/test_stability_tracking.py +++ b/tests/test_utils/test_stability_tracking.py @@ -12,7 +12,7 @@ EventStabilityTracker, Classification, ) -from detectmatelibrary.utils.RLE_list import RLEList +from detectmatelibrary.utils.persistency.rle_list import RLEList class TestStabilityClassifier: diff --git a/uv.lock b/uv.lock index 5d6526f1..176093d5 100644 --- a/uv.lock +++ b/uv.lock @@ -5,9 +5,12 @@ resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", "python_full_version >= '3.14' and sys_platform == 'emscripten'", "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version < '3.14' and sys_platform == 'win32'", - "python_full_version < '3.14' and sys_platform == 'emscripten'", - "python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version < '3.13' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version < '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] [[package]] @@ -225,11 +228,15 @@ version = "0.2.0" source = { editable = "." } dependencies = [ { name = "detectmateperformance" }, + { name = "fsspec" }, + { name = "msgpack" }, { name = "numpy" }, { name = "openai" }, { name = "pandas" }, { name = "polars" }, - { name = "protobuf" }, + { name = "protobuf", version = "6.33.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, + { name = "protobuf", version = "7.34.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, + { name = "pyarrow" }, { name = "pydantic" }, { name = "pyyaml" }, { name = "regex" }, @@ -250,11 +257,14 @@ dev = [ [package.metadata] requires-dist = [ { name = "detectmateperformance", git = "https://github.com/ait-detectmate/DetectMatePerformance?rev=main" }, + { name = "fsspec", specifier = ">=2024.1.0" }, + { name = "msgpack", specifier = ">=1.0.0" }, { name = "numpy", specifier = ">=2.3.2" }, { name = "openai", specifier = ">=2.26.0" }, { name = "pandas", specifier = ">=2.3.2" }, { name = "polars", specifier = ">=1.38.1" }, { name = "protobuf", specifier = ">=6.32.1" }, + { name = "pyarrow", specifier = ">=24.0.0" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "pyyaml", specifier = ">=6.0.3" }, { name = "regex", specifier = ">=2025.11.3" }, @@ -294,6 +304,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] +[[package]] +name = "fsspec" +version = "2026.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/8d/1c51c094345df128ca4a990d633fe1a0ff28726c9e6b3c41ba65087bba1d/fsspec-2026.4.0.tar.gz", hash = "sha256:301d8ac70ae90ef3ad05dcf94d6c3754a097f9b5fe4667d2787aa359ec7df7e4", size = 312760, upload-time = "2026-04-29T20:42:38.635Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/0c/043d5e551459da400957a1395e0febbf771446ff34291afcbe3d8be2a279/fsspec-2026.4.0-py3-none-any.whl", hash = "sha256:11ef7bb35dab8a394fde6e608221d5cf3e8499401c249bebaeaad760a1a8dec2", size = 203402, upload-time = "2026-04-29T20:42:36.842Z" }, +] + [[package]] name = "ghp-import" version = "2.1.0" @@ -656,6 +675,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/29/744136411e785c4b0b744d5413e56555265939ab3a104c6a4b719dad33fd/mkdocs_get_deps-0.2.2-py3-none-any.whl", hash = "sha256:e7878cbeac04860b8b5e0ca31d3abad3df9411a75a32cde82f8e44b6c16ff650", size = 9555, upload-time = "2026-03-10T02:46:32.256Z" }, ] +[[package]] +name = "msgpack" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/bd/8b0d01c756203fbab65d265859749860682ccd2a59594609aeec3a144efa/msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa", size = 81939, upload-time = "2025-10-08T09:15:01.472Z" }, + { url = "https://files.pythonhosted.org/packages/34/68/ba4f155f793a74c1483d4bdef136e1023f7bcba557f0db4ef3db3c665cf1/msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb", size = 85064, upload-time = "2025-10-08T09:15:03.764Z" }, + { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" }, + { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" }, + { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" }, + { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" }, + { url = "https://files.pythonhosted.org/packages/41/0d/2ddfaa8b7e1cee6c490d46cb0a39742b19e2481600a7a0e96537e9c22f43/msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029", size = 65096, upload-time = "2025-10-08T09:15:11.11Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ec/d431eb7941fb55a31dd6ca3404d41fbb52d99172df2e7707754488390910/msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b", size = 72708, upload-time = "2025-10-08T09:15:12.554Z" }, + { url = "https://files.pythonhosted.org/packages/c5/31/5b1a1f70eb0e87d1678e9624908f86317787b536060641d6798e3cf70ace/msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69", size = 64119, upload-time = "2025-10-08T09:15:13.589Z" }, + { url = "https://files.pythonhosted.org/packages/6b/31/b46518ecc604d7edf3a4f94cb3bf021fc62aa301f0cb849936968164ef23/msgpack-1.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4efd7b5979ccb539c221a4c4e16aac1a533efc97f3b759bb5a5ac9f6d10383bf", size = 81212, upload-time = "2025-10-08T09:15:14.552Z" }, + { url = "https://files.pythonhosted.org/packages/92/dc/c385f38f2c2433333345a82926c6bfa5ecfff3ef787201614317b58dd8be/msgpack-1.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:42eefe2c3e2af97ed470eec850facbe1b5ad1d6eacdbadc42ec98e7dcf68b4b7", size = 84315, upload-time = "2025-10-08T09:15:15.543Z" }, + { url = "https://files.pythonhosted.org/packages/d3/68/93180dce57f684a61a88a45ed13047558ded2be46f03acb8dec6d7c513af/msgpack-1.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fdf7d83102bf09e7ce3357de96c59b627395352a4024f6e2458501f158bf999", size = 412721, upload-time = "2025-10-08T09:15:16.567Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ba/459f18c16f2b3fc1a1ca871f72f07d70c07bf768ad0a507a698b8052ac58/msgpack-1.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fac4be746328f90caa3cd4bc67e6fe36ca2bf61d5c6eb6d895b6527e3f05071e", size = 424657, upload-time = "2025-10-08T09:15:17.825Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/4398c46863b093252fe67368b44edc6c13b17f4e6b0e4929dbf0bdb13f23/msgpack-1.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fffee09044073e69f2bad787071aeec727183e7580443dfeb8556cbf1978d162", size = 402668, upload-time = "2025-10-08T09:15:19.003Z" }, + { url = "https://files.pythonhosted.org/packages/28/ce/698c1eff75626e4124b4d78e21cca0b4cc90043afb80a507626ea354ab52/msgpack-1.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5928604de9b032bc17f5099496417f113c45bc6bc21b5c6920caf34b3c428794", size = 419040, upload-time = "2025-10-08T09:15:20.183Z" }, + { url = "https://files.pythonhosted.org/packages/67/32/f3cd1667028424fa7001d82e10ee35386eea1408b93d399b09fb0aa7875f/msgpack-1.1.2-cp313-cp313-win32.whl", hash = "sha256:a7787d353595c7c7e145e2331abf8b7ff1e6673a6b974ded96e6d4ec09f00c8c", size = 65037, upload-time = "2025-10-08T09:15:21.416Z" }, + { url = "https://files.pythonhosted.org/packages/74/07/1ed8277f8653c40ebc65985180b007879f6a836c525b3885dcc6448ae6cb/msgpack-1.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:a465f0dceb8e13a487e54c07d04ae3ba131c7c5b95e2612596eafde1dccf64a9", size = 72631, upload-time = "2025-10-08T09:15:22.431Z" }, + { url = "https://files.pythonhosted.org/packages/e5/db/0314e4e2db56ebcf450f277904ffd84a7988b9e5da8d0d61ab2d057df2b6/msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84", size = 64118, upload-time = "2025-10-08T09:15:23.402Z" }, + { url = "https://files.pythonhosted.org/packages/22/71/201105712d0a2ff07b7873ed3c220292fb2ea5120603c00c4b634bcdafb3/msgpack-1.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e23ce8d5f7aa6ea6d2a2b326b4ba46c985dbb204523759984430db7114f8aa00", size = 81127, upload-time = "2025-10-08T09:15:24.408Z" }, + { url = "https://files.pythonhosted.org/packages/1b/9f/38ff9e57a2eade7bf9dfee5eae17f39fc0e998658050279cbb14d97d36d9/msgpack-1.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6c15b7d74c939ebe620dd8e559384be806204d73b4f9356320632d783d1f7939", size = 84981, upload-time = "2025-10-08T09:15:25.812Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a9/3536e385167b88c2cc8f4424c49e28d49a6fc35206d4a8060f136e71f94c/msgpack-1.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99e2cb7b9031568a2a5c73aa077180f93dd2e95b4f8d3b8e14a73ae94a9e667e", size = 411885, upload-time = "2025-10-08T09:15:27.22Z" }, + { url = "https://files.pythonhosted.org/packages/2f/40/dc34d1a8d5f1e51fc64640b62b191684da52ca469da9cd74e84936ffa4a6/msgpack-1.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:180759d89a057eab503cf62eeec0aa61c4ea1200dee709f3a8e9397dbb3b6931", size = 419658, upload-time = "2025-10-08T09:15:28.4Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ef/2b92e286366500a09a67e03496ee8b8ba00562797a52f3c117aa2b29514b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:04fb995247a6e83830b62f0b07bf36540c213f6eac8e851166d8d86d83cbd014", size = 403290, upload-time = "2025-10-08T09:15:29.764Z" }, + { url = "https://files.pythonhosted.org/packages/78/90/e0ea7990abea5764e4655b8177aa7c63cdfa89945b6e7641055800f6c16b/msgpack-1.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8e22ab046fa7ede9e36eeb4cfad44d46450f37bb05d5ec482b02868f451c95e2", size = 415234, upload-time = "2025-10-08T09:15:31.022Z" }, + { url = "https://files.pythonhosted.org/packages/72/4e/9390aed5db983a2310818cd7d3ec0aecad45e1f7007e0cda79c79507bb0d/msgpack-1.1.2-cp314-cp314-win32.whl", hash = "sha256:80a0ff7d4abf5fecb995fcf235d4064b9a9a8a40a3ab80999e6ac1e30b702717", size = 66391, upload-time = "2025-10-08T09:15:32.265Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f1/abd09c2ae91228c5f3998dbd7f41353def9eac64253de3c8105efa2082f7/msgpack-1.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:9ade919fac6a3e7260b7f64cea89df6bec59104987cbea34d34a2fa15d74310b", size = 73787, upload-time = "2025-10-08T09:15:33.219Z" }, + { url = "https://files.pythonhosted.org/packages/6a/b0/9d9f667ab48b16ad4115c1935d94023b82b3198064cb84a123e97f7466c1/msgpack-1.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:59415c6076b1e30e563eb732e23b994a61c159cec44deaf584e5cc1dd662f2af", size = 66453, upload-time = "2025-10-08T09:15:34.225Z" }, + { url = "https://files.pythonhosted.org/packages/16/67/93f80545eb1792b61a217fa7f06d5e5cb9e0055bed867f43e2b8e012e137/msgpack-1.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:897c478140877e5307760b0ea66e0932738879e7aa68144d9b78ea4c8302a84a", size = 85264, upload-time = "2025-10-08T09:15:35.61Z" }, + { url = "https://files.pythonhosted.org/packages/87/1c/33c8a24959cf193966ef11a6f6a2995a65eb066bd681fd085afd519a57ce/msgpack-1.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a668204fa43e6d02f89dbe79a30b0d67238d9ec4c5bd8a940fc3a004a47b721b", size = 89076, upload-time = "2025-10-08T09:15:36.619Z" }, + { url = "https://files.pythonhosted.org/packages/fc/6b/62e85ff7193663fbea5c0254ef32f0c77134b4059f8da89b958beb7696f3/msgpack-1.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5559d03930d3aa0f3aacb4c42c776af1a2ace2611871c84a75afe436695e6245", size = 435242, upload-time = "2025-10-08T09:15:37.647Z" }, + { url = "https://files.pythonhosted.org/packages/c1/47/5c74ecb4cc277cf09f64e913947871682ffa82b3b93c8dad68083112f412/msgpack-1.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70c5a7a9fea7f036b716191c29047374c10721c389c21e9ffafad04df8c52c90", size = 432509, upload-time = "2025-10-08T09:15:38.794Z" }, + { url = "https://files.pythonhosted.org/packages/24/a4/e98ccdb56dc4e98c929a3f150de1799831c0a800583cde9fa022fa90602d/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f2cb069d8b981abc72b41aea1c580ce92d57c673ec61af4c500153a626cb9e20", size = 415957, upload-time = "2025-10-08T09:15:40.238Z" }, + { url = "https://files.pythonhosted.org/packages/da/28/6951f7fb67bc0a4e184a6b38ab71a92d9ba58080b27a77d3e2fb0be5998f/msgpack-1.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d62ce1f483f355f61adb5433ebfd8868c5f078d1a52d042b0a998682b4fa8c27", size = 422910, upload-time = "2025-10-08T09:15:41.505Z" }, + { url = "https://files.pythonhosted.org/packages/f0/03/42106dcded51f0a0b5284d3ce30a671e7bd3f7318d122b2ead66ad289fed/msgpack-1.1.2-cp314-cp314t-win32.whl", hash = "sha256:1d1418482b1ee984625d88aa9585db570180c286d942da463533b238b98b812b", size = 75197, upload-time = "2025-10-08T09:15:42.954Z" }, + { url = "https://files.pythonhosted.org/packages/15/86/d0071e94987f8db59d4eeb386ddc64d0bb9b10820a8d82bcd3e53eeb2da6/msgpack-1.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:5a46bf7e831d09470ad92dff02b8b1ac92175ca36b087f904a0519857c6be3ff", size = 85772, upload-time = "2025-10-08T09:15:43.954Z" }, + { url = "https://files.pythonhosted.org/packages/81/f2/08ace4142eb281c12701fc3b93a10795e4d4dc7f753911d836675050f886/msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46", size = 70868, upload-time = "2025-10-08T09:15:44.959Z" }, +] + [[package]] name = "numpy" version = "2.4.4" @@ -876,10 +939,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/13/24b0288c553dc8d61f44c4d0746fe9bb1e1bd29d1e70571658536e4c0f72/prek-0.3.11-py3-none-win_arm64.whl", hash = "sha256:e4a8f900378a6657c7eb2fc4b12fa5c934edf209d0a24544539842479ec16e0b", size = 5345988, upload-time = "2026-04-27T04:22:50.918Z" }, ] +[[package]] +name = "protobuf" +version = "6.33.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] +sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" }, + { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" }, + { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" }, + { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" }, + { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, +] + [[package]] name = "protobuf" version = "7.34.1" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.13.*' and sys_platform == 'win32'", + "python_full_version < '3.13' and sys_platform == 'win32'", + "python_full_version == '3.13.*' and sys_platform == 'emscripten'", + "python_full_version < '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" }, @@ -891,6 +982,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" }, ] +[[package]] +name = "pyarrow" +version = "24.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/a9/9686d9f07837f91f775e8932659192e02c74f9d8920524b480b85212cc68/pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810", size = 34981559, upload-time = "2026-04-21T10:47:22.17Z" }, + { url = "https://files.pythonhosted.org/packages/80/b6/0ddf0e9b6ead3474ab087ae598c76b031fc45532bf6a63f3a553440fb258/pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a", size = 36663654, upload-time = "2026-04-21T10:47:28.315Z" }, + { url = "https://files.pythonhosted.org/packages/7c/3b/926382efe8ce27ba729071d3566ade6dfb86bdf112f366000196b2f5780a/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66", size = 45679394, upload-time = "2026-04-21T10:47:34.821Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7a/829f7d9dfd37c207206081d6dad474d81dde29952401f07f2ba507814818/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb", size = 48863122, upload-time = "2026-04-21T10:47:42.056Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e8/f88ce625fe8babaae64e8db2d417c7653adb3019b08aae85c5ed787dc816/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e", size = 49376032, upload-time = "2026-04-21T10:47:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, + { url = "https://files.pythonhosted.org/packages/66/1c/e3e72c8014ad2743ca64a701652c733cc5cbcee15c0463a32a8c55518d9e/pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826", size = 27355660, upload-time = "2026-04-21T10:48:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, + { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, + { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, + { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, + { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, + { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, + { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, + { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, + { url = "https://files.pythonhosted.org/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997, upload-time = "2026-04-21T10:49:48.796Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720, upload-time = "2026-04-21T10:49:55.858Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852, upload-time = "2026-04-21T10:50:04.624Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852, upload-time = "2026-04-21T10:50:12.293Z" }, + { url = "https://files.pythonhosted.org/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207, upload-time = "2026-04-21T10:50:20.677Z" }, + { url = "https://files.pythonhosted.org/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117, upload-time = "2026-04-21T10:50:29.14Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155, upload-time = "2026-04-21T10:51:22.337Z" }, + { url = "https://files.pythonhosted.org/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387, upload-time = "2026-04-21T10:50:35.552Z" }, + { url = "https://files.pythonhosted.org/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102, upload-time = "2026-04-21T10:50:42.417Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118, upload-time = "2026-04-21T10:50:49.324Z" }, + { url = "https://files.pythonhosted.org/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765, upload-time = "2026-04-21T10:50:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890, upload-time = "2026-04-21T10:51:02.439Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250, upload-time = "2026-04-21T10:51:10.576Z" }, + { url = "https://files.pythonhosted.org/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282, upload-time = "2026-04-21T10:51:16.815Z" }, +] + [[package]] name = "pybind11" version = "3.0.4"