From 5b81e9d8f47a01c38a7b828f84de11aea869c0b0 Mon Sep 17 00:00:00 2001 From: Ernst Leierzopf Date: Thu, 14 May 2026 20:17:17 +0200 Subject: [PATCH 01/16] first commit for EntropyDetector. --- docs/detectors.md | 1 + docs/detectors/entropy.md | 62 ++++ .../detectors/entropy_detector.py | 147 ++++++++ tests/test_detectors/test_entropy_detector.py | 313 ++++++++++++++++++ 4 files changed, 523 insertions(+) create mode 100644 docs/detectors/entropy.md create mode 100644 src/detectmatelibrary/detectors/entropy_detector.py create mode 100644 tests/test_detectors/test_entropy_detector.py diff --git a/docs/detectors.md b/docs/detectors.md index 26bd97a..486bbf2 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -89,6 +89,7 @@ List of detectors: * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. * [New Event](detectors/new_event.md): Detect new events in the variables in the logs. * [Rule Based](detectors/rule_based.md): Detect anomalies based in a set of rules. +* [Entropy](detectors/entropy.md): Detect entropy-based anomalies in the logs. ## Configuration diff --git a/docs/detectors/entropy.md b/docs/detectors/entropy.md new file mode 100644 index 0000000..8ca83be --- /dev/null +++ b/docs/detectors/entropy.md @@ -0,0 +1,62 @@ +# Entropy Detector + +The Entropy Detector raises alerts when ... + +| | Schema | Description | +|------------|----------------------------|--------------------| +| **Input** | [ParserSchema](../schemas.md) | Structured log | +| **Output** | [DetectorSchema](../schemas.md) | Alert / finding | + +## Description + +This detector maintains a lightweight set of observed values per monitored field and emits an alert when a value not present in the set is seen for the first time (subject to configuration). + + +## Configuration example + +```yaml +detectors: + EntropyDetector: + method_type: entropy_detector + auto_config: False + params: {} + events: + 1: + test: + params: {} + variables: + - pos: 0 + name: var1 + params: + threshold: 0. + header_variables: + - pos: level + params: {} +``` + + +## Example usage + +```python +from detectmatelibrary.detectors.entropy_detector import EntropyDetector, BufferMode +import detectmatelibrary.schemas as schemas + +detector = EntropyDetector(name="EntropyTest", config=cfg) + +parsed_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["var1"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"timestamp": "123456"} +}) + + +alert = detector.process(parsed_data) +``` + +Go back [Index](../index.md) diff --git a/src/detectmatelibrary/detectors/entropy_detector.py b/src/detectmatelibrary/detectors/entropy_detector.py new file mode 100644 index 0000000..ef4cbe7 --- /dev/null +++ b/src/detectmatelibrary/detectors/entropy_detector.py @@ -0,0 +1,147 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig +from detectmatelibrary.common.detector import ( + CoreDetectorConfig, + CoreDetector, + get_configured_variables, + get_global_variables, + validate_config_coverage, +) +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode +from detectmatelibrary.schemas import ParserSchema, DetectorSchema +from detectmatelibrary.constants import GLOBAL_EVENT_ID +from typing_extensions import override +from tools.logging import logger + + +class EntropyDetectorConfig(CoreDetectorConfig): + method_type: str = "entropy_detector" + + use_stable_vars: bool = True + use_static_vars: bool = True + + +class EntropyDetector(CoreDetector): + """Detect entropy-based anomalies in log data.""" + + def __init__( + self, + name: str = "EntropyDetector", + config: EntropyDetectorConfig = EntropyDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = EntropyDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: EntropyDetectorConfig # type narrowing for IDE + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + configured_variables = get_configured_variables(input_, self.config.events) + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables=configured_variables + ) + if self.config.global_instances: + global_vars = get_global_variables(input_, self.config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"], + named_variables=global_vars + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new values in the input data.""" + alerts: dict[str, str] = {} + configured_variables = get_configured_variables(input_, self.config.events) + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_data() + + if current_event_id in known_events: + event_tracker = known_events[current_event_id] + for var_name, multi_tracker in event_tracker.get_data().items(): + value = configured_variables.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"EventID {current_event_id} - {var_name}"] = ( + f"Unknown value: '{value}'" + ) + overall_score += 1.0 + + if self.config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, self.config.global_instances) + global_tracker = known_events[GLOBAL_EVENT_ID] + for var_name, multi_tracker in global_tracker.get_data().items(): + value = global_vars.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"Global - {var_name}"] = f"Unknown value: '{value}'" + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = f"{self.name} detects values not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: # type: ignore + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + variables=input_["variables"], + named_variables=input_["logFormatVariables"], + ) + + @override + def post_train(self) -> None: + if not self.config.auto_config: + validate_config_coverage(self.name, self.config.events, self.persistency) + + def set_configuration(self) -> None: + variables = {} + for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): + stable = [] + if self.config.use_stable_vars: + stable = tracker.get_features_by_classification("STABLE") # type: ignore + static = [] + if self.config.use_static_vars: + static = tracker.get_features_by_classification("STATIC") # type: ignore + vars_ = stable + static + if len(vars_) > 0: + variables[event_id] = vars_ + config_dict = generate_detector_config( + variable_selection=variables, + detector_name=self.name, + method_type=self.config.method_type, + ) + # Update the config object from the dictionary instead of replacing it + self.config = EntropyDetectorConfig.from_dict(config_dict, self.name) + events = self.config.events + if isinstance(events, EventsConfig) and not events.events: + logger.warning( + f"[{self.name}] auto_config=True generated an empty configuration. " + "No stable variables were found in configure-phase data. " + "The detector will produce no alerts." + ) diff --git a/tests/test_detectors/test_entropy_detector.py b/tests/test_detectors/test_entropy_detector.py new file mode 100644 index 0000000..2e53b09 --- /dev/null +++ b/tests/test_detectors/test_entropy_detector.py @@ -0,0 +1,313 @@ +"""Tests for EntropyDetector class. + +This module tests the EntropyDetector implementation including: +- Initialization and configuration +- Training functionality to learn known values +- Detection logic for unexpected values +- Event-specific configuration handling +- Input/output schema validation +""" + +from detectmatelibrary.common._core_op._fit_logic import TrainState +from detectmatelibrary.detectors.entropy_detector import ( + EntropyDetector, EntropyDetectorConfig, BufferMode +) +from detectmatelibrary.common._core_op._fit_logic import ConfigState +from detectmatelibrary.constants import GLOBAL_EVENT_ID +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From +import detectmatelibrary.schemas as schemas + +from detectmatelibrary.utils.aux import time_test_mode + +# Set time test mode for consistent timestamps +time_test_mode() + + +config = { + "detectors": { + "CustomInit": { + "method_type": "entropy_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "instance1": { + "params": {}, + "variables": [{ + "pos": 0, "name": "sad", "params": {} + }] + } + } + } + }, + "MultipleDetector": { + "method_type": "entropy_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "test": { + "params": {}, + "variables": [{ + "pos": 1, "name": "test", "params": {} + }], + "header_variables": [{ + "pos": "level", "params": {} + }] + } + } + } + } + } +} + + +class TestEntropyDetectorInitialization: + """Test EntropyDetector initialization and configuration.""" + + def test_default_initialization(self): + """Test detector initialization with default parameters.""" + detector = EntropyDetector() + + assert detector.name == "EntropyDetector" + assert hasattr(detector, 'config') + assert detector.data_buffer.mode == BufferMode.NO_BUF + assert detector.input_schema == schemas.ParserSchema + assert detector.output_schema == schemas.DetectorSchema + assert hasattr(detector, 'persistency') + + def test_custom_config_initialization(self): + """Test detector initialization with custom configuration.""" + detector = EntropyDetector(name="CustomInit", config=config) + + assert detector.name == "CustomInit" + assert hasattr(detector, 'persistency') + assert isinstance(detector.persistency.events_data, dict) + + +class TestEntropyDetectorTraining: + """Test EntropyDetector training functionality.""" + + def test_train_multiple_values(self): + """Test training with multiple different values.""" + detector = EntropyDetector(config=config, name="MultipleDetector") + # Train with multiple values (only event 1 should be tracked per config) + for event in range(3): + for level in ["INFO", "WARNING", "ERROR"]: + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": event, + "template": "test template", + "variables": ["0", "assa"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": level} + }) + detector.train(parser_data) + + # Only event 1 should be tracked (based on events config) + assert len(detector.persistency.events_data) == 1 + event_data = detector.persistency.get_event_data(1) + assert event_data is not None + # Check the level values + assert "INFO" in event_data["level"].unique_set + assert "WARNING" in event_data["level"].unique_set + assert "ERROR" in event_data["level"].unique_set + # Check the variable at position 1 (named "test") + assert "assa" in event_data["test"].unique_set + + +class TestEntropyDetectorDetection: + """Test EntropyDetector detection functionality.""" + + def test_detect_known_value_no_alert(self): + detector = EntropyDetector(config=config, name="MultipleDetector") + + # Train with a value + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 12, + "template": "test template", + "variables": ["adsasd"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert not result + assert output.score == 0.0 + + def test_detect_known_value_alert(self): + detector = EntropyDetector(config=config, name="MultipleDetector") + + # Train with a value + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert result + assert output.score == 1.0 + + +_PARSER_CONFIG = { + "parsers": { + "MatcherParser": { + "method_type": "matcher_parser", + "auto_config": False, + "log_format": "type= msg=audit(