diff --git a/docs/content/en/latest/pipelines/ldm_extension/_index.md b/docs/content/en/latest/pipelines/ldm_extension/_index.md index 7435b59b8..2251b01a8 100644 --- a/docs/content/en/latest/pipelines/ldm_extension/_index.md +++ b/docs/content/en/latest/pipelines/ldm_extension/_index.md @@ -45,6 +45,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def | dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) | Column data type. | | workspace_data_filter_id | string | ID of the workspace data filter to use. | | workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. | +| dataset_description | string \| None | Optional declarative description on the custom dataset. | +| dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. | #### Validity constraints @@ -63,6 +65,8 @@ The custom fields define the individual fields in the custom datasets defined ab | custom_field_type | [CustomFieldType](#customfieldtype) | Indicates whether the field represents an attribute, a date, or a fact. | | custom_field_source_column | string | Name of the column in the physical data model. | | custom_field_source_column_data_type | [ColumnDataType](#columndatatype) | Data type of the field. | +| description | string \| None | Optional declarative description on the attribute, fact, or date dataset. | +| tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. | #### Validity constraints @@ -128,6 +132,25 @@ ldm_extension_manager.process( ``` +### Merging into an existing child workspace LDM + +By default, `process` **replaces** the child workspace LDM with the declarative fragment built from your inputs. Any prior custom datasets or date instances that aren't in the current call are lost. + +Set `merge_into_existing_ldm=True` to switch to an **append / update** behaviour: `process` loads the current workspace LDM first, replaces any dataset or date instance whose `id` matches one in your input, and keeps the rest of the model as is (including previously uploaded custom extensions). + +Optional cleanup: when `remove_managed_datasets_missing_from_input=True` and `management_tag` is set, datasets that carry that tag but are **not** in the current `process` call are removed from the merged LDM before the upload. This lets tools such as BCA reliably delete their own obsolete custom datasets without touching anything else. + +```python +ldm_extension_manager.process( + custom_datasets=custom_dataset_definitions, + custom_fields=custom_field_definitions, + check_relations=False, + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=True, + management_tag="bca_tooling_managed", +) +``` + ## Example Here is a complete example of extending a child workspace's LDM: diff --git a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py index d6f8c2b02..6f43a037c 100644 --- a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py +++ b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py @@ -5,6 +5,8 @@ into objects defined in the GoodData Python SDK. """ +import copy + from gooddata_sdk.catalog.identifier import ( CatalogDatasetWorkspaceDataFilterIdentifier, CatalogGrainIdentifier, @@ -36,11 +38,26 @@ from gooddata_pipelines.ldm_extension.models.custom_data_object import ( ColumnDataType, CustomDataset, + CustomDatasetDefinition, CustomFieldDefinition, CustomFieldType, ) +def _effective_field_tags( + dataset_name: str, custom_field: CustomFieldDefinition +) -> list[str]: + if custom_field.tags is not None: + return list(custom_field.tags) + return [dataset_name] + + +def _effective_dataset_tags(definition: CustomDatasetDefinition) -> list[str]: + if definition.dataset_tags is not None: + return list(definition.dataset_tags) + return [definition.dataset_name] + + class LdmExtensionDataProcessor: """Create GoodData LDM from validated custom datasets and fields.""" @@ -77,7 +94,8 @@ def _attribute_from_field( source_column=custom_field.custom_field_source_column, labels=[], source_column_data_type=custom_field.custom_field_source_column_data_type.value, - tags=[dataset_name], + description=custom_field.description, + tags=_effective_field_tags(dataset_name, custom_field), ) @staticmethod @@ -91,7 +109,8 @@ def _fact_from_field( title=custom_field.custom_field_name, source_column=custom_field.custom_field_source_column, source_column_data_type=custom_field.custom_field_source_column_data_type.value, - tags=[dataset_name], + description=custom_field.description, + tags=_effective_field_tags(dataset_name, custom_field), ) def _date_from_field( @@ -109,7 +128,8 @@ def _date_from_field( title_pattern="%titleBase - %granularityTitle", ), granularities=self.DATE_GRANULARITIES, - tags=[dataset_name], + description=custom_field.description, + tags=_effective_field_tags(dataset_name, custom_field), ) @staticmethod @@ -258,7 +278,7 @@ def datasets_to_ldm( ), ] + date_references, - description=None, + description=dataset.definition.dataset_description, attributes=attributes, facts=facts, data_source_table_id=dataset_source_table_id, @@ -278,7 +298,7 @@ def datasets_to_ldm( filter_column_data_type=ColumnDataType.STRING.value, ) ], - tags=[dataset.definition.dataset_name], + tags=_effective_dataset_tags(dataset.definition), ) ) @@ -287,3 +307,60 @@ def datasets_to_ldm( datasets=declarative_datasets, date_instances=date_instances ) return CatalogDeclarativeModel(ldm=ldm) + + def merge_custom_ldm_into_existing( + self, + existing: CatalogDeclarativeModel, + custom_datasets: dict[DatasetId, CustomDataset], + *, + remove_managed_datasets_missing_from_input: bool = False, + management_tag: str | None = None, + ) -> CatalogDeclarativeModel: + """Merge datasets produced from ``custom_datasets`` into an existing declarative LDM. + + Custom datasets and date instances that share an ``id`` with the fragment replace + their previous definitions. When ``remove_managed_datasets_missing_from_input`` is + set, datasets that carry ``management_tag`` but are absent from the incoming + fragment are removed first (typical for tooling-owned extension datasets). + + Any other pre-existing LDM objects (previously uploaded extensions whose ids + are not in the incoming fragment) are preserved unchanged. + """ + fragment = self.datasets_to_ldm(custom_datasets) + fragment_ldm = fragment.ldm or CatalogDeclarativeLdm( + datasets=[], date_instances=[] + ) + + result = copy.deepcopy(existing) + result_ldm = result.ldm or CatalogDeclarativeLdm( + datasets=[], date_instances=[] + ) + result.ldm = result_ldm + + incoming_dataset_ids = {d.id for d in fragment_ldm.datasets} + incoming_date_ids = {d.id for d in fragment_ldm.date_instances} + + datasets = list(result_ldm.datasets) + if remove_managed_datasets_missing_from_input and management_tag: + datasets = [ + d + for d in datasets + if not ( + d.tags + and management_tag in d.tags + and d.id not in incoming_dataset_ids + ) + ] + datasets = [d for d in datasets if d.id not in incoming_dataset_ids] + datasets.extend(fragment_ldm.datasets) + result_ldm.datasets = datasets + + date_instances = [ + d + for d in result_ldm.date_instances + if d.id not in incoming_date_ids + ] + date_instances.extend(fragment_ldm.date_instances) + result_ldm.date_instances = date_instances + + return result diff --git a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py index f08f017e2..cd5d797f0 100644 --- a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py +++ b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/ldm_extension_manager.py @@ -3,6 +3,9 @@ from pathlib import Path +from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import ( + CatalogDeclarativeModel, +) from gooddata_sdk.sdk import GoodDataSdk from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content @@ -147,9 +150,35 @@ def _new_ldm_does_not_invalidate_relations( # If the set of new invalid relations is a subset of the current one, return set_new_invalid_relations.issubset(set_current_invalid_relations) + def _ldm_payload_for_workspace( + self, + workspace_id: str, + datasets: dict[DatasetId, CustomDataset], + *, + merge_into_existing_ldm: bool, + remove_managed_datasets_missing_from_input: bool, + management_tag: str | None, + ) -> CatalogDeclarativeModel: + """Build the declarative LDM payload to upload for one workspace.""" + if not merge_into_existing_ldm: + return self._processor.datasets_to_ldm(datasets) + current = self._sdk.catalog_workspace_content.get_declarative_ldm( + workspace_id + ) + return self._processor.merge_custom_ldm_into_existing( + current, + datasets, + remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input, + management_tag=management_tag, + ) + def _process_with_relations_check( self, validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]], + *, + merge_into_existing_ldm: bool = False, + remove_managed_datasets_missing_from_input: bool = False, + management_tag: str | None = None, ) -> None: """Check whether relations of analytical objects are valid before and after updating the LDM in the GoodData workspace. @@ -173,7 +202,13 @@ def _process_with_relations_check( # Put the LDM with custom datasets into the GoodData workspace. self._sdk.catalog_workspace_content.put_declarative_ldm( workspace_id=workspace_id, - ldm=self._processor.datasets_to_ldm(datasets), + ldm=self._ldm_payload_for_workspace( + workspace_id, + datasets, + merge_into_existing_ldm=merge_into_existing_ldm, + remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input, + management_tag=management_tag, + ), ) # Get a set of objects with invalid relations from the new workspace state @@ -232,13 +267,23 @@ def _log_diff_invalid_relations( def _process_without_relations_check( self, validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]], + *, + merge_into_existing_ldm: bool = False, + remove_managed_datasets_missing_from_input: bool = False, + management_tag: str | None = None, ) -> None: """Update the LDM in the GoodData workspace without checking relations.""" for workspace_id, datasets in validated_data.items(): # Put the LDM with custom datasets into the GoodData workspace. self._sdk.catalog_workspace_content.put_declarative_ldm( workspace_id=workspace_id, - ldm=self._processor.datasets_to_ldm(datasets), + ldm=self._ldm_payload_for_workspace( + workspace_id, + datasets, + merge_into_existing_ldm=merge_into_existing_ldm, + remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input, + management_tag=management_tag, + ), ) self._log_success_message(workspace_id) @@ -251,6 +296,9 @@ def process( custom_datasets: list[CustomDatasetDefinition], custom_fields: list[CustomFieldDefinition], check_relations: bool = True, + merge_into_existing_ldm: bool = False, + remove_managed_datasets_missing_from_input: bool = False, + management_tag: str | None = None, ) -> None: """Create custom datasets and fields in GoodData workspaces. @@ -266,6 +314,14 @@ def process( after updating the LDM. If the number of invalid relations increases, the LDM will be reverted to its previous state. If False, the check is skiped and the LDM is updated directly. Defaults to True. + merge_into_existing_ldm (bool): When True, load the workspace LDM first and merge + the generated custom datasets and date instances into it instead of uploading + only the extension fragment. Defaults to False for backward compatibility. + remove_managed_datasets_missing_from_input (bool): When ``merge_into_existing_ldm`` + is True, remove existing datasets that contain ``management_tag`` but whose + dataset id is not present in this ``process`` call (tooling cleanup). + management_tag (str | None): Tag value used with + ``remove_managed_datasets_missing_from_input``. Raises: ValueError: If there are validation errors in the dataset or field definitions. @@ -278,6 +334,16 @@ def process( if check_relations: # Process the validated data with relations check. - self._process_with_relations_check(validated_data) + self._process_with_relations_check( + validated_data, + merge_into_existing_ldm=merge_into_existing_ldm, + remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input, + management_tag=management_tag, + ) else: - self._process_without_relations_check(validated_data) + self._process_without_relations_check( + validated_data, + merge_into_existing_ldm=merge_into_existing_ldm, + remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input, + management_tag=management_tag, + ) diff --git a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py index b241d5e34..9c0dae3a4 100644 --- a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py +++ b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py @@ -7,7 +7,7 @@ from enum import Enum -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field, model_validator class CustomFieldType(str, Enum): @@ -42,6 +42,14 @@ class CustomFieldDefinition(BaseModel): custom_field_type: CustomFieldType custom_field_source_column: str custom_field_source_column_data_type: ColumnDataType + description: str | None = Field( + default=None, + description="Declarative description on the attribute, fact, or date dataset.", + ) + tags: list[str] | None = Field( + default=None, + description="If set, replaces the default tag list (dataset display name only).", + ) @model_validator(mode="after") def check_ids_not_equal(self) -> "CustomFieldDefinition": @@ -68,6 +76,14 @@ class CustomDatasetDefinition(BaseModel): dataset_reference_source_column_data_type: ColumnDataType workspace_data_filter_id: str workspace_data_filter_column_name: str + dataset_description: str | None = Field( + default=None, + description="Declarative description on the custom dataset.", + ) + dataset_tags: list[str] | None = Field( + default=None, + description="If set, replaces the default tag list (dataset display name only).", + ) @model_validator(mode="after") def check_source(self) -> "CustomDatasetDefinition": diff --git a/packages/gooddata-pipelines/tests/test_ldm_extension/conftest.py b/packages/gooddata-pipelines/tests/test_ldm_extension/conftest.py new file mode 100644 index 000000000..86754a5dc --- /dev/null +++ b/packages/gooddata-pipelines/tests/test_ldm_extension/conftest.py @@ -0,0 +1,84 @@ +# (C) 2025 GoodData Corporation +import pytest + +from gooddata_pipelines.ldm_extension.models.custom_data_object import ( + ColumnDataType, + CustomDataset, + CustomDatasetDefinition, + CustomFieldDefinition, + CustomFieldType, +) + + +@pytest.fixture +def mock_custom_field_attribute(): + return CustomFieldDefinition( + workspace_id="workspace1", + dataset_id="ds1", + custom_field_id="attr1", + custom_field_name="Attribute 1", + custom_field_type=CustomFieldType.ATTRIBUTE, + custom_field_source_column="col_attr1", + custom_field_source_column_data_type=ColumnDataType.STRING, + ) + + +@pytest.fixture +def mock_custom_field_fact(): + return CustomFieldDefinition( + workspace_id="workspace1", + dataset_id="ds1", + custom_field_id="fact1", + custom_field_name="Fact 1", + custom_field_type=CustomFieldType.FACT, + custom_field_source_column="col_fact1", + custom_field_source_column_data_type=ColumnDataType.INT, + ) + + +@pytest.fixture +def mock_custom_field_date(): + return CustomFieldDefinition( + workspace_id="workspace1", + dataset_id="ds1", + custom_field_id="date1", + custom_field_name="Date 1", + custom_field_type=CustomFieldType.DATE, + custom_field_source_column="col_date1", + custom_field_source_column_data_type=ColumnDataType.DATE, + ) + + +@pytest.fixture +def mock_dataset_definition(): + return CustomDatasetDefinition( + workspace_id="workspace1", + dataset_id="ds1", + dataset_name="Dataset 1", + dataset_source_table="table1", + dataset_datasource_id="ds_source", + dataset_source_sql=None, + parent_dataset_reference="parent_ds", + parent_dataset_reference_attribute_id="parent_attr", + dataset_reference_source_column="ref_col", + dataset_reference_source_column_data_type=ColumnDataType.STRING, + workspace_data_filter_id="wdf1", + workspace_data_filter_column_name="col1", + ) + + +@pytest.fixture +def mock_custom_dataset( + mock_dataset_definition, + mock_custom_field_attribute, + mock_custom_field_fact, + mock_custom_field_date, +): + return CustomDataset( + definition=mock_dataset_definition, + custom_fields=[ + mock_custom_field_attribute, + mock_custom_field_fact, + mock_custom_field_date, + ], + ) diff --git a/packages/gooddata-pipelines/tests/test_ldm_extension/test_input_processor.py b/packages/gooddata-pipelines/tests/test_ldm_extension/test_input_processor.py index 851e903fa..8c50cd571 100644 --- a/packages/gooddata-pipelines/tests/test_ldm_extension/test_input_processor.py +++ b/packages/gooddata-pipelines/tests/test_ldm_extension/test_input_processor.py @@ -1,21 +1,29 @@ # (C) 2025 GoodData Corporation -import pytest - from gooddata_pipelines.ldm_extension.input_processor import ( LdmExtensionDataProcessor, ) from gooddata_pipelines.ldm_extension.models.custom_data_object import ( ColumnDataType, CustomDataset, - CustomDatasetDefinition, CustomFieldDefinition, CustomFieldType, ) -@pytest.fixture -def mock_custom_field_attribute(): - return CustomFieldDefinition( +def test_attribute_from_field(mock_custom_field_attribute): + attr = LdmExtensionDataProcessor._attribute_from_field( + "dataset_name", mock_custom_field_attribute + ) + assert attr.id == "attr1" + assert attr.title == "Attribute 1" + assert attr.source_column == "col_attr1" + assert attr.source_column_data_type == ColumnDataType.STRING.value + assert attr.tags == ["dataset_name"] + assert attr.description is None + + +def test_attribute_from_field_custom_tags_and_description(): + field = CustomFieldDefinition( workspace_id="workspace1", dataset_id="ds1", custom_field_id="attr1", @@ -23,79 +31,14 @@ def mock_custom_field_attribute(): custom_field_type=CustomFieldType.ATTRIBUTE, custom_field_source_column="col_attr1", custom_field_source_column_data_type=ColumnDataType.STRING, + tags=["t1", "t2"], + description="Attr desc", ) - - -@pytest.fixture -def mock_custom_field_fact(): - return CustomFieldDefinition( - workspace_id="workspace1", - dataset_id="ds1", - custom_field_id="fact1", - custom_field_name="Fact 1", - custom_field_type=CustomFieldType.FACT, - custom_field_source_column="col_fact1", - custom_field_source_column_data_type=ColumnDataType.INT, - ) - - -@pytest.fixture -def mock_custom_field_date(): - return CustomFieldDefinition( - workspace_id="workspace1", - dataset_id="ds1", - custom_field_id="date1", - custom_field_name="Date 1", - custom_field_type=CustomFieldType.DATE, - custom_field_source_column="col_date1", - custom_field_source_column_data_type=ColumnDataType.DATE, - ) - - -@pytest.fixture -def mock_dataset_definition(): - return CustomDatasetDefinition( - workspace_id="workspace1", - dataset_id="ds1", - dataset_name="Dataset 1", - dataset_source_table="table1", - dataset_datasource_id="ds_source", - dataset_source_sql=None, - parent_dataset_reference="parent_ds", - parent_dataset_reference_attribute_id="parent_attr", - dataset_reference_source_column="ref_col", - dataset_reference_source_column_data_type=ColumnDataType.STRING, - workspace_data_filter_id="wdf1", - workspace_data_filter_column_name="col1", - ) - - -@pytest.fixture -def mock_custom_dataset( - mock_dataset_definition, - mock_custom_field_attribute, - mock_custom_field_fact, - mock_custom_field_date, -): - return CustomDataset( - definition=mock_dataset_definition, - custom_fields=[ - mock_custom_field_attribute, - mock_custom_field_fact, - mock_custom_field_date, - ], - ) - - -def test_attribute_from_field(mock_custom_field_attribute): attr = LdmExtensionDataProcessor._attribute_from_field( - "dataset_name", mock_custom_field_attribute + "dataset_name", field ) - assert attr.id == "attr1" - assert attr.title == "Attribute 1" - assert attr.source_column == "col_attr1" - assert attr.source_column_data_type == ColumnDataType.STRING.value - assert attr.tags == ["dataset_name"] + assert attr.tags == ["t1", "t2"] + assert attr.description == "Attr desc" def test_fact_from_field(mock_custom_field_fact): @@ -149,6 +92,20 @@ def test_get_sources_sql_only(mock_dataset_definition): assert sql.statement == "SELECT * FROM foo" +def test_datasets_to_ldm_dataset_tags_and_description(mock_dataset_definition): + mock_dataset_definition.dataset_tags = ["managed", "extra"] + mock_dataset_definition.dataset_description = "DS desc" + mock_dataset_definition.dataset_source_sql = "SELECT 1" + mock_dataset_definition.dataset_source_table = None + ds = CustomDataset(definition=mock_dataset_definition, custom_fields=[]) + processor = LdmExtensionDataProcessor() + model = processor.datasets_to_ldm({"ds1": ds}) + d = model.ldm.datasets[0] + assert d.description == "DS desc" + assert d.tags == ["managed", "extra"] + assert d.sql is not None + + def test_datasets_to_ldm(mock_custom_dataset): print(mock_custom_dataset) processor = LdmExtensionDataProcessor() diff --git a/packages/gooddata-pipelines/tests/test_ldm_extension/test_ldm_extension_manager.py b/packages/gooddata-pipelines/tests/test_ldm_extension/test_ldm_extension_manager.py index 5fc6cc087..376ad4f2b 100644 --- a/packages/gooddata-pipelines/tests/test_ldm_extension/test_ldm_extension_manager.py +++ b/packages/gooddata-pipelines/tests/test_ldm_extension/test_ldm_extension_manager.py @@ -1,7 +1,23 @@ # (C) 2025 GoodData Corporation +from unittest.mock import MagicMock + import pytest from pytest_mock import MockerFixture +from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.dataset.dataset import ( + CatalogDeclarativeDataset, +) +from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import ( + CatalogDeclarativeLdm, + CatalogDeclarativeModel, +) + +from gooddata_pipelines.ldm_extension.input_processor import ( + LdmExtensionDataProcessor, +) +from gooddata_pipelines.ldm_extension.input_validator import ( + LdmExtensionDataValidator, +) from gooddata_pipelines.ldm_extension.ldm_extension_manager import ( LdmExtensionManager, ) @@ -48,7 +64,6 @@ def test_relations_check_success( manager, validated_data, mocker: MockerFixture ): """Relation check passes, workspace layout not reverted.""" - # Setup mocks mocker.patch.object( manager._sdk.catalog_workspace, "get_declarative_workspace", @@ -85,7 +100,6 @@ def test_relations_check_success( manager._sdk.catalog_workspace, "put_declarative_workspace" ) - # Should print "Workspace workspace_1 LDM updated." and not revert manager._process_with_relations_check(validated_data) manager._sdk.catalog_workspace_content.put_declarative_ldm.assert_called_once() manager._sdk.catalog_workspace.put_declarative_workspace.assert_not_called() @@ -95,7 +109,6 @@ def test_relations_check_failure_and_revert( manager, validated_data, capsys, mocker: MockerFixture ): """Relation check fails, workspace layout is reverted.""" - # Setup mocks mocker.patch.object(manager._api, "get_workspace_layout") obj1 = make_analytical_object("a", "A", "type", False) obj2 = make_analytical_object("b", "B", "type", False) @@ -122,7 +135,6 @@ def test_relations_check_failure_and_revert( manager._process_with_relations_check(validated_data) - # Should revert and print info about invalid relations manager._sdk.catalog_workspace.put_declarative_workspace.assert_called_once() out = capsys.readouterr().out assert ( @@ -136,7 +148,6 @@ def test_relations_check_fewer_invalid_relations( manager, validated_data, mocker: MockerFixture ): """Fewer invalid relations after LDM update, no revert needed.""" - # Setup mocks obj1 = make_analytical_object("a", "A", "type", False) mocker.patch.object( manager._sdk.catalog_workspace, @@ -192,3 +203,193 @@ def test_log_diff_invalid_relations(manager, capsys): assert "b (type) B" in captured_output assert "d (type) D" in captured_output assert "c (type) C" not in captured_output + + +def _bare_manager(sdk_mock: MagicMock) -> LdmExtensionManager: + """Build a manager with a real ``LdmExtensionDataProcessor`` and a mocked SDK. + + Used by tests that assert the real merge/payload logic end-to-end. Tests that + only need to check dispatch or logging use the ``manager`` fixture above, + which mocks the processor as well. + """ + bare = object.__new__(LdmExtensionManager) + bare._processor = LdmExtensionDataProcessor() + bare._validator = LdmExtensionDataValidator() + bare._sdk = sdk_mock + bare.logger = MagicMock() + return bare + + +def test_ldm_payload_without_merge_returns_fragment_only(mock_custom_dataset): + sdk_mock = MagicMock() + bare = _bare_manager(sdk_mock) + + payload = bare._ldm_payload_for_workspace( + "workspace1", + {"ds1": mock_custom_dataset}, + merge_into_existing_ldm=False, + remove_managed_datasets_missing_from_input=False, + management_tag=None, + ) + + sdk_mock.catalog_workspace_content.get_declarative_ldm.assert_not_called() + assert payload.ldm is not None + assert [d.id for d in payload.ldm.datasets] == ["ds1"] + + +def test_ldm_payload_merges_with_existing_ldm(mock_custom_dataset): + inherited = CatalogDeclarativeDataset( + id="parent_only", + title="Parent", + grain=[], + references=[], + ) + existing = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[inherited], date_instances=[]) + ) + sdk_mock = MagicMock() + sdk_mock.catalog_workspace_content.get_declarative_ldm.return_value = ( + existing + ) + bare = _bare_manager(sdk_mock) + + payload = bare._ldm_payload_for_workspace( + "workspace1", + {"ds1": mock_custom_dataset}, + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=False, + management_tag=None, + ) + + sdk_mock.catalog_workspace_content.get_declarative_ldm.assert_called_once_with( + "workspace1" + ) + assert payload.ldm is not None + assert {d.id for d in payload.ldm.datasets} == {"parent_only", "ds1"} + + +def test_ldm_payload_merge_forwards_cleanup_flags(mock_custom_dataset): + managed_old = CatalogDeclarativeDataset( + id="managed_old", + title="Old", + grain=[], + references=[], + tags=["bca_tooling_managed"], + ) + existing = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[managed_old], date_instances=[]) + ) + sdk_mock = MagicMock() + sdk_mock.catalog_workspace_content.get_declarative_ldm.return_value = ( + existing + ) + bare = _bare_manager(sdk_mock) + + payload = bare._ldm_payload_for_workspace( + "workspace1", + {"ds1": mock_custom_dataset}, + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=True, + management_tag="bca_tooling_managed", + ) + + assert payload.ldm is not None + assert [d.id for d in payload.ldm.datasets] == ["ds1"] + + +def test_process_without_relations_check_forwards_merge_kwargs( + mock_custom_dataset, +): + existing = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[], date_instances=[]) + ) + sdk_mock = MagicMock() + sdk_mock.catalog_workspace_content.get_declarative_ldm.return_value = ( + existing + ) + bare = _bare_manager(sdk_mock) + + bare._process_without_relations_check( + {"workspace1": {"ds1": mock_custom_dataset}}, + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=False, + management_tag=None, + ) + + sdk_mock.catalog_workspace_content.get_declarative_ldm.assert_called_once_with( + "workspace1" + ) + put_call = sdk_mock.catalog_workspace_content.put_declarative_ldm + put_call.assert_called_once() + kwargs = put_call.call_args.kwargs + assert kwargs["workspace_id"] == "workspace1" + assert [d.id for d in kwargs["ldm"].ldm.datasets] == ["ds1"] + + +def test_process_with_relations_check_happy_path(mock_custom_dataset): + sdk_mock = MagicMock() + bare = _bare_manager(sdk_mock) + bare._get_objects_with_invalid_relations = MagicMock(return_value=[]) + + bare._process_with_relations_check( + {"workspace1": {"ds1": mock_custom_dataset}}, + merge_into_existing_ldm=False, + remove_managed_datasets_missing_from_input=False, + management_tag=None, + ) + + sdk_mock.catalog_workspace.get_declarative_workspace.assert_called_once_with( + "workspace1" + ) + put_call = sdk_mock.catalog_workspace_content.put_declarative_ldm + put_call.assert_called_once() + assert put_call.call_args.kwargs["workspace_id"] == "workspace1" + sdk_mock.catalog_workspace.put_declarative_workspace.assert_not_called() + + +def test_process_dispatches_with_relations_check_by_default(): + sdk_mock = MagicMock() + bare = _bare_manager(sdk_mock) + bare._validator = MagicMock() + bare._validator.validate.return_value = {"workspace1": {}} + bare._process_with_relations_check = MagicMock() + bare._process_without_relations_check = MagicMock() + + bare.process( + custom_datasets=[], + custom_fields=[], + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=True, + management_tag="bca_tooling_managed", + ) + + bare._process_with_relations_check.assert_called_once_with( + {"workspace1": {}}, + merge_into_existing_ldm=True, + remove_managed_datasets_missing_from_input=True, + management_tag="bca_tooling_managed", + ) + bare._process_without_relations_check.assert_not_called() + + +def test_process_skips_relations_check_when_flag_is_false(): + sdk_mock = MagicMock() + bare = _bare_manager(sdk_mock) + bare._validator = MagicMock() + bare._validator.validate.return_value = {"workspace1": {}} + bare._process_with_relations_check = MagicMock() + bare._process_without_relations_check = MagicMock() + + bare.process( + custom_datasets=[], + custom_fields=[], + check_relations=False, + ) + + bare._process_without_relations_check.assert_called_once_with( + {"workspace1": {}}, + merge_into_existing_ldm=False, + remove_managed_datasets_missing_from_input=False, + management_tag=None, + ) + bare._process_with_relations_check.assert_not_called() diff --git a/packages/gooddata-pipelines/tests/test_ldm_extension/test_merge_ldm.py b/packages/gooddata-pipelines/tests/test_ldm_extension/test_merge_ldm.py new file mode 100644 index 000000000..45d0777e6 --- /dev/null +++ b/packages/gooddata-pipelines/tests/test_ldm_extension/test_merge_ldm.py @@ -0,0 +1,84 @@ +# (C) 2025 GoodData Corporation +from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.dataset.dataset import ( + CatalogDeclarativeDataset, +) +from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import ( + CatalogDeclarativeLdm, + CatalogDeclarativeModel, +) + +from gooddata_pipelines.ldm_extension.input_processor import ( + LdmExtensionDataProcessor, +) +from gooddata_pipelines.ldm_extension.models.custom_data_object import ( + ColumnDataType, + CustomDataset, + CustomDatasetDefinition, +) + + +def test_merge_into_empty_ldm(mock_custom_dataset): + processor = LdmExtensionDataProcessor() + empty = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[], date_instances=[]) + ) + merged = processor.merge_custom_ldm_into_existing( + empty, {"ds1": mock_custom_dataset} + ) + assert len(merged.ldm.datasets) == 1 + assert merged.ldm.datasets[0].id == "ds1" + assert len(merged.ldm.date_instances) == 1 + + +def test_merge_preserves_other_datasets(mock_custom_dataset): + inherited = CatalogDeclarativeDataset( + id="parent_only", + title="Parent DS", + grain=[], + references=[], + ) + existing = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[inherited], date_instances=[]) + ) + processor = LdmExtensionDataProcessor() + merged = processor.merge_custom_ldm_into_existing( + existing, {"ds1": mock_custom_dataset} + ) + ids = {d.id for d in merged.ldm.datasets} + assert ids == {"parent_only", "ds1"} + + +def test_merge_removes_managed_dataset_not_in_input(): + managed = CatalogDeclarativeDataset( + id="managed_old", + title="Old", + grain=[], + references=[], + tags=["bca_tooling_managed"], + ) + existing = CatalogDeclarativeModel( + ldm=CatalogDeclarativeLdm(datasets=[managed], date_instances=[]) + ) + definition = CustomDatasetDefinition( + workspace_id="workspace1", + dataset_id="managed_new", + dataset_name="Dataset New", + dataset_datasource_id="dsrc1", + dataset_source_table="table1", + dataset_source_sql=None, + parent_dataset_reference="parent_ds", + parent_dataset_reference_attribute_id="parent_attr", + dataset_reference_source_column="ref_col", + dataset_reference_source_column_data_type=ColumnDataType.STRING, + workspace_data_filter_id="wdf1", + workspace_data_filter_column_name="col1", + ) + incoming = CustomDataset(definition=definition, custom_fields=[]) + processor = LdmExtensionDataProcessor() + merged = processor.merge_custom_ldm_into_existing( + existing, + {"managed_new": incoming}, + remove_managed_datasets_missing_from_input=True, + management_tag="bca_tooling_managed", + ) + assert [d.id for d in merged.ldm.datasets] == ["managed_new"]