From b4350d98bba1c8cf3d93bd2a112a450fef81cdd1 Mon Sep 17 00:00:00 2001 From: jzhaoqwa Date: Wed, 22 Apr 2026 15:45:47 -0700 Subject: [PATCH 1/4] fix(tuner): pass through full OutputDataConfig from ModelTrainer HyperparameterTuner._build_training_job_definition was reconstructing a new OutputDataConfig with only s3_output_path, silently dropping kms_key_id, compression_type, and other fields set on the ModelTrainer. Pass model_trainer.output_data_config directly to preserve all fields. Also update _create_mock_model_trainer in tests to use a real OutputDataConfig instead of MagicMock, and add a test verifying kms_key_id and compression_type are preserved through the tuning job definition. X-AI-Prompt: Check if HyperparameterTuner passes OutputDataConfig.kms_key_id from ModelTrainer and fix the gap X-AI-Tool: Kiro --- sagemaker-train/src/sagemaker/train/tuner.py | 11 ++--- .../tests/unit/train/test_tuner.py | 42 ++++++++++++++++++- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/sagemaker-train/src/sagemaker/train/tuner.py b/sagemaker-train/src/sagemaker/train/tuner.py index 5613331209..66a80a1836 100644 --- a/sagemaker-train/src/sagemaker/train/tuner.py +++ b/sagemaker-train/src/sagemaker/train/tuner.py @@ -1472,13 +1472,10 @@ def _build_training_job_definition(self, inputs): if not any(c.channel_name == channel.channel_name for c in input_data_config): input_data_config.append(channel) - # Build output data config - output_config = OutputDataConfig( - s3_output_path=( - model_trainer.output_data_config.s3_output_path - if model_trainer.output_data_config - else None - ) + # Pass through the full OutputDataConfig from ModelTrainer so that + # kms_key_id, compression_type, and any other fields are preserved. + output_config = model_trainer.output_data_config or OutputDataConfig( + s3_output_path=None ) # Build resource config diff --git a/sagemaker-train/tests/unit/train/test_tuner.py b/sagemaker-train/tests/unit/train/test_tuner.py index 057ca364ed..c1b2b69087 100644 --- a/sagemaker-train/tests/unit/train/test_tuner.py +++ b/sagemaker-train/tests/unit/train/test_tuner.py @@ -54,8 +54,9 @@ def _create_mock_model_trainer(with_internal_channels=False, with_spot_training= trainer.training_image = "test-image:latest" trainer.training_input_mode = "File" trainer.role = "arn:aws:iam::123456789012:role/SageMakerRole" - trainer.output_data_config = MagicMock() - trainer.output_data_config.s3_output_path = "s3://bucket/output" + from sagemaker.core.shapes import OutputDataConfig + + trainer.output_data_config = OutputDataConfig(s3_output_path="s3://bucket/output") trainer.compute = MagicMock() trainer.compute.instance_type = "ml.m5.xlarge" trainer.compute.instance_count = 1 @@ -666,3 +667,40 @@ def test_build_training_job_definition_with_empty_environment(self): assert definition.environment == {}, ( "Empty dict environment should be passed through as-is" ) + + def test_build_training_job_definition_passes_through_output_data_config(self): + """Test that _build_training_job_definition passes through the full OutputDataConfig. + + This verifies that fields like kms_key_id and compression_type from + ModelTrainer.output_data_config are preserved in the tuning job definition, + rather than only copying s3_output_path. + """ + from sagemaker.core.shapes import OutputDataConfig + + mock_trainer = _create_mock_model_trainer() + mock_trainer.output_data_config = OutputDataConfig( + s3_output_path="s3://bucket/output", + kms_key_id="arn:aws:kms:us-west-2:123456789012:key/abc123", + compression_type="NONE", + ) + + tuner = HyperparameterTuner( + model_trainer=mock_trainer, + objective_metric_name="accuracy", + hyperparameter_ranges=_create_single_hp_range(), + ) + + definition = tuner._build_training_job_definition(None) + + assert definition.output_data_config is mock_trainer.output_data_config, ( + "output_data_config should be the same object from ModelTrainer" + ) + assert definition.output_data_config.kms_key_id == ( + "arn:aws:kms:us-west-2:123456789012:key/abc123" + ), "kms_key_id should be preserved" + assert definition.output_data_config.compression_type == "NONE", ( + "compression_type should be preserved" + ) + assert definition.output_data_config.s3_output_path == "s3://bucket/output", ( + "s3_output_path should be preserved" + ) From 5666f4a1f8ad27d6eff60bf78173d9f1ecf83f3a Mon Sep 17 00:00:00 2001 From: jzhaoqwa Date: Wed, 22 Apr 2026 15:46:23 -0700 Subject: [PATCH 2/4] Delte file: Resolved template parameters: {'role_arn.md --- ...esolved template parameters: {'role_arn.md | 172 ------------------ 1 file changed, 172 deletions(-) delete mode 100644 sagemaker-train/src/sagemaker/train/evaluate/Resolved template parameters: {'role_arn.md diff --git a/sagemaker-train/src/sagemaker/train/evaluate/Resolved template parameters: {'role_arn.md b/sagemaker-train/src/sagemaker/train/evaluate/Resolved template parameters: {'role_arn.md deleted file mode 100644 index 9c1077203e..0000000000 --- a/sagemaker-train/src/sagemaker/train/evaluate/Resolved template parameters: {'role_arn.md +++ /dev/null @@ -1,172 +0,0 @@ -Resolved template parameters: {'role_arn': base_evaluator.py:757 - 'arn:aws:iam::634683118556:role/service-role/AmazonSageMaker-Exe - cutionRole-20251116T174807', 'mlflow_resource_arn': - 'arn:aws:sagemaker:us-east-1:634683118556:mlflow-app/app-DA25Q2S - 35KHZ', 'mlflow_experiment_name': None, 'mlflow_run_name': None, - 'model_package_group_arn': - 'arn:aws:sagemaker:us-east-1:634683118556:model-package-group/tm - p-humanlike-llama32-rlaif', 'source_model_package_arn': None, - 'base_model_arn': - 'arn:aws:sagemaker:us-east-1:aws:hub-content/SageMakerPublicHub/ - Model/meta-textgeneration-llama-3-2-1b-instruct/1.25.0', - 's3_output_path': - 's3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval', 'dataset_artifact_arn': - 'arn:aws:sagemaker:us-east-1:634683118556:artifact/c3c6611071894 - bad6a7f0925a729b02e', 'action_arn_prefix': - 'arn:aws:sagemaker:us-east-1:634683118556:action', - 'dataset_uri': - 'arn:aws:sagemaker:us-east-1:634683118556:hub-content/CKO4ACGI3U - RQBOO74C9JPLUMQNG02M2I4CIM9M931SQHE0625A30/DataSet/tmp-humanlike - -rlaif-eval/0.0.1', 'judge_model_id': - 'anthropic.claude-3-5-sonnet-20240620-v1:0', 'llmaj_metrics': - '[]', 'custom_metrics_s3_path': - 's3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval/evaluationinputs/eval-meta-1517aa3320251202-011237/custo - m-metrics.json', 'max_new_tokens': '8192', 'temperature': '0', - 'top_k': '-1', 'top_p': '1.0', 'pipeline_name': - 'SagemakerModelEvaluationType2-llmaj', 'evaluate_base_model': - True} - INFO Rendered pipeline definition: base_evaluator.py:766 - { - "Version": "2020-12-01", - "Metadata": {}, - "MlflowConfig": { - "MlflowResourceArn": - "arn:aws:sagemaker:us-east-1:634683118556:mlflow-app/app-DA25Q2S - 35KHZ" - }, - "Parameters": [], - "Steps": [ - { - "Name": "EvaluateBaseInferenceModel", - "Type": "Training", - "Arguments": { - "TrainingJobName": "BaseInference", - "RoleArn": - "arn:aws:iam::634683118556:role/service-role/AmazonSageMaker-Exe - cutionRole-20251116T174807", - "ServerlessJobConfig": { - "BaseModelArn": - "arn:aws:sagemaker:us-east-1:aws:hub-content/SageMakerPublicHub/ - Model/meta-textgeneration-llama-3-2-1b-instruct/1.25.0", - "AcceptEula": true, - "JobType": "Evaluation", - "EvaluationType": "BenchmarkEvaluation" - }, - "StoppingCondition": { - "MaxRuntimeInSeconds": 86400 - }, - "HyperParameters": { - "name": "BaseInference", - "task": "inference_only" - }, - "OutputDataConfig": { - "S3OutputPath": - "s3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval", - "CompressionType": "NONE" - }, - "InputDataConfig": [ - { - "ChannelName": "train", - "DataSource": { - "DatasetSource": { - "DatasetArn": - "arn:aws:sagemaker:us-east-1:634683118556:hub-content/CKO4ACGI3U - RQBOO74C9JPLUMQNG02M2I4CIM9M931SQHE0625A30/DataSet/tmp-humanlike - -rlaif-eval/0.0.1" - } - } - } - ] - } - }, - { - "Name": "EvaluateBaseModelMetrics", - "Type": "Training", - "DependsOn": [ - "EvaluateBaseInferenceModel" - ], - "Arguments": { - "TrainingJobName": { - "Std:Join": { - "On": "-", - "Values": [ - "base-llmaj-eval", - { - "Get": "Execution.PipelineExecutionId" - } - ] - } - }, - "RoleArn": - "arn:aws:iam::634683118556:role/service-role/AmazonSageMaker-Exe - cutionRole-20251116T174807", - "ServerlessJobConfig": { - "BaseModelArn": - "arn:aws:sagemaker:us-east-1:aws:hub-content/SageMakerPublicHub/ - Model/meta-textgeneration-llama-3-2-1b-instruct/1.25.0", - "AcceptEula": true, - "JobType": "Evaluation", - "EvaluationType": "LLMAJEvaluation" - }, - "StoppingCondition": { - "MaxRuntimeInSeconds": 86400 - }, - "HyperParameters": { - "name": { - "Std:Join": { - "On": "-", - "Values": [ - "base-llmaj-eval", - { - "Get": "Execution.PipelineExecutionId" - } - ] - } - }, - "judge_model_id": - "anthropic.claude-3-5-sonnet-20240620-v1:0", - "inference_data_s3_path": { - "Std:Join": { - "On": "", - "Values": [ - { - "Get": - "Steps.EvaluateBaseInferenceModel.OutputDataConfig.S3OutputPath" - }, - "/", - { - "Get": - "Steps.EvaluateBaseInferenceModel.TrainingJobName" - }, - "/output/output/", - "BaseInference", - "/eval_results/inference_output.jsonl" - ] - } - }, - "output_path": - "s3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval", - "llmaj_metrics": "[]", - "custom_metrics_s3_path": - "s3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval/evaluationinputs/eval-meta-1517aa3320251202-011237/custo - m-metrics.json", - "max_new_tokens": "8192", - "temperature": "0", - "top_k": "-1", - "top_p": "1.0" - }, - "OutputDataConfig": { - "S3OutputPath": - "s3://sagemaker-us-east-1-634683118556/tmp-humanlike-llama32-rla - if/eval", - "CompressionType": "NONE" - } - } - } - ] - } \ No newline at end of file From 419176fba769aa1f5e8e1ccccfa39149e2ad790a Mon Sep 17 00:00:00 2001 From: jzhaoqwa Date: Wed, 22 Apr 2026 17:07:59 -0700 Subject: [PATCH 3/4] fix unit tests --- .../tests/unit/train/test_tuner_driver_channels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py b/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py index d7bdb18686..678cdfac14 100644 --- a/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py +++ b/sagemaker-train/tests/unit/train/test_tuner_driver_channels.py @@ -38,6 +38,7 @@ DataSource, S3DataSource, VpcConfig, + OutputDataConfig, ) from sagemaker.core.utils.utils import Unassigned @@ -76,8 +77,7 @@ def _mock_model_trainer(**overrides): trainer.training_image = "test-image:latest" trainer.training_input_mode = "File" trainer.role = "arn:aws:iam::123456789012:role/SageMakerRole" - trainer.output_data_config = MagicMock() - trainer.output_data_config.s3_output_path = "s3://bucket/output" + trainer.output_data_config = OutputDataConfig(s3_output_path="s3://bucket/output") trainer.compute = MagicMock() trainer.compute.instance_type = "ml.m5.xlarge" trainer.compute.instance_count = 1 From d496b0ef418b953b825e3a12328c8175c03c69d2 Mon Sep 17 00:00:00 2001 From: jzhaoqwa Date: Wed, 22 Apr 2026 17:29:46 -0700 Subject: [PATCH 4/4] Update comput instance type --- .../tests/integ/jumpstart/test_jumpstart_train.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sagemaker-train/tests/integ/jumpstart/test_jumpstart_train.py b/sagemaker-train/tests/integ/jumpstart/test_jumpstart_train.py index c42520b984..2ee33f760c 100644 --- a/sagemaker-train/tests/integ/jumpstart/test_jumpstart_train.py +++ b/sagemaker-train/tests/integ/jumpstart/test_jumpstart_train.py @@ -17,6 +17,7 @@ from sagemaker.core.jumpstart import JumpStartConfig from sagemaker.train import ModelTrainer +from sagemaker.train.configs import Compute @pytest.mark.parametrize( @@ -27,6 +28,9 @@ "hyperparameters": { "epochs": 1, # Set to 1 for testing purposes }, + # Override default instance type; the model's default + # (ml.p3.2xlarge) is deprecated. + "compute": Compute(instance_type="ml.g5.xlarge"), }, {"model_id": "xgboost-classification-model"}, {"model_id": "catboost-regression-model"}, @@ -47,5 +51,6 @@ def test_jumpstart_train(test_case): jumpstart, base_job_name=test_case["model_id"], hyperparameters=test_case.get("hyperparameters", {}), + compute=test_case.get("compute"), ) model_trainer.train()