amazon-sagemaker-examples
amazon-sagemaker-examples copied to clipboard
[Bug Report] linear_learner_mnist_with_file_system_data_source notebook errors on subnet and security_group_id value type
Link to the notebook Add the link to the notebook.
Describe the bug This code here when run errors out indicating invalid value type.
linear = sagemaker.estimator.Estimator(
container,
role,
subnets=subnets,
security_group_ids=security_groups_ids,
train_instance_count=1,
train_instance_type="ml.c4.xlarge",
output_path=output_location,
sagemaker_session=sess,
)
The solution to this error is to make subnets and security_group_ids as array. See the solution here:
linear = sagemaker.estimator.Estimator(
container,
role,
subnets=[subnets],
security_group_ids=[security_groups_ids],
train_instance_count=1,
train_instance_type="ml.c4.xlarge",
output_path=output_location,
sagemaker_session=sess,
)
To reproduce Run the above notebook using SageMaker Studio Notebook (Python 3 (Data Science)). Here is sdk version:
Name: sagemaker
Version: 1.72.1
---
Name: boto3
Version: 1.24.62
---
Name: botocore
Version: 1.27.62
Even after upgrading to v2 you get same error, but also train_* warnings.
Logs
Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.
---------------------------------------------------------------------------
ParamValidationError Traceback (most recent call last)
<ipython-input-49-8fc2f5ab22f6> in <module>
17 linear.set_hyperparameters(feature_dim=784, predictor_type="binary_classifier", mini_batch_size=200)
18
---> 19 linear.fit({"train": file_system_input})
/opt/conda/lib/python3.7/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name, experiment_config)
494 self._prepare_for_training(job_name=job_name)
495
--> 496 self.latest_training_job = _TrainingJob.start_new(self, inputs, experiment_config)
497 self.jobs.append(self.latest_training_job)
498 if wait:
/opt/conda/lib/python3.7/site-packages/sagemaker/estimator.py in start_new(cls, estimator, inputs, experiment_config)
1089 train_args["enable_sagemaker_metrics"] = estimator.enable_sagemaker_metrics
1090
-> 1091 estimator.sagemaker_session.train(**train_args)
1092
1093 return cls(estimator.sagemaker_session, estimator._current_job_name)
/opt/conda/lib/python3.7/site-packages/sagemaker/session.py in train(self, input_mode, input_config, role, job_name, output_config, resource_config, vpc_config, hyperparameters, stop_condition, tags, metric_definitions, enable_network_isolation, image, algorithm_arn, encrypt_inter_container_traffic, train_use_spot_instances, checkpoint_s3_uri, checkpoint_local_path, experiment_config, debugger_rule_configs, debugger_hook_config, tensorboard_output_config, enable_sagemaker_metrics)
588 LOGGER.info("Creating training-job with name: %s", job_name)
589 LOGGER.debug("train request: %s", json.dumps(train_request, indent=4))
--> 590 self.sagemaker_client.create_training_job(**train_request)
591
592 def process(
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
510 )
511 # The "self" in this scope is referring to the BaseClient.
--> 512 return self._make_api_call(operation_name, kwargs)
513
514 _api_call.__name__ = str(py_operation_name)
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
881 }
882 request_dict = self._convert_to_request_dict(
--> 883 api_params, operation_model, context=request_context
884 )
885 resolve_checksum_context(request_dict, operation_model, api_params)
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _convert_to_request_dict(self, api_params, operation_model, context)
942 )
943 request_dict = self._serializer.serialize_to_request(
--> 944 api_params, operation_model
945 )
946 if not self._client_config.inject_host_prefix:
/opt/conda/lib/python3.7/site-packages/botocore/validate.py in serialize_to_request(self, parameters, operation_model)
379 )
380 if report.has_errors():
--> 381 raise ParamValidationError(report=report.generate_report())
382 return self._serializer.serialize_to_request(
383 parameters, operation_model
ParamValidationError: Parameter validation failed:
Invalid type for parameter VpcConfig.Subnets, value: subnet-0af64674c564b501b, type: <class 'str'>, valid types: <class 'list'>, <class 'tuple'>
Invalid type for parameter VpcConfig.SecurityGroupIds, value: sg-0377a8692191789a7, type: <class 'str'>, valid types: <class 'list'>, <class 'tuple'>