sagemaker-python-sdk
sagemaker-python-sdk copied to clipboard
Sagemaker: AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group'
To reproduce/Code Snippet:
from sagemaker.feature_store.feature_group import FeatureGroup from time import gmtime, strftime, sleep from random import randint import boto3 import sagemaker import pandas as pd import numpy as np import logging import random import time import subprocess import sys import importlib import pprint from sagemaker.local import LocalSession from sagemaker import get_execution_role
logger = logging.getLogger('name') logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler())
if sagemaker.version < '2.48.1': subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sagemaker==2.48.1']) importlib.reload(sagemaker)
if boto3.version < '1.24.23': subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'boto3==1.24.23']) importlib.reload(boto3)
logger.info(f'Using SageMaker version: {sagemaker.version}') logger.info(f'Using Pandas version: {pd.version}') logger.info(f'Using boto3 version: {boto3.version}') pretty_printer = pprint.PrettyPrinter(indent=4)
sagemaker_role = 'arn:aws:iam::xxxxxxxxxxxxx:role/service-role/AmazonSageMaker-ExecutionRole-2022XXXXXXXXXX' sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}}
'''
Sagemaker Role
try: role = sagemaker.get_execution_role() except ValueError: iam = boto3.client('iam') role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-2022XXXXXXXX')['Role']['Arn'] '''
default_bucket = sagemaker_session.default_bucket() logger.info(f'Default S3 bucket = {default_bucket}') prefix = 'sagemaker-feature-store' region = sagemaker_session.boto_region_name
boto_session = boto3.Session(region_name=region) sagemaker_runtime = boto_session.client(service_name='sagemaker', region_name=region) featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region) s3 = boto_session.resource('s3')
customers_df = pd.read_csv('/Users/akram/AKRAM_CODE_FOLDER/ML/Washington_ML/serverless-machine-learning/AWS_SAGEMAKER_FEATURE_GROUP/customers.csv') customers_df.head(5) customers_df.dtypes
current_time_sec = int(round(time.time())) customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64") customers_df.head(5)
customers_df['customer_id'] = customers_df['customer_id'].astype('string') current_timestamp = strftime('%m-%d-%H-%M', gmtime()) customers_feature_group_name = f'fs-customers-{current_timestamp}' logger.info(f'Feature group name = {customers_feature_group_name}')
customers_feature_group = FeatureGroup(name=customers_feature_group_name, sagemaker_session=sagemaker_session) customers_feature_group.load_feature_definitions(data_frame=customers_df)
def wait_for_feature_group_creation_complete(feature_group): status = feature_group.describe().get('FeatureGroupStatus') print(f'Initial status: {status}') while status == 'Creating': logger.info(f'Waiting for feature group: {feature_group.name} to be created ...') time.sleep(5) status = feature_group.describe().get('FeatureGroupStatus') if status != 'Created': raise SystemExit(f'Failed to create feature group {feature_group.name}: {status}') logger.info(f'FeatureGroup {feature_group.name} was successfully created.')
customers_feature_group.create(s3_uri=f's3://{default_bucket}/{prefix}', ==========> Line of error as per Traceback record_identifier_name='customer_id', event_time_feature_name='event_time', role_arn=sagemaker_role, enable_online_store=True)
wait_for_feature_group_creation_complete(customers_feature_group) describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name) pretty_printer.pprint(describe_feature_group_result)
logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ') customers_feature_group.ingest(data_frame=customers_df, max_workers=3, wait=True) customer_id = f'C{randint(1, 500)}' logger.info(f'customer_id={customer_id}')
feature_record = featurestore_runtime.get_record(FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=customer_id) feature_record
customers_query = customers_feature_group.athena_query() customers_table = customers_query.table_name
output_location = f's3://{default_bucket}/{prefix}/query_results/' query_string = f'SELECT * FROM "{customers_table}" limit 10'
customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df = customers_query.as_dataframe() athena_df.head()
sagemaker_runtime.update_feature_group( FeatureGroupName=customers_feature_group_name, FeatureAdditions=[ {"FeatureName": "has_kids", "FeatureType": "Integral"} ])
time.sleep(60)
describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name ) pretty_printer.pprint(describe_feature_group_result)
customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df_update = customers_query.as_dataframe() athena_df_update.head()
customers_df.drop(['event_time'],axis=1) customers_df['has_kids'] =np.random.randint(0, 2, customers_df.shape[0]) customers_df.dtypes
customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64") customers_df.head(10)
logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ...') customers_feature_group.ingest(data_frame=customers_df, max_workers=3, wait=True) logger.info(f'{len(customers_df)} customer records ingested into feature group: {customers_feature_group.name}')
get_record_result = featurestore_runtime.get_record( FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=customer_id ) pretty_printer.pprint(get_record_result)
customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df_update = customers_query.as_dataframe() athena_df_update.head()
############### describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name ) pretty_printer.pprint(describe_feature_group_result)
s3_config = describe_feature_group_result['OfflineStoreConfig']['S3StorageConfig'] s3_uri = s3_config['ResolvedOutputS3Uri'] full_prefix = '/'.join(s3_uri.split('/')[3:]) logger.info(full_prefix)
bucket = s3.Bucket(default_bucket) offline_objects = bucket.objects.filter(Prefix=full_prefix) offline_objects.delete()
customers_feature_group.delete()
Expected behavior: SageMaker Feature Group Creation API should work correctly
Screenshots or logs AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group'
System information A description of your system. Please provide:
(serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP % pip list | grep sagemaker sagemaker 2.109.0 (serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP %
SageMaker Python SDK version: 2.109.0 Python version: 3.9 CPU or GPU: CPU Custom Docker image (Y/N): N