sagemaker-python-sdk icon indicating copy to clipboard operation
sagemaker-python-sdk copied to clipboard

Sagemaker: AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group'

Open akramIOT opened this issue 1 year ago • 0 comments

To reproduce/Code Snippet:

from sagemaker.feature_store.feature_group import FeatureGroup from time import gmtime, strftime, sleep from random import randint import boto3 import sagemaker import pandas as pd import numpy as np import logging import random import time import subprocess import sys import importlib import pprint from sagemaker.local import LocalSession from sagemaker import get_execution_role

logger = logging.getLogger('name') logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler())

if sagemaker.version < '2.48.1': subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sagemaker==2.48.1']) importlib.reload(sagemaker)

if boto3.version < '1.24.23': subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'boto3==1.24.23']) importlib.reload(boto3)

logger.info(f'Using SageMaker version: {sagemaker.version}') logger.info(f'Using Pandas version: {pd.version}') logger.info(f'Using boto3 version: {boto3.version}') pretty_printer = pprint.PrettyPrinter(indent=4)

sagemaker_role = 'arn:aws:iam::xxxxxxxxxxxxx:role/service-role/AmazonSageMaker-ExecutionRole-2022XXXXXXXXXX' sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}}

'''

Sagemaker Role

try: role = sagemaker.get_execution_role() except ValueError: iam = boto3.client('iam') role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-2022XXXXXXXX')['Role']['Arn'] '''

default_bucket = sagemaker_session.default_bucket() logger.info(f'Default S3 bucket = {default_bucket}') prefix = 'sagemaker-feature-store' region = sagemaker_session.boto_region_name

boto_session = boto3.Session(region_name=region) sagemaker_runtime = boto_session.client(service_name='sagemaker', region_name=region) featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region) s3 = boto_session.resource('s3')

customers_df = pd.read_csv('/Users/akram/AKRAM_CODE_FOLDER/ML/Washington_ML/serverless-machine-learning/AWS_SAGEMAKER_FEATURE_GROUP/customers.csv') customers_df.head(5) customers_df.dtypes

current_time_sec = int(round(time.time())) customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64") customers_df.head(5)

customers_df['customer_id'] = customers_df['customer_id'].astype('string') current_timestamp = strftime('%m-%d-%H-%M', gmtime()) customers_feature_group_name = f'fs-customers-{current_timestamp}' logger.info(f'Feature group name = {customers_feature_group_name}')

customers_feature_group = FeatureGroup(name=customers_feature_group_name, sagemaker_session=sagemaker_session) customers_feature_group.load_feature_definitions(data_frame=customers_df)

def wait_for_feature_group_creation_complete(feature_group): status = feature_group.describe().get('FeatureGroupStatus') print(f'Initial status: {status}') while status == 'Creating': logger.info(f'Waiting for feature group: {feature_group.name} to be created ...') time.sleep(5) status = feature_group.describe().get('FeatureGroupStatus') if status != 'Created': raise SystemExit(f'Failed to create feature group {feature_group.name}: {status}') logger.info(f'FeatureGroup {feature_group.name} was successfully created.')

customers_feature_group.create(s3_uri=f's3://{default_bucket}/{prefix}', ==========> Line of error as per Traceback record_identifier_name='customer_id', event_time_feature_name='event_time', role_arn=sagemaker_role, enable_online_store=True)

wait_for_feature_group_creation_complete(customers_feature_group) describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name) pretty_printer.pprint(describe_feature_group_result)

logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ') customers_feature_group.ingest(data_frame=customers_df, max_workers=3, wait=True) customer_id = f'C{randint(1, 500)}' logger.info(f'customer_id={customer_id}')

feature_record = featurestore_runtime.get_record(FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=customer_id) feature_record

customers_query = customers_feature_group.athena_query() customers_table = customers_query.table_name

output_location = f's3://{default_bucket}/{prefix}/query_results/' query_string = f'SELECT * FROM "{customers_table}" limit 10'

customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df = customers_query.as_dataframe() athena_df.head()

sagemaker_runtime.update_feature_group( FeatureGroupName=customers_feature_group_name, FeatureAdditions=[ {"FeatureName": "has_kids", "FeatureType": "Integral"} ])

time.sleep(60)

describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name ) pretty_printer.pprint(describe_feature_group_result)

customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df_update = customers_query.as_dataframe() athena_df_update.head()

customers_df.drop(['event_time'],axis=1) customers_df['has_kids'] =np.random.randint(0, 2, customers_df.shape[0]) customers_df.dtypes

customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64") customers_df.head(10)

logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ...') customers_feature_group.ingest(data_frame=customers_df, max_workers=3, wait=True) logger.info(f'{len(customers_df)} customer records ingested into feature group: {customers_feature_group.name}')

get_record_result = featurestore_runtime.get_record( FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=customer_id ) pretty_printer.pprint(get_record_result)

customers_query.run(query_string=query_string,output_location=output_location) customers_query.wait() athena_df_update = customers_query.as_dataframe() athena_df_update.head()

############### describe_feature_group_result = sagemaker_runtime.describe_feature_group( FeatureGroupName=customers_feature_group_name ) pretty_printer.pprint(describe_feature_group_result)

s3_config = describe_feature_group_result['OfflineStoreConfig']['S3StorageConfig'] s3_uri = s3_config['ResolvedOutputS3Uri'] full_prefix = '/'.join(s3_uri.split('/')[3:]) logger.info(full_prefix)

bucket = s3.Bucket(default_bucket) offline_objects = bucket.objects.filter(Prefix=full_prefix) offline_objects.delete()

customers_feature_group.delete()

Expected behavior: SageMaker Feature Group Creation API should work correctly

Screenshots or logs AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group'

System information A description of your system. Please provide:

(serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP % pip list | grep sagemaker sagemaker 2.109.0 (serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP %

SageMaker Python SDK version: 2.109.0 Python version: 3.9 CPU or GPU: CPU Custom Docker image (Y/N): N

akramIOT avatar Sep 15 '22 00:09 akramIOT