scalecast icon indicating copy to clipboard operation
scalecast copied to clipboard

Data size impacting tune_test_forecast() and find_optimal_transformation()

Open raedbsili1991 opened this issue 2 years ago • 3 comments

Hello,

In an attempt to try to deploy automatic forecasting on different dataset (the optimal goal is to try to find optimal model automatically for each input TS data), I noticed the the two functions tune_test_forecast() and find_optimal_transformation() encounters a "shape".

ValueError: Found array with 0 sample(s) (shape=(0, 45)) while a minimum of 1 is required by MinMaxScaler.

Or maybe does it has to be with the add_ar_terms ?

In the case of the following dataset, I noticed that the problem is mainly on the f.auto_forecast().

However, the find_statistical_transformation() works well.

Dataset attached.

CODE:

`forecast_months_horizon = 18 #Select number of months to be forecasted in the future performance_metric = "mae" data = df f = Forecaster( y = data['Monthly_Ordered quantity _ basic U_M'], # required current_dates = data['first_day_of_month'], # required future_dates=forecast_months_horizon, cis = False, # choose whether or not to evaluate confidence intervals for all models, metrics = ['mae','r2','rmse','mape'], # the metrics to evaluate when testing/tuning models )

f.add_metric(custom_metric_3)

f.set_validation_metric(performance_metric) f.set_validation_length(int(len(f.y).2) + number_months_validation0) f.set_test_length(int(len(f.y).25)+number_months_test0)

def forecaster_0(f):

f.add_sklearn_estimator(StackingRegressor,called='stacking')
f.add_sklearn_estimator(AdaBoostRegressor,called='adaboost')
f.add_covid19_regressor()
            
f.add_metric(custom_metric_3)
f.set_validation_metric(performance_metric)

models = ('lasso','gbt','ridge','elasticnet')
for m in tqdm(models):

    f.drop_all_Xvars()
    f.set_estimator(m)
    f.auto_Xvar_select(estimator=m, irr_cycles=[12],cross_validate=True) 
    #f.determine_best_series_length(estimator =m, monitor='ValidationMetricValue' ,cross_validate=True, dynamic_tuning = 18)
    f.tune() # by default, will pull the grid with the same name as the estimator (mlr will pull the mlr grid, etc.)
    f.cross_validate(k = 5,verbose=True, dynamic_tuning=True) 
    f.auto_forecast(call_me = m + '_0')
    f.restore_series_length()

auto_arima(f,m=12) # saves a model called auto_arima    

def forecaster_1(f):

#f.eval_cis() # tell the object to build confidence intervals for all models
for i in range(11): f.add_ar_terms(i)

f.add_AR_terms((2,12))
f.add_time_trend()

f.add_seasonal_regressors('month','quarter','week','dayofyear',raw=False,sincos=True)
f.add_seasonal_regressors('dayofweek','is_leap_year','week',raw=False,dummy=True,drop_first=True)
f.add_seasonal_regressors('year')

#f.add_sklearn_estimator(StackingRegressor,called='stacking')
#f.add_sklearn_estimator(AdaBoostRegressor,called='adaboost')

models = ('lasso', 'xgboost')
# f.tune_test_forecast(models, dynamic_testing=True, 
#                      cross_validate= True, summary_stats = True, dynamic_tuning=True,verbose=True)
#f.tune_test_forecast(models, suffix = "_1") 
for m in tqdm(models): 
    f.set_estimator(m) 
    f.tune(dynamic_tuning=True)
    f.cross_validate(k=2,dynamic_tuning=True,verbose=True)
    f.auto_forecast() 

auto_arima(f,m=12) # saves a model called auto_arima
f.add_covid19_regressor()

def Plot_Analysis(f):

print("Plotting AutoCorrelation & Seasonal Decomposition Graph")

f.plot_acf()
plt.title("ACF")

f.plot_pacf()
plt.title("PACF")

f.seasonal_decompose().plot()
plt.title("Seasonal Decompose")
plt.show()

def Plot_Forecasts(f):

f.plot_fitted(order_by='TestSetMAE') # plot fitted values of all models ordered by r2
plt.title('fitted_results results',size=16)
plt.show()
df_models = plot_test_export_summaries(f)

f.plot(order_by='TestSetMAE')
plt.title('Forecasting results',size=16)
plt.show()

#transformer, reverter = find_statistical_transformation(f)

forecaster_1(f) Plot_Forecasts(f)`

df_A0430151.xlsx

raedbsili1991 avatar Aug 04 '23 14:08 raedbsili1991