etna
etna copied to clipboard
POC: duplicated runs fold caching for backtest and stackingensemble via joblib.Memory
IMPORTANT: Please do not create a Pull Request without creating an issue first.
Before submitting (must do checklist)
- [ ] Did you read the contribution guide?
- [ ] Did you update the docs? We use Numpy format for all the methods and classes.
- [ ] Did you write any new necessary tests?
- [ ] Did you update the CHANGELOG?
Type of Change
- [ ] Examples / docs / tutorials / contributors update
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] Improvement (non-breaking change which improves an existing feature)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
Proposed Changes
Related Issue
Closing issues
from etna.datasets import TSDataset, generate_ar_df
from etna.pipeline import Pipeline
from etna.ensembles import StackingEnsemble
from etna.models import CatBoostModelMultiSegment, LinearPerSegmentModel, NaiveModel
from etna.transforms import StandardScalerTransform, LagTransform, SegmentEncoderTransform, TimeSeriesImputerTransform, DateFlagsTransform
from etna.metrics import SMAPE, MAE
from etna.analysis import plot_backtest, plot_forecast
from copy import deepcopy
import warnings
warnings.filterwarnings("ignore")
df = generate_ar_df(periods=500, start_time="2021-01-01", n_segments=20)
ts = TSDataset.to_dataset(df[["target", "segment", "timestamp"]])
ts = TSDataset(ts, freq="D")
pipe_naive_one = Pipeline(model=NaiveModel(7), horizon=7)
pipe_naive_two = Pipeline(model=NaiveModel(1), horizon=7)
stack_pipe = StackingEnsemble(
pipelines=[
pipe_naive_one, pipe_naive_two
], n_jobs=1, joblib_params=dict(verbose=0, backend="multiprocessing", mmap_mode="c"), n_folds=2
)
stack_pipe_copy = deepcopy(stack_pipe)
import time
import shelve
with shelve.open('counter') as db:
db['counter'] = 0
start = time.monotonic()
metrics, _ , _ = stack_pipe.backtest(ts, n_jobs=1, n_folds=2, metrics=[MAE()], joblib_params=dict(verbose=0, backend="multiprocessing", mmap_mode="c"))
print(time.monotonic() - start)
with shelve.open('counter') as db:
print(db['counter'])
print(metrics.MAE.mean())
Run with enabled cache ETNA_CACHE=1 python script.py
🚀 Deployed on https://deploy-preview-655--etna-docs.netlify.app