timesfm icon indicating copy to clipboard operation
timesfm copied to clipboard

Is there any certified case that timesfm could predict the trend of a stock?

Open JackeyLee007 opened this issue 1 year ago • 2 comments

I tried it with China's A-share 000001 and found that it was not accurate. Is there anything that I used improperly?

The blue-grey line is real data, and the red line is predicted value. I used the value from 20210101 to 20210430 to predict the value of the next 32 days.

stock_trend

import timesfm
import os
import numpy as np
import akshare as ak
import pandas as pd

stHist = ak.stock_zh_a_hist('000001', 'daily', '20210101', '20210430')
incVals=range(1, len(stHist)+1)
stHist['unique_id'] = incVals
stHist = stHist.rename(columns={'日期':'ds','收盘':'value'})
stHist['ds'] = pd.to_datetime(stHist['ds'])
print(stHist)
tfm = timesfm.TimesFm(
    context_len=128,
    horizon_len=32,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cpu',
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")


forecast_df = tfm.forecast_on_df(
        inputs=stHist,
        freq='D',
        value_name='value',
        num_jobs=1)

JackeyLee007 avatar Jul 04 '24 01:07 JackeyLee007

I did some modifications to predict next day price only. I run the inference every day instead of predicting multiple days ahead. So far it's accuracy is 67% (14/21) for up or down predictions. I find it very satisfactory. At least my investment performance increased a lot. You may need to adjust parameters in the code (shift_d, discard_th) every day for better accuracy.

113232

Following code works in Google Colab

! git clone https://github.com/google-research/timesfm.git
%cd timesfm
!pip install -e .
!pip install utilsforecast
!pip install transformers accelerate bitsandbytes
!huggingface-cli login --token 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
#!pip install --upgrade numpy
!pip install numpy

import datetime
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import timesfm
from timesfm import TimesFm

# 調整が必要なパラメータ(シフト量と抑制閾値はほぼ毎回調整が必要です)
# フラット抑制を高めると精度は上がるが、horizon(予測期間)が短くなる
date_m = 7 # month
date_d = 11 # day
shift_d = 7 # 時間軸のシフト量
discard_th = 0.00 # フラット抑制閾値(この変動率以下の予測値は切り捨て)
codelist = ["2558.T"] # 銘柄コード
ohlc = "Adj Close" # 4値

# 現在値(手動設定, 必要があれば)
date_str1 = "2024-7-10"
date_str2 = "2024-7-11"
prices1 = [24240.0, 24285.0, 24225.0, 25870.0]
prices2 = [24430.0, 24500.0, 24420.0, 26130.0]
bk_str = "gpu" # Backend

ohlc2i = {"Open": 0, "High": 1, "Low": 2, "Adj Close": 3}
cur_price1 = prices1[ohlc2i[ohlc]]
cur_price2 = prices2[ohlc2i[ohlc]]

start = datetime.date(2020, 1, 1)
end_t = datetime.date(2024, date_m, date_d )
#end_t = datetime.date.today()

data_train = yf.download(codelist, start = start, end = end_t)
data_all = yf.download(codelist, start = start, end = end_t)
print( data_train )

data_all = data_all[ohlc].dropna()  #欠損値を除去

if data_all.empty:
    raise ValueError("データが空です。期間を変更して再度試してください。")
data_all.loc[date_str1] = cur_price1
data_all.loc[date_str2] = cur_price2

context_len = 512  # コンテキスト長の設定
horizon_len = 1  # 予測する期間の長さの設定
#horizon_len = 128  # 予測する期間の長さの設定

# TimesFMモデルの初期化と読み込み
tfm = TimesFm(
    context_len = context_len,
    horizon_len = horizon_len,
    input_patch_len = 32,
    output_patch_len = 128,
    num_layers = 20,
    model_dims = 1280,
    backend = bk_str,
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# データの前処理
data_train = data_train[ohlc].dropna()  #欠損値を除去

if data_train.empty:
    raise ValueError("データが空です。期間を変更して再度試してください。")
data_train.loc[date_str1] = cur_price1
data_train.loc[date_str2] = cur_price2
#print("data length", len(data_train))
print( data_train )

if len(data_train) < context_len:
    raise ValueError(f"データの長さがコンテキスト長({context_len})より短いです。")

frequency_input = [0]  # データの頻度を設定(0は高頻度のデータ)

pred_len = 60
prev_pf = 23000
concat_pf = np.array([[]])
context_start = datetime.datetime(year = 2024, month = date_m, day = date_d )

for i in range(pred_len):
  tail = context_len + i
  context_begin = context_start - datetime.timedelta( days = i )
  context_end = context_start - datetime.timedelta( days = tail )
  context_data = data_train.loc[ context_end : context_begin ]

  # データの準備
  forecast_input = [context_data.values]

  # 予測の実行
  point_forecast, experimental_quantile_forecast = tfm.forecast(
    forecast_input,
    freq = frequency_input,
  )
  if i > 5 :
    if abs( 100 * ( point_forecast- prev_pf ) / prev_pf ) > discard_th:
      concat_pf = np.append( concat_pf, point_forecast )
      prev_pf = point_forecast
  else :
      concat_pf = np.append( concat_pf, point_forecast )
      prev_pf = point_forecast

concat_pf = np.flip( concat_pf )

#print("point_forcast", point_forecast[0], point_forecast[0].shape)
print("concat_pf", concat_pf, concat_pf.shape)

# 予測結果の表示
pred_len = concat_pf.shape[0]
forecast_dates = pd.date_range( start = data_train.index[ - pred_len ] + pd.Timedelta( days = shift_d ), periods = pred_len, freq='B' )
forecast_series = pd.Series(concat_pf, index=forecast_dates)
#print("forecast_series", forecast_series, forecast_series.index, forecast_series.values)
print("forecast_series", forecast_series)

plt.figure(figsize=(14, 7))
#plt.plot(data_train.index, data_train.values, label="Actual Prices")
plt.plot(data_all.index, data_all.values, label="Actual Prices")
plt.plot(forecast_series.index, forecast_series.values, label="Forecasted Prices")
start_date = datetime.date(2024, 4, 15)
end_date = datetime.date(2024, 11, 15)
plt.xlim(start_date, end_date)
plt.ylim(22000, 27000)
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()

Arata-Nakajima avatar Jul 11 '24 02:07 Arata-Nakajima

I did some modifications to predict next day price only. I run the inference every day instead of predicting multiple days ahead. So far it's accuracy is 67% (14/21) for up or down predictions. I find it very satisfactory. At least my investment performance increased a lot. You may need to adjust parameters in the code (shift_d, discard_th) every day for better accuracy.

113232

Following code works in Google Colab

! git clone https://github.com/google-research/timesfm.git
%cd timesfm
!pip install -e .
!pip install utilsforecast
!pip install transformers accelerate bitsandbytes
!huggingface-cli login --token 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
#!pip install --upgrade numpy
!pip install numpy

import datetime
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import timesfm
from timesfm import TimesFm

# 調整が必要なパラメータ(シフト量と抑制閾値はほぼ毎回調整が必要です)
# フラット抑制を高めると精度は上がるが、horizon(予測期間)が短くなる
date_m = 7 # month
date_d = 11 # day
shift_d = 7 # 時間軸のシフト量
discard_th = 0.00 # フラット抑制閾値(この変動率以下の予測値は切り捨て)
codelist = ["2558.T"] # 銘柄コード
ohlc = "Adj Close" # 4値

# 現在値(手動設定, 必要があれば)
date_str1 = "2024-7-10"
date_str2 = "2024-7-11"
prices1 = [24240.0, 24285.0, 24225.0, 25870.0]
prices2 = [24430.0, 24500.0, 24420.0, 26130.0]
bk_str = "gpu" # Backend

ohlc2i = {"Open": 0, "High": 1, "Low": 2, "Adj Close": 3}
cur_price1 = prices1[ohlc2i[ohlc]]
cur_price2 = prices2[ohlc2i[ohlc]]

start = datetime.date(2020, 1, 1)
end_t = datetime.date(2024, date_m, date_d )
#end_t = datetime.date.today()

data_train = yf.download(codelist, start = start, end = end_t)
data_all = yf.download(codelist, start = start, end = end_t)
print( data_train )

data_all = data_all[ohlc].dropna()  #欠損値を除去

if data_all.empty:
    raise ValueError("データが空です。期間を変更して再度試してください。")
data_all.loc[date_str1] = cur_price1
data_all.loc[date_str2] = cur_price2

context_len = 512  # コンテキスト長の設定
horizon_len = 1  # 予測する期間の長さの設定
#horizon_len = 128  # 予測する期間の長さの設定

# TimesFMモデルの初期化と読み込み
tfm = TimesFm(
    context_len = context_len,
    horizon_len = horizon_len,
    input_patch_len = 32,
    output_patch_len = 128,
    num_layers = 20,
    model_dims = 1280,
    backend = bk_str,
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# データの前処理
data_train = data_train[ohlc].dropna()  #欠損値を除去

if data_train.empty:
    raise ValueError("データが空です。期間を変更して再度試してください。")
data_train.loc[date_str1] = cur_price1
data_train.loc[date_str2] = cur_price2
#print("data length", len(data_train))
print( data_train )

if len(data_train) < context_len:
    raise ValueError(f"データの長さがコンテキスト長({context_len})より短いです。")

frequency_input = [0]  # データの頻度を設定(0は高頻度のデータ)

pred_len = 60
prev_pf = 23000
concat_pf = np.array([[]])
context_start = datetime.datetime(year = 2024, month = date_m, day = date_d )

for i in range(pred_len):
  tail = context_len + i
  context_begin = context_start - datetime.timedelta( days = i )
  context_end = context_start - datetime.timedelta( days = tail )
  context_data = data_train.loc[ context_end : context_begin ]

  # データの準備
  forecast_input = [context_data.values]

  # 予測の実行
  point_forecast, experimental_quantile_forecast = tfm.forecast(
    forecast_input,
    freq = frequency_input,
  )
  if i > 5 :
    if abs( 100 * ( point_forecast- prev_pf ) / prev_pf ) > discard_th:
      concat_pf = np.append( concat_pf, point_forecast )
      prev_pf = point_forecast
  else :
      concat_pf = np.append( concat_pf, point_forecast )
      prev_pf = point_forecast

concat_pf = np.flip( concat_pf )

#print("point_forcast", point_forecast[0], point_forecast[0].shape)
print("concat_pf", concat_pf, concat_pf.shape)

# 予測結果の表示
pred_len = concat_pf.shape[0]
forecast_dates = pd.date_range( start = data_train.index[ - pred_len ] + pd.Timedelta( days = shift_d ), periods = pred_len, freq='B' )
forecast_series = pd.Series(concat_pf, index=forecast_dates)
#print("forecast_series", forecast_series, forecast_series.index, forecast_series.values)
print("forecast_series", forecast_series)

plt.figure(figsize=(14, 7))
#plt.plot(data_train.index, data_train.values, label="Actual Prices")
plt.plot(data_all.index, data_all.values, label="Actual Prices")
plt.plot(forecast_series.index, forecast_series.values, label="Forecasted Prices")
start_date = datetime.date(2024, 4, 15)
end_date = datetime.date(2024, 11, 15)
plt.xlim(start_date, end_date)
plt.ylim(22000, 27000)
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()

该代码存在问题,日期没对上,很多情况没考虑到。比如一开始预测的结果不可能和真实值差这么多,所谓的 67%准确率只是随机的结果罢了, 预测股票还是会有其它深度学习模型常用的滞后性。后面我再试试协变量的效果

zhongtianda avatar May 14 '25 13:05 zhongtianda