etf50_df = pd.read_csv("data/ETF50.csv", dtype={"STOCK_ID": str})
etf50_id = etf50_df.loc[:, "STOCK_ID"]
data_df = load_stock(stock_index, start_year=2011, end_year=2021)
data_df
从之前的实验我们可以知道,比起直接使用原始资料,
将资料数值间的差值可以获得更好的效果,
最後使用ZScore Normalize避免特徵间的数值差异影响输出结果
norm_df = data_df
norm_df.loc[:, "Open"] = norm_df["Open"] - norm_df["Close"]
norm_df.loc[:, "High"] = norm_df["High"] - norm_df["Close"]
norm_df.loc[:, "Low"] = norm_df["Low"] - norm_df["Close"]
norm_df.loc[:, "Close"] = norm_df["Close"] - norm_df["Close"].shift(1)
norm_df = (norm_df - norm_df.mean()) / norm_df.std()
norm_df = norm_df.dropna()
norm_df
一样记得别把未来资料带进去了。
train_mask = (data_df.index > "2014-01-01") & (data_df.index < "2018-12-31")
test_mask = (data_df.index > "2019-01-01") & (data_df.index < "2019-12-31")
X_train = data_df[test_mask]
y_train = data_df["Close"].shift(-1)[train_mask]
X_test = data_df[test_mask]
y_test = data_df["Close"].shift(-1)[test_mask]
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
如果是在4年前,我们会需要写一堆pandas或numpy程序,
去将资料转换成lstm使用的时间序列格式,然而现在2021只要使用套版函式就好了。
watch_days = 10
# data shape = 128 * 10 * 5
data_gen = TimeseriesGenerator(
X_train, y_train, length=watch_days, sampling_rate=1, batch_size=128
)
model = Sequential()
model.add(LSTM(100, activation="relu", input_shape=(watch_days, 5)))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
# fit model
model.fit_generator(data_gen, steps_per_epoch=1, epochs=500, verbose=0)
<<: [DAY 28] 用google sheet 做简易UI介面(3/3)
“There is an infinite amount of hope in the unive...
点开Project Settings的other,把Scripting Backend改为IL2CP...
前面我们自己写了登入登出及注册,但其实laravel有提供我们身份验证的套件,这些工具包会自动提供我...
Youtube连结:https://bit.ly/2MbU9cR 前阵子在社群广为流传的议题「轰都...
前言 在上一篇文章中我们简单的介绍到如何使用 React.createElement(),并搭配 R...