Type 2: Absolute Time-Series
For these datasets, we can treat all the classes’ variables together and avoid trying to learn class-independent effects. In this sense these datasets can essentially be viewed as a time-series dataset.
Possible actions:
|
import pandas as pd
from synthesized import MetaExtractor, HighDimSynthesizer
df = pd.read_csv("bitcoin_price.csv")
print(df)
Date Open High Low Close Volume Market Cap 0 Jul 31, 2017 2763.24 2889.62 2720.61 2875.34 8.605750e+08 45535800000 1 Jul 30, 2017 2724.39 2758.53 2644.85 2757.18 7.059430e+08 44890700000 2 Jul 29, 2017 2807.02 2808.76 2692.80 2726.45 8.037460e+08 46246700000 3 Jul 28, 2017 2679.73 2897.45 2679.73 2809.01 1.380100e+09 44144400000 4 Jul 27, 2017 2538.71 2693.32 2529.34 2671.78 7.891040e+08 41816500000 ... ... ... ... ... ... ... 1551 May 02, 2013 116.38 125.60 92.28 105.21 NaN 1292190000 1552 May 01, 2013 139.00 139.89 107.72 116.99 NaN 1542820000 1553 Apr 30, 2013 144.00 146.93 134.05 139.00 NaN 1597780000 1554 Apr 29, 2013 134.44 147.49 134.00 144.54 NaN 1491160000 1555 Apr 28, 2013 135.30 135.98 132.10 134.21 NaN 1500520000 [1556 rows x 7 columns]
df = df.set_index('Date')
df.index = pd.to_datetime(df.index)
df = df.sort_index().reset_index().iloc[248:]
dfb = df.assign(
d_Open=df["Open"].diff().fillna(df["Open"].iloc[0]),
d_High=df["High"].diff().fillna(df["High"].iloc[0]),
d_Low=df["Low"].diff().fillna(df["Low"].iloc[0]),
d_Close=df["Close"].diff().fillna(df["Close"].iloc[0]),
d_Market_Cap=df["Market Cap"].diff().fillna(df["Market Cap"].iloc[0]),
).drop(
columns=["Open", "High", "Low", "Close", "Market Cap"]
).reset_index(drop=True)
print(dfb)
Date Volume d_Open d_High d_Low d_Close d_Market_Cap 0 2016-01-15 1.533510e+08 430.25 430.25 364.33 364.33 6.489870e+09 1 2016-01-16 1.203520e+08 -65.18 -39.69 -9.42 23.21 -9.820800e+08 2 2016-01-17 4.531960e+07 22.08 0.40 25.18 -5.24 3.344800e+08 3 2016-01-18 5.440390e+07 -5.42 -2.86 -3.42 4.87 -8.033000e+07 4 2016-01-19 4.681980e+07 5.30 -0.37 2.30 -7.02 8.157000e+07 ... ... ... ... ... ... ... 559 2017-07-27 7.891040e+08 -39.06 82.56 78.54 142.33 -6.385000e+08 560 2017-07-28 1.380100e+09 141.02 204.13 150.39 137.23 2.327900e+09 561 2017-07-29 8.037460e+08 127.29 -88.69 13.07 -82.56 2.102300e+09 562 2017-07-30 7.059430e+08 -82.63 -50.23 -47.95 30.73 -1.356000e+09 563 2017-07-31 8.605750e+08 38.85 131.09 75.76 118.16 6.451000e+08 [564 rows x 7 columns]
df_meta = MetaExtractor.extract(dfb)
from synthesized.model import DataFrameModel
DataFrameModel(df_meta).fit(dfb).plot()

synth = HighDimSynthesizer(df_meta)
synth.learn(df_train=dfb)
df_synth = synth.synthesize(num_rows=len(dfb))
df_synth = df_synth.set_index("Date").sort_index().reset_index()
df3_synth = df_synth.assign(
High=x["d_High"].cumsum()
).drop(
columns=["BALANCE DIFF"]
).sort_index()