binder

示例#

|在Colab中打开|

[1]:
# !pip install fracdiff
# !pip install matplotlib pandas pandas_datareader seaborn statsmodels
[2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader
import seaborn
import statsmodels.tsa.stattools as stattools
from fracdiff import Fracdiff, FracdiffStat, fdiff
[3]:
seaborn.set_style("white")

系数#

[4]:
from fracdiff.base import fdiff_coef

plt.figure(figsize=(24, 6))

plt.subplot(1, 2, 1)
plt.title("Coefficients of fractional differentiation for d=0.0-1.0")
for d in np.linspace(0.0, 1.0, 5):
    plt.plot(fdiff_coef(d, 6), label=f"d={d:.2f}")
plt.legend()

plt.subplot(1, 2, 2)
plt.title("Coefficients of fractional differentiation for d=1.0-2.0")
for d in np.linspace(1.0, 2.0, 5):
    plt.plot(fdiff_coef(d, 6), label=f"d={d:.2f}")
plt.legend()

plt.show()
../../../_images/examples_transformation_fracdiff_example_prado_6_0.png

S&P 500#

[5]:
def fetch_yahoo(ticker, begin="1998-01-01", end="2020-09-30"):
    """Return pandas.Series."""
    return pandas_datareader.data.DataReader(ticker, "yahoo", begin, end)["Adj Close"]


def fetch_fred(ticker, begin="1998-01-01", end="2020-09-30"):
    """Return pandas.Series."""
    return pandas_datareader.data.DataReader(ticker, "fred", begin, end).iloc[:, 0]
[6]:
spx = fetch_yahoo("^GSPC")
spx.head()
[6]:
Date
1997-12-31    970.429993
1998-01-02    975.039978
1998-01-05    977.070007
1998-01-06    966.580017
1998-01-07    964.000000
Name: Adj Close, dtype: float64
[7]:
spx.shape
[7]:
(5725,)

绘制分数差分#

[8]:
plt.figure(figsize=(24, 24))
plt.tight_layout()
plt.subplots_adjust(hspace=0.4)

for i, d in enumerate(np.linspace(0.1, 0.9, 9)):
    diff = fdiff(spx, d, mode="valid")
    diff = pd.Series(diff, index=spx.index[-diff.size :])
    plt.subplot(9, 1, i + 1)
    plt.title(f"S&P 500, {d:.1f}th differentiated")
    plt.plot(diff, linewidth=0.4)

plt.show()
../../../_images/examples_transformation_fracdiff_example_prado_12_0.png

fracdiff 的平稳性#

[9]:
def adfstat(d):  # noqa: D103
    diff = fdiff(spx, d, mode="valid")
    stat, *_ = stattools.adfuller(diff)
    return stat


def correlation(d):  # noqa: D103
    diff = fdiff(spx, d, mode="valid")
    corr = np.corrcoef(spx[-diff.size :], diff)[0, 1]
    return corr


ds = np.linspace(0.0, 1.0, 10)
stats = np.vectorize(adfstat)(ds)
corrs = np.vectorize(correlation)(ds)

# 5% critical value of stationarity
_, _, _, _, crit, _ = stattools.adfuller(spx)

# plot
fig, ax_stat = plt.subplots(figsize=(24, 8))
ax_corr = ax_stat.twinx()

ax_stat.plot(ds, stats, color="blue", label="ADF statistics (left)")
ax_corr.plot(ds, corrs, color="orange", label="correlation (right)")
ax_stat.axhline(y=crit["5%"], linestyle="--", color="k", label="5% critical value")

plt.title("Stationarity and memory of fractionally differentiated S&P 500")
fig.legend()
plt.show()
../../../_images/examples_transformation_fracdiff_example_prado_14_0.png

在保留内存的同时进行微分#

[10]:
X = spx.values.reshape(-1, 1)

fs = FracdiffStat(mode="valid")

Xdiff = fs.fit_transform(X)
_, pvalue, _, _, _, _ = stattools.adfuller(Xdiff.reshape(-1))
corr = np.corrcoef(X[-Xdiff.size :, 0], Xdiff.reshape(-1))[0][1]

print(f"* Order: {fs.d_[0]:.2f}")
print(f"* ADF p-value: {100 * pvalue:.2f} %")
print(f"* Correlation with the original time-series: {corr:.2f}")
* Order: 0.84
* ADF p-value: 3.41 %
* Correlation with the original time-series: 0.66
[11]:
spx_diff = pd.Series(Xdiff.reshape(-1), index=spx.index[-Xdiff.size :])

fig, ax_s = plt.subplots(figsize=(24, 6))
plt.title("S&P 500 and its differentiation preserving memory")
ax_d = ax_s.twinx()

plot_s = ax_s.plot(spx, color="blue", linewidth=0.4, label="S&P 500 (left)")
plot_d = ax_d.plot(
    spx_diff,
    color="orange",
    linewidth=0.4,
    label=f"S&P 500, {fs.d_[0]:.2f} th diff (right)",
)
plots = plot_s + plot_d

ax_s.legend(plots, [p.get_label() for p in plots], loc=0)
plt.show()
../../../_images/examples_transformation_fracdiff_example_prado_17_0.png

其他财务数据#

[12]:
nt_yahoo = [
    ("S&P 500", "^GSPC"),
    ("Nikkei 225", "^N225"),
    ("Shanghai Comp", "^SSEC"),
    ("US 10y", "^TNX"),
    ("Apple", "AAPL"),
]
nt_fred = [
    ("USD/JPY", "DEXJPUS"),
    ("Gold", "GOLDPMGBD228NLBM"),
    ("Crude Oil", "DCOILWTICO"),
]

dfy = pd.DataFrame({name: fetch_yahoo(ticker) for name, ticker in nt_yahoo})
dff = pd.DataFrame({name: fetch_fred(ticker) for name, ticker in nt_fred})

prices = pd.concat([dfy, dff], axis=1).fillna(method="ffill").loc["1998-01-05":]
[13]:
prices
[13]:
S&P 500 Nikkei 225 Shanghai Comp US 10y Apple USD/JPY Gold Crude Oil
1998-01-05 977.070007 14956.839844 1220.473022 5.498 0.122484 133.99 284.40 16.95
1998-01-06 966.580017 14896.400391 1233.619995 5.473 0.146113 133.88 282.80 16.64
1998-01-07 964.000000 15028.169922 1244.069946 5.527 0.135022 131.70 281.60 16.91
1998-01-08 956.049988 15019.179688 1237.162964 5.465 0.140326 132.49 281.65 17.01
1998-01-09 927.690002 14995.099609 1239.901001 5.379 0.140326 131.52 278.70 16.65
... ... ... ... ... ... ... ... ...
2020-09-24 3246.590088 23087.820312 3331.521973 0.666 108.220001 105.42 1861.75 40.11
2020-09-25 3298.459961 23204.619141 3331.521973 0.659 112.279999 105.59 1859.70 40.06
2020-09-28 3351.600098 23511.619141 3331.521973 0.663 114.959999 105.50 1864.30 40.47
2020-09-29 3335.469971 23539.099609 3331.521973 0.645 114.089996 105.68 1883.95 39.03
2020-09-30 3363.000000 23185.119141 3331.521973 0.677 115.809998 105.58 1886.90 40.05

5933 rows × 8 columns

[14]:
def stats(X):  # noqa: D103
    return [stattools.adfuller(X[:, i])[0] for i in range(X.shape[1])]


ds = np.linspace(0.0, 1.0, 11)

df_stats = pd.DataFrame(
    [stats(Fracdiff(d, mode="valid").fit_transform(prices.values)) for d in ds],
    index=ds,
    columns=prices.columns,
)

df_stats
[14]:
S&P 500 Nikkei 225 Shanghai Comp US 10y Apple USD/JPY Gold Crude Oil
0.0 0.968324 -1.170435 -2.116867 -1.153880 4.951773 -2.178117 0.049131 -2.434859
0.1 0.921352 -1.159694 -2.157641 -1.191706 4.862630 -2.132538 0.028313 -2.412764
0.2 0.802339 -1.311269 -2.128508 -1.240243 4.745854 -2.145354 -0.028585 -2.369947
0.3 0.717212 -1.359395 -2.250318 -1.268093 4.592151 -2.120619 -0.055576 -2.324401
0.4 0.565597 -1.313894 -2.257933 -1.430662 4.374133 -2.204098 -0.117716 -2.297110
0.5 0.290000 -1.587270 -2.328407 -1.673761 4.006742 -2.392610 -0.172077 -2.330552
0.6 -0.022086 -1.551456 -2.549346 -1.959220 3.325249 -2.481928 -0.410898 -2.502265
0.7 -0.728352 -2.142965 -2.883885 -2.162136 2.125019 -3.043593 -0.847778 -2.952382
0.8 -1.865212 -3.419540 -4.061263 -3.273391 0.207860 -4.235373 -1.633992 -4.159692
0.9 -4.699825 -6.702866 -7.249331 -6.317477 -3.184654 -7.890231 -3.769678 -7.355233
1.0 -14.739458 -79.308045 -13.078267 -56.638622 -12.503627 -20.568982 -14.873720 -12.198547
[15]:
_, _, _, _, crit, _ = stattools.adfuller(prices["S&P 500"].values)

df_stats.plot(figsize=(24, 8), ylim=(-30, 5))
plt.axhline(y=crit["5%"], linestyle="--", color="gray")
plt.title("ADF statistics of fractionally differentiated prices")
plt.show()
../../../_images/examples_transformation_fracdiff_example_prado_22_0.png
[ ]:


使用 nbsphinx 生成。Jupyter 笔记本可以在这里找到。