Research entry
Bitcoin Price Time Series Analysis
2021 · Academic Archive
ARIMA and SARIMA modelling of Bitcoin price data, including stationarity testing with Augmented Dickey-Fuller, ACF/PACF analysis, and price forecasting.
Python Time Series ARIMA statsmodels pandas
Overview
A time series analysis project on historical Bitcoin price data (OHLCV format). The goal was to model and forecast Bitcoin closing prices using classical time series methods — stationarity testing, differencing, ACF/PACF analysis, and ARIMA/SARIMA modelling.
Original Script (2021)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pandas.plotting import lag_plot
import pmdarima as pm
# Load Bitcoin OHLCV data
bitcoin = pd.read_csv("coin_Bitcoin.csv", parse_dates=['Date'])
# Fill missing values
bitcoin['Volume'].fillna(value=0, inplace=True)
bitcoin['Marketcap'].fillna(value=0, inplace=True)
bitcoin['Open'].fillna(method='ffill', inplace=True)
bitcoin['High'].fillna(method='ffill', inplace=True)
bitcoin['Low'].fillna(method='ffill', inplace=True)
bitcoin['Close'].fillna(method='ffill', inplace=True)
# Set date index
bitcoin.index = bitcoin['Date']
bitcoin = bitcoin.drop(columns=['Date', 'SNo', 'Name', 'Symbol'])
# Stationarity test (Augmented Dickey-Fuller)
result = adfuller(bitcoin['Close'].dropna())
print(f'ADF Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')
# First difference to achieve stationarity
bitcoin['Close_diff'] = bitcoin['Close'].diff()
# ACF and PACF plots
plot_acf(bitcoin['Close_diff'].dropna(), lags=40)
plot_pacf(bitcoin['Close_diff'].dropna(), lags=40)
plt.show()
# Lag plot
lag_plot(bitcoin['Close'])
plt.show()
# ARIMA model
model = ARIMA(bitcoin['Close'], order=(1, 1, 1))
result = model.fit()
print(result.summary())
# SARIMA with auto-selection
auto_model = pm.auto_arima(
bitcoin['Close'],
seasonal=True,
m=12,
stepwise=True,
suppress_warnings=True
)
print(auto_model.summary())
# OLS regression baseline
X = sm.add_constant(range(len(bitcoin)))
ols = sm.OLS(bitcoin['Close'], X).fit()
bitcoin['OLS_predicted'] = ols.predict(X)
plt.plot(bitcoin['Close'], label='Actual')
plt.plot(bitcoin['OLS_predicted'], label='OLS Trend')
plt.legend()
plt.show()
What It Did
- Loaded and cleaned Bitcoin historical OHLCV data
- Tested stationarity using Augmented Dickey-Fuller test
- Applied first differencing and log-differencing to achieve stationarity
- Plotted ACF/PACF to identify ARMA orders
- Fitted ARIMA(1,1,1) and auto-selected SARIMA via
pmdarima - Compared against OLS linear trend baseline
Issues in the Original
- Used deprecated
statsmodels.tsa.arima_model.ARIMA(removed in statsmodels 0.14) fillna(method='ffill')deprecated in newer pandas — should useffill()- No train/test split — model evaluated on training data only
- No forecast confidence intervals plotted
- Hardcoded file paths
Interactive App
Adjust the train/test split, ARIMA order (p, d, q), and confidence interval level — the forecast and residual chart update instantly. Hosted on Railway.
Modern Rewrite
Key improvements over the original:
- Modern ARIMA API —
from statsmodels.tsa.arima.model import ARIMAreplaces the deprecatedstatsmodels.tsa.arima_model.ARIMA(removed in statsmodels 0.14) ffill()instead offillna(method='ffill')— pandas deprecated themethod=kwarg onfillna- Proper train/test split — train on 2013–2018, evaluate on 2019 onwards; original fitted and evaluated on the same data
- Forecast confidence intervals —
result.get_forecast(steps=n).summary_frame(alpha=0.10)gives 90% CI bounds plotted as a shaded ribbon - RMSE, MAE, MAPE reported — original computed these but didn’t use them to compare models
- Plotly HTML outputs — all plots saved as self-contained HTML; no
plt.show()blocking calls pathlibfor portable file paths
"""
Bitcoin Time Series Analysis — Modern Rewrite (2024)
Original: Time_Series_Bitcoin.py (2021)
Dependencies (uv):
uv add pandas statsmodels plotly scikit-learn
Usage:
uv run bitcoin_timeseries_modern.py
"""
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA # modern API (statsmodels >= 0.13)
from statsmodels.tsa.stattools import adfuller
DATA_DIR = Path(__file__).parent
CSV_FILE = DATA_DIR / "coin_Bitcoin.csv"
OUT_DIR = DATA_DIR / "output"
TRAIN_END = "2018-12-31"
TEST_START = "2019-01-01"
def load_bitcoin(path: Path) -> pd.Series:
"""Load and clean Bitcoin CSV. Returns monthly resampled close prices."""
df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date").sort_index()
df[["Open", "High", "Low", "Close"]] = df[["Open", "High", "Low", "Close"]].ffill()
df[["Volume", "Marketcap"]] = df[["Volume", "Marketcap"]].fillna(0)
return df["Close"].resample("ME").mean()
def adf_test(series: pd.Series, label: str) -> None:
"""Run Augmented Dickey-Fuller test and print result."""
stat, p, *_ = adfuller(series.dropna())
print(f" ADF [{label}]: stat={stat:.4f}, p={p:.4f} → "
f"{'stationary ✓' if p < 0.05 else 'NOT stationary ✗'}")
def make_stationary(series: pd.Series) -> pd.Series:
"""Log-difference: removes exponential trend and heteroscedasticity."""
return np.log(series).diff().dropna()
def fit_arima(train: pd.Series, order: tuple[int, int, int]):
"""Fit ARIMA using the modern statsmodels API."""
return ARIMA(train, order=order).fit()
def forecast(result, n_periods: int, test_index: pd.DatetimeIndex) -> pd.DataFrame:
"""Return forecast with point estimate and 90% confidence interval."""
fc = result.get_forecast(steps=n_periods).summary_frame(alpha=0.10)
fc.index = test_index
return fc[["mean", "mean_ci_lower", "mean_ci_upper"]]
def evaluate(actual: pd.Series, predicted: pd.Series) -> None:
"""Print RMSE, MAE, and MAPE."""
rmse = np.sqrt(mean_squared_error(actual, predicted))
mae = mean_absolute_error(actual, predicted)
mape = np.mean(np.abs((actual.values - predicted.values) / actual.values)) * 100
print(f" RMSE={rmse:.4f} MAE={mae:.4f} MAPE={mape:.2f}%")
def plot_forecast(train: pd.Series, test: pd.Series, fc: pd.DataFrame, out_dir: Path) -> None:
"""Train/test split with ARIMA forecast and 90% confidence ribbon."""
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train, name="Train", line={"color": "#60a5fa"}))
fig.add_trace(go.Scatter(x=test.index, y=test, name="Actual", line={"color": "#34d399"}))
fig.add_trace(go.Scatter(x=fc.index, y=fc["mean"], name="Forecast",
line={"color": "#f87171", "dash": "dot"}))
fig.add_trace(go.Scatter(
x=list(fc.index) + list(fc.index[::-1]),
y=list(fc["mean_ci_upper"]) + list(fc["mean_ci_lower"][::-1]),
fill="toself", fillcolor="rgba(248,113,113,0.15)",
line={"color": "rgba(0,0,0,0)"}, name="90% CI",
))
fig.update_layout(
title="ARIMA(1,0,0) Forecast — Log-Differenced Monthly Bitcoin Price",
xaxis_title="Date", yaxis_title="Log return",
template="plotly_dark",
)
fig.write_html(out_dir / "forecast.html")
def main() -> None:
OUT_DIR.mkdir(exist_ok=True)
monthly = load_bitcoin(CSV_FILE)
adf_test(monthly, "raw monthly close")
log_diff = make_stationary(monthly)
adf_test(log_diff, "log-differenced")
train = log_diff.loc[:TRAIN_END]
test = log_diff.loc[TEST_START:]
result = fit_arima(train, order=(1, 0, 0))
fc = forecast(result, n_periods=len(test), test_index=test.index)
evaluate(test, fc["mean"])
plot_forecast(train, test, fc, OUT_DIR)
if __name__ == "__main__":
main()