Bitcoin Price Time Series Analysis

Overview

A time series analysis project on historical Bitcoin price data (OHLCV format). The goal was to model and forecast Bitcoin closing prices using classical time series methods — stationarity testing, differencing, ACF/PACF analysis, and ARIMA/SARIMA modelling.

Original Script (2021)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pandas.plotting import lag_plot
import pmdarima as pm

# Load Bitcoin OHLCV data
bitcoin = pd.read_csv("coin_Bitcoin.csv", parse_dates=['Date'])

# Fill missing values
bitcoin['Volume'].fillna(value=0, inplace=True)
bitcoin['Marketcap'].fillna(value=0, inplace=True)
bitcoin['Open'].fillna(method='ffill', inplace=True)
bitcoin['High'].fillna(method='ffill', inplace=True)
bitcoin['Low'].fillna(method='ffill', inplace=True)
bitcoin['Close'].fillna(method='ffill', inplace=True)

# Set date index
bitcoin.index = bitcoin['Date']
bitcoin = bitcoin.drop(columns=['Date', 'SNo', 'Name', 'Symbol'])

# Stationarity test (Augmented Dickey-Fuller)
result = adfuller(bitcoin['Close'].dropna())
print(f'ADF Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')

# First difference to achieve stationarity
bitcoin['Close_diff'] = bitcoin['Close'].diff()

# ACF and PACF plots
plot_acf(bitcoin['Close_diff'].dropna(), lags=40)
plot_pacf(bitcoin['Close_diff'].dropna(), lags=40)
plt.show()

# Lag plot
lag_plot(bitcoin['Close'])
plt.show()

# ARIMA model
model = ARIMA(bitcoin['Close'], order=(1, 1, 1))
result = model.fit()
print(result.summary())

# SARIMA with auto-selection
auto_model = pm.auto_arima(
    bitcoin['Close'],
    seasonal=True,
    m=12,
    stepwise=True,
    suppress_warnings=True
)
print(auto_model.summary())

# OLS regression baseline
X = sm.add_constant(range(len(bitcoin)))
ols = sm.OLS(bitcoin['Close'], X).fit()
bitcoin['OLS_predicted'] = ols.predict(X)

plt.plot(bitcoin['Close'], label='Actual')
plt.plot(bitcoin['OLS_predicted'], label='OLS Trend')
plt.legend()
plt.show()

What It Did

Loaded and cleaned Bitcoin historical OHLCV data
Tested stationarity using Augmented Dickey-Fuller test
Applied first differencing and log-differencing to achieve stationarity
Plotted ACF/PACF to identify ARMA orders
Fitted ARIMA(1,1,1) and auto-selected SARIMA via pmdarima
Compared against OLS linear trend baseline

Issues in the Original

Used deprecated statsmodels.tsa.arima_model.ARIMA (removed in statsmodels 0.14)
fillna(method='ffill') deprecated in newer pandas — should use ffill()
No train/test split — model evaluated on training data only
No forecast confidence intervals plotted
Hardcoded file paths

Interactive App

Open Live Dashboard →

Adjust the train/test split, ARIMA order (p, d, q), and confidence interval level — the forecast and residual chart update instantly. Hosted on Railway.

Modern Rewrite

Download Script

Key improvements over the original:

Modern ARIMA API — from statsmodels.tsa.arima.model import ARIMA replaces the deprecated statsmodels.tsa.arima_model.ARIMA (removed in statsmodels 0.14)
ffill() instead of fillna(method='ffill') — pandas deprecated the method= kwarg on fillna
Proper train/test split — train on 2013–2018, evaluate on 2019 onwards; original fitted and evaluated on the same data
Forecast confidence intervals — result.get_forecast(steps=n).summary_frame(alpha=0.10) gives 90% CI bounds plotted as a shaded ribbon
RMSE, MAE, MAPE reported — original computed these but didn’t use them to compare models
Plotly HTML outputs — all plots saved as self-contained HTML; no plt.show() blocking calls
pathlib for portable file paths

"""
Bitcoin Time Series Analysis — Modern Rewrite (2024)
Original: Time_Series_Bitcoin.py (2021)

Dependencies (uv):
    uv add pandas statsmodels plotly scikit-learn

Usage:
    uv run bitcoin_timeseries_modern.py
"""

from pathlib import Path

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA  # modern API (statsmodels >= 0.13)
from statsmodels.tsa.stattools import adfuller

DATA_DIR = Path(__file__).parent
CSV_FILE = DATA_DIR / "coin_Bitcoin.csv"
OUT_DIR = DATA_DIR / "output"

TRAIN_END = "2018-12-31"
TEST_START = "2019-01-01"


def load_bitcoin(path: Path) -> pd.Series:
    """Load and clean Bitcoin CSV. Returns monthly resampled close prices."""
    df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date").sort_index()
    df[["Open", "High", "Low", "Close"]] = df[["Open", "High", "Low", "Close"]].ffill()
    df[["Volume", "Marketcap"]] = df[["Volume", "Marketcap"]].fillna(0)
    return df["Close"].resample("ME").mean()


def adf_test(series: pd.Series, label: str) -> None:
    """Run Augmented Dickey-Fuller test and print result."""
    stat, p, *_ = adfuller(series.dropna())
    print(f"  ADF [{label}]: stat={stat:.4f}, p={p:.4f} → "
          f"{'stationary ✓' if p < 0.05 else 'NOT stationary ✗'}")


def make_stationary(series: pd.Series) -> pd.Series:
    """Log-difference: removes exponential trend and heteroscedasticity."""
    return np.log(series).diff().dropna()


def fit_arima(train: pd.Series, order: tuple[int, int, int]):
    """Fit ARIMA using the modern statsmodels API."""
    return ARIMA(train, order=order).fit()


def forecast(result, n_periods: int, test_index: pd.DatetimeIndex) -> pd.DataFrame:
    """Return forecast with point estimate and 90% confidence interval."""
    fc = result.get_forecast(steps=n_periods).summary_frame(alpha=0.10)
    fc.index = test_index
    return fc[["mean", "mean_ci_lower", "mean_ci_upper"]]


def evaluate(actual: pd.Series, predicted: pd.Series) -> None:
    """Print RMSE, MAE, and MAPE."""
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = np.mean(np.abs((actual.values - predicted.values) / actual.values)) * 100
    print(f"  RMSE={rmse:.4f}  MAE={mae:.4f}  MAPE={mape:.2f}%")


def plot_forecast(train: pd.Series, test: pd.Series, fc: pd.DataFrame, out_dir: Path) -> None:
    """Train/test split with ARIMA forecast and 90% confidence ribbon."""
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train.index, y=train, name="Train", line={"color": "#60a5fa"}))
    fig.add_trace(go.Scatter(x=test.index, y=test, name="Actual", line={"color": "#34d399"}))
    fig.add_trace(go.Scatter(x=fc.index, y=fc["mean"], name="Forecast",
                             line={"color": "#f87171", "dash": "dot"}))
    fig.add_trace(go.Scatter(
        x=list(fc.index) + list(fc.index[::-1]),
        y=list(fc["mean_ci_upper"]) + list(fc["mean_ci_lower"][::-1]),
        fill="toself", fillcolor="rgba(248,113,113,0.15)",
        line={"color": "rgba(0,0,0,0)"}, name="90% CI",
    ))
    fig.update_layout(
        title="ARIMA(1,0,0) Forecast — Log-Differenced Monthly Bitcoin Price",
        xaxis_title="Date", yaxis_title="Log return",
        template="plotly_dark",
    )
    fig.write_html(out_dir / "forecast.html")


def main() -> None:
    OUT_DIR.mkdir(exist_ok=True)
    monthly = load_bitcoin(CSV_FILE)

    adf_test(monthly, "raw monthly close")
    log_diff = make_stationary(monthly)
    adf_test(log_diff, "log-differenced")

    train = log_diff.loc[:TRAIN_END]
    test = log_diff.loc[TEST_START:]

    result = fit_arima(train, order=(1, 0, 0))
    fc = forecast(result, n_periods=len(test), test_index=test.index)
    evaluate(test, fc["mean"])
    plot_forecast(train, test, fc, OUT_DIR)


if __name__ == "__main__":
    main()