from sklearn.metrics import roc_auc_score
if ell_path.is_file():
boundaries = [1, 10, 20, 30, 40, 50]
wf_results = []
for i in range(len(boundaries) - 2):
train_mask = (ts >= boundaries[0]) & (ts < boundaries[i + 1])
test_mask = (ts >= boundaries[i + 1]) & (ts < boundaries[i + 2])
if train_mask.sum() < 50 or test_mask.sum() < 50:
continue
X_tr_e, y_tr_e = X_ell[train_mask], y_ell[train_mask]
X_te_e, y_te_e = X_ell[test_mask], y_ell[test_mask]
pipe_ell.fit(X_tr_e, y_tr_e)
auc_wf = roc_auc_score(y_te_e, pipe_ell.predict_proba(X_te_e)[:, 1])
illicit_rate = y_te_e.mean()
wf_results.append({
"window": f"t={boundaries[i+1]}–{boundaries[i+2]-1}",
"AUC": auc_wf,
"illicit_rate": illicit_rate,
"n_test": int(test_mask.sum()),
})
windows = [r["window"] for r in wf_results]
aucs = [r["AUC"] for r in wf_results]
rates = [r["illicit_rate"] for r in wf_results]
colour_auc, colour_rate = "#1f77b4", "#d62728"
fig, ax1 = plt.subplots(figsize=(11, 5.5))
ax1.bar(windows, aucs, color=colour_auc, alpha=0.75, label="Walk-forward AUC")
ax1.set_ylabel("AUC", color=colour_auc, fontsize=13)
ax1.set_ylim(0.75, 1.0)
ax1.tick_params(axis="y", labelcolor=colour_auc, labelsize=12)
ax1.tick_params(axis="x", labelsize=12)
ax2 = ax1.twinx()
ax2.plot(windows, rates, "o--", color=colour_rate, linewidth=2.5,
markersize=8, label="Illicit rate")
ax2.set_ylabel("Illicit rate in test window", color=colour_rate, fontsize=13)
ax2.tick_params(axis="y", labelcolor=colour_rate, labelsize=12)
ax1.axhline(auc_ell.mean(), color=colour_auc, linestyle=":", linewidth=2, alpha=0.6,
label=f"Shuffled CV: {auc_ell.mean():.3f}")
ax1.set_title("Elliptic: walk-forward AUC vs shuffled baseline", fontsize=15, fontweight="bold")
ax1.legend(loc="lower left", fontsize=11)
ax2.legend(loc="upper right", fontsize=11)
plt.tight_layout()
plt.show()
mean_wf = np.mean(aucs)
mean_sh = auc_ell.mean()
print(f"Walk-forward: {mean_wf:.3f} | Shuffled CV: {mean_sh:.3f} | Bias gap: {mean_sh - mean_wf:+.3f}")