A retailer forecasts customer churn using Bayesian survival analysis to optimize retention campaigns.
import pandas as pd
import pymc as pm
import arviz as az
url = "https://raw.githubusercontent.com/RameenShahid/Online-Retail-Dataset-UCI/master/data.csv"
df = pd.read_csv(url, encoding='ISO-8859-1')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df = df.dropna(subset=['CustomerID']).query('Quantity > 0')
# Compute time-to-next-purchase
df['NextPurchase'] = df.groupby('CustomerID')['InvoiceDate'].shift(-1)
df['TimeToNext'] = (df['NextPurchase'] - df['InvoiceDate']).dt.days
df['Censored'] = df['TimeToNext'].isna().astype(int)
df['TimeToNext'] = df['TimeToNext'].fillna(365)
with pm.Model() as survival_model:
mu_alpha = pm.Normal('mu_alpha', mu=2, sigma=1)
alpha = pm.Normal('alpha', mu=mu_alpha, sigma=1, shape=5) # Top 5 countries
beta = pm.Normal('beta', mu=2, sigma=1, shape=5)
t = pm.Weibull('t', alpha=alpha[country_idx], beta=beta[country_idx], observed=times, mask=censored==0)
trace = pm.sample(1000)
Insights: Survival curves vary by country; frequent buyers have longer lifetimes. Full code in GitHub.