= 2
dim = DataLoaders.from_dsets([], []) # Empty train and validation datasets
dls = XResAttn(dim, n_class=1, stem_szs=(32,), conv_blocks=[1, 1, 1],
model =[128, 256, 512], pos_enc=False,
block_szs=4, dim_ff=512, nhead_enc=8, linear_layers=[])
n_encoder_layers
model.to(default_device())= Learner(dls, model, loss_func=L1LossFlat(), model_dir=MODEL_PATH) learn_exp
Benchmark: anomalous diffusion
The main goal is to characterize anomalous diffusion processes that switch between diffusive states without any kind of prior konwledge. In this tutorial, we focus on inferring the anomalous diffusion exponent \(\alpha\) at every time step, which naturally highlights potential changes in the behaviour along the trajectories. This allows us to obtain a deeper understanding of the underlying physical systems that drive the dynamics.
In the following analysis, we provide a thorough characterization of the model performance under various conditions and we show how to reproduce some figures of our paper.
This tutorial is very similar to the Benchmark: Brownian motion. Thus, we skip some extended explanations here and we refer to this one instead.
Load the model
First of all, we need to load a trained model with which to perform the analysis.
We refer to the model training tutorial for details about how to train and save your models.
To load the models within a Leaner
, we need to provide some data loaders. However, we will use the model to study various scenarios, so we provide an empty dataloader and load the different data sets as we need them.
Now we can load the trained model weights.
f'xresattn_exp_{dim}d_1_to_4_cp')
learn_exp.load(eval(); learn_exp.model.
It is very important to define the model exactly with the same layers as we did during training. Otherwise, we won’t be able to load the trained parameters!
Pointwise prediction
We study the overall performance of the method over anomalous diffusion trajectories that present changes both in the underlying anomalous diffusion model, and the anomalous diffusion exponent \(\alpha\). We look at the pointwise prediction error and study how the length of the segments within the trajectory affect it.
We compare our approach with a reference method based on the time-averaged mean squared displacement (TA-MSD) (see baselines). However, since this method cannot account for any heterogeneity in the trajectories, we provide it with the pre-segmented trajectories. This results in a massive advantage and, thus, we also pass the pre-cut segments through our model.
With this segment-wise analysis, we can easily study the performance of all the methods as function of the segment lengths.
Generate the data
To evaluate the different methods, we need a proper test set. We can generate one in the same way that we genereate the train set in the model training tutorial.
Skip the data generation if you already have a test set!
# OPTIONAL: create the test set.
# Don't need to run this cell if it already exists.
= 12500
n_per_set = 200
max_t = 2
dim = [1, 2, 3, 4]
cps = partial(create_andi_segmentation_dataset,
ds_fun =max_t, dim=dim, noise=[0.], save=False)
max_t= [ds_fun(n_per_set, n_change_points=n_cp) for n_cp in cps]
datasets = combine_datasets(datasets)
dataset = f"{min(cps)}_to_{max(cps)}"
n_change = DATA_PATH/get_andids_fname(n_change, max_t, dim, "test")
save_path dataset.to_pickle(save_path)
Load the test set.
# Skip this cell if you just generated it.
= "1_to_4"
n_change = "test"
name = load_dataset(n_change=n_change, dim=dim, name=name) ds
Get the predictions
We now perform the predictions of the trajectories store them in the dataframe to process them later. First, we compute the full-trajectory predictions. Then, we proceed with the segment-wise analysis. We split the trajectories and their predictions by the true changepoints. Then, we perform the prediction especifically for each segment with both the model and the TA-MSD method. We use the TA-MSD 2-10 to perform the evaluation.
Let’s define a prediction function and a normalization function to simplify the code.
def predict_norm_sample(model, x):
"Get the `model` prediction normalizing a single sample `x`."
= x.unsqueeze(0).to(default_device())
xb = normalize(xb)[0]
xb_norm return to_detach(model(xb_norm).squeeze())
def normalize(trajs):
"Normalize a batch of trajectories."
= trajs.shape
bs, _, dim -= trajs.mean(1, keepdim=True)
trajs = trajs[:, 1:] - trajs[:, :-1]
displ = displ.std(1, keepdim=True)
std = torch.where(std == 0., torch.ones_like(std), std)
std /= std
displ = displ.cumsum(1)
trajs_norm = torch.cat([torch.zeros(bs, 1, dim, device=trajs.device),
trajs_norm =1)
trajs_norm], axisreturn trajs_norm, std
Now we can proceed to process the trajectories. In the following piece of code, we process every trajectory (outer loop). Then, we split it into segments with constant properties (inner loop), and we compute the mean absolute error (MAE) and the mean relative error with the three aforementioned approaches.
= 50000 # Subsample 50k trajectories
max_samples = []
segment_data
for i, row in tqdm(ds[:max_samples].iterrows()):
= row.x, row.y_exp.squeeze()
x, y = row.y_mod.squeeze(), row.cp
y_mod, cps
# Predict over full trajectory
= predict_norm_sample(learn_exp.model, x.T)
pred
# Segment trajectory with true changepoints
= split_tensor(x.T, cps), split_tensor(pred, cps)
split_x, split_pred = split_tensor(y, cps), split_tensor(y_mod, cps)
split_y, split_model = zip(split_x, split_y, split_model, split_pred)
splits for j, (seg_x, seg_y, seg_model, pred_cut) in enumerate(splits):
# Prediction over full trajectory cut with true changepoints
= mean_absolute_error(pred_cut, seg_y)
mae
# Add noise to flat CTRW segments for numerical stability
if (seg_x - seg_x[0]).allclose(tensor(0.)):
+= 0.1*torch.randn_like(seg_x)
seg_x
# Prediction over segment
= predict_norm_sample(learn_exp.model, seg_x - seg_x[0])
pred_segment = mean_absolute_error(pred_segment, seg_y)
mae_segment
# Prediction over segment with TA-MSD
= anomalous_exponent_tamsd(seg_x)
pred_tamsd = mean_absolute_error(pred_tamsd, seg_y[0])
mae_tamsd
'sample': i, 'segment_idx': j,
segment_data.append({'length': len(seg_y), 'x': seg_x, 'y': seg_y,
'model': seg_model[0], 'pred_cut': pred_cut,
'pred_segment': pred_segment,
'pred_tamsd': pred_tamsd, 'mae': mae,
'mae_segment': mae_segment,
'mae_tamsd': mae_tamsd})
= pd.DataFrame.from_records(segment_data) ds
Unlike in the normal diffusion case, we encounter some numerical instabilities with the methods when we provide them with the split segments. We purge all the broken predictions to proceed with the analysis.
= ds.mae.apply(lambda x: torch.isnan(x).item())
mask_full = ds.mae_tamsd.apply(lambda x: torch.isnan(x).item())
mask_tamsd = ds.mae_segment.apply(lambda x: torch.isnan(x).item())
mask_seg = (mask_full | mask_tamsd | mask_seg)
mask = ds[~mask] ds
We can save all the data for further posterior processing.
/'segment_analysis_alpha_test.pkl') ds.to_pickle(DATA_PATH
Prediction error
Let’s quantify the model performances by computing their MAE over full trajectories and trajectory segments.
= ds.length.unique().astype(int)
lengths
lengths.sort()
= ['mae', 'mae_segment', 'mae_tamsd']
metrics = {m: {'mean': [], 'sem': [], 'global': None}
metric_by_length for m in metrics}
for m in metrics:
= [getattr(ds, m)[ds.length == l].mean()
means for l in lengths]
= [getattr(ds, m)[ds.length == l].sem()
sems for l in lengths]
'mean'] = np.array(means)
metric_by_length[m]['sem'] = np.array(sems)
metric_by_length[m]['global'] = (
metric_by_length[m][getattr(ds, m)*ds.length).sum() /
(sum()
ds.length. )
Save this for later.
= "mae_segment_length"
name = (FIG_PATH/name).with_suffix(".pkl")
data_path
with open(data_path, 'wb') as f:
=pickle.HIGHEST_PROTOCOL) pickle.dump(metric_by_length, f, protocol
Let’s see the overall MAE for each method.
Code
print(f"STEP: {metric_by_length['mae']['global']:.3f}")
print(f"STEP + segments: {metric_by_length['mae_segment']['global']:.3f}")
print(f"TA-MSD + segments: {metric_by_length['mae_tmsd']['global']:.3f}")
STEP: 0.271
STEP + segments: 0.275
TA-MSD + segments: 0.368
STEP outperforms the TA-MSD baseline, reducing the overall error by more than \(25\%\). The advantage is significantly larger in short segments, as we see below.
Code
= plt.figure(figsize=(1.5*fig_size, fig_size))
fig = np.arange(10, 191)
lengths = ['STEP', 'STEP + segments', 'TA-MSD + segments']
labels for i, m in enumerate(metric_by_length.keys()):
= metric_by_length[m]['mean'], metric_by_length[m]['sem']
mean, sem = labels[i//2]# if i%2 == 0 else None
label ='-', color=colors[i//2], label=label,
plt.plot(lengths, mean, linestyle=10 if i==0 else -1)
zorder- sem, mean + sem, color=colors[i//2], alpha=0.3)
plt.fill_between(lengths, mean =alpha_grid)
plt.grid(alpha=11)
plt.legend(fontsize=14)
plt.tick_params(labelsize0.1, 0.75])
plt.ylim(["Segment length", fontsize=16)
plt.xlabel(r"$|\alpha_{true} - \alpha_{pred}|$", fontsize=16); plt.ylabel(
In this case, the TA-MSD-based method does never reach the performance level of STEP, even in very long segments. This is in contrast to the Brownian motion benchmark, where the TA-MSD method attains a similar performance for the longest segments.
Additionally, in this case, feeding STEP with segments harms the performance in the shortest segments. As we have mentioned previously, doing this causes some numerical issues with the methods (both STEP and the TA-MSD). Regardless, the pre-segmented trajectories are never available in real scenarios.
Error by anomalous diffusion model
The anomalous diffusion models that we consider describe very different behaviours. The deviation from normal diffusion has different sources in each of them and, therefore, their anomalous diffusion exponent \(\alpha\) depends on different phenomena.
Hence, it is reasonable to expect the underlying anomalous diffusion model of each segment to play a key role in the characterization. Furthermore, every model has a different range for \(\alpha\):
- Annealed transit time (ATTM) with \(\alpha\in\left[0.05, 1\right]\).
- Continuous time random walk (CTRW) with \(\alpha\in\left[0.05, 1\right]\).
- Fractional Brownian motion (FBM) with \(\alpha\in\left[0.05, 1.95\right]\).
- Lévy Walk (LW) with \(\alpha\in\left[1.05, 2\right]\).
- Scaled Brownian motion (SBM) with \(\alpha\in\left[0.05, 2\right]\).
The wider the \(\alpha\) range, the larger the errors can become. Let’s look at the MAE for each method.
= [ds[(ds.model == m)].mae.mean()
mae_by_model for m in MODEL_DATA.keys()]
Code
= np.arange(5)
x =colors[0], width=0.6)
plt.bar(x, mae_by_model, color='y', alpha=alpha_grid)
plt.grid(axis'MAE', fontsize=16)
plt.ylabel('name'].upper() for n in x])
plt.xticks(x, [MODEL_DATA[n][='x', labelsize=16)
plt.tick_params(axis='y', labelsize=14) plt.tick_params(axis
CTRW, FBM and LW have similar MAE. Surprisingly, FBM holds the lowest error while having one of the largest ranges for \(\alpha\). Then, ATTM and SBM are clearly the hardest models to characterize, as their MAE is between \(50\%\) to \(100\%\) larger than the other methods.
In SBM, as we will see in the next tutorial, \(\alpha\) is related to the ageing of the diffusion coefficient. This means that we need long segments to correctly characterize \(\alpha\).
SBM was also found to be the hardest model to characterize in the AnDi Challenge.
Let’s bring this study further to get a better idea about the sources of errors in all the models, such as ATTM.
Error by diffusion model and localization noise
Let’s study the behaviour of STEP for each diffusion model in more detail and, at the same time, study its resilience to localization noise.
# Optional: load the test set if it is not in memory
= load_dataset(n_change="1_to_4", dim=dim, name="test") ds
To make the computation more bearable, we work with a fraction of our full test set.
= ds.sample(frac=0.25, random_state=0).reset_index(drop=True) ds
'y'] = ds['y_exp'].apply(torch.squeeze)
ds['model'] = ds['y_mod'].apply(torch.squeeze)
ds[= ds.drop(columns=['y_mod', 'y_exp', 'models', 'exps', 'noise']) ds
Process the trajectories
Similar to the Brownian motion analysis, we will process the trajectories at various noise levels.
Here, we sample \(\sigma_{\text{noise}}\in[10^{-5}, 10^2]\) uniformly in log space.
= 128
noise_samples = 2, -5
noise_max, noise_min = noise_max - noise_min
noise_range = torch.rand(ds.shape[0], noise_samples)*noise_range + noise_min noise_traj
As in the previous cases, we can compare the predictions of STEP with the TA-MSD baseline.
def predict(model, x):
"Get prediction of `model` on batch `x`."
return to_detach(model(x.to(default_device()))).squeeze()
= (ds.shape[0], max_t, noise_traj.shape[1])
shape = torch.zeros(shape)
pred_by_noise = torch.zeros(shape)
pred_tamsd_by_noise
for i, (x, cps) in tqdm(ds[['x','cp']].iterrows()):
= torch.randn(noise_samples, *x.T.shape)*10**noise_traj[i]
noise = x.T.unsqueeze(0) + noise.unsqueeze(-1).unsqueeze(-1)
noisy_x = normalize(noisy_x)
noisy_x, std = split_tensor(noisy_x.transpose(1, 0), cps)
split_x
= predict(learn_exp.model, noisy_x).T
pred_by_noise[i]
= []
pred_tamsd for seg_x in split_x:
= normalize(seg_x.transpose(1, 0))
seg_x, _ = torch.ones(seg_x.shape[1])
ones *anomalous_exponent_tamsd(s)
pred_tamsd.append(torch.stack([onesfor s in seg_x]))
= torch.cat(pred_tamsd, axis=-1).T
pred_tamsd_by_noise[i] = dict(zip(['full', 'tamsd'],
predictions [pred_by_noise, pred_tamsd_by_noise]))
With the predictions, we can compute the error performed at every time step as a function of \(\sigma_{\text{noise}}\) and the diffusion model independently.
Therefore, we start by obtaining the diffusion model and noise of every trajectory at every time step.
= torch.stack([t for t in ds['y'].values])
y = torch.stack([t for t in ds['model'].values])
model = noise_traj.unsqueeze(1) + torch.zeros_like(y.unsqueeze(-1))
rel_noise = model.unsqueeze(-1) + torch.zeros_like(rel_noise) model
Now we can compute the pointwise errors of the full test set with random noise.
= {k: y.unsqueeze(-1) - p for k, p in predictions.items()}
errors = torch.linspace(rel_noise.min(), rel_noise.max(), 100) bins_noise
Overall performance
We can get a good idea about the error sources by looking at the predicted vs true \(\alpha\) values for each anomalous diffusion model.
For this, we need a target tensor with the same shape as our predictions for all noise levels.
= y.unsqueeze(-1) + torch.zeros_like(model)
y_ext y_ext.shape
torch.Size([49994, 200, 128])
And it is useful to have a mapping between model names and their integer index.
= {v['name']: k for k, v in MODEL_DATA.items()}
model_keys model_keys
{'attm': 0, 'ctrw': 1, 'fbm': 2, 'lw': 3, 'sbm': 4}
Now we can define the different noise intervals at which we wish to study the models. We consider \(\sigma_{\text{noise}}\in\left\{[10^{-5}, 10^{-4}], [10^{-2}, 10^{-1}], [10^{-1}, 10^{0}]\right\}\). The first interval is a quasy-noiseless regime, the second is moderate noise and the third is very high noise.
= {}
pred_vs_true_by_model = [(-5, -4), (-2, -1), (-1, 0)]
noise_ranges = (0, 2, 41) bins
Let’s compute the 2D histograms of \(\alpha_{\text{pred}}\) vs \(\alpha_{\text{true}}\).
= predictions['full']
preds_full for m in tqdm(list(model_keys.keys()) + ['all']):
= (model == model_keys[m] if m != 'all'
mask_model else torch.ones_like(model, dtype=bool))
= []
model_histograms for (low, high) in noise_ranges:
= (low <= rel_noise) & (rel_noise < high)
mask_noise = mask_model & mask_noise
mask = np.histogram2d(y_ext[mask].numpy(),
hist, true_e, pred_e
preds_full[mask].numpy(),=[np.linspace(*bins)+1e-3,
bins*bins[:2], 41)])
np.linspace(
model_histograms.append(hist)= {'hist': model_histograms,
pred_vs_true_by_model[m] 'true_edges': true_e,
'pred_edges': pred_e}
Save them for later.
= "pred_vs_true_models"
fig_name = (pred_vs_true_by_model, noise_ranges, bins_by_model)
plot_data with open(FIG_PATH/f'{fig_name}.pkl', 'wb') as f:
=pickle.HIGHEST_PROTOCOL) pickle.dump(plot_data, f, protocol
Code
= plt.subplots(3, 5, figsize=(6*fig_size, 3*fig_size),
fig, axes =True)
constrained_layout= (np.arange(0, 41, 20), [0, 1, 2])
ticks = 16
ticksize = 20
fontsize = ["$[10^{-5}, 10^{-4})$",
noise_ranges "$[10^{-2}, 10^{-1})$",
"$[10^{-1}, 10^{0})$"]
= {'attm': [0.9, 0.9, 0.9],
vmax 'ctrw': [0.8, 0.9, 0.8],
'fbm': [0.3, 0.3, 0.5],
'lw': [0.6, 0.6, 0.6],
'sbm': [0.9, 0.9, 1]}
for m, data in pred_vs_true_by_model.items():
if m == 'all': continue
= axes[:, model_keys[m]]
m_axes = data['hist']
histfor i, ax in enumerate(m_axes):
= hist[i].shape
shape /hist[i].max(), vmin=0, vmax=vmax[m][i],
ax.pcolor(hist[i].T=cmap_hist1, rasterized=True)
cmap
if i == 0:
=fontsize)
ax.set_title(m.upper(), fontsize=False)
ax.tick_params(labeltopif i == 1:
=False)
ax.tick_params(labeltop
if i == 2:
*ticks)
ax.set_xticks(r'$\alpha_{true}$', fontsize=fontsize)
ax.set_xlabel(=False, labelbottom=True)
ax.tick_params(labeltopelse:
ax.set_xticks([])
if model_keys[m] == 0:
r'$\alpha_{pred}$', fontsize=fontsize)
ax.set_ylabel(*ticks)
ax.set_yticks(else:
ax.set_yticks([])
=ticksize);
ax.tick_params(labelsize
if m == 'sbm':
for ax, noise in zip(m_axes, noise_ranges):
42, 20, fr"$\sigma_{{noise}} \in$" + noise, fontsize=fontsize) ax.text(
As expected, we observe a very nice \(\alpha_{\text{pred}}\) vs \(\alpha_{\text{true}}\) relationship for FBM and LW segments.
There is a clear tendency for STEP to predict \(\alpha\approx0.8\) for SBM segments which is enhanced with noise. This is indicative that the model struggles to identify any clear behaviour and defaults to a quasi Brownian motion prediction.
In CTRW segments we find a similar pattern, where the model predicts \(\alpha\approx0.25\), corresponding to nearly immobile particles. CTRW trajectories are characterized by immobilization with jumps at random times. In short segments, it is very likely that no jumps are shown and, thus, the model sees a static particle.
To a lesser extent, we observe a tendnency for ATTM trajectories to predict \(\alpha\approx1\). ATTM trajectories are characterized by Brownian motion segments with random diffusion coefficients that change at random times. In short ATTM segments, it is very likely that there are no diffusion coefficient changes. Therefore, STEP only sees a pure Brownian motion segment for which the prediction of \(\alpha\approx1\) is correct.
MAE as function of \(\sigma_{\text{noise}}\)
Now we can look at the MAE of STEP and the TA-MSD baseline as function of the localization noise.
= [], [], [], []
noise_err, noise_err_std, x_err, counts for low, high in tqdm(zip(bins_noise[:-1], bins_noise[1:])):
= (rel_noise >= low) & (rel_noise <= high)
mask + high)/2)
x.append((low float().sum())
counts.append(mask.& ~err.isnan()].abs().mean()
noise_err.append(tensor([err[mask for err in errors.values()]))
& ~err.isnan()].abs().std()
noise_err_std.append(tensor([err[mask for err in errors.values()]))
= torch.stack(noise_err)
noise_err = torch.stack(noise_err_std)
noise_err_std = torch.stack(x_err)
x_err = torch.stack(counts) counts
We can go a step further and compute the MAE for each diffusion model separately and as function of the noise.
= {k: errors['full'][model == i] for k, i in model_keys.items()} errors_model
= {}
mae_model_by_noise for m, i in model_keys.items():
= []
mae = rel_noise[model == i]
noise_model for low, high in tqdm(zip(bins_noise[:-1], bins_noise[1:])):
= (noise_model >= low) & (noise_model <= high)
mask abs().mean())
mae.append(errors_model[m][mask].= mae mae_model_by_noise[m]
Code
= plt.subplots(1, 2, figsize=(2.5*fig_size, fig_size), constrained_layout=True)
fig, ax
= ['STEP', 'TA-MSD + segments']
labels = [0, 3]
idx = noise_err_std/counts.sqrt().unsqueeze(-1)
sem for i, (err, s) in enumerate(zip(noise_err.T[idx], sem.T[idx])):
0].semilogx(10**x_err, err, '-', linewidth=3., color=colors[2*i], label=labels[i])
ax[0].fill_between(10**x_err, err - s, err + s, color=colors[2*i], alpha=0.3)
ax[0].legend(fontsize=14)
ax[0].set_xlabel(r"$\sigma_{noise}$", fontsize=16)
ax[0].set_ylabel("MAE", fontsize=16)
ax[0].set_xlim([2e-5, 50])
ax[
= bins_noise[1] - bins_noise[0]
dx = bins_noise[:-1] + dx
x_noise_model for i, (m, mae) in enumerate(mae_model_by_noise.items()):
1].semilogx(10**x_noise_model, mae, linewidth=2, color=colors[i], label=m.upper())
ax[1].set_xlim([10**(-3.5), 6])
ax[1].legend(fontsize=14)
ax[1].set_xlabel(r"$\sigma_{noise}$", fontsize=16)
ax[1].set_yticklabels([])
ax[
for a in ax:
=14)
a.tick_params(labelsize=alpha_grid) a.grid(alpha
Let’s save the data of the figures to process them later.
= "noise_analysis_alpha"
fig_name = (FIG_PATH/fig_name).with_suffix(".pkl")
plot_path = (noise_err, noise_err_std, x, counts)
plot_data with open(plot_path, 'wb') as f:
=pickle.HIGHEST_PROTOCOL)
pickle.dump(plot_data, f, protocol
= "noise_analysis_mae_models"
fig_name = (FIG_PATH/fig_name).with_suffix(".pkl")
plot_path = (mae_model_by_noise, bins_noise)
plot_data with open(plot_path, 'wb') as f:
=pickle.HIGHEST_PROTOCOL) pickle.dump(plot_data, f, protocol
Changepoint detection
In the Brownian motion benchmark, we show a few ways to detect the position at which the trajectory behaviour changes. Here, we perform an analogous analysis to detect changes in the anomalous diffusion exponent \(\alpha\).
We compare the results obtained by feeding the STEP predictions to the kernel changepoint detection algorithm provided by ruptures, with running ruptures on top of an \(\alpha\) estimation with the TA-MSD over a sliding window along the trajectory.
Generate the data
In order to obtain a better insight about the strengths of the methods, we test them in trajectories with one single changepoint. Furthermore, we restrict ourselves to FBM trajectories, which has been the anomalous diffusion model for which our methods provided the best results.
In this way, we study how the changepoint position and difference between \(\alpha\) in consecutive segments affects the performance.
= create_segmentation_dataset(40000, dim=2, models=[2], noise=[0.], save=False) ds
Get the predictions
Let’s define some helping functions to keep our code clean.
def make_batch(df, i, bs, col='x'):
"Return a batch of samples from df."
= [x.transpose(-1, 0) for x in ds.loc[i*bs:(i+1)*bs-1, col]]
samples return torch.stack(samples, dim=0)
def predict_norm(model, x):
"Get the `model` prediction normalizing the batch `x`."
= normalize(x.to(default_device()))[0]
x_norm return to_detach(model(x_norm).squeeze())
Let’s gooo.
= 100
bs = np.ceil(ds.shape[0]/bs).astype(int)
n_batch for i in tqdm(range(n_batch)):
= make_batch(ds, i, bs)
xb = predict_norm(learn_exp.model, xb)
pred *bs:(i+1)*bs-1, 'pred'] = np.array([p for p in pred],
ds.loc[i=object) dtype
Overall performance
Let’s compute the Jaccard index (JI) using the STEP predictions and the \(\alpha\) estimation with the TA-MSD.
We need to define a function to compute \(\alpha\) along the trajectory with a sliding window.
def swin_alpha_tamsd(x, win_size=10):
"Computes the anomalous exponent with a sliding window over x."
= []
alphas for i in range(len(x) - win_size):
= anomalous_exponent_tamsd(x[i:i + win_size],
alpha =np.arange(2, win_size//2))
t_lag
alphas.append(alpha)return np.array(alphas)
To ease the code readability, we define a function to combine two dictionaries together (adhoc for our purposes).
def merge_dict(dict_1, dict_2):
= {}
merge for k, v1 in dict_1.items():
= dict_2.get(k, [])
v2 if isinstance(v1, Iterable) and isinstance(v2, Iterable):
= [*v1, *v2]
merge[k] elif isinstance(v1, Iterable):
= [*v1, v2]
merge[k] elif isinstance(v2, Iterable):
= [v1, *v2]
merge[k] else:
= [v1, v2]
merge[k] return merge
Now we process every trajectory and retrieve the changepoints with ruptures using both inputs.
= {'pred': 2., 'tamsd': 8.}
method_pen = 30
win_size = list(method_pen.keys())
methods = 20
threshold
= {m: {} for m in methods}
metrics_method for i, row in tqdm(ds.iterrows()):
= normalize(row.x.T.unsqueeze(0))[0]
traj = row.pred.numpy(), row.cp.numpy()
pred, true_cp = swin_alpha_tamsd(traj.squeeze(), win_size=win_size)
alphas_tamsd
for m in methods:
= (pred if m == 'pred'
seg_data else alphas_tamsd if m == 'tamsd'
else None)
= ruptures_cp(seg_data, pen=method_pen[m], min_size=5)
pred_cp if m == 'tamsd':
# Add half window size to the prediction from TA-MSD
= [p + win_size//2 for p in pred_cp]
pred_cp = evaluate_cp_prediction(true_cp, pred_cp,
metrics =threshold)
changepoint_threshold= merge_dict(metrics, metrics_method[m])
metrics_method[m]
for k, v in metrics.items():
if metrics['tp']:
f'cp_{m}_{k}'] = v[0] if k == 'sq_error' else v
ds.loc[i, else:
f'cp_{m}_{k}'] = -1 if k == 'sq_error' else v ds.loc[i,
Now we can compute the overall JI and MSE for both approaches over the whole test set.
= {}, {}
method_mse, method_j_idx for m, v in metrics_method.items():
= np.mean(v['sq_error'])
method_mse[m] = np.sum(v['tp']), np.sum(v['fp']), np.sum(v['fn'])
tp, fp, fn = jaccard_index(tp, fp, fn) method_j_idx[m]
# code-fold: true
print("Jaccard index:")
for m, ji in method_j_idx.items():
print(f" {m}: {ji:.3f}")
print("MSE:")
for m, ji in method_mse.items():
print(f" {m}: {ji:.3f}")
Jaccard index:
pred: 0.515
tamsd: 0.297
MSE:
pred: 35.011
tamsd: 71.276
Running the changepoint detection algorithm over the STEP predictions provides a massive advantage with respect to the sliding window TA-MSD baseline. We reduce the changepoint prediction error by about \(30\%\) and we cut the MSE by half.
However, the changepoint detection task for \(\alpha\) proves to be significantly more challenging than for \(D\), where we achieve mean JI over 0.8, as we saw in the Brownian motion benchmark.
Difference between segments
Just like we did for the diffusion coefficient, we can look at how the jaccard index behaves as function of the difference between consecutive segments.
We start by computing the difference between consecutive \(\alpha\) in each trajectory.
'cp'] = ds.cp.apply(lambda x: x[0].item())
ds['alpha_diff'] = ds.exps.apply(lambda x: round((x[1] - x[0]).item(), 2)) ds[
Now we can compute the metrics.
= np.unique(ds.alpha_diff)
differences = {m: [] for m in methods}
j_idx_by_diff for diff in differences:
= ds.alpha_diff == diff
mask for m in methods:
= ds.loc[mask, f'cp_{m}_tp'].sum()
tp = ds.loc[mask, f'cp_{m}_fp'].sum()
fp = ds.loc[mask, f'cp_{m}_fn'].sum()
fn j_idx_by_diff[m].append(jaccard_index(tp, fp, fn))
Code
= {'pred': colors[0], 'displ': colors_light[3], 'tamsd': colors[2]}
method_color = {'pred': 'KCPD + STEP', 'displ': 'KCPD + displ', 'tamsd': 'KCPD + TA-MSD'}
method_label = {'pred': 'solid', 'displ': 'dashed', 'tamsd': 'dashed'}
method_ls
= plt.figure(figsize=(1.4*fig_size, fig_size))
fig for m in methods:
=linewidth, label=method_label[m],
plt.plot(differences, j_idx_by_diff[m], linewidth=method_color[m], linestyle=method_ls[m])
color=14)
plt.legend(fontsize=alpha_grid)
plt.grid(alpha-2.2, 2.2])
plt.xlim([-2, -1, 0, 1, 2])
plt.xticks([-0.05, 1.05])
plt.ylim([0., 0.5, 1.])
plt.yticks([r"$\alpha_2 - \alpha_1$", fontsize=16)
plt.xlabel("Jaccard index", fontsize=16)
plt.ylabel(=14) plt.tick_params(labelsize
Change point position
= np.arange(10, 200, 10)
bins_cp = {m: [] for m in methods}
j_idx_by_position for low, high in zip(bins_cp[:-1], bins_cp[1:]):
= (low <= ds.cp) & (ds.cp < high)
mask for m in methods:
= ds.loc[mask, f'cp_{m}_tp'].sum()
tp = ds.loc[mask, f'cp_{m}_fp'].sum()
fp = ds.loc[mask, f'cp_{m}_fn'].sum()
fn j_idx_by_position[m].append(jaccard_index(tp, fp, fn))
Code
= plt.figure(figsize=(1.4*fig_size, fig_size))
fig for m in methods:
=linewidth, label=method_label[m],
plt.plot(j_idx_by_position[m], linewidth=method_color[m], linestyle=method_ls[m])
color= np.array([0, 9, 17])
x_tick_idx
plt.xticks(x_tick_idx, f'{low}-{high}' for low, high in zip(bins_cp[x_tick_idx], bins_cp[x_tick_idx+1])])
[=alpha_grid)
plt.grid(alpha=14)
plt.legend(fontsize-0.05, 1.05])
plt.ylim([0., 0.5, 1.])
plt.yticks(["Changepoint position", fontsize=16)
plt.xlabel(=14) plt.tick_params(labelsize