Searchlight classification of movie segments#

In this analysis, we classify which segment (5 TRs) of the movie the participant was watching for each searchlight (20 mm radius).

Preparations#

Import Python packages#

import os
import numpy as np
from scipy.stats import ttest_rel, rankdata, pearsonr, spearmanr
import neuroboros as nb
from joblib import Parallel, delayed, cpu_count, parallel_backend

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

Other settings#

dsets = ['raiders', 'budapest']
ico = 32
spaces = [f'{a}-ico{b}' for a in ['fsavg', 'fslr', 'onavg'] for b in [ico]]
train_dur, test_dur = '1st-half', '2nd-half'
radius = 20
align_radius = 20
size = 5
colors = np.array(sns.color_palette('tab10'))
np.set_printoptions(4, linewidth=200)
if os.path.exists('Arial.ttf'):
    from matplotlib import font_manager
    font_manager.fontManager.addfont('Arial.ttf')
mpl.rcParams['font.family'] = 'Arial'

Load results#

# cache file to avoid excessive I/O
summary_fn = f'sl_clf/summary_ico{ico}_size{size}.pkl'
if not os.path.exists(summary_fn):
    results = {}
    for dset_name in dsets:
        dset = nb.dataset(dset_name)
        sids = dset.subjects
        for align in ['surf', 'procr', 'ridge']:
            for space in spaces:
                folder = f'sl_clf/{dset_name}/{space}_{align}_{align_radius}mm_'\
                    f'{train_dur}_{test_dur}_{size}TRs'
                aa = []
                for sid in sids:
                    a = [np.load(f'{folder}/{sid}_{lr}h_{radius}mm.npy')
                         for lr in 'lr']
                    a = np.concatenate(a)
                    aa.append(a)
                aa = np.array(aa)
                results[dset_name, space, align] = aa
    nb.save(summary_fn, results)
results = nb.load(summary_fn)

Accuracy of movie time point classification#

  • Two datasets (top: Raiders, bottom: Budapest)

  • Three alignment methods (surface alignment, Procrustes hyperalignment, warp hyperalignment)

  • Bars denote average accuracy across participants and searchlights

  • Grey lines denote individual participants (average accuracy across searchlights)

fig, axs = plt.subplots(2, 3, figsize=[_/2.54 for _ in (12, 8)], dpi=200)
pct1, pct2, pp = [], [], []
for ii, dset_name in enumerate(['raiders', 'budapest']):
    for jj, align in enumerate(['surf', 'procr', 'ridge']):
        ax = axs[ii, jj]
        ys = []
        for i, space in enumerate(spaces):
            a = results[dset_name, space, align].mean(axis=1)
            m = a.mean(axis=0)
            c = colors[i]
            ax.bar(i, m, facecolor=c, edgecolor=c*0.5, ecolor=c*0.5)
            ys.append(a)
        ax.set_xticks(np.arange(3), labels=spaces)

        ys = np.array(ys)
        for y in ys.T:
            ax.plot(np.arange(3), y, 'k-', alpha=0.3, markersize=1)
        t, p = ttest_rel(ys[2], ys[0])
        pp.append(p)
        assert p < 0.005
        t, p = ttest_rel(ys[2], ys[1])
        pp.append(p)
        assert p < 0.005
        pct1.append(np.mean(ys[0] < ys[2]))
        pct2.append(np.mean(ys[1] < ys[2]))

        max_ = int(np.ceil(ys.max() * 20))
        yy = np.arange(max_ + 1) * 0.05
        ax.set_yticks(yy, labels=[f'{_*100:.0f}%' for _ in yy])
        ax.set_xlim([-0.6, 2.6])
        ax.tick_params(axis='both', pad=0, length=2, labelsize=5)
        ax.set_ylabel('Accuracy', size=6, labelpad=1)
        ax.set_xlabel('Surface space', size=6, labelpad=1)
        title = {'procr': 'Procrustes hyperalignment',
                 'ridge': 'Warp hyperalignment',
                 'surf': 'Surface alignment'}[align]
        ax.set_title(f'{title}', size=6, pad=2)
    ax.annotate(dset_name.capitalize(), (0.005, 0.99 - ii * 0.5),
                xycoords='figure fraction', size=6, va='top')
print(np.mean(pct1), np.mean(pct2), np.max(pp))

fig.subplots_adjust(
    left=0.08, right=0.99, top=0.94, bottom=0.06, wspace=0.3, hspace=0.4)
plt.savefig('sl_clf_bar.png', dpi=300, transparent=True)
plt.show()
1.0 1.0 4.803615811053366e-07
../_images/6cca40463950f43c2a470e91256819b9fd9615339dcdbcdf1c2842ec478644b9.png

Difference of classification accuracy between template spaces#

  • The difference accuracy between onavg and fsavg (blue bars), and between onavg and fslr (orange bars)

  • The differences are statistically significant (p < 1e-4) across different datasets and alignment methods

fig, axs = plt.subplots(
    2, 3, figsize=[_/2.54 for _ in (12, 8)], dpi=200)
pct1, pct2, pp = [], [], []
for ii, dset_name in enumerate(dsets):
    for jj, align in enumerate(['surf', 'procr', 'ridge']):
        ax = axs[ii, jj]
        max_ = []
        onavg = results[dset_name, spaces[-1], align].mean(axis=1)
        for kk, space in enumerate(spaces[:2]):
            y = onavg - results[dset_name, space, align].mean(axis=1)
            m = y.mean(axis=0)
            se = y.std(axis=0, ddof=1) / np.sqrt(y.shape[0])
            c = colors[kk]
            t, p = ttest_rel(onavg, results[dset_name, space, align][:, -1])
            dof = len(onavg) - 1
            label = f'{spaces[-1].split("-")[0]}$ - ${spaces[kk].split("-")[0]}'\
                    f'\n$t$({dof}) = {t:.2f}'
            ax.bar(kk, m, yerr=se, color=c, ec=c*0.5, width=0.7, label=label)
            max_.append(m + se)
            assert p < 1e-4
        max_ = int(np.ceil(np.max(max_) * 1.1 * 1000))

        ax.set_xticks([])
        ax.set_xlim(np.array([-0.6, 1.6]))
        yy = np.arange(0, max_ + 1, 5)
        ax.set_yticks(yy  * 0.001, labels=[f'{_*0.1:.1f}%' for _ in yy])
        ax.set_ylabel('Accuracy difference', size=6, labelpad=1)
        ax.tick_params(axis='both', pad=0, length=2, labelsize=5)

        title = {'procr': 'Procrustes hyperalignment',
                 'ridge': 'Warp hyperalignment',
                 'surf': 'Surface alignment',
                }[align]
        ax.set_title(f'{title}', size=6, pad=2)
        ax.legend(fontsize=6, fancybox=True, loc='lower right')
    axs[ii, 0].annotate(
        dset_name.capitalize(), (-0.28, 1.15),
        xycoords='axes fraction', size=6, va='top', ha='left')
fig.subplots_adjust(
    left=0.08, right=0.99, top=0.93, bottom=0.02, wspace=0.3, hspace=0.4)
plt.savefig('sl_clf_bar_diff.png', dpi=300, transparent=True)
plt.show()
../_images/08f7ddfe4e9a409f8331a92af0df9bfcdfb20fd985c6606752c217b63118202f.png

Which brain region has the most improvement#

Upsample the data#

  • To compare the results based on different template spaces, we upsample the statistics to onavg-ico128 space

upsampled = {}
for ii, dset_name in enumerate(['raiders', 'budapest']):
    for jj, align in enumerate(['surf', 'procr', 'ridge']):
        for space in spaces:
            a = results[dset_name, space, align].mean(axis=0)
            center_space = space.split('-')[0] + '-ico32'
            xfms = [nb.mapping(lr, center_space, 'onavg-ico128', mask=True)
                    for lr in 'lr']
            nv1 = xfms[0].shape[0]
            a = np.concatenate([a[:nv1] @ xfms[0], a[nv1:] @ xfms[1]])
            upsampled[dset_name, space, align] = a
upsampled_ivd = {}
for space in spaces:
    center_space = space.split('-')[0] + '-ico32'
    ivd_lr = []
    for lr in 'lr':
        coords, faces = nb.geometry('midthickness', lr, center_space)
        surf = nb.Surface(coords, faces)
        distances = nb.distances(lr, center_space)
        mask = nb.mask(lr, space=center_space)
        ivd = []
        for i, nbrs in enumerate(surf.neighbors):
            if mask[i]:
                ivd.append(distances[i, nbrs].mean())
        ivd_lr.append(np.array(ivd))

    values = []
    for lr, ivd in zip('lr', ivd_lr):
        sls = nb.sls(lr, radius, space=space)
        avg_ivd = np.array([ivd[sl].mean() for sl in sls])

        xfm = nb.mapping(lr, center_space, 'onavg-ico128', mask=True)
        values.append(avg_ivd @ xfm)
    upsampled_ivd[space] = np.concatenate(values)

Correlations with improvement in accuracy#

  • Regions that were previously undersampled are more likely to have more improvement after switching to onavg (pearsonr(c, d) in the code below).

  • Regions that have higher accuracies based on traditional templates are more likely to have more improvement after switching to onavg (pearsonr(a, d) in the code below).

for space in spaces[:2]:
    for ii, dset_name in enumerate(['raiders', 'budapest']):
        for jj, align in enumerate(['surf', 'procr', 'ridge']):
            a = upsampled[dset_name, space, align]
            b = upsampled[dset_name, spaces[-1], align]
            c = upsampled_ivd[space]
            d = b - a
            print(f'{space:11s}, {dset_name:8s}, {align:5s}, '
                  f'{pearsonr(c, d)[0]:7.4f}, {pearsonr(a, d)[0]:7.4f}')
fsavg-ico32, raiders , surf , -0.0902, -0.0007
fsavg-ico32, raiders , procr,  0.2385,  0.4536
fsavg-ico32, raiders , ridge,  0.4063,  0.4944
fsavg-ico32, budapest, surf , -0.1182, -0.0443
fsavg-ico32, budapest, procr,  0.2056,  0.3686
fsavg-ico32, budapest, ridge,  0.4501,  0.3976
fslr-ico32 , raiders , surf , -0.1038,  0.0102
fslr-ico32 , raiders , procr,  0.2697,  0.4402
fslr-ico32 , raiders , ridge,  0.4472,  0.5102
fslr-ico32 , budapest, surf , -0.0832,  0.0718
fslr-ico32 , budapest, procr,  0.2601,  0.3916
fslr-ico32 , budapest, ridge,  0.4820,  0.4256

Scatter plots#

nc = 10
colors2 = np.concatenate(
    [sns.color_palette("mako", nc)[1:],
     [[0.9, 0.9, 0.9]],
     sns.color_palette("rocket", nc)[1:][::-1]],
    axis=0)
cmap = mpl.colors.LinearSegmentedColormap.from_list('mako_rocket', colors2)

In the figure below, red dots are searchlights that have larger inter-vertex distance based on traditional templates, and blue dots are those that lave smaller inter-vertex distance. Dots above the diagonal are searchlights that improve after switching to onavg, and red dots appear more often above the diagonal.

with sns.axes_style('darkgrid'):
    fig, axs = plt.subplots(2, 3, figsize=[_/2.54 for _ in (12, 8)], dpi=200)
    for ii, dset_name in enumerate(['raiders', 'budapest']):
        for jj, align in enumerate(['surf', 'procr', 'ridge']):
            aa, bb, cc = [], [], []
            for space in spaces[:2]:
                a = upsampled[dset_name, space, align]
                b = upsampled[dset_name, spaces[-1], align]
                c = upsampled_ivd[space]
                aa.append(a)
                bb.append(b)
                cc.append(c)
            aa = np.concatenate(aa)
            bb = np.concatenate(bb)
            cc = np.concatenate(cc)
            cc = cc - np.median(cc)
            vmax = np.percentile(cc, [99])
            idx = np.argsort(bb - aa)
            aa, bb, cc = aa[idx], bb[idx], cc[idx]
            ax = axs[ii, jj]
            ax.scatter(aa, bb, c=cc, s=0.1, edgecolor='none',
                       cmap=cmap, vmin=-vmax, vmax=vmax)
            lim = [0, max(a.max(), b.max())]
            ticks = np.arange(0, np.ceil(lim[1] * 100) + 1, 10) * 0.01
            labels = [f'{_*100:.0f}%' for _ in ticks]
            ax.set_xticks(ticks, labels=labels)
            ax.set_yticks(ticks, labels=labels)
            ax.set_xlim(lim)
            ax.set_ylim(lim)
            ax.plot(lim, lim, 'k--', alpha=1, lw=0.2)
            ax.tick_params(axis='both', pad=0, length=2, labelsize=5)
            ax.set_ylabel(f'Accuracy ({spaces[-1]})', size=6, labelpad=1)
            ax.set_xlabel(f'Accuracy ({" & ".join(spaces[:-1])})',
                          size=6, labelpad=1)
            title = {'procr': 'Procrustes hyperalignment',
                     'ridge': 'Warp hyperalignment',
                     'surf': 'Surface alignment',
                    }[align]
            ax.set_title(f'{title}', size=6, pad=2)
        axs[ii, 0].annotate(
            dset_name.capitalize(), (-0.28, 1.15),
            xycoords='axes fraction', size=6, va='top', ha='left')
    fig.subplots_adjust(
        left=0.08, right=0.99, top=0.92, bottom=0.06, wspace=0.35, hspace=0.4)
    plt.savefig('sl_clf_scatter.png', dpi=300)
    plt.show()
../_images/ba0fbd394f6694e3ff4ad8e906a7279136be3919e915fd8913d2afedcf29c3d0.png