ERP analysis of ERP CORE data#

Loading Python modules#

import json
import seaborn as sns
from pipeline import group_pipeline
from pipeline.datasets import get_erpcore

Downloading example data#

The pipeline comes with a function to download example data from the ERP CORE dataset. This dataset contains data from 40 participants who completed six different EEG experiments which were specifically designed to elicit seven common ERP components.

For example, in the N400 experiment, participants viewed pairs of prime and target words that were either semantically related (EEG triggers 211 and 212) or unrelated (EEG triggers 221 and 222).

The raw data are stored in the Open Science Framework and more details about the study are in Kappenman et al. (2021).

n400_files = get_erpcore('N400', participants=4)
Hide code cell output
Downloading file 'erpcore/N400/README.txt' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//612e8f7faf610c00b9dfefbc' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/task-N400_events.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078961e80d3708e3a57da1' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/participants.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007866eba0109089e8927bb' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/participants.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007866a86541a092914d749' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/dataset_description.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078666e80d3708eca5b9a7' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/LICENSE' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007865b86541a092914d70d' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/CHANGES' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078658ba010908a2892e11' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007868cba010908a4893e10' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.set' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007869086541a092614c80d' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_events.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007869286541a092614c817' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_electrodes.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6102109c317620027d387c86' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_coordsystem.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210a00c4cba0277bc6539' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.fdt' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078689e80d3708eca5ba21' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-001/eeg/sub-001_task-N400_channels.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078680ba010908a7895405' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_channels.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//60078695e80d3708eca5ba69' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.fdt' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//6007869de80d3708eaa5a332' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786a186541a091614a872' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.set' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786a586541a092914d822' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_events.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786a986541a092314bcb3' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_electrodes.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210a90c4cba026abccd3c' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-002/eeg/sub-002_task-N400_coordsystem.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210aec7a976029b9e47ca' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_coordsystem.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210c00c4cba026dbcd749' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_electrodes.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210b70c4cba026dbcd730' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_events.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786bde80d3708eca5bb26' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.set' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786b986541a092614c863' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_channels.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786ac86541a092114b8d9' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.fdt' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786b386541a091a14ad60' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786b4ba0109089e89280b' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_events.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786d0ba010908a7895542' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_coordsystem.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210cdc7a976029b9e4854' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_electrodes.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//610210c9c7a976029b9e483d' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.set' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786ceba010908a4893ec8' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_channels.tsv' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786bfe80d3708eca5bb36' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.fdt' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786c886541a092c15a105' to '/home/docs/.cache/hu-neuro-pipeline'.
Downloading file 'erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.json' from 'https://files.de-1.osf.io/v1/resources/29xpq/providers/osfstorage//600786cbba010908a4893ebb' to '/home/docs/.cache/hu-neuro-pipeline'.
def print_dict(d): print(json.dumps(d, indent=4))
print_dict(n400_files)
{
    "raw_files": [
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.set"
    ],
    "log_files": [
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_events.tsv"
    ]
}

To save time, we only download and process data from the first four participants. Feel free to re-run the example with more participants by increasing or removing the n_participants argument.

The paths of the downloaded raw EEG files (.set) and events file (.tsv) can now be fed into pipeline.

Running the pipeline#

We run a simple pipeline for single-trial ERP analysis with the following steps:

  • Downsampling from 1024 to 256 Hz

  • Applying standard Biosemi montage for channel locations

  • Re-referencing to common average (not shown)

  • Automatic detection and interpolation of bad channels

  • Ocular correction with ICA

  • Bandpass filtering between 0.1 and 30 Hz

  • Segmentation to epochs around stimulus triggers

  • Baseline correction (not shown)

  • Rejecting bad epochs based on peak-to-peak amplitudes > 200 µV (not shown)

  • Computing single trial N400 amplitudes by averaging across the time window and channels of interest

  • Creating by-participant averages for the related and unrelated conditions

trials, evokeds, config = group_pipeline(

    # Input/output paths
    raw_files=n400_files['raw_files'],
    log_files=n400_files['log_files'],
    output_dir='output',

    # Preprocessing options
    downsample_sfreq=256.0,
    montage='biosemi64',
    bad_channels='auto',
    ica_method='fastica',
    highpass_freq=0.1,
    lowpass_freq=30.0,

    # Epoching options
    triggers=[211, 212, 221, 222],
    skip_log_conditions={'value': [111, 112, 121, 122, 201, 202]},
    components={'name': ['N400'],
                'tmin': [0.3],
                'tmax': [0.5],
                'roi': [['Cz', 'CPz']]},

    # Averaging options
    average_by={'related': 'value in [211, 212]',
                'unrelated': 'value in [221, 222]'})
Hide code cell output
=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.fdt
Reading 0 ... 585727  =      0.000 ...   571.999 secs...
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/group.py:125: UserWarning: The default value of `ica_n_components` has changed from `0.99` (i.e., 99% explained variance) to `None` (i.e., extract as many components as possible). To reproduce previous results, explicitly set `ica_n_components=0.99`.
  warn('The default value of `ica_n_components` has changed from ' +
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 29 components
Fitting ICA took 4.1s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (29 components)
    Zeroing out 3 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
Automatically detected bad channels ['C5'] with z_SE > 3.0
Restarting with interpolation of bad channels

=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.fdt
Reading 0 ... 585727  =      0.000 ...   571.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
Setting channel interpolation method to {'eeg': 'spline'}.
Interpolating bad channels.
    Automatic origin fit: head of radius 95.0 mm
Computing interpolation matrix from 29 sensor positions
Interpolating 1 sensors
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 28 components
Fitting ICA took 3.2s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (28 components)
    Zeroing out 3 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
<Epochs |  120 events (all good), -0.5 - 1.49609 s, baseline -0.2 - 0 s, ~16.4 MB, data loaded,
 '211': 30
 '212': 30
 '221': 30
 '222': 30>
Adding metadata with 6 columns
    Rejecting  epoch based on EEG : ['F8']
    Rejecting  epoch based on EEG : ['F8']
    Rejecting  epoch based on EEG : ['F8']
    Rejecting  epoch based on EEG : ['FP1', 'P7', 'Oz', 'CPz', 'FP2', 'P8', 'PO8', 'O2']
    Rejecting  epoch based on EEG : ['F8']
5 bad epochs dropped
Computing single trial ERP amplitudes for 'N400'
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.fdt
Reading 0 ... 510975  =      0.000 ...   498.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 29 components
Fitting ICA took 4.3s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (29 components)
    Zeroing out 2 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
Automatically detected bad channels ['P3'] with z_SE > 3.0
Restarting with interpolation of bad channels

=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.fdt
Reading 0 ... 510975  =      0.000 ...   498.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
Setting channel interpolation method to {'eeg': 'spline'}.
Interpolating bad channels.
    Automatic origin fit: head of radius 95.0 mm
Computing interpolation matrix from 29 sensor positions
Interpolating 1 sensors
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 28 components
Fitting ICA took 3.6s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (28 components)
    Zeroing out 2 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
<Epochs |  120 events (all good), -0.5 - 1.49609 s, baseline -0.2 - 0 s, ~16.4 MB, data loaded,
 '211': 30
 '212': 30
 '221': 30
 '222': 30>
Adding metadata with 6 columns
0 bad epochs dropped
Computing single trial ERP amplitudes for 'N400'
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.fdt
Reading 0 ... 428031  =      0.000 ...   417.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 29 components
Fitting ICA took 2.0s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (29 components)
    Zeroing out 3 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
Automatically detected bad channels ['Oz'] with z_SE > 3.0
Restarting with interpolation of bad channels

=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.fdt
Reading 0 ... 428031  =      0.000 ...   417.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
Setting channel interpolation method to {'eeg': 'spline'}.
Interpolating bad channels.
    Automatic origin fit: head of radius 95.0 mm
Computing interpolation matrix from 29 sensor positions
Interpolating 1 sensors
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 28 components
Fitting ICA took 2.0s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (28 components)
    Zeroing out 3 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
<Epochs |  120 events (all good), -0.5 - 1.49609 s, baseline -0.2 - 0 s, ~16.4 MB, data loaded,
 '211': 30
 '212': 30
 '221': 30
 '222': 30>
Adding metadata with 6 columns
0 bad epochs dropped
Computing single trial ERP amplitudes for 'N400'
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.fdt
Reading 0 ... 634879  =      0.000 ...   619.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 29 components
Fitting ICA took 5.8s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (29 components)
    Zeroing out 2 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
Automatically detected bad channels ['FP1'] with z_SE > 3.0
Restarting with interpolation of bad channels

=== Reading raw data from /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.set ===
Reading /home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.fdt
Reading 0 ... 634879  =      0.000 ...   619.999 secs...
Downsampling from 1024.0 Hz to 256.0 Hz
Adding bipolar channel VEOG (FP1 - VEOG_lower)
Adding bipolar channel HEOG (HEOG_left - HEOG_right)
Loading standard montage biosemi64
Setting channel interpolation method to {'eeg': 'spline'}.
Interpolating bad channels.
    Automatic origin fit: head of radius 95.0 mm
Computing interpolation matrix from 29 sensor positions
Interpolating 1 sensors
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 30 channels (please be patient, this may take a while)
Selecting by non-zero PCA components: 28 components
Fitting ICA took 4.8s.
/home/docs/checkouts/readthedocs.org/user_builds/hu-neuro-pipeline/conda/stable/lib/python3.11/site-packages/pipeline/preprocessing.py:116: FutureWarning: The default for pick_channels will change from ordered=False to ordered=True in 1.5 and this will result in a change of behavior because the resulting channel order will not match. Either use a channel order that matches your instance or pass ordered=False.
  eog_indices, _ = ica.find_bads_eog(
Applying ICA to Raw instance
    Transforming to ICA space (28 components)
    Zeroing out 3 ICA components
    Projecting back using 30 PCA components
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 8449 samples (33.004 s)

Not setting metadata
120 matching events found
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 120 events and 513 original time points ...
0 bad epochs dropped
<Epochs |  120 events (all good), -0.5 - 1.49609 s, baseline -0.2 - 0 s, ~16.4 MB, data loaded,
 '211': 30
 '212': 30
 '221': 30
 '222': 30>
Adding metadata with 6 columns
    Rejecting  epoch based on EEG : ['FP1', 'F7', 'P3', 'PO3', 'Oz', 'Pz', 'CPz', 'FP2', 'Fz', 'F8', 'FCz', 'Cz', 'C4', 'P4', 'PO4', 'O2']
    Rejecting  epoch based on EEG : ['FP1', 'FP2']
2 bad epochs dropped
Computing single trial ERP amplitudes for 'N400'
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
=== Processing group level ===
Identifying common channels ...
Identifying common channels ...

Cecking the results#

This pipeline returns three objects: A dataframe of single trial ERP amplitudes, a dataframe of by-participant condition averages, and a dictionary of pipeline metadata.

Single trial amplitudes#

These are basically just the log files, concatenated for all participants, with two added columns for the two ERP components of interest. Each value in these columns reflects the single trial ERP amplitude, averaged across time points and channels of interest.

Here are the first couple of lines of the dataframe:

trials.head()
participant_id onset duration sample trial_type stim_file value N400
0 sub-001_task-N400_eeg 21.5010 0.2 22018 stimulus NaN 212 5.015664
1 sub-001_task-N400_eeg 24.4658 0.2 25054 stimulus NaN 222 -1.232971
2 sub-001_task-N400_eeg 27.2988 0.2 27955 stimulus NaN 222 2.444319
3 sub-001_task-N400_eeg 30.1475 0.2 30872 stimulus NaN 222 3.753161
4 sub-001_task-N400_eeg 33.0293 0.2 33823 stimulus NaN 212 7.988066

Since we only have four participant and relatively few trials per participant, we can show all the single trial amplitudes in one plot (color-coded by condition):

trials['condition'] = trials['value'].map({211: 'related', 212: 'related',
                                           221: 'unrelated', 222: 'unrelated'})
trials['participant'] = trials['participant_id'].str.extract(r'(sub-\d+)')

_ = sns.swarmplot(data=trials, x='participant', y='N400', hue='condition')
../_images/4b9fca014191436be39fd7cc7153247f714ae70c45332f2ff7f876332fb3fd5b.png

We could also use this dataframe for statistical analysis on the single trial level, e.g., using linear mixed-effects models with the lme4 package in R (see UCAP example) or the statsmodels package in Python.

By-participant averages#

This is one big data frame which, unlike trials, is averaged across trials (i.e., losing any single trial information) but not averaged across time points or channels (i.e., retaining the millisecond-wise ERP waveform at all electrodes).

evokeds.head()
participant_id label query time FP1 F3 F7 FC3 C3 C5 ... Cz C4 C6 P4 P8 P10 PO8 PO4 O2 N400
0 sub-001_task-N400_eeg related value in [211, 212] -0.500000 0.866599 -0.194195 0.199290 -0.829457 0.195871 0.639818 ... -0.014475 1.261090 1.448636 -0.515446 0.454525 -0.204328 -0.529926 -0.542719 -0.838938 0.366025
1 sub-001_task-N400_eeg related value in [211, 212] -0.496094 0.603450 0.470583 -1.205644 -0.206700 0.487827 0.067032 ... 0.350924 1.593126 1.522060 -0.402127 0.777953 -0.092133 -0.029865 -0.409488 -1.039111 0.672487
2 sub-001_task-N400_eeg related value in [211, 212] -0.492188 0.209414 0.945254 -2.550194 0.335628 0.745456 -0.315256 ... 0.619660 1.825658 1.482514 -0.002799 0.979261 -0.067263 0.422345 0.080980 -1.142628 0.916845
3 sub-001_task-N400_eeg related value in [211, 212] -0.488281 -0.124549 1.013611 -3.309687 0.556418 0.846509 -0.418907 ... 0.717804 1.854671 1.346626 0.586631 1.043467 -0.133005 0.804730 0.803144 -1.059672 1.029418
4 sub-001_task-N400_eeg related value in [211, 212] -0.484375 -0.240614 0.597106 -3.219084 0.321459 0.718498 -0.294996 ... 0.631276 1.657058 1.140245 1.204687 1.019894 -0.264767 1.122595 1.572330 -0.760735 0.991035

5 rows × 35 columns

We can use it to display the grand-averaged ERP waveforms for different conditions as a timecourse plot at a single channel or ROI (here for the N400 ROI):

_ = sns.lineplot(data=evokeds, x='time', y='N400', hue='label', errorbar=None)
../_images/c3561fbf8b2e61319e115418408682941cc726df5cf066f34fc257139ea9e367.png

Note that we’re explicitly disabling error bars here because they would be invalid due to the fact that our condition effect (related vs. unrelated) is a within-participant factor. See the UCAP example for how to compute and plot valid within-participant error bars around the grand-averged evoked waveform.

Pipeline metadata#

This is a dictionary with various metadata about the pipeline run. It contains:

  • The user-specified input arguments

  • The default values for those arguments that were not specified

  • Some descriptive statistics that were computed by the pipeline along the way (e.g., the number of indices of rejecected epochs based on peak-to-peak amplitude)

  • The software version of Python, the pipeline and its most important dependency packages

print_dict(config)
{
    "raw_files": [
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_eeg.set",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_eeg.set"
    ],
    "log_files": [
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-001/eeg/sub-001_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-002/eeg/sub-002_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-003/eeg/sub-003_task-N400_events.tsv",
        "/home/docs/.cache/hu-neuro-pipeline/erpcore/N400/sub-004/eeg/sub-004_task-N400_events.tsv"
    ],
    "output_dir": "output",
    "clean_dir": null,
    "epochs_dir": null,
    "report_dir": null,
    "to_df": true,
    "downsample_sfreq": 256.0,
    "veog_channels": "auto",
    "heog_channels": "auto",
    "montage": "biosemi64",
    "bad_channels": [
        "auto",
        "auto",
        "auto",
        "auto"
    ],
    "ref_channels": "average",
    "besa_files": [
        null,
        null,
        null,
        null
    ],
    "ica_method": "fastica",
    "ica_n_components": null,
    "highpass_freq": 0.1,
    "lowpass_freq": 30.0,
    "triggers": [
        211,
        212,
        221,
        222
    ],
    "triggers_column": null,
    "epochs_tmin": -0.5,
    "epochs_tmax": 1.5,
    "baseline": [
        -0.2,
        0.0
    ],
    "skip_log_rows": [
        null,
        null,
        null,
        null
    ],
    "skip_log_conditions": {
        "value": [
            111,
            112,
            121,
            122,
            201,
            202
        ]
    },
    "reject_peak_to_peak": 200.0,
    "components": {
        "name": [
            "N400"
        ],
        "tmin": [
            0.3
        ],
        "tmax": [
            0.5
        ],
        "roi": [
            [
                "Cz",
                "CPz"
            ]
        ]
    },
    "average_by": {
        "related": "value in [211, 212]",
        "unrelated": "value in [221, 222]"
    },
    "perform_tfr": false,
    "tfr_subtract_evoked": false,
    "tfr_freqs": [
        4.0,
        5.0,
        6.0,
        7.0,
        8.0,
        9.0,
        10.0,
        11.0,
        12.0,
        13.0,
        14.0,
        15.0,
        16.0,
        17.0,
        18.0,
        19.0,
        20.0,
        21.0,
        22.0,
        23.0,
        24.0,
        25.0,
        26.0,
        27.0,
        28.0,
        29.0,
        30.0,
        31.0,
        32.0,
        33.0,
        34.0,
        35.0,
        36.0,
        37.0,
        38.0,
        39.0,
        40.0
    ],
    "tfr_cycles": [
        2.0,
        2.5,
        3.0,
        3.5,
        4.0,
        4.5,
        5.0,
        5.5,
        6.0,
        6.5,
        7.0,
        7.5,
        8.0,
        8.5,
        9.0,
        9.5,
        10.0,
        10.5,
        11.0,
        11.5,
        12.0,
        12.5,
        13.0,
        13.5,
        14.0,
        14.5,
        15.0,
        15.5,
        16.0,
        16.5,
        17.0,
        17.5,
        18.0,
        18.5,
        19.0,
        19.5,
        20.0
    ],
    "tfr_mode": "percent",
    "tfr_baseline": [
        -0.45,
        -0.05
    ],
    "tfr_components": {
        "name": [],
        "tmin": [],
        "tmax": [],
        "fmin": [],
        "fmax": [],
        "roi": []
    },
    "perm_contrasts": [],
    "perm_tmin": 0.0,
    "perm_tmax": 1.0,
    "perm_channels": null,
    "perm_fmin": null,
    "perm_fmax": null,
    "n_jobs": 1,
    "vhdr_files": null,
    "auto_bad_channels": {
        "sub-001_task-N400_eeg": [
            "C5"
        ],
        "sub-002_task-N400_eeg": [
            "P3"
        ],
        "sub-003_task-N400_eeg": [
            "Oz"
        ],
        "sub-004_task-N400_eeg": [
            "FP1"
        ]
    },
    "auto_rejected_epochs": {
        "sub-001_task-N400_eeg": [
            86,
            89,
            90,
            107,
            117
        ],
        "sub-002_task-N400_eeg": [],
        "sub-003_task-N400_eeg": [],
        "sub-004_task-N400_eeg": [
            84,
            85
        ]
    },
    "auto_ica_n_components": {
        "sub-001_task-N400_eeg": 28,
        "sub-002_task-N400_eeg": 28,
        "sub-003_task-N400_eeg": 28,
        "sub-004_task-N400_eeg": 28
    },
    "auto_ica_bad_components": {
        "sub-001_task-N400_eeg": [
            2,
            1,
            0
        ],
        "sub-002_task-N400_eeg": [
            0,
            4
        ],
        "sub-003_task-N400_eeg": [
            0,
            1,
            15
        ],
        "sub-004_task-N400_eeg": [
            1,
            0,
            5
        ]
    },
    "package_versions": {
        "python": "3.11.8",
        "pipeline": "0.8.6.dev0",
        "mne": "1.6.1",
        "numpy": "1.26.4",
        "pandas": "2.2.2",
        "scikit-learn": "1.4.2"
    }
}