Bedside

Processing utilities for bedside/ICU data

Dataloaders


source

ForecastingDataset


def ForecastingDataset(
    channels, # channels to use
    forecast_window_sec, # forecast window (within), suggest 5, 10, 15 minutes
    outcome_df, # pandas dataframe containing outcomes for zarr files
    outcome_df_outcome_col, # outcome column in the y file path
    file_col:str='file_path', # column indicating zarr file path
    y_date_column:str='date', # column indicating date of sample collection
    outcome_df_seconds_since_column:str='Time Stamp (seconds)', # column indicating how many seconds since beginning of waveform
    outcome_df_duration_column:str='event_length', # column indicating duration of outcome in seconds
    sample_df:NoneType=None, # dataframe indicating which indices within each zarr file includes a sample
    sample_seq_len_sec:NoneType=None, # if no sample_df, generate sequences of this length in seconds as one sample
    frequency:int=125, # frequency of underlying data
    butterworth_filters:NoneType=None, # dictionary of low pass, high pass, and bandpass dictionary to perform on channels
    median_filter_kernel_size:NoneType=None, # size of median filter to perform on channels
    clip_interpolations:NoneType=None, # dictionary of channels:{'phys_range':..., 'percentiles':...} for filtering and interpolation of filtered values
    constant_nan_tolerance:float=0.5, # tolerance for nan values in the data - 0 means no nan allowed, 1 means 100% of nans allowed
    require_all_channels:bool=False, # indicator to require all channels to be present in the sample, if False, will return samples with any of the channels and 0s for the missing channels
    infer_forecast_windows:bool=True, # indicator to require all forecast windows to be present in the sample, if False, will return samples with any of the forecast windows and NAs for the missing forecast windows
    normalize_signals:bool=True, # indicator to normalize signals to 0 mean and unit variance
    sample_frequency_key:str='sampling_frequency'
):

PyTorch Dataset for forecasting tasks using physiological waveform data stored in zarr format. This dataset class handles loading, preprocessing, and preparing samples for forecasting tasks.


source

SelfSupervisedDataset


def SelfSupervisedDataset(
    zarr_files, # zarr files that include samples
    channels, # channels to use
    max_seq_len_sec:NoneType=None, # maximum sequence length (in seconds) to use (this is especially relevant when you are returning both stft and raw ts data to keep them in sync)
    sample_df:NoneType=None, # dataframe indicating which indices within each zarr file includes a sample
    sample_seq_len_sec:NoneType=None, # if no sample_df, generate sequences of this length in seconds as one sample
    sample_stride_sec:NoneType=None, # if no sample_df, seconds of overlap for samples from the same array, if seq_len_seconds == overlap_seconds, there is no overlap
    frequency:int=125, # frequency of underlying data
    butterworth_filters:NoneType=None, # dictionary of low pass, high pass, and bandpass dictionary to perform on channels
    median_filter_kernel_size:NoneType=None, # size of median filter to perform on channels
    clip_interpolations:NoneType=None, # dictionary of channels:{'phys_range':..., 'percentiles':...} for filtering and interpolation of filtered values
    constant_nan_tolerance:float=0.2, # tolerance for nan values in the data - 0 means no nan allowed, 1 means 100% of nans allowed
    require_all_channels:bool=True, # indicator to require all channels to be present in the data
    normalize_signals:bool=True, # indicator to normalize signals to 0 mean and unit variance
):

PyTorch Dataset for self-supervised learning tasks using physiological waveform data stored in zarr format. This dataset class handles loading, preprocessing, and preparing samples for self-supervised learning tasks.