Bedside
Processing utilities for bedside/ICU data
Dataloaders
ForecastingDataset
def ForecastingDataset(
channels, # channels to use
forecast_window_sec, # forecast window (within), suggest 5, 10, 15 minutes
outcome_df, # pandas dataframe containing outcomes for zarr files
outcome_df_outcome_col, # outcome column in the y file path
file_col:str='file_path', # column indicating zarr file path
y_date_column:str='date', # column indicating date of sample collection
outcome_df_seconds_since_column:str='Time Stamp (seconds)', # column indicating how many seconds since beginning of waveform
outcome_df_duration_column:str='event_length', # column indicating duration of outcome in seconds
sample_df:NoneType=None, # dataframe indicating which indices within each zarr file includes a sample
sample_seq_len_sec:NoneType=None, # if no sample_df, generate sequences of this length in seconds as one sample
frequency:int=125, # frequency of underlying data
butterworth_filters:NoneType=None, # dictionary of low pass, high pass, and bandpass dictionary to perform on channels
median_filter_kernel_size:NoneType=None, # size of median filter to perform on channels
clip_interpolations:NoneType=None, # dictionary of channels:{'phys_range':..., 'percentiles':...} for filtering and interpolation of filtered values
constant_nan_tolerance:float=0.5, # tolerance for nan values in the data - 0 means no nan allowed, 1 means 100% of nans allowed
require_all_channels:bool=False, # indicator to require all channels to be present in the sample, if False, will return samples with any of the channels and 0s for the missing channels
infer_forecast_windows:bool=True, # indicator to require all forecast windows to be present in the sample, if False, will return samples with any of the forecast windows and NAs for the missing forecast windows
normalize_signals:bool=True, # indicator to normalize signals to 0 mean and unit variance
sample_frequency_key:str='sampling_frequency'
):
PyTorch Dataset for forecasting tasks using physiological waveform data stored in zarr format. This dataset class handles loading, preprocessing, and preparing samples for forecasting tasks.
SelfSupervisedDataset
def SelfSupervisedDataset(
zarr_files, # zarr files that include samples
channels, # channels to use
max_seq_len_sec:NoneType=None, # maximum sequence length (in seconds) to use (this is especially relevant when you are returning both stft and raw ts data to keep them in sync)
sample_df:NoneType=None, # dataframe indicating which indices within each zarr file includes a sample
sample_seq_len_sec:NoneType=None, # if no sample_df, generate sequences of this length in seconds as one sample
sample_stride_sec:NoneType=None, # if no sample_df, seconds of overlap for samples from the same array, if seq_len_seconds == overlap_seconds, there is no overlap
frequency:int=125, # frequency of underlying data
butterworth_filters:NoneType=None, # dictionary of low pass, high pass, and bandpass dictionary to perform on channels
median_filter_kernel_size:NoneType=None, # size of median filter to perform on channels
clip_interpolations:NoneType=None, # dictionary of channels:{'phys_range':..., 'percentiles':...} for filtering and interpolation of filtered values
constant_nan_tolerance:float=0.2, # tolerance for nan values in the data - 0 means no nan allowed, 1 means 100% of nans allowed
require_all_channels:bool=True, # indicator to require all channels to be present in the data
normalize_signals:bool=True, # indicator to normalize signals to 0 mean and unit variance
):
PyTorch Dataset for self-supervised learning tasks using physiological waveform data stored in zarr format. This dataset class handles loading, preprocessing, and preparing samples for self-supervised learning tasks.