Data Preprocessing

Dont forget, none of this matters

source

calculate_class_weights


def calculate_class_weights(
    dataloader, ignore_index:int=-100, returns_padded_mask:bool=True, return_ratio:bool=True
):

Call self as a function.


source

interpolate_nan_clip


def interpolate_nan_clip(
    x, physiological_range_clip:NoneType=None, percentile_clip:NoneType=None, return_mask_only:bool=False
):

Function to clip outliers based on percentiles or physiological range and then interpolate nearby values


source

calculate_stats_all


def calculate_stats_all(
    zarr_files, channels, sample_wise:bool=True, clip_interpolations:NoneType=None,
    channel_magnitude_multiple:NoneType=None
):

Call self as a function.


source

calculate_stats


def calculate_stats(
    idx, zarr_file, channels, clip_interpolations:NoneType=None, channel_magnitude_multiple:NoneType=None
):

Function to caluclate stats on an individual zarr array, including a clip interpolate range


source

calculate_stft_stats_batch


def calculate_stft_stats_batch(
    batch, n_fft:int=256, # number of ffts to perform
    win_length:int=250, # window of ffts
    pad_mode:str='replicate', # padding mode
    center:bool=True, # center stft?
    hop_length:int=125, # hop length?
    normalized:bool=True, decibel_scale:bool=True, return_complex:bool=True
):

Call self as a function.


source

calculate_stft_stats


def calculate_stft_stats(
    idx, zarr_file, # zarr file with arrays
    channels, # channels to perform stft and calculate stats on
    n_fft:int=256, # number of ffts to perform
    win_length:int=250, # window of ffts
    pad_mode:str='replicate', # padding mode
    center:bool=True, # center stft?
    hop_length:int=125, # hop length?
    normalized:bool=True, decibel_scale:bool=True, return_complex:bool=True, clip_interpolations:NoneType=None
):

Function to calculate stft stats on an individual zarr array, including a clip interpolate range


source

calculate_samples_mp


def calculate_samples_mp(
    zarr_files, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, constant_nan_tolerance:float=1.0,
    require_all_channels:bool=True, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):

Multiprocessing function to generate samples


source

calculate_samples


def calculate_samples(
    zarr_file, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, require_all_channels:bool=True,
    constant_nan_tolerance:float=1.0, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):

Function to create a dataframe of samples and their sequence indices


source

check_signal_not_constant


def check_signal_not_constant(
    channel_data, constant_tolerance:float=1.0
):

Function to check if a signal is NOT constant. This works for NaNs and constant values.

channels = [['ECG', 'ECG (LL-RA)', 'EKG', 'ECG (L-R)'],
            ['EOG(L)', 'EOG-L', 'E1', 'LOC', 'E1-M2', 'E1-AVG'],
            ['EMG', 'cchin_l', 'chin', 'EMG (L-R)', 'EMG (1-2)', 'EMG (1-3)', 'Chin3'],
            ['C4-M1', 'C4_M1', 'EEG', 'EEG3', 'C3-M2', 'C3_M2', 'C4-AVG'],
            ['SaO2', 'SpO2','spo2'],
            ['THOR RES', 'Thor', 'thorax', 'Thoracic', 'Chest'],
            ['ABDO RES', 'abdomen', 'Abdo', 'Abdominal', 'ABD']]
channels = ['ABP', 'II', 'V', 'PLETH','RESP']

updated_channels = []
avail_channels = ['II']
if all(isinstance(i, list) for i in channels):
        for p in channels:
            updated_channels.append(next((x for x in p if x in avail_channels), None))
else:
    updated_channels = [p if p in avail_channels else None for p in channels]
updated_channels
[None, 'II', None, None, None]

source

check_hypnograms_mp


def check_hypnograms_mp(
    zarr_files, required_stages:list=[0, 1, 2, 3, 4], constant_tolerance:float=1.0
):

Multiprocessing function to check hypnograms


source

check_hypnogram


def check_hypnogram(
    zarr_file, required_stages:list=[0, 1, 2, 3, 4], constant_tolerance:float=1.0
):

Function to check if a hypnogram contains all required stages and is not constant


source

calculate_samples_forecast_mp


def calculate_samples_forecast_mp(
    outcome_df, file_col, outcome_val_col, outcome_time_col, outcome_duration_col, forecast_window_sec, channels,
    frequency, sample_seq_len_sec, channel_quality_functions:NoneType=None, require_all_channels:bool=True,
    infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):

Multiprocessing function to generate samples


source

calculate_samples_forecast


def calculate_samples_forecast(
    zarr_file, outcome_start_times, outcome_durations, outcome_vals, channels, forecast_window_sec, frequency,
    sample_seq_len_sec, require_all_channels:bool=True, channel_quality_functions:NoneType=None,
    infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):

Function to create a dataframe of samples and their sequence indices


source

calculate_samples_forecast_df


def calculate_samples_forecast_df(
    outcome_df, file_col, outcome_val_col, outcome_time_col, outcome_duration_col, channels, forecast_window_sec,
    frequency, sample_seq_len_sec, channel_quality_functions:NoneType=None, require_all_channels:bool=True,
    infer_forecast_windows:bool=False, sample_frequency_key:str='sampling_frequency'
):

Optimized version using pure multiprocessing