Data Preprocessing

oh no

source

calculate_class_weights


def calculate_class_weights(
    dataloader, ignore_index:int=-100, returns_padded_mask:bool=True, return_ratio:bool=True
):

source

interpolate_nan_clip


def interpolate_nan_clip(
    x_in, physiological_range_clip:NoneType=None, percentile_clip:NoneType=None, return_mask_only:bool=False
):

Function to clip outliers based on percentiles or physiological range and then interpolate nearby values


source

calculate_stats_all


def calculate_stats_all(
    zarr_files, channels, sample_wise:bool=True, clip_interpolations:NoneType=None,
    channel_magnitude_multiple:NoneType=None
):

source

calculate_stats


def calculate_stats(
    idx, zarr_file, channels, clip_interpolations:NoneType=None, channel_magnitude_multiple:NoneType=None
):

Function to caluclate stats on an individual zarr array, including a clip interpolate range


source

calculate_samples_mp


def calculate_samples_mp(
    zarr_files, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, nan_tolerance:float=0.0
):

Multiprocessing function to generate samples


source

calculate_samples


def calculate_samples(
    idx, zarr_file, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, nan_tolerance:float=0.0
):

Function to create a dataframe of samples and their sequence indices