Data Preprocessing

Process your data, they say

source

interpolate_nan_clip


def interpolate_nan_clip(
    x_in, physiological_range_clip:NoneType=None, percentile_clip:NoneType=None, return_mask_only:bool=False
):

Function to clip outliers based on percentiles or physiological range and then interpolate nearby values


source

calculate_samples_mp


def calculate_samples_mp(
    zarr_files, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, constant_nan_tolerance:float=1.0,
    require_all_channels:bool=True, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):

Multiprocessing function to generate samples


source

calculate_samples


def calculate_samples(
    zarr_file, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
    max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, require_all_channels:bool=True,
    constant_nan_tolerance:float=1.0, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):

Function to create a dataframe of samples and their sequence indices


source

check_signal_not_constant


def check_signal_not_constant(
    channel_data, constant_tolerance:float=1.0
):

Function to check if a signal is NOT constant. This works for NaNs and constant values.


source

calculate_samples_forecast_mp


def calculate_samples_forecast_mp(
    outcome_df, file_col, outcome_val_col, outcome_time_col, outcome_duration_col, forecast_window_sec, channels,
    frequency, sample_seq_len_sec, constant_nan_tolerance:float=1.0, require_all_channels:bool=True,
    infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):

Multiprocessing function to generate samples


source

calculate_samples_forecast


def calculate_samples_forecast(
    zarr_file, outcome_start_times, outcome_durations, outcome_vals, channels, forecast_window_sec, frequency,
    sample_seq_len_sec, require_all_channels:bool=True, constant_nan_tolerance:float=1.0,
    infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):

Function to create a dataframe of samples and their sequence indices