Data Preprocessing
Process your data, they say
interpolate_nan_clip
def interpolate_nan_clip(
x_in, physiological_range_clip:NoneType=None, percentile_clip:NoneType=None, return_mask_only:bool=False
):
Function to clip outliers based on percentiles or physiological range and then interpolate nearby values
calculate_samples_mp
def calculate_samples_mp(
zarr_files, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, constant_nan_tolerance:float=1.0,
require_all_channels:bool=True, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):
Multiprocessing function to generate samples
calculate_samples
def calculate_samples(
zarr_file, channels, frequency, sample_seq_len_sec, stride_sec, start_offset_sec:NoneType=None,
max_seq_len_sec:NoneType=None, include_partial_samples:bool=True, require_all_channels:bool=True,
constant_nan_tolerance:float=1.0, min_seq_len_sec:NoneType=None, constant_channels:NoneType=None
):
Function to create a dataframe of samples and their sequence indices
check_signal_not_constant
def check_signal_not_constant(
channel_data, constant_tolerance:float=1.0
):
Function to check if a signal is NOT constant. This works for NaNs and constant values.
calculate_samples_forecast_mp
def calculate_samples_forecast_mp(
outcome_df, file_col, outcome_val_col, outcome_time_col, outcome_duration_col, forecast_window_sec, channels,
frequency, sample_seq_len_sec, constant_nan_tolerance:float=1.0, require_all_channels:bool=True,
infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):
Multiprocessing function to generate samples
calculate_samples_forecast
def calculate_samples_forecast(
zarr_file, outcome_start_times, outcome_durations, outcome_vals, channels, forecast_window_sec, frequency,
sample_seq_len_sec, require_all_channels:bool=True, constant_nan_tolerance:float=1.0,
infer_forecast_windows:bool=True, sample_frequency_key:str='sampling_frequency'
):
Function to create a dataframe of samples and their sequence indices