Augmentations

Because this will help

Patching


source

create_patch


def create_patch(
    xb, patch_len, stride, return_patch_num:bool=False, constant_pad:bool=False, constant_pad_value:int=0,
    max_seq_len:NoneType=None
):

xb: [bs x n_vars x seq_len]

x = torch.randn(4, 1000)

# test seq_len > patch len == stride 
xb = create_patch(x, patch_len=505, stride=500, constant_pad=False)
xb_rep = create_patch(x, patch_len=500, stride=500, constant_pad=True)
x.shape, xb.shape, xb_rep.shape
#xb_rep_short = create_patch(x_short, patch_en=502, stride=500, replication_pad=False)b
(torch.Size([4, 1000]), torch.Size([4, 1, 505]), torch.Size([4, 2, 500]))
x = torch.randn(1,7,1350000)

# test seq_len > patch len == stride 
xb = create_patch(x, patch_len=1024, stride=1024, constant_pad=False)
xb_rep = create_patch(x, patch_len=1024, stride=1024, constant_pad=True)
x.shape, xb.shape, xb_rep.shape
#xb_rep_short = create_patch(x_short, patch_en=502, stride=500, replication_pad=False)
(torch.Size([1, 7, 1350000]),
 torch.Size([1, 1318, 7, 1024]),
 torch.Size([1, 1319, 7, 1024]))

source

unpatch


def unpatch(
    x, seq_len, remove_padding:bool=True
):

x: [bs/None x patch_num x n_vars x patch_len] returns x: [bs x n_vars x seq_len]

x = torch.randn(1,1,50)

# test seq_len > patch len == stride 
xb = create_patch(x, patch_len=6, stride=6, constant_pad=True)
xb = unpatch(xb, seq_len=50, remove_padding=False)
xb.shape
(torch.Size([1, 1, 50]), torch.Size([1, 9, 1, 6]))

Patch Masking


source

random_masking


def random_masking(
    xb, mask_ratio
):

source

mask_patches_simple


def mask_patches_simple(
    xb, # input tensor of size 3 or 4 to be masked
    mask_ratio, # ratio of masking of patches
):

Function that masks patches in a simple way

xb: [bs x patch_num x n_vars x patch_len] padding_mask [bs x patch_num x 1|num_vars x patch_len]

x = torch.randn(50,16,7,50)
mask_ratio = 0.4

x_new, mask = mask_patches_simple(x,mask_ratio=mask_ratio)

Value Augmentations


source

jitter_augmentation


def jitter_augmentation(
    x, mask_ratio:float=0.05, jitter_ratio:float=0.05
):

source

remove_values


def remove_values(
    x, mask_ratio
):
## note that the random number generator advances state...
torch.manual_seed(42)
x = torch.randn(4,7,1000)

torch.manual_seed(42)
x_new, n_masks = jitter_augmentation(x)
n_masks /(4* 7*1000)

torch.manual_seed(42)
x_new2, n_masks2 = jitter_augmentation(x)
torch.equal(x_new, x_new2)
True

Shuffle Augmentations


source

shuffle_dim


def shuffle_dim(
    x, dim:int=1, p:float=0.5
):

shuffles a dimension randomly along dim x: [bs x n channels x n patches x patch len]


source

reverse_sequence


def reverse_sequence(
    x, seq_dim:tuple=(-1,), p:float=0.5
):
x = torch.randn(4,1,5,5).to('cuda')

torch.equal(shuffle_dim(x), x)
False

Callbacks


source

IntraClassCutMix1d


def IntraClassCutMix1d(
    mix_prob:float=0.5, # probability of applying cutmix
    return_y_every_sec:int=30, # length of segment to mix, if one value of y corresponds to 30 seconds of signal data, this should be set to 30.
    frequency:int=125, # frequency of the data
    return_sequence_padding_mask:bool=True, # whether to return the sequence padding mask
):

Intra-class CutMix for 1D data (e.g., time-series).

This is a callback that can be used to apply CutMix to the training data. It is used to mix segments within the same class.

x = torch.randn(4,7,90)
x_c = x.clone()
y = torch.randint(0, 5, size=(4,90//30))
xxt = IntraClassCutMix1d(mix_prob=1, frequency=1, return_y_every_sec=30, return_sequence_padding_mask=False)
batch = (x,y)
xxt.on_train_batch_start(None, None, batch, 0)
torch.equal(x_c, batch[0]) == False
True

source

IntraClassCutMixBatch


def IntraClassCutMixBatch(
    mix_prob:float=0.5, # probability of applying cutmix
    return_y_every_sec:int=30, # length of segment to mix, if one value of y corresponds to 30 seconds of signal data, this should be set to 30.
    frequency:int=125, # frequency of the data
    return_sequence_padding_mask:bool=True, # whether to return the sequence padding mask
    intra_class_only:bool=True, # whether to mix only within same class (True) or across all classes (False)
):

Intra-class CutMix for 1D data (e.g., time-series).

This is a callback that can be used to apply CutMix to the training data. It is used to mix segments within the same class.

This is different to IntraClassCutMix1d in that it mixes segments of the same class across batches of data, rather than just at the same segment

x = torch.randn(4,7,90)
x_c = x.clone()
y = torch.randint(0, 5, size=(4,90//30))
xxt = IntraClassCutMixBatch(mix_prob=1, frequency=1, return_y_every_sec=30, return_sequence_padding_mask=False)
batch = (x,y)
batch = xxt.on_train_batch_start(None, None, batch, 0)
torch.equal(x_c, batch[0]) == False
intra-class CutMixBatch is being applied!
True
# Create a tuple
batch = ([1,2,3], [4,5,6])

# Unpack into new variables
x, y = batch

# Modify x and y
x[0] = 99  # This modifies the list because lists are mutable
y[0] = 88  # This modifies the list because lists are mutable

print(batch)  # Will show ([99,2,3], [88,5,6]) because lists are mutable
([99, 2, 3], [88, 5, 6])

source

MixupCallback


def MixupCallback(
    num_classes, mixup_alpha:float=0.4, # alpha parameter for the beta distribution
    return_sequence_padding_mask:bool=True, # whether to return the sequence padding mask
    ignore_index:int=-100, # ignore index
):

Mixup for 1D data (e.g., time-series).

This callback applies Mixup to the training data, blending both the input data and the labels.

See tsai implementation here: https://github.com/timeseriesAI/tsai/blob/bdff96cc8c4c8ea55bc20d7cffd6a72e402f4cb2/tsai/data/mixed_augmentation.py#L43

Note that this creates non-integer labels/soft labels. Loss functions should be able to handle this.

x = torch.randn(4,7,90)
x_c = x.clone()
y_og = torch.randint(0, 5, size=(4,90//30))
y_og[1,2] = -100
y_og[2,1] = -100
y_c = y_og.clone()
xxt = MixupCallback(num_classes=5, mixup_alpha=0.4, return_sequence_padding_mask=False)
batch = (x,y_og)
batch = xxt.on_train_batch_start(None, None, batch, 0)
torch.equal(x_c, batch[0]) == False, torch.equal(y_c, batch[1]) == False
Mixup is being applied!
(True, True)