Patch Time Series Transformer

Revolutionary!

source

MaskedAutogressionFeedForward


def MaskedAutogressionFeedForward(
    c_in, # the number of input channels
    patch_len, # the length of the patches (either stft or interval length)
    d_model, # the dimension of the initial linear layers for inputting patches into transformer
    shared_recreation:bool=True, # indicator of whether to project each channel individually or together
):

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call :meth:to, etc.

.. note:: As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool


source

TSTBlock


def TSTBlock(
    d_model, # dimension of patch embeddings
    n_heads, # number of attention heads per layer
    d_ff:int=256, # dimension of feedforward layer in each transformer layer
    attn_dropout:int=0, dropout:float=0.0, bias:bool=True, activation:str='gelu', pre_norm:bool=False,
    rotary_pes:bool=False
):

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call :meth:to, etc.

.. note:: As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

d_model = 512
n_heads = 8
batch_size = 2
n_vars = 7
max_len = 100

# Create sequences of different lengths
seq_lens = torch.randint(50, max_len, (batch_size,))

# Create input tensors with different sequence lengths
x_list = [torch.randn(n_vars,  d_model, length) for length in seq_lens]
x_nested = torch.nested.as_nested_tensor(x_list, layout=torch.jagged)

model = MaskedAutogressionFeedForward(c_in=n_vars, patch_len=max_len, d_model=d_model, shared_recreation=True)
out = model(x_nested)
out.shape
torch.Size([2, j2, 7, 100])

source

PatchTFTSimple


def PatchTFTSimple(
    c_in, patch_size, patch_stride, num_patches, d_model, n_heads, d_ff, num_layers,
    augmentations:list=['patch_mask', 'jitter_zero_mask', 'channel_masking'], mask_ratio:float=0.1,
    shared_embedding:bool=False, pretrain_head:bool=True, dropout:float=0.0, attn_dropout:float=0.0, act:str='gelu',
    pre_norm:bool=False, pe_type:str='tAPE', qkv_bias:bool=True, init_std:float=0.02, tokenizer_type:str='simple',
    tokenizer_kwargs:dict={}
):

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call :meth:to, etc.

.. note:: As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

d_model = 512
n_heads = 8
batch_size = 2
n_vars = 7
max_len = 100

# Create sequences of different lengths
seq_lens = torch.tensor([100, 50])

# Create input tensors with different sequence lengths
x_list = [torch.randn(n_vars, length) for length in seq_lens]
x_nested = torch.nested.as_nested_tensor(x_list, layout=torch.jagged)
x = torch.randn(batch_size, n_vars, max_len)
# Initialize the attention module
mha = PatchTFTSimple(
    c_in=7, # dimension of patch embeddings
    num_patches=10, 
    patch_size=10,
    patch_stride=10,
    d_model=512,
    n_heads=1,
    d_ff=512,
    num_layers=2,
    augmentations=['patch_mask','jitter_zero_mask', 'channel_masking'],
    mask_ratio=0.1,
    pretrain_head=True,
    dropout=0.1, 
    attn_dropout=0.1, 
    act="gelu", 
    pre_norm=True,
    pe_type='tAPE',
    qkv_bias=True,
    init_std=0.02,
    tokenizer_type='simple',
    shared_embedding=False,
    tokenizer_kwargs=dict(
        bottleneck_channels=32*7,
    )
)

# Forward pass
z, output, y = mha(x_nested)

output.shape, y.shape, z.shape
/opt/miniconda3/envs/timeflies/lib/python3.12/site-packages/torch/nested/__init__.py:109: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. We recommend specifying layout=torch.jagged when constructing a nested tensor, as this layout receives active development, has better operator coverage, and works with torch.compile. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/NestedTensorImpl.cpp:182.)
  return torch._nested_tensor_from_tensor_list(ts, dtype, None, device, None)
(torch.Size([2, j11, 7, 10]),
 torch.Size([2, j4, 7, 10]),
 torch.Size([2, 7, 512, j10]))

source

PatchTFTSimpleLightning


def PatchTFTSimpleLightning(
    learning_rate, train_size, batch_size, n_gpus, metrics:dict={}, loss_func:str='mse', weight_decay:float=0.0,
    epochs:int=100, use_weight_decay_scheduler:bool=False, final_weight_decay:float=0.4, optimizer_type:str='AdamW',
    scheduler_type:str='OneCycle', huber_delta:NoneType=None, # huber loss delta, not used otherwise
    scheduler_kwargs:dict={}, transforms:NoneType=None, patchmeup_kwargs:VAR_KEYWORD
):

Hooks to be used in LightningModule.

encoder_kwargs = dict(c_in=7, # dimension of patch embeddings
    num_patches=10, 
    patch_size=10,
    patch_stride=10,
    d_model=512,
    n_heads=1,
    d_ff=512,
    num_layers=2,
    augmentations=['patch_mask','jitter_zero_mask', 'channel_masking'],
    mask_ratio=0.1,
    pretrain_head=True,
    dropout=0.1, 
    attn_dropout=0.1, 
    act="gelu", 
    pre_norm=True,
    pe_type='rotary',
    qkv_bias=True,
    init_std=0.02,
    tokenizer_type='simple',
    tokenizer_kwargs=dict(
        bottleneck_channels=32*7,
    ),
    shared_embedding=False)

patch_lightning = PatchTFTSimpleLightning(
    learning_rate=0.001,
    train_size=1000,
    batch_size=10,
    n_gpus=1,
    **encoder_kwargs,
)

x = torch.randn(batch_size, n_vars, max_len)

# Create sequences of different lengths
seq_lens = torch.randint(50, max_len, (batch_size,))

# Create input tensors with different sequence lengths
x_list = [torch.randn(n_vars, length) for length in seq_lens]
x_nested = torch.nested.as_nested_tensor(x_list, layout=torch.jagged)
x = torch.randn(batch_size, n_vars, max_len)

patch_lightning.training_step((x_nested, x_nested), batch_idx=0)
tensor(1.2333, grad_fn=<DivBackward0>)