"""
The underlying architecture of the bidirectional LSTM network used in PARROT
.............................................................................
idptools-parrot was developed by the Holehouse lab
Original release ---- 2020
Question/comments/concerns? Raise an issue on github:
https://github.com/idptools/parrot
Licensed under the MIT license.
"""
import torch
import torch.nn as nn
[docs]class BRNN_MtM(nn.Module):
"""A PyTorch many-to-many bidirectional recurrent neural network
A class containing the PyTorch implementation of a BRNN. The network consists
of repeating LSTM units in the hidden layers that propogate sequence information
in both the foward and reverse directions. A final fully connected layer
aggregates the deepest hidden layers of both directions and produces the
outputs.
"Many-to-many" refers to the fact that the network will produce outputs
corresponding to every item of the input sequence. For example, an input
sequence of length 10 will produce 10 sequential outputs.
Attributes
----------
device : str
String describing where the network is physically stored on the computer.
Should be either 'cpu' or 'cuda' (GPU).
hidden_size : int
Size of hidden vectors in the network
num_layers : int
Number of hidden layers (for each direction) in the network
num_classes : int
Number of classes for the machine learning task. If it is a regression
problem, `num_classes` should be 1. If it is a classification problem,
it should be the number of classes.
lstm : PyTorch LSTM object
The bidirectional LSTM layer(s) of the recurrent neural network.
fc : PyTorch Linear object
The fully connected linear layer of the recurrent neural network. Across
the length of the input sequence, this layer aggregates the output of the
LSTM nodes from the deepest forward layer and deepest reverse layer and
returns the output for that residue in the sequence.
"""
def __init__(self, input_size, hidden_size, num_layers, num_classes, device):
"""
Parameters
----------
input_size : int
Length of the input vectors at each timestep
hidden_size : int
Size of hidden vectors in the network
num_layers : int
Number of hidden layers (for each direction) in the network
num_classes : int
Number of classes for the machine learning task. If it is a regression
problem, `num_classes` should be 1. If it is a classification problem,
it should be the number of classes.
device : str
String describing where the network is physically stored on the computer.
Should be either 'cpu' or 'cuda' (GPU).
"""
super(BRNN_MtM, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_classes = num_classes
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(in_features=hidden_size*2, # *2 for bidirection
out_features=num_classes)
[docs] def forward(self, x):
"""Propogate input sequences through the network to produce outputs
Parameters
----------
x : 3-dimensional PyTorch IntTensor
Input sequence to the network. Should be in the format:
[batch_dim X sequence_length X input_size]
Returns
-------
3-dimensional PyTorch FloatTensor
Output after propogating the sequences through the network. Will
be in the format:
[batch_dim X sequence_length X num_classes]
"""
# Set initial states
# h0 and c0 dimensions: [num_layers*2 X batch_size X hidden_size]
h0 = torch.zeros(self.num_layers*2, # *2 for bidirection
x.size(0), self.hidden_size).to(self.device)
c0 = torch.zeros(self.num_layers*2,
x.size(0), self.hidden_size).to(self.device)
# Forward propagate LSTM
# out: tensor of shape: [batch_size, seq_length, hidden_size*2]
out, (h_n, c_n) = self.lstm(x, (h0, c0))
# Decode the hidden state for each time step
fc_out = self.fc(out)
return fc_out
[docs]class BRNN_MtO(nn.Module):
"""A PyTorch many-to-one bidirectional recurrent neural network
A class containing the PyTorch implementation of a BRNN. The network consists
of repeating LSTM units in the hidden layers that propogate sequence information
in both the foward and reverse directions. A final fully connected layer
aggregates the deepest hidden layers of both directions and produces the
output.
"Many-to-one" refers to the fact that the network will produce a single output
for an entire input sequence. For example, an input sequence of length 10 will
produce only one output.
Attributes
----------
device : str
String describing where the network is physically stored on the computer.
Should be either 'cpu' or 'cuda' (GPU).
hidden_size : int
Size of hidden vectors in the network
num_layers : int
Number of hidden layers (for each direction) in the network
num_classes : int
Number of classes for the machine learning task. If it is a regression
problem, `num_classes` should be 1. If it is a classification problem,
it should be the number of classes.
lstm : PyTorch LSTM object
The bidirectional LSTM layer(s) of the recurrent neural network.
fc : PyTorch Linear object
The fully connected linear layer of the recurrent neural network. Across
the length of the input sequence, this layer aggregates the output of the
LSTM nodes from the deepest forward layer and deepest reverse layer and
returns the output for that residue in the sequence.
"""
def __init__(self, input_size, hidden_size, num_layers, num_classes, device):
"""
Parameters
----------
input_size : int
Length of the input vectors at each timestep
hidden_size : int
Size of hidden vectors in the network
num_layers : int
Number of hidden layers (for each direction) in the network
num_classes : int
Number of classes for the machine learning task. If it is a regression
problem, `num_classes` should be 1. If it is a classification problem,
it should be the number of classes.
device : str
String describing where the network is physically stored on the computer.
Should be either 'cpu' or 'cuda' (GPU).
"""
super(BRNN_MtO, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(in_features=hidden_size*2, # *2 for bidirection
out_features=num_classes)
[docs] def forward(self, x):
"""Propogate input sequences through the network to produce outputs
Parameters
----------
x : 3-dimensional PyTorch IntTensor
Input sequence to the network. Should be in the format:
[batch_dim X sequence_length X input_size]
Returns
-------
3-dimensional PyTorch FloatTensor
Output after propogating the sequences through the network. Will
be in the format:
[batch_dim X 1 X num_classes]
"""
# Set initial states
# h0 and c0 dimensions: [num_layers*2 X batch_size X hidden_size]
h0 = torch.zeros(self.num_layers*2, # *2 for bidirection
x.size(0), self.hidden_size).to(self.device)
c0 = torch.zeros(self.num_layers*2,
x.size(0), self.hidden_size).to(self.device)
# Forward propagate LSTM
# out: tensor of shape: [batch_size, seq_length, hidden_size*2]
out, (h_n, c_n) = self.lstm(x, (h0, c0))
# Retain the outputs of the last time step in the sequence for both directions
# (i.e. output of seq[n] in forward direction, seq[0] in reverse direction)
final_outs = torch.cat((h_n[:, :, :][-2, :], h_n[:, :, :][-1, :]), -1)
# Decode the hidden state of the last time step
fc_out = self.fc(final_outs)
return fc_out