Merge pull request #81 from choosewhatulike/fixMLP

Yunfan Shao · web-flow · commit 4d66bd6b9f39 · 2018-09-19T14:40:10.000+08:00
update MLP
diff --git a/docs/source/user/quickstart.rst b/docs/source/user/quickstart.rst
@@ -36,7 +36,7 @@ pre-processing data, constructing model and training model.
            self.enc = encoder.Conv(
                in_channels=300, out_channels=100, kernel_size=3)
            self.agg = aggregation.MaxPool()
-           self.dec = decoder.MLP(100, num_classes=num_classes)
+           self.dec = decoder.MLP([100, num_classes])
 
        def forward(self, x):
            x = self.emb(x)  # [N,L] -> [N,L,C]
diff --git a/fastNLP/modules/decoder/MLP.py b/fastNLP/modules/decoder/MLP.py
@@ -2,13 +2,15 @@
 import torch.nn as nn
 from fastNLP.modules.utils import initial_parameter
 class MLP(nn.Module):
-    def __init__(self, size_layer, num_class=2, activation='relu' , initial_method = None):
+    def __init__(self, size_layer, activation='relu' , initial_method = None):
         """Multilayer Perceptrons as a decoder
 
-        Args:
-            size_layer: list of int, define the size of MLP layers
-            num_class: int, num of class in output, should be 2 or the last layer's size
-            activation: str or function, the activation function for hidden layers
+        :param size_layer: list of int, define the size of MLP layers
+        :param activation: str or function, the activation function for hidden layers
+
+        .. note::
+            There is no activation function applying on output layer.
+
         """
         super(MLP, self).__init__()
         self.hiddens = nn.ModuleList()
@@ -19,13 +21,6 @@ def __init__(self, size_layer, num_class=2, activation='relu' , initial_method =
             else:
                 self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i]))
 
-        if num_class == 2:
-            self.out_active = nn.LogSigmoid()
-        elif num_class == size_layer[-1]:
-            self.out_active = nn.LogSoftmax(dim=1)
-        else:
-            raise ValueError("should set output num_class correctly: {}".format(num_class))
-        
         actives = {
             'relu': nn.ReLU(),
             'tanh': nn.Tanh()
@@ -37,17 +32,18 @@ def __init__(self, size_layer, num_class=2, activation='relu' , initial_method =
         else:
             raise ValueError("should set activation correctly: {}".format(activation))
         initial_parameter(self, initial_method  )
+
     def forward(self, x):
         for layer in self.hiddens:
             x = self.hidden_active(layer(x))
-        x = self.out_active(self.output(x))
+        x = self.output(x)
         return x
 
 
 
 if __name__ == '__main__':
     net1 = MLP([5,10,5])
-    net2 = MLP([5,10,5], 5)
+    net2 = MLP([5,10,5], 'tanh')
     for net in [net1, net2]:
         x = torch.randn(5, 5)
         y = net(x)
diff --git a/reproduction/LSTM+self_attention_sentiment_analysis/main.py b/reproduction/LSTM+self_attention_sentiment_analysis/main.py
@@ -53,7 +53,7 @@ def __init__(self, args=None):
         self.embedding = Embedding(len(word2index) ,embeding_size , init_emb= None )
         self.lstm = Lstm(input_size = embeding_size,hidden_size = lstm_hidden_size ,bidirectional = True)
         self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops)
-        self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ] ,num_class=class_num  ,)
+        self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ])
     def forward(self,x):
         x_emb = self.embedding(x)
         output = self.lstm(x_emb)