Add style transfer example

koki0702 · koki0702 · commit cdded76290b1 · 2020-03-19T20:25:10.000+09:00
diff --git a/README.md b/README.md
@@ -67,6 +67,8 @@ DeZeroの他の実装例は[examples](/examples)にあります。
 
 [<img src="https://raw.githubusercontent.com/oreilly-japan/deep-learning-from-scratch-3/images/gan.gif" height="175"/>](/examples/gan.py)[<img src="https://raw.githubusercontent.com/oreilly-japan/deep-learning-from-scratch-3/images/vae.png" height="175"/>](/examples/vae.py)[<img src="https://raw.githubusercontent.com/oreilly-japan/deep-learning-from-scratch-3/images/pythonista.png" height="175"/>](https://github.com/oreilly-japan/deep-learning-from-scratch-3/wiki/DeZero%E3%82%92iPhone%E3%81%A7%E5%8B%95%E3%81%8B%E3%81%99)
 
+[<img src="https://raw.githubusercontent.com/oreilly-japan/deep-learning-from-scratch-3/images/style_transfer.png" height="175"/>](/examples/style_transfer.py)
+
 ## 正誤表
 
 本書の正誤情報は、[:mag_right: 正誤表ページ](../../wiki/Errata)に掲載しています。
diff --git a/examples/style_transfer.py b/examples/style_transfer.py
@@ -0,0 +1,147 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import dezero
+import dezero.functions as F
+from dezero import Variable
+from dezero.models import VGG16
+
+
+use_gpu = dezero.cuda.gpu_enable
+lr = 5.0
+iterations = 2001
+model_input_size = (224, 224)
+style_weight = 1.0
+content_weight = 1e-4
+total_varitaion_weight = 1e-6
+content_layers = ['conv5_2']
+style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
+content_url = 'https://github.com/oreilly-japan/deep-learning-from-scratch-3/raw/images/zebra.jpg'
+style_url = 'https://raw.githubusercontent.com/jcjohnson/neural-style/master/examples/inputs/starry_night_google.jpg'
+
+
+class VGG16(VGG16):
+    def extract(self, x):
+        c1_1 = F.relu(self.conv1_1(x))
+        c1_2 = F.relu(self.conv1_2(c1_1))
+        p1 = F.average_pooling(c1_2, 2, 2)
+        c2_1 = F.relu(self.conv2_1(p1))
+        c2_2 = F.relu(self.conv2_2(c2_1))
+        p2 = F.average_pooling(c2_2, 2, 2)
+        c3_1 = F.relu(self.conv3_1(p2))
+        c3_2 = F.relu(self.conv3_2(c3_1))
+        c3_3 = F.relu(self.conv3_3(c3_2))
+        p3 = F.average_pooling(c3_3, 2, 2)
+        c4_1 = F.relu(self.conv4_1(p3))
+        c4_2 = F.relu(self.conv4_2(c4_1))
+        c4_3 = F.relu(self.conv4_3(c4_2))
+        p4 = F.average_pooling(c4_3, 2, 2)
+        c5_1 = F.relu(self.conv5_1(p4))
+        c5_2 = F.relu(self.conv5_2(c5_1))
+        c5_3 = F.relu(self.conv5_3(c5_2))
+        return {'conv1_1':c1_1, 'conv1_2':c1_2, 'conv2_1':c2_1, 'conv2_2':c2_2,
+                'conv3_1':c3_1, 'conv3_2':c3_2, 'conv3_3':c3_3, 'conv4_1':c4_1,
+                'conv5_1':c5_1, 'conv5_2':c5_2, 'conv5_3':c5_3}
+
+# Setup for content & style image
+content_path = dezero.utils.get_file(content_url)
+style_path = dezero.utils.get_file(style_url)
+content_img = Image.open(content_path)
+content_size = content_img.size
+style_img = Image.open(style_path)
+content_img = VGG16.preprocess(content_img, size=model_input_size)[np.newaxis]  # preprocess for VGG
+style_img = VGG16.preprocess(style_img, size=model_input_size)[np.newaxis]
+content_img, style_img = Variable(content_img), Variable(style_img)
+
+model = VGG16(pretrained=True)
+#gen_data = np.random.uniform(-20, 20, (1, 3, img_resize[0], img_resize[1])).astype(np.float32)
+gen_data = content_img.data.copy()
+gen_img = dezero.Parameter(gen_data)
+gen_model = dezero.models.Model()
+gen_model.param = gen_img
+optimizer = dezero.optimizers.AdaGrad(lr=lr).setup(gen_model)
+
+if use_gpu:
+    model.to_gpu()
+    gen_img.to_gpu()
+    content_img.to_gpu()
+    style_img.to_gpu()
+
+
+with dezero.no_grad():
+    content_features = model.extract(content_img)
+    style_features = model.extract(style_img)
+
+
+def deprocess_image(x, size=None):
+    if use_gpu:
+        x = dezero.cuda.as_numpy(x)
+    if x.ndim == 4:
+        x = np.squeeze(x)
+    x = x.transpose((1,2,0))
+    x += np.array([103.939, 116.779, 123.68])
+    x = x[:,:,::-1] # BGR -> RGB
+    x = np.clip(x, 0, 255).astype('uint8')
+    img = Image.fromarray(x, mode="RGB")
+    if size:
+        img = img.resize(size)
+    return img
+
+
+def gram_mat(x):
+    N, C, H, W = x.shape
+    features = x.reshape(C, -1)
+    gram = F.matmul(features, features.T)
+    return gram.reshape(1, C, C)
+
+
+def style_loss(style, comb):
+    S = gram_mat(style)
+    C = gram_mat(comb)
+    N, ch, H, W = style.shape
+    return F.mean_squared_error(S, C) / (4 * (ch * W * H)**2)
+
+
+def content_loss(base, comb):
+    return F.mean_squared_error(base, comb) / 2
+
+
+def total_varitaion_loss(x):
+    a = (x[:, :, :-1, :-1] - x[:, :, 1:, : -1]) ** 2
+    b = (x[:, :, :-1, :-1] - x[:, :, : -1, 1:]) ** 2
+    return F.sum(a + b)
+
+
+def loss_func(gen_features, content_features, style_features, gen_img):
+    loss = 0
+    # content loss
+    for layer in content_features:
+        loss += content_weight / len(content_layers) * \
+                content_loss(content_features[layer], gen_features[layer])
+    # style loss
+    for layer in style_features:
+        loss += style_weight / len(style_layers) * \
+                style_loss(style_features[layer], gen_features[layer])
+    # total variation loss
+    loss += total_varitaion_weight * total_varitaion_loss(gen_img)
+    return loss
+
+
+print_interval = 100 if use_gpu else 1
+for i in range(iterations):
+    model.cleargrads()
+    gen_img.cleargrad()
+
+    gen_features = model.extract(gen_img)
+    loss = loss_func(gen_features, content_features, style_features, gen_img)
+    loss.backward()
+    optimizer.update()
+
+    if i % print_interval == 0:
+        print('{} loss: {:.0f}'.format(i, float(loss.data)))
+
+    if i % 100 == 0:
+        img = deprocess_image(gen_img.data, content_size)
+        plt.imshow(np.array(img))
+        plt.show()
+        #img.save("style_transfer_{}.png".format(str(i)))