add two time-scale update rule (TTUR)

lucidrains · lucidrains · commit b1365b838b73 · 2020-07-29T18:20:57.000-07:00
diff --git a/bin/stylegan2_pytorch b/bin/stylegan2_pytorch
@@ -19,6 +19,7 @@ def train_from_folder(
     gradient_accumulate_every = 5,
     num_train_steps = 150000,
     learning_rate = 2e-4,
+    ttur_mult = 2,
     num_workers =  None,
     save_every = 1000,
     generate = False,
@@ -45,6 +46,7 @@ def train_from_folder(
         network_capacity = network_capacity,
         transparent = transparent,
         lr = learning_rate,
+        ttur_mult = ttur_mult,
         num_workers = num_workers,
         save_every = save_every,
         trunc_psi = trunc_psi,
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
   name = 'stylegan2_pytorch',
   packages = find_packages(),
   scripts=['bin/stylegan2_pytorch'],
-  version = '0.17.14',
+  version = '0.18.0',
   license='GPLv3+',
   description = 'StyleGan2 in Pytorch',
   author = 'Phil Wang',
diff --git a/stylegan2_pytorch/stylegan2_pytorch.py b/stylegan2_pytorch/stylegan2_pytorch.py
@@ -534,7 +534,7 @@ def forward(self, x):
         return x.squeeze(), quantize_loss
 
 class StyleGAN2(nn.Module):
-    def __init__(self, image_size, latent_dim = 512, fmap_max = 512, style_depth = 8, network_capacity = 16, transparent = False, fp16 = False, cl_reg = False, steps = 1, lr = 1e-4, fq_layers = [], fq_dict_size = 256, attn_layers = [], no_const = False):
+    def __init__(self, image_size, latent_dim = 512, fmap_max = 512, style_depth = 8, network_capacity = 16, transparent = False, fp16 = False, cl_reg = False, steps = 1, lr = 1e-4, ttur_mult = 2, fq_layers = [], fq_dict_size = 256, attn_layers = [], no_const = False):
         super().__init__()
         self.lr = lr
         self.steps = steps
@@ -563,7 +563,7 @@ def __init__(self, image_size, latent_dim = 512, fmap_max = 512, style_depth = 8
 
         generator_params = list(self.G.parameters()) + list(self.S.parameters())
         self.G_opt = AdamP(generator_params, lr = self.lr, betas=(0.5, 0.9))
-        self.D_opt = AdamP(self.D.parameters(), lr = self.lr, betas=(0.5, 0.9))
+        self.D_opt = AdamP(self.D.parameters(), lr = self.lr * ttur_mult, betas=(0.5, 0.9))
 
         self._init_weights()
         self.reset_parameter_averaging()
@@ -602,7 +602,7 @@ def forward(self, x):
         return x
 
 class Trainer():
-    def __init__(self, name, results_dir, models_dir, image_size, network_capacity, transparent = False, batch_size = 4, mixed_prob = 0.9, gradient_accumulate_every=1, lr = 2e-4, num_workers = None, save_every = 1000, trunc_psi = 0.6, fp16 = False, cl_reg = False, fq_layers = [], fq_dict_size = 256, attn_layers = [], no_const = False, aug_prob = 0., dataset_aug_prob = 0., *args, **kwargs):
+    def __init__(self, name, results_dir, models_dir, image_size, network_capacity, transparent = False, batch_size = 4, mixed_prob = 0.9, gradient_accumulate_every=1, lr = 2e-4, ttur_mult = 2, num_workers = None, save_every = 1000, trunc_psi = 0.6, fp16 = False, cl_reg = False, fq_layers = [], fq_dict_size = 256, attn_layers = [], no_const = False, aug_prob = 0., dataset_aug_prob = 0., *args, **kwargs):
         self.GAN_params = [args, kwargs]
         self.GAN = None
 
@@ -623,6 +623,7 @@ def __init__(self, name, results_dir, models_dir, image_size, network_capacity,
         self.aug_prob = aug_prob
 
         self.lr = lr
+        self.ttur_mult = ttur_mult
         self.batch_size = batch_size
         self.num_workers = num_workers
         self.mixed_prob = mixed_prob
@@ -656,7 +657,7 @@ def __init__(self, name, results_dir, models_dir, image_size, network_capacity,
 
     def init_GAN(self):
         args, kwargs = self.GAN_params
-        self.GAN = StyleGAN2(lr=self.lr, image_size = self.image_size, network_capacity = self.network_capacity, transparent = self.transparent, fq_layers = self.fq_layers, fq_dict_size = self.fq_dict_size, attn_layers = self.attn_layers, fp16 = self.fp16, cl_reg = self.cl_reg, no_const = self.no_const, *args, **kwargs)
+        self.GAN = StyleGAN2(lr = self.lr, ttur_mult = self.ttur_mult, image_size = self.image_size, network_capacity = self.network_capacity, transparent = self.transparent, fq_layers = self.fq_layers, fq_dict_size = self.fq_dict_size, attn_layers = self.attn_layers, fp16 = self.fp16, cl_reg = self.cl_reg, no_const = self.no_const, *args, **kwargs)
 
     def write_config(self):
         self.config_path.write_text(json.dumps(self.config()))