From 0d10e29922a1cb7a25cd2a557aa2fe5e73ca273f Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Sat, 3 Sep 2022 21:34:28 +0900
Subject: [PATCH 1/7] add a patch for #418 that solves the redundant privacy
 charging in DCGAN example

---
 examples/dcgan_patch1.py | 355 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 355 insertions(+)
 create mode 100644 examples/dcgan_patch1.py

diff --git a/examples/dcgan_patch1.py b/examples/dcgan_patch1.py
new file mode 100644
index 00000000..99218472
--- /dev/null
+++ b/examples/dcgan_patch1.py
@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Runs DCGAN training with differential privacy.
+
+"""
+from __future__ import print_function
+
+import argparse
+import os
+import random
+
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.optim as optim
+import torch.utils.data
+import torchvision.datasets as dset
+import torchvision.transforms as transforms
+import torchvision.utils as vutils
+from opacus import PrivacyEngine
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--data-root", required=True, help="path to dataset")
+parser.add_argument(
+    "--workers", type=int, help="number of data loading workers", default=2
+)
+parser.add_argument("--batch-size", type=int, default=64, help="input batch size")
+parser.add_argument(
+    "--imageSize",
+    type=int,
+    default=64,
+    help="the height / width of the input image to network",
+)
+parser.add_argument("--nz", type=int, default=100, help="size of the latent z vector")
+parser.add_argument("--ngf", type=int, default=128)
+parser.add_argument("--ndf", type=int, default=128)
+parser.add_argument(
+    "--epochs", type=int, default=25, help="number of epochs to train for"
+)
+parser.add_argument(
+    "--lr", type=float, default=0.0002, help="learning rate, default=0.0002"
+)
+parser.add_argument(
+    "--beta1", type=float, default=0.5, help="beta1 for adam. default=0.5"
+)
+parser.add_argument("--ngpu", type=int, default=1, help="number of GPUs to use")
+parser.add_argument("--netG", default="", help="path to netG (to continue training)")
+parser.add_argument("--netD", default="", help="path to netD (to continue training)")
+parser.add_argument(
+    "--outf", default=".", help="folder to output images and model checkpoints"
+)
+parser.add_argument("--manualSeed", type=int, help="manual seed")
+parser.add_argument(
+    "--target-digit",
+    type=int,
+    default=8,
+    help="the target digit(0~9) for MNIST training",
+)
+parser.add_argument(
+    "--device",
+    type=str,
+    default="cuda",
+    help="GPU ID for this process (default: 'cuda')",
+)
+parser.add_argument(
+    "--disable-dp",
+    action="store_true",
+    default=False,
+    help="Disable privacy training and just train with vanilla SGD",
+)
+parser.add_argument(
+    "--secure-rng",
+    action="store_true",
+    default=False,
+    help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
+)
+parser.add_argument(
+    "-r",
+    "--n-runs",
+    type=int,
+    default=1,
+    metavar="R",
+    help="number of runs to average on (default: 1)",
+)
+parser.add_argument(
+    "--sigma",
+    type=float,
+    default=1.0,
+    metavar="S",
+    help="Noise multiplier (default 1.0)",
+)
+parser.add_argument(
+    "-c",
+    "--max-per-sample-grad_norm",
+    type=float,
+    default=1.0,
+    metavar="C",
+    help="Clip per-sample gradients to this norm (default 1.0)",
+)
+parser.add_argument(
+    "--delta",
+    type=float,
+    default=1e-5,
+    metavar="D",
+    help="Target delta (default: 1e-5)",
+)
+
+opt = parser.parse_args()
+
+try:
+    os.makedirs(opt.outf)
+except OSError:
+    pass
+
+if opt.manualSeed is None:
+    opt.manualSeed = random.randint(1, 10000)
+print("Random Seed: ", opt.manualSeed)
+random.seed(opt.manualSeed)
+torch.manual_seed(opt.manualSeed)
+
+cudnn.benchmark = True
+
+
+try:
+    dataset = dset.MNIST(
+        root=opt.data_root,
+        download=True,
+        transform=transforms.Compose(
+            [
+                transforms.Resize(opt.imageSize),
+                transforms.ToTensor(),
+                transforms.Normalize((0.5,), (0.5,)),
+            ]
+        ),
+    )
+    idx = dataset.targets == opt.target_digit
+    dataset.targets = dataset.targets[idx]
+    dataset.data = dataset.data[idx]
+    nc = 1
+except ValueError:
+    print("Cannot load dataset")
+
+dataloader = torch.utils.data.DataLoader(
+    dataset,
+    num_workers=int(opt.workers),
+    batch_size=opt.batch_size,
+)
+
+device = torch.device(opt.device)
+ngpu = int(opt.ngpu)
+nz = int(opt.nz)
+ngf = int(opt.ngf)
+ndf = int(opt.ndf)
+
+
+# custom weights initialization called on netG and netD
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find("BatchNorm") != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+
+
+class Generator(nn.Module):
+    def __init__(self, ngpu):
+        super(Generator, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is Z, going into a convolution
+            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
+            nn.ReLU(True),
+            # state size. (ngf*8) x 4 x 4
+            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
+            nn.ReLU(True),
+            # state size. (ngf*4) x 8 x 8
+            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
+            nn.ReLU(True),
+            # state size. (ngf*2) x 16 x 16
+            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf), ndf),
+            nn.ReLU(True),
+            # state size. (ngf) x 32 x 32
+            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
+            nn.Tanh()
+            # state size. (nc) x 64 x 64
+        )
+
+    def forward(self, input):
+        if input.is_cuda and self.ngpu > 1:
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+        return output
+
+
+netG = Generator(ngpu)
+netG = netG.to(device)
+netG.apply(weights_init)
+if opt.netG != "":
+    netG.load_state_dict(torch.load(opt.netG))
+
+
+class Discriminator(nn.Module):
+    def __init__(self, ngpu):
+        super(Discriminator, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is (nc) x 64 x 64
+            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf) x 32 x 32
+            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*2) x 16 x 16
+            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*4) x 8 x 8
+            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*8) x 4 x 4
+            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, input):
+        if input.is_cuda and self.ngpu > 1:
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+
+        return output.view(-1, 1).squeeze(1)
+
+
+netD = Discriminator(ngpu)
+netD = netD.to(device)
+netD.apply(weights_init)
+if opt.netD != "":
+    netD.load_state_dict(torch.load(opt.netD))
+
+criterion = nn.BCELoss()
+
+FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
+REAL_LABEL = 1.0
+FAKE_LABEL = 0.0
+
+# setup optimizer
+optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+if not opt.disable_dp:
+    privacy_engine = PrivacyEngine(secure_mode=opt.secure_rng)
+
+    netD, optimizerD, dataloader = privacy_engine.make_private(
+        module=netD,
+        optimizer=optimizerD,
+        data_loader=dataloader,
+        noise_multiplier=opt.sigma,
+        max_grad_norm=opt.max_per_sample_grad_norm,
+    )
+
+optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+for epoch in range(opt.epochs):
+    data_bar = tqdm(dataloader)
+    for i, data in enumerate(data_bar, 0):
+        ############################
+        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
+        ###########################
+
+        optimizerD.zero_grad()
+
+        real_data = data[0].to(device)
+        batch_size = real_data.size(0)
+        # train with fake
+        noise = torch.randn(batch_size, nz, 1, 1, device=device)
+        fake = netG(noise)
+        label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
+        output = netD(fake.detach())
+        errD_fake = criterion(output, label_fake)
+
+        # train with real
+        label_true = torch.full((batch_size,), REAL_LABEL, device=device)
+        output = netD(real_data)
+        errD_real = criterion(output, label_true)
+
+        # Note that we clip the gradient for not only real but also fake data.
+        errD = errD_fake + errD_real
+        errD.backward()
+        optimizerD.step()
+        optimizerD.zero_grad()
+
+        D_x = output.mean().item()
+        D_G_z1 = output.mean().item()
+
+        ############################
+        # (2) Update G network: maximize log(D(G(z)))
+        ###########################
+        optimizerG.zero_grad()
+        optimizerD.zero_grad()
+
+        label_g = torch.full((batch_size,), REAL_LABEL, device=device)
+        output_g = netD(fake)
+        errG = criterion(output_g, label_g)
+        errG.backward()
+        D_G_z2 = output.mean().item()
+        optimizerG.step()
+        data_bar.set_description(
+            f"epoch: {epoch}, Loss_D: {errD.item()} "
+            f"Loss_G: {errG.item()} D(x): {D_x} "
+            f"D(G(z)): {D_G_z1}/{D_G_z2}"
+        )
+
+        if not opt.disable_dp:
+            epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
+                delta=opt.delta
+            )
+            print(
+                "(ε = %.2f, δ = %.2f) for α = %.2f" % (epsilon, opt.delta, best_alpha)
+            )
+
+        if i % 100 == 0:
+            vutils.save_image(
+                real_data, "%s/real_samples.png" % opt.outf, normalize=True
+            )
+            fake = netG(FIXED_NOISE)
+            vutils.save_image(
+                fake.detach(),
+                "%s/fake_samples_epoch_%03d.png" % (opt.outf, epoch),
+                normalize=True,
+            )
+
+    # do checkpointing
+    torch.save(netG.state_dict(), "%s/netG_epoch_%d.pth" % (opt.outf, epoch))
+    torch.save(netD.state_dict(), "%s/netD_epoch_%d.pth" % (opt.outf, epoch))

From 03037f58ca652e8a4dd59ea58674bd1cd121e8d0 Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Sat, 3 Sep 2022 21:46:40 +0900
Subject: [PATCH 2/7] remove duplicated zero_grad

---
 examples/dcgan_patch1.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/dcgan_patch1.py b/examples/dcgan_patch1.py
index 99218472..214c8c7d 100644
--- a/examples/dcgan_patch1.py
+++ b/examples/dcgan_patch1.py
@@ -308,7 +308,6 @@ def forward(self, input):
         errD = errD_fake + errD_real
         errD.backward()
         optimizerD.step()
-        optimizerD.zero_grad()
 
         D_x = output.mean().item()
         D_G_z1 = output.mean().item()

From f35cb61b97b75bcd3d81b49368cd1fa216c17f6c Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Sat, 3 Sep 2022 21:50:09 +0900
Subject: [PATCH 3/7] add another solution that uses two optimizers; one for
 fake data and the other for real data

---
 examples/dcgan_patch2.py | 357 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 357 insertions(+)
 create mode 100644 examples/dcgan_patch2.py

diff --git a/examples/dcgan_patch2.py b/examples/dcgan_patch2.py
new file mode 100644
index 00000000..7c51c3a1
--- /dev/null
+++ b/examples/dcgan_patch2.py
@@ -0,0 +1,357 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Runs DCGAN training with differential privacy.
+
+"""
+from __future__ import print_function
+
+import argparse
+import os
+import random
+
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.optim as optim
+import torch.utils.data
+import torchvision.datasets as dset
+import torchvision.transforms as transforms
+import torchvision.utils as vutils
+from opacus import PrivacyEngine
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--data-root", required=True, help="path to dataset")
+parser.add_argument(
+    "--workers", type=int, help="number of data loading workers", default=2
+)
+parser.add_argument("--batch-size", type=int, default=64, help="input batch size")
+parser.add_argument(
+    "--imageSize",
+    type=int,
+    default=64,
+    help="the height / width of the input image to network",
+)
+parser.add_argument("--nz", type=int, default=100, help="size of the latent z vector")
+parser.add_argument("--ngf", type=int, default=128)
+parser.add_argument("--ndf", type=int, default=128)
+parser.add_argument(
+    "--epochs", type=int, default=25, help="number of epochs to train for"
+)
+parser.add_argument(
+    "--lr", type=float, default=0.0002, help="learning rate, default=0.0002"
+)
+parser.add_argument(
+    "--beta1", type=float, default=0.5, help="beta1 for adam. default=0.5"
+)
+parser.add_argument("--ngpu", type=int, default=1, help="number of GPUs to use")
+parser.add_argument("--netG", default="", help="path to netG (to continue training)")
+parser.add_argument("--netD", default="", help="path to netD (to continue training)")
+parser.add_argument(
+    "--outf", default=".", help="folder to output images and model checkpoints"
+)
+parser.add_argument("--manualSeed", type=int, help="manual seed")
+parser.add_argument(
+    "--target-digit",
+    type=int,
+    default=8,
+    help="the target digit(0~9) for MNIST training",
+)
+parser.add_argument(
+    "--device",
+    type=str,
+    default="cuda",
+    help="GPU ID for this process (default: 'cuda')",
+)
+parser.add_argument(
+    "--disable-dp",
+    action="store_true",
+    default=False,
+    help="Disable privacy training and just train with vanilla SGD",
+)
+parser.add_argument(
+    "--secure-rng",
+    action="store_true",
+    default=False,
+    help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
+)
+parser.add_argument(
+    "-r",
+    "--n-runs",
+    type=int,
+    default=1,
+    metavar="R",
+    help="number of runs to average on (default: 1)",
+)
+parser.add_argument(
+    "--sigma",
+    type=float,
+    default=1.0,
+    metavar="S",
+    help="Noise multiplier (default 1.0)",
+)
+parser.add_argument(
+    "-c",
+    "--max-per-sample-grad_norm",
+    type=float,
+    default=1.0,
+    metavar="C",
+    help="Clip per-sample gradients to this norm (default 1.0)",
+)
+parser.add_argument(
+    "--delta",
+    type=float,
+    default=1e-5,
+    metavar="D",
+    help="Target delta (default: 1e-5)",
+)
+
+opt = parser.parse_args()
+
+try:
+    os.makedirs(opt.outf)
+except OSError:
+    pass
+
+if opt.manualSeed is None:
+    opt.manualSeed = random.randint(1, 10000)
+print("Random Seed: ", opt.manualSeed)
+random.seed(opt.manualSeed)
+torch.manual_seed(opt.manualSeed)
+
+cudnn.benchmark = True
+
+
+try:
+    dataset = dset.MNIST(
+        root=opt.data_root,
+        download=True,
+        transform=transforms.Compose(
+            [
+                transforms.Resize(opt.imageSize),
+                transforms.ToTensor(),
+                transforms.Normalize((0.5,), (0.5,)),
+            ]
+        ),
+    )
+    idx = dataset.targets == opt.target_digit
+    dataset.targets = dataset.targets[idx]
+    dataset.data = dataset.data[idx]
+    nc = 1
+except ValueError:
+    print("Cannot load dataset")
+
+dataloader = torch.utils.data.DataLoader(
+    dataset,
+    num_workers=int(opt.workers),
+    batch_size=opt.batch_size,
+)
+
+device = torch.device(opt.device)
+ngpu = int(opt.ngpu)
+nz = int(opt.nz)
+ngf = int(opt.ngf)
+ndf = int(opt.ndf)
+
+
+# custom weights initialization called on netG and netD
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find("BatchNorm") != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+
+
+class Generator(nn.Module):
+    def __init__(self, ngpu):
+        super(Generator, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is Z, going into a convolution
+            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
+            nn.ReLU(True),
+            # state size. (ngf*8) x 4 x 4
+            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
+            nn.ReLU(True),
+            # state size. (ngf*4) x 8 x 8
+            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
+            nn.ReLU(True),
+            # state size. (ngf*2) x 16 x 16
+            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf), ndf),
+            nn.ReLU(True),
+            # state size. (ngf) x 32 x 32
+            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
+            nn.Tanh()
+            # state size. (nc) x 64 x 64
+        )
+
+    def forward(self, input):
+        if input.is_cuda and self.ngpu > 1:
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+        return output
+
+
+netG = Generator(ngpu)
+netG = netG.to(device)
+netG.apply(weights_init)
+if opt.netG != "":
+    netG.load_state_dict(torch.load(opt.netG))
+
+
+class Discriminator(nn.Module):
+    def __init__(self, ngpu):
+        super(Discriminator, self).__init__()
+        self.ngpu = ngpu
+        self.main = nn.Sequential(
+            # input is (nc) x 64 x 64
+            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf) x 32 x 32
+            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*2) x 16 x 16
+            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*4) x 8 x 8
+            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
+            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*8) x 4 x 4
+            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, input):
+        if input.is_cuda and self.ngpu > 1:
+            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
+        else:
+            output = self.main(input)
+
+        return output.view(-1, 1).squeeze(1)
+
+
+netD = Discriminator(ngpu)
+netD = netD.to(device)
+netD.apply(weights_init)
+if opt.netD != "":
+    netD.load_state_dict(torch.load(opt.netD))
+
+criterion = nn.BCELoss()
+
+FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
+REAL_LABEL = 1.0
+FAKE_LABEL = 0.0
+
+# setup optimizer
+optimizerD_fake = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+optimizerD_real = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+if not opt.disable_dp:
+    privacy_engine = PrivacyEngine(secure_mode=opt.secure_rng)
+
+    netD, optimizerD_real, dataloader = privacy_engine.make_private(
+        module=netD,
+        optimizer=optimizerD_real,
+        data_loader=dataloader,
+        noise_multiplier=opt.sigma,
+        max_grad_norm=opt.max_per_sample_grad_norm,
+    )
+
+optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+
+for epoch in range(opt.epochs):
+    data_bar = tqdm(dataloader)
+    for i, data in enumerate(data_bar, 0):
+        ############################
+        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
+        ###########################
+
+        optimizerD_fake.zero_grad()
+        optimizerD_real.zero_grad()
+
+        real_data = data[0].to(device)
+        batch_size = real_data.size(0)
+        # train with fake
+        noise = torch.randn(batch_size, nz, 1, 1, device=device)
+        fake = netG(noise)
+        label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
+        output = netD(fake.detach())
+        errD_fake = criterion(output, label_fake)
+        errD_fake.backward()
+        optimizerD_fake.step()
+
+        # train with real
+        label_true = torch.full((batch_size,), REAL_LABEL, device=device)
+        output = netD(real_data)
+        errD_real = criterion(output, label_true)
+        errD_real.backward()
+        optimizerD_real.step()
+        D_x = output.mean().item()
+
+        D_G_z1 = output.mean().item()
+        errD = errD_real + errD_fake
+
+        ############################
+        # (2) Update G network: maximize log(D(G(z)))
+        ###########################
+        optimizerG.zero_grad()
+        optimizerD_fake.zero_grad()
+        optimizerD_real.zero_grad()
+
+        label_g = torch.full((batch_size,), REAL_LABEL, device=device)
+        output_g = netD(fake)
+        errG = criterion(output_g, label_g)
+        errG.backward()
+        D_G_z2 = output.mean().item()
+        optimizerG.step()
+        data_bar.set_description(
+            f"epoch: {epoch}, Loss_D: {errD.item()} "
+            f"Loss_G: {errG.item()} D(x): {D_x} "
+            f"D(G(z)): {D_G_z1}/{D_G_z2}"
+        )
+
+        if not opt.disable_dp:
+            epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
+                delta=opt.delta
+            )
+            print(
+                "(ε = %.2f, δ = %.2f) for α = %.2f" % (epsilon, opt.delta, best_alpha)
+            )
+
+        if i % 100 == 0:
+            vutils.save_image(
+                real_data, "%s/real_samples.png" % opt.outf, normalize=True
+            )
+            fake = netG(FIXED_NOISE)
+            vutils.save_image(
+                fake.detach(),
+                "%s/fake_samples_epoch_%03d.png" % (opt.outf, epoch),
+                normalize=True,
+            )
+
+    # do checkpointing
+    torch.save(netG.state_dict(), "%s/netG_epoch_%d.pth" % (opt.outf, epoch))
+    torch.save(netD.state_dict(), "%s/netD_epoch_%d.pth" % (opt.outf, epoch))

From 979a5831e667379e019293a4940bc25978d58d64 Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Mon, 5 Sep 2022 22:16:25 +0900
Subject: [PATCH 4/7] clip gradient for solution 2

---
 examples/dcgan_patch2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/dcgan_patch2.py b/examples/dcgan_patch2.py
index 7c51c3a1..19fedbce 100644
--- a/examples/dcgan_patch2.py
+++ b/examples/dcgan_patch2.py
@@ -301,6 +301,7 @@ def forward(self, input):
         output = netD(fake.detach())
         errD_fake = criterion(output, label_fake)
         errD_fake.backward()
+        torch.nn.utils.clip_grad_norm_(netD.parameters(), opt.max_per_sample_grad_norm)
         optimizerD_fake.step()
 
         # train with real

From 54779c000d3d2b959acdec52f22045a84dc50c74 Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Tue, 6 Sep 2022 16:32:45 +0900
Subject: [PATCH 5/7] adopt the first solution

---
 examples/dcgan.py        |  12 +-
 examples/dcgan_patch1.py | 354 --------------------------------------
 examples/dcgan_patch2.py | 358 ---------------------------------------
 3 files changed, 5 insertions(+), 719 deletions(-)
 delete mode 100644 examples/dcgan_patch1.py
 delete mode 100644 examples/dcgan_patch2.py

diff --git a/examples/dcgan.py b/examples/dcgan.py
index 81d7134e..214c8c7d 100644
--- a/examples/dcgan.py
+++ b/examples/dcgan.py
@@ -33,7 +33,6 @@
 from opacus import PrivacyEngine
 from tqdm import tqdm
 
-
 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument("--data-root", required=True, help="path to dataset")
 parser.add_argument(
@@ -299,20 +298,19 @@ def forward(self, input):
         label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
         output = netD(fake.detach())
         errD_fake = criterion(output, label_fake)
-        errD_fake.backward()
-        optimizerD.step()
-        optimizerD.zero_grad()
 
         # train with real
         label_true = torch.full((batch_size,), REAL_LABEL, device=device)
         output = netD(real_data)
         errD_real = criterion(output, label_true)
-        errD_real.backward()
+
+        # Note that we clip the gradient for not only real but also fake data.
+        errD = errD_fake + errD_real
+        errD.backward()
         optimizerD.step()
-        D_x = output.mean().item()
 
+        D_x = output.mean().item()
         D_G_z1 = output.mean().item()
-        errD = errD_real + errD_fake
 
         ############################
         # (2) Update G network: maximize log(D(G(z)))
diff --git a/examples/dcgan_patch1.py b/examples/dcgan_patch1.py
deleted file mode 100644
index 214c8c7d..00000000
--- a/examples/dcgan_patch1.py
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Runs DCGAN training with differential privacy.
-
-"""
-from __future__ import print_function
-
-import argparse
-import os
-import random
-
-import torch.backends.cudnn as cudnn
-import torch.nn as nn
-import torch.optim as optim
-import torch.utils.data
-import torchvision.datasets as dset
-import torchvision.transforms as transforms
-import torchvision.utils as vutils
-from opacus import PrivacyEngine
-from tqdm import tqdm
-
-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument("--data-root", required=True, help="path to dataset")
-parser.add_argument(
-    "--workers", type=int, help="number of data loading workers", default=2
-)
-parser.add_argument("--batch-size", type=int, default=64, help="input batch size")
-parser.add_argument(
-    "--imageSize",
-    type=int,
-    default=64,
-    help="the height / width of the input image to network",
-)
-parser.add_argument("--nz", type=int, default=100, help="size of the latent z vector")
-parser.add_argument("--ngf", type=int, default=128)
-parser.add_argument("--ndf", type=int, default=128)
-parser.add_argument(
-    "--epochs", type=int, default=25, help="number of epochs to train for"
-)
-parser.add_argument(
-    "--lr", type=float, default=0.0002, help="learning rate, default=0.0002"
-)
-parser.add_argument(
-    "--beta1", type=float, default=0.5, help="beta1 for adam. default=0.5"
-)
-parser.add_argument("--ngpu", type=int, default=1, help="number of GPUs to use")
-parser.add_argument("--netG", default="", help="path to netG (to continue training)")
-parser.add_argument("--netD", default="", help="path to netD (to continue training)")
-parser.add_argument(
-    "--outf", default=".", help="folder to output images and model checkpoints"
-)
-parser.add_argument("--manualSeed", type=int, help="manual seed")
-parser.add_argument(
-    "--target-digit",
-    type=int,
-    default=8,
-    help="the target digit(0~9) for MNIST training",
-)
-parser.add_argument(
-    "--device",
-    type=str,
-    default="cuda",
-    help="GPU ID for this process (default: 'cuda')",
-)
-parser.add_argument(
-    "--disable-dp",
-    action="store_true",
-    default=False,
-    help="Disable privacy training and just train with vanilla SGD",
-)
-parser.add_argument(
-    "--secure-rng",
-    action="store_true",
-    default=False,
-    help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
-)
-parser.add_argument(
-    "-r",
-    "--n-runs",
-    type=int,
-    default=1,
-    metavar="R",
-    help="number of runs to average on (default: 1)",
-)
-parser.add_argument(
-    "--sigma",
-    type=float,
-    default=1.0,
-    metavar="S",
-    help="Noise multiplier (default 1.0)",
-)
-parser.add_argument(
-    "-c",
-    "--max-per-sample-grad_norm",
-    type=float,
-    default=1.0,
-    metavar="C",
-    help="Clip per-sample gradients to this norm (default 1.0)",
-)
-parser.add_argument(
-    "--delta",
-    type=float,
-    default=1e-5,
-    metavar="D",
-    help="Target delta (default: 1e-5)",
-)
-
-opt = parser.parse_args()
-
-try:
-    os.makedirs(opt.outf)
-except OSError:
-    pass
-
-if opt.manualSeed is None:
-    opt.manualSeed = random.randint(1, 10000)
-print("Random Seed: ", opt.manualSeed)
-random.seed(opt.manualSeed)
-torch.manual_seed(opt.manualSeed)
-
-cudnn.benchmark = True
-
-
-try:
-    dataset = dset.MNIST(
-        root=opt.data_root,
-        download=True,
-        transform=transforms.Compose(
-            [
-                transforms.Resize(opt.imageSize),
-                transforms.ToTensor(),
-                transforms.Normalize((0.5,), (0.5,)),
-            ]
-        ),
-    )
-    idx = dataset.targets == opt.target_digit
-    dataset.targets = dataset.targets[idx]
-    dataset.data = dataset.data[idx]
-    nc = 1
-except ValueError:
-    print("Cannot load dataset")
-
-dataloader = torch.utils.data.DataLoader(
-    dataset,
-    num_workers=int(opt.workers),
-    batch_size=opt.batch_size,
-)
-
-device = torch.device(opt.device)
-ngpu = int(opt.ngpu)
-nz = int(opt.nz)
-ngf = int(opt.ngf)
-ndf = int(opt.ndf)
-
-
-# custom weights initialization called on netG and netD
-def weights_init(m):
-    classname = m.__class__.__name__
-    if classname.find("Conv") != -1:
-        m.weight.data.normal_(0.0, 0.02)
-    elif classname.find("BatchNorm") != -1:
-        m.weight.data.normal_(1.0, 0.02)
-        m.bias.data.fill_(0)
-
-
-class Generator(nn.Module):
-    def __init__(self, ngpu):
-        super(Generator, self).__init__()
-        self.ngpu = ngpu
-        self.main = nn.Sequential(
-            # input is Z, going into a convolution
-            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
-            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
-            nn.ReLU(True),
-            # state size. (ngf*8) x 4 x 4
-            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
-            nn.ReLU(True),
-            # state size. (ngf*4) x 8 x 8
-            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
-            nn.ReLU(True),
-            # state size. (ngf*2) x 16 x 16
-            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf), ndf),
-            nn.ReLU(True),
-            # state size. (ngf) x 32 x 32
-            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
-            nn.Tanh()
-            # state size. (nc) x 64 x 64
-        )
-
-    def forward(self, input):
-        if input.is_cuda and self.ngpu > 1:
-            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
-        else:
-            output = self.main(input)
-        return output
-
-
-netG = Generator(ngpu)
-netG = netG.to(device)
-netG.apply(weights_init)
-if opt.netG != "":
-    netG.load_state_dict(torch.load(opt.netG))
-
-
-class Discriminator(nn.Module):
-    def __init__(self, ngpu):
-        super(Discriminator, self).__init__()
-        self.ngpu = ngpu
-        self.main = nn.Sequential(
-            # input is (nc) x 64 x 64
-            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf) x 32 x 32
-            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*2) x 16 x 16
-            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*4) x 8 x 8
-            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*8) x 4 x 4
-            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, input):
-        if input.is_cuda and self.ngpu > 1:
-            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
-        else:
-            output = self.main(input)
-
-        return output.view(-1, 1).squeeze(1)
-
-
-netD = Discriminator(ngpu)
-netD = netD.to(device)
-netD.apply(weights_init)
-if opt.netD != "":
-    netD.load_state_dict(torch.load(opt.netD))
-
-criterion = nn.BCELoss()
-
-FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
-REAL_LABEL = 1.0
-FAKE_LABEL = 0.0
-
-# setup optimizer
-optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-
-if not opt.disable_dp:
-    privacy_engine = PrivacyEngine(secure_mode=opt.secure_rng)
-
-    netD, optimizerD, dataloader = privacy_engine.make_private(
-        module=netD,
-        optimizer=optimizerD,
-        data_loader=dataloader,
-        noise_multiplier=opt.sigma,
-        max_grad_norm=opt.max_per_sample_grad_norm,
-    )
-
-optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-
-for epoch in range(opt.epochs):
-    data_bar = tqdm(dataloader)
-    for i, data in enumerate(data_bar, 0):
-        ############################
-        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
-        ###########################
-
-        optimizerD.zero_grad()
-
-        real_data = data[0].to(device)
-        batch_size = real_data.size(0)
-        # train with fake
-        noise = torch.randn(batch_size, nz, 1, 1, device=device)
-        fake = netG(noise)
-        label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
-        output = netD(fake.detach())
-        errD_fake = criterion(output, label_fake)
-
-        # train with real
-        label_true = torch.full((batch_size,), REAL_LABEL, device=device)
-        output = netD(real_data)
-        errD_real = criterion(output, label_true)
-
-        # Note that we clip the gradient for not only real but also fake data.
-        errD = errD_fake + errD_real
-        errD.backward()
-        optimizerD.step()
-
-        D_x = output.mean().item()
-        D_G_z1 = output.mean().item()
-
-        ############################
-        # (2) Update G network: maximize log(D(G(z)))
-        ###########################
-        optimizerG.zero_grad()
-        optimizerD.zero_grad()
-
-        label_g = torch.full((batch_size,), REAL_LABEL, device=device)
-        output_g = netD(fake)
-        errG = criterion(output_g, label_g)
-        errG.backward()
-        D_G_z2 = output.mean().item()
-        optimizerG.step()
-        data_bar.set_description(
-            f"epoch: {epoch}, Loss_D: {errD.item()} "
-            f"Loss_G: {errG.item()} D(x): {D_x} "
-            f"D(G(z)): {D_G_z1}/{D_G_z2}"
-        )
-
-        if not opt.disable_dp:
-            epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
-                delta=opt.delta
-            )
-            print(
-                "(ε = %.2f, δ = %.2f) for α = %.2f" % (epsilon, opt.delta, best_alpha)
-            )
-
-        if i % 100 == 0:
-            vutils.save_image(
-                real_data, "%s/real_samples.png" % opt.outf, normalize=True
-            )
-            fake = netG(FIXED_NOISE)
-            vutils.save_image(
-                fake.detach(),
-                "%s/fake_samples_epoch_%03d.png" % (opt.outf, epoch),
-                normalize=True,
-            )
-
-    # do checkpointing
-    torch.save(netG.state_dict(), "%s/netG_epoch_%d.pth" % (opt.outf, epoch))
-    torch.save(netD.state_dict(), "%s/netD_epoch_%d.pth" % (opt.outf, epoch))
diff --git a/examples/dcgan_patch2.py b/examples/dcgan_patch2.py
deleted file mode 100644
index 19fedbce..00000000
--- a/examples/dcgan_patch2.py
+++ /dev/null
@@ -1,358 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Runs DCGAN training with differential privacy.
-
-"""
-from __future__ import print_function
-
-import argparse
-import os
-import random
-
-import torch.backends.cudnn as cudnn
-import torch.nn as nn
-import torch.optim as optim
-import torch.utils.data
-import torchvision.datasets as dset
-import torchvision.transforms as transforms
-import torchvision.utils as vutils
-from opacus import PrivacyEngine
-from tqdm import tqdm
-
-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument("--data-root", required=True, help="path to dataset")
-parser.add_argument(
-    "--workers", type=int, help="number of data loading workers", default=2
-)
-parser.add_argument("--batch-size", type=int, default=64, help="input batch size")
-parser.add_argument(
-    "--imageSize",
-    type=int,
-    default=64,
-    help="the height / width of the input image to network",
-)
-parser.add_argument("--nz", type=int, default=100, help="size of the latent z vector")
-parser.add_argument("--ngf", type=int, default=128)
-parser.add_argument("--ndf", type=int, default=128)
-parser.add_argument(
-    "--epochs", type=int, default=25, help="number of epochs to train for"
-)
-parser.add_argument(
-    "--lr", type=float, default=0.0002, help="learning rate, default=0.0002"
-)
-parser.add_argument(
-    "--beta1", type=float, default=0.5, help="beta1 for adam. default=0.5"
-)
-parser.add_argument("--ngpu", type=int, default=1, help="number of GPUs to use")
-parser.add_argument("--netG", default="", help="path to netG (to continue training)")
-parser.add_argument("--netD", default="", help="path to netD (to continue training)")
-parser.add_argument(
-    "--outf", default=".", help="folder to output images and model checkpoints"
-)
-parser.add_argument("--manualSeed", type=int, help="manual seed")
-parser.add_argument(
-    "--target-digit",
-    type=int,
-    default=8,
-    help="the target digit(0~9) for MNIST training",
-)
-parser.add_argument(
-    "--device",
-    type=str,
-    default="cuda",
-    help="GPU ID for this process (default: 'cuda')",
-)
-parser.add_argument(
-    "--disable-dp",
-    action="store_true",
-    default=False,
-    help="Disable privacy training and just train with vanilla SGD",
-)
-parser.add_argument(
-    "--secure-rng",
-    action="store_true",
-    default=False,
-    help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
-)
-parser.add_argument(
-    "-r",
-    "--n-runs",
-    type=int,
-    default=1,
-    metavar="R",
-    help="number of runs to average on (default: 1)",
-)
-parser.add_argument(
-    "--sigma",
-    type=float,
-    default=1.0,
-    metavar="S",
-    help="Noise multiplier (default 1.0)",
-)
-parser.add_argument(
-    "-c",
-    "--max-per-sample-grad_norm",
-    type=float,
-    default=1.0,
-    metavar="C",
-    help="Clip per-sample gradients to this norm (default 1.0)",
-)
-parser.add_argument(
-    "--delta",
-    type=float,
-    default=1e-5,
-    metavar="D",
-    help="Target delta (default: 1e-5)",
-)
-
-opt = parser.parse_args()
-
-try:
-    os.makedirs(opt.outf)
-except OSError:
-    pass
-
-if opt.manualSeed is None:
-    opt.manualSeed = random.randint(1, 10000)
-print("Random Seed: ", opt.manualSeed)
-random.seed(opt.manualSeed)
-torch.manual_seed(opt.manualSeed)
-
-cudnn.benchmark = True
-
-
-try:
-    dataset = dset.MNIST(
-        root=opt.data_root,
-        download=True,
-        transform=transforms.Compose(
-            [
-                transforms.Resize(opt.imageSize),
-                transforms.ToTensor(),
-                transforms.Normalize((0.5,), (0.5,)),
-            ]
-        ),
-    )
-    idx = dataset.targets == opt.target_digit
-    dataset.targets = dataset.targets[idx]
-    dataset.data = dataset.data[idx]
-    nc = 1
-except ValueError:
-    print("Cannot load dataset")
-
-dataloader = torch.utils.data.DataLoader(
-    dataset,
-    num_workers=int(opt.workers),
-    batch_size=opt.batch_size,
-)
-
-device = torch.device(opt.device)
-ngpu = int(opt.ngpu)
-nz = int(opt.nz)
-ngf = int(opt.ngf)
-ndf = int(opt.ndf)
-
-
-# custom weights initialization called on netG and netD
-def weights_init(m):
-    classname = m.__class__.__name__
-    if classname.find("Conv") != -1:
-        m.weight.data.normal_(0.0, 0.02)
-    elif classname.find("BatchNorm") != -1:
-        m.weight.data.normal_(1.0, 0.02)
-        m.bias.data.fill_(0)
-
-
-class Generator(nn.Module):
-    def __init__(self, ngpu):
-        super(Generator, self).__init__()
-        self.ngpu = ngpu
-        self.main = nn.Sequential(
-            # input is Z, going into a convolution
-            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
-            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
-            nn.ReLU(True),
-            # state size. (ngf*8) x 4 x 4
-            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
-            nn.ReLU(True),
-            # state size. (ngf*4) x 8 x 8
-            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
-            nn.ReLU(True),
-            # state size. (ngf*2) x 16 x 16
-            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf), ndf),
-            nn.ReLU(True),
-            # state size. (ngf) x 32 x 32
-            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
-            nn.Tanh()
-            # state size. (nc) x 64 x 64
-        )
-
-    def forward(self, input):
-        if input.is_cuda and self.ngpu > 1:
-            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
-        else:
-            output = self.main(input)
-        return output
-
-
-netG = Generator(ngpu)
-netG = netG.to(device)
-netG.apply(weights_init)
-if opt.netG != "":
-    netG.load_state_dict(torch.load(opt.netG))
-
-
-class Discriminator(nn.Module):
-    def __init__(self, ngpu):
-        super(Discriminator, self).__init__()
-        self.ngpu = ngpu
-        self.main = nn.Sequential(
-            # input is (nc) x 64 x 64
-            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf) x 32 x 32
-            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 2), ndf * 2),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*2) x 16 x 16
-            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 4), ndf * 4),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*4) x 8 x 8
-            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
-            nn.GroupNorm(min(32, ndf * 8), ndf * 8),
-            nn.LeakyReLU(0.2, inplace=True),
-            # state size. (ndf*8) x 4 x 4
-            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, input):
-        if input.is_cuda and self.ngpu > 1:
-            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
-        else:
-            output = self.main(input)
-
-        return output.view(-1, 1).squeeze(1)
-
-
-netD = Discriminator(ngpu)
-netD = netD.to(device)
-netD.apply(weights_init)
-if opt.netD != "":
-    netD.load_state_dict(torch.load(opt.netD))
-
-criterion = nn.BCELoss()
-
-FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
-REAL_LABEL = 1.0
-FAKE_LABEL = 0.0
-
-# setup optimizer
-optimizerD_fake = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-optimizerD_real = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-
-if not opt.disable_dp:
-    privacy_engine = PrivacyEngine(secure_mode=opt.secure_rng)
-
-    netD, optimizerD_real, dataloader = privacy_engine.make_private(
-        module=netD,
-        optimizer=optimizerD_real,
-        data_loader=dataloader,
-        noise_multiplier=opt.sigma,
-        max_grad_norm=opt.max_per_sample_grad_norm,
-    )
-
-optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-
-for epoch in range(opt.epochs):
-    data_bar = tqdm(dataloader)
-    for i, data in enumerate(data_bar, 0):
-        ############################
-        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
-        ###########################
-
-        optimizerD_fake.zero_grad()
-        optimizerD_real.zero_grad()
-
-        real_data = data[0].to(device)
-        batch_size = real_data.size(0)
-        # train with fake
-        noise = torch.randn(batch_size, nz, 1, 1, device=device)
-        fake = netG(noise)
-        label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
-        output = netD(fake.detach())
-        errD_fake = criterion(output, label_fake)
-        errD_fake.backward()
-        torch.nn.utils.clip_grad_norm_(netD.parameters(), opt.max_per_sample_grad_norm)
-        optimizerD_fake.step()
-
-        # train with real
-        label_true = torch.full((batch_size,), REAL_LABEL, device=device)
-        output = netD(real_data)
-        errD_real = criterion(output, label_true)
-        errD_real.backward()
-        optimizerD_real.step()
-        D_x = output.mean().item()
-
-        D_G_z1 = output.mean().item()
-        errD = errD_real + errD_fake
-
-        ############################
-        # (2) Update G network: maximize log(D(G(z)))
-        ###########################
-        optimizerG.zero_grad()
-        optimizerD_fake.zero_grad()
-        optimizerD_real.zero_grad()
-
-        label_g = torch.full((batch_size,), REAL_LABEL, device=device)
-        output_g = netD(fake)
-        errG = criterion(output_g, label_g)
-        errG.backward()
-        D_G_z2 = output.mean().item()
-        optimizerG.step()
-        data_bar.set_description(
-            f"epoch: {epoch}, Loss_D: {errD.item()} "
-            f"Loss_G: {errG.item()} D(x): {D_x} "
-            f"D(G(z)): {D_G_z1}/{D_G_z2}"
-        )
-
-        if not opt.disable_dp:
-            epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
-                delta=opt.delta
-            )
-            print(
-                "(ε = %.2f, δ = %.2f) for α = %.2f" % (epsilon, opt.delta, best_alpha)
-            )
-
-        if i % 100 == 0:
-            vutils.save_image(
-                real_data, "%s/real_samples.png" % opt.outf, normalize=True
-            )
-            fake = netG(FIXED_NOISE)
-            vutils.save_image(
-                fake.detach(),
-                "%s/fake_samples_epoch_%03d.png" % (opt.outf, epoch),
-                normalize=True,
-            )
-
-    # do checkpointing
-    torch.save(netG.state_dict(), "%s/netG_epoch_%d.pth" % (opt.outf, epoch))
-    torch.save(netD.state_dict(), "%s/netD_epoch_%d.pth" % (opt.outf, epoch))

From a4b52f47f13d797762f3ee6fbb020bd8b38bcbc8 Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Sat, 10 Sep 2022 01:26:28 +0900
Subject: [PATCH 6/7] fix typos

---
 examples/dcgan.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/dcgan.py b/examples/dcgan.py
index 214c8c7d..52cce1ab 100644
--- a/examples/dcgan.py
+++ b/examples/dcgan.py
@@ -297,11 +297,13 @@ def forward(self, input):
         fake = netG(noise)
         label_fake = torch.full((batch_size,), FAKE_LABEL, device=device)
         output = netD(fake.detach())
+        D_G_z1 = output.mean().item()
         errD_fake = criterion(output, label_fake)
 
         # train with real
         label_true = torch.full((batch_size,), REAL_LABEL, device=device)
         output = netD(real_data)
+        D_x = output.mean().item()
         errD_real = criterion(output, label_true)
 
         # Note that we clip the gradient for not only real but also fake data.
@@ -309,9 +311,6 @@ def forward(self, input):
         errD.backward()
         optimizerD.step()
 
-        D_x = output.mean().item()
-        D_G_z1 = output.mean().item()
-
         ############################
         # (2) Update G network: maximize log(D(G(z)))
         ###########################
@@ -322,7 +321,7 @@ def forward(self, input):
         output_g = netD(fake)
         errG = criterion(output_g, label_g)
         errG.backward()
-        D_G_z2 = output.mean().item()
+        D_G_z2 = output_g.mean().item()
         optimizerG.step()
         data_bar.set_description(
             f"epoch: {epoch}, Loss_D: {errD.item()} "

From 66885598dabbaee7b919233fceda719bd671c84e Mon Sep 17 00:00:00 2001
From: Koukyosyumei <koukyosyumei@hotmail.com>
Date: Sat, 10 Sep 2022 18:36:37 +0900
Subject: [PATCH 7/7] isort

---
 examples/dcgan.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/dcgan.py b/examples/dcgan.py
index 52cce1ab..28180dc7 100644
--- a/examples/dcgan.py
+++ b/examples/dcgan.py
@@ -30,9 +30,10 @@
 import torchvision.datasets as dset
 import torchvision.transforms as transforms
 import torchvision.utils as vutils
-from opacus import PrivacyEngine
 from tqdm import tqdm
 
+from opacus import PrivacyEngine
+
 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument("--data-root", required=True, help="path to dataset")
 parser.add_argument(