import random
import torch
import numpy as np
import os
import glob
import as nn
import as F
import torchvision
import as transforms
from torch import optim
from import Variable
from import Dataset, DataLoader
import as plt
from tqdm import tqdm
def same_seeds(seed):
# Python built-in random module
(seed)
# Numpy
(seed)
# Torch
torch.manual_seed(seed)
if .is_available():
.manual_seed(seed)
.manual_seed_all(seed)
= False
= True
same_seeds(2021)
class CrypkoDataset(Dataset):
def __init__(self, fnames, transform):
= transform
= fnames
self.num_samples = len()
def __getitem__(self, idx):
fname = [idx]
# 1. Load the image
img = .read_image(fname)
# 2. Resize and normalize the images using torchvision.
img = (img)
return img
def __len__(self):
return self.num_samples
def get_dataset(root):
fnames = ((root, '*'))
# 1. Resize the image to (64, 64)
# 2. Linearly map [0, 1] to [-1, 1]
compose = [
(),
((64, 64)),
(),
(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
]
transform = (compose)
dataset = CrypkoDataset(fnames, transform)
return dataset
def weights_init(m):
classname = m.__class__.__name__
if ('Conv') != -1:
.normal_(0.0, 0.02)
elif ('BatchNorm') != -1:
.normal_(1.0, 0.02)
.fill_(0)
class Generator():
"""
Input shape: (N, in_dim)
Output shape: (N, 3, 64, 64)
"""
def __init__(self, in_dim, dim=64):
super(Generator, self).__init__()
def dconv_bn_relu(in_dim, out_dim):
return (
nn.ConvTranspose2d(in_dim, out_dim, 5, 2,
padding=2, output_padding=1, bias=False),
nn.BatchNorm2d(out_dim),
()
)
self.l1 = (
(in_dim, dim * 8 * 4 * 4, bias=False),
nn.BatchNorm1d(dim * 8 * 4 * 4),
()
)
self.l2_5 = (
dconv_bn_relu(dim * 8, dim * 4),
dconv_bn_relu(dim * 4, dim * 2),
dconv_bn_relu(dim * 2, dim),
nn.ConvTranspose2d(dim, 3, 5, 2, padding=2, output_padding=1),
()
)
(weights_init)
def forward(self, x):
y = self.l1(x)
y = ((0), -1, 4, 4)
y = self.l2_5(y)
return y
class Discriminator():
"""
Input shape: (N, 3, 64, 64)
Output shape: (N, )
"""
def __init__(self, in_dim, dim=64, use_sigmoid=True):
super(Discriminator, self).__init__()
def conv_bn_lrelu(in_dim, out_dim):
return (
nn.Conv2d(in_dim, out_dim, 5, 2, 2),
nn.BatchNorm2d(out_dim),
(0.2),
)
""" Medium: Remove the last sigmoid layer for WGAN. """
layers = [
nn.Conv2d(in_dim, dim, 5, 2, 2),
(0.2),
conv_bn_lrelu(dim, dim * 2),
conv_bn_lrelu(dim * 2, dim * 4),
conv_bn_lrelu(dim * 4, dim * 8),
nn.Conv2d(dim * 8, 1, 4),
]
if use_sigmoid:
(())
= (*layers)
(weights_init)
def forward(self, x):
y = (x)
y = (-1)
return y
def train(baseline="Simple", show_img=True):
# Training hyperparameters
batch_size = 64
z_sample = Variable((100, z_dim)).cuda()
lr = 1e-4
if baseline == "Simple":
n_epoch = 50 # 50
n_critic = 1# train1Second discriminator, then train1Second generator
elif baseline== "Medium":
""" Medium: WGAN, 50 epoch, n_critic=5, clip_value=0.01 """
n_epoch = 50
n_critic = 5#Train first5Second discriminator, then train1Second generator
clip_value= 0.01
# Model
G = Generator(in_dim=z_dim).cuda()
if baseline == "Simple":
D = Discriminator(3).cuda()
elif baseline == "Medium":
D = Discriminator(3, use_sigmoid=False).cuda()
()
()
# Loss
criterion = ()
# Optimizer
if baseline == "Simple":
opt_D = ((), lr=lr, betas=(0.5, 0.999))
opt_G = ((), lr=lr, betas=(0.5, 0.999))
elif baseline == "Medium":
""" Medium: Use RMSprop for WGAN. """
opt_D = ((), lr=lr)
opt_G = ((), lr=lr)
# DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)
steps = 0
for e, epoch in enumerate(range(n_epoch)):
progress_bar = tqdm(dataloader)
for i, data in enumerate(progress_bar):
imgs = data
imgs = ()
bs = (0)
# ============================================
# Train D
# ============================================
z = Variable((bs, z_dim)).cuda()
r_imgs = Variable(imgs).cuda()
f_imgs = G(z)
if baseline == "Simple":
# Label
r_label = ((bs)).cuda()
f_label = ((bs)).cuda()
# Model forwarding
r_logit = D(r_imgs.detach())
f_logit = D(f_imgs.detach())
# Compute the loss for the discriminator.
r_loss = criterion(r_logit, r_label)
f_loss = criterion(f_logit, f_label)
loss_D = (r_loss + f_loss) / 2
elif baseline == "Medium":
# WGAN Loss
loss_D = -(D(r_imgs)) + (D(f_imgs))
# Model backwarding
D.zero_grad()
loss_D.backward()
# Update the discriminator.
opt_D.step()
if baseline == "Medium":
""" Medium: Clip weights of discriminator. """
for p in ():
.clamp_(-clip_value, clip_value)
# ============================================
# Train G
# ============================================
if steps % n_critic == 0:
# Generate some fake images.
z = Variable((bs, z_dim)).cuda()
f_imgs = G(z)
# Model forwarding
f_logit = D(f_imgs)
if baseline == "Simple":
# Compute the loss for the generator.
loss_G = criterion(f_logit, r_label)
elif baseline == "Medium":
# WGAN Loss
loss_G = -(D(f_imgs))
# Model backwarding
G.zero_grad()
loss_G.backward()
# Update the generator.
opt_G.step()
steps += 1
# Set the info of the progress bar
# Note that the value of the GAN loss is not directly related to
# the quality of the generated images.
progress_bar.set_postfix({
'Loss_D': round(loss_D.item(), 4),
'Loss_G': round(loss_G.item(), 4),
'Epoch': e + 1,
'Step': steps,
})
()
f_imgs_sample = (G(z_sample).data + 1) / 2.0
filename = (log_dir, f'Epoch_{epoch + 1:03d}.jpg')
.save_image(f_imgs_sample, filename, nrow=10)
print(f' | Save some samples to {filename}.')
# Show generated images in the jupyter notebook.
if show_img:
grid_img = .make_grid(f_imgs_sample.cpu(), nrow=10)
(figsize=(10, 10))
(grid_img.permute(1, 2, 0))
()
()
if (e + 1) % 5 == 0 or e == 0:
# Save the checkpoints.
(G.state_dict(), (ckpt_dir, ''))
(D.state_dict(), (ckpt_dir, ''))
def inference():
G = Generator(z_dim)
G.load_state_dict(((ckpt_dir, '')))
()
()
# Generate 1000 images and make a grid to save them.
n_output = 1000
z_sample = Variable((n_output, z_dim)).cuda()
imgs_sample = (G(z_sample).data + 1) / 2.0
log_dir = ('logs')
filename = (log_dir, '')
.save_image(imgs_sample, filename, nrow=10)
# Show 30 of the images.
grid_img = .make_grid(imgs_sample[:30].cpu(), nrow=10)
(figsize=(10, 10))
(grid_img.permute(1, 2, 0))
()
if __name__ == '__main__':
dataset = get_dataset('faces')
# Note that the range of these values is [-1, 1], so the display is darker
# images= [dataset[i] for i in range(16)]
# grid_img = .make_grid(images, nrow=4)
# (figsize=(10, 10))
# (grid_img.permute(1, 2, 0))
# ()
# We need to convert them to valid ranges [0, 1], to display correctly.
# images= [(dataset[i] + 1) / 2 for i in range(16)]
# grid_img = .make_grid(images, nrow=4)
# (figsize=(10, 10))
# (grid_img.permute(1, 2, 0))
# ()
z_dim = 100
log_dir = ('logs')
ckpt_dir = ('checkpoints')
(log_dir, exist_ok=True)
(ckpt_dir, exist_ok=True)
train(baseline="Medium", show_img=False)
inference()