Location>code7788 >text

[Li Hongyi Machine Learning Notes] Generative Adversarial Network GAN

Popularity:250 ℃/2025-04-21 09:34:10
import random import torch import numpy as np import os import glob import as nn import as F import torchvision import as transforms from torch import optim from import Variable from import Dataset, DataLoader import as plt from tqdm import tqdm def same_seeds(seed): # Python built-in random module (seed) # Numpy (seed) # Torch torch.manual_seed(seed) if .is_available(): .manual_seed(seed) .manual_seed_all(seed) = False = True same_seeds(2021) class CrypkoDataset(Dataset): def __init__(self, fnames, transform): = transform = fnames self.num_samples = len() def __getitem__(self, idx): fname = [idx] # 1. Load the image img = .read_image(fname) # 2. Resize and normalize the images using torchvision. img = (img) return img def __len__(self): return self.num_samples def get_dataset(root): fnames = ((root, '*')) # 1. Resize the image to (64, 64) # 2. Linearly map [0, 1] to [-1, 1] compose = [ (), ((64, 64)), (), (mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ] transform = (compose) dataset = CrypkoDataset(fnames, transform) return dataset def weights_init(m): classname = m.__class__.__name__ if ('Conv') != -1: .normal_(0.0, 0.02) elif ('BatchNorm') != -1: .normal_(1.0, 0.02) .fill_(0) class Generator(): """ Input shape: (N, in_dim) Output shape: (N, 3, 64, 64) """ def __init__(self, in_dim, dim=64): super(Generator, self).__init__() def dconv_bn_relu(in_dim, out_dim): return ( nn.ConvTranspose2d(in_dim, out_dim, 5, 2, padding=2, output_padding=1, bias=False), nn.BatchNorm2d(out_dim), () ) self.l1 = ( (in_dim, dim * 8 * 4 * 4, bias=False), nn.BatchNorm1d(dim * 8 * 4 * 4), () ) self.l2_5 = ( dconv_bn_relu(dim * 8, dim * 4), dconv_bn_relu(dim * 4, dim * 2), dconv_bn_relu(dim * 2, dim), nn.ConvTranspose2d(dim, 3, 5, 2, padding=2, output_padding=1), () ) (weights_init) def forward(self, x): y = self.l1(x) y = ((0), -1, 4, 4) y = self.l2_5(y) return y class Discriminator(): """ Input shape: (N, 3, 64, 64) Output shape: (N, ) """ def __init__(self, in_dim, dim=64, use_sigmoid=True): super(Discriminator, self).__init__() def conv_bn_lrelu(in_dim, out_dim): return ( nn.Conv2d(in_dim, out_dim, 5, 2, 2), nn.BatchNorm2d(out_dim), (0.2), ) """ Medium: Remove the last sigmoid layer for WGAN. """ layers = [ nn.Conv2d(in_dim, dim, 5, 2, 2), (0.2), conv_bn_lrelu(dim, dim * 2), conv_bn_lrelu(dim * 2, dim * 4), conv_bn_lrelu(dim * 4, dim * 8), nn.Conv2d(dim * 8, 1, 4), ] if use_sigmoid: (()) = (*layers) (weights_init) def forward(self, x): y = (x) y = (-1) return y def train(baseline="Simple", show_img=True): # Training hyperparameters batch_size = 64 z_sample = Variable((100, z_dim)).cuda() lr = 1e-4 if baseline == "Simple": n_epoch = 50 # 50 n_critic = 1# train1Second discriminator, then train1Second generator elif baseline== "Medium": """ Medium: WGAN, 50 epoch, n_critic=5, clip_value=0.01 """ n_epoch = 50 n_critic = 5#Train first5Second discriminator, then train1Second generator clip_value= 0.01 # Model G = Generator(in_dim=z_dim).cuda() if baseline == "Simple": D = Discriminator(3).cuda() elif baseline == "Medium": D = Discriminator(3, use_sigmoid=False).cuda() () () # Loss criterion = () # Optimizer if baseline == "Simple": opt_D = ((), lr=lr, betas=(0.5, 0.999)) opt_G = ((), lr=lr, betas=(0.5, 0.999)) elif baseline == "Medium": """ Medium: Use RMSprop for WGAN. """ opt_D = ((), lr=lr) opt_G = ((), lr=lr) # DataLoader dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2) steps = 0 for e, epoch in enumerate(range(n_epoch)): progress_bar = tqdm(dataloader) for i, data in enumerate(progress_bar): imgs = data imgs = () bs = (0) # ============================================ # Train D # ============================================ z = Variable((bs, z_dim)).cuda() r_imgs = Variable(imgs).cuda() f_imgs = G(z) if baseline == "Simple": # Label r_label = ((bs)).cuda() f_label = ((bs)).cuda() # Model forwarding r_logit = D(r_imgs.detach()) f_logit = D(f_imgs.detach()) # Compute the loss for the discriminator. r_loss = criterion(r_logit, r_label) f_loss = criterion(f_logit, f_label) loss_D = (r_loss + f_loss) / 2 elif baseline == "Medium": # WGAN Loss loss_D = -(D(r_imgs)) + (D(f_imgs)) # Model backwarding D.zero_grad() loss_D.backward() # Update the discriminator. opt_D.step() if baseline == "Medium": """ Medium: Clip weights of discriminator. """ for p in (): .clamp_(-clip_value, clip_value) # ============================================ # Train G # ============================================ if steps % n_critic == 0: # Generate some fake images. z = Variable((bs, z_dim)).cuda() f_imgs = G(z) # Model forwarding f_logit = D(f_imgs) if baseline == "Simple": # Compute the loss for the generator. loss_G = criterion(f_logit, r_label) elif baseline == "Medium": # WGAN Loss loss_G = -(D(f_imgs)) # Model backwarding G.zero_grad() loss_G.backward() # Update the generator. opt_G.step() steps += 1 # Set the info of the progress bar # Note that the value of the GAN loss is not directly related to # the quality of the generated images. progress_bar.set_postfix({ 'Loss_D': round(loss_D.item(), 4), 'Loss_G': round(loss_G.item(), 4), 'Epoch': e + 1, 'Step': steps, }) () f_imgs_sample = (G(z_sample).data + 1) / 2.0 filename = (log_dir, f'Epoch_{epoch + 1:03d}.jpg') .save_image(f_imgs_sample, filename, nrow=10) print(f' | Save some samples to {filename}.') # Show generated images in the jupyter notebook. if show_img: grid_img = .make_grid(f_imgs_sample.cpu(), nrow=10) (figsize=(10, 10)) (grid_img.permute(1, 2, 0)) () () if (e + 1) % 5 == 0 or e == 0: # Save the checkpoints. (G.state_dict(), (ckpt_dir, '')) (D.state_dict(), (ckpt_dir, '')) def inference(): G = Generator(z_dim) G.load_state_dict(((ckpt_dir, ''))) () () # Generate 1000 images and make a grid to save them. n_output = 1000 z_sample = Variable((n_output, z_dim)).cuda() imgs_sample = (G(z_sample).data + 1) / 2.0 log_dir = ('logs') filename = (log_dir, '') .save_image(imgs_sample, filename, nrow=10) # Show 30 of the images. grid_img = .make_grid(imgs_sample[:30].cpu(), nrow=10) (figsize=(10, 10)) (grid_img.permute(1, 2, 0)) () if __name__ == '__main__': dataset = get_dataset('faces') # Note that the range of these values ​​is [-1, 1], so the display is darker # images= [dataset[i] for i in range(16)] # grid_img = .make_grid(images, nrow=4) # (figsize=(10, 10)) # (grid_img.permute(1, 2, 0)) # () # We need to convert them to valid ranges [0, 1], to display correctly. # images= [(dataset[i] + 1) / 2 for i in range(16)] # grid_img = .make_grid(images, nrow=4) # (figsize=(10, 10)) # (grid_img.permute(1, 2, 0)) # () z_dim = 100 log_dir = ('logs') ckpt_dir = ('checkpoints') (log_dir, exist_ok=True) (ckpt_dir, exist_ok=True) train(baseline="Medium", show_img=False) inference()