cDCGAN生成指定手写数字--tensorflow2.0

接上一篇DCGAN手写数字生成,虽然它能够生成效果不错的手写数字图片,但它有一个缺点就是不能生成指定数值的数字,好在有一种 GAN 模型叫 cGAN,即 Conditional Generative Adversarial Nets, 出自 此篇论文,它能够生成指定数值的数字图片。

模型结构如下图:

其损失函数定义:

GAN 所能接受的 条件 y 种类很多,如label等等。 接下里,我们就使用 MNIST 数据集加入 label 条件,实现一个可以生成指定数值的数字图片模型。代码实现和上一篇几乎一样,只不过加入了label。废话少说,show me your code

1、定义生成器

和上一篇中的生成器定义一样,只不过这里的输入维度变成了 110 ,随机噪声的 100 维 + one-hot 编码的 10label ,将两者级联起来送入生成器,生成一个 28*28*1 的黑白数字图像。

级联:tf.concat((step_noise, labels), axis=1)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def make_generator_model():
model = tf.keras.Sequential()
model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(110,))) # 输入数据的维度为 100 + 10 = 110
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Reshape((7, 7, 256)))
assert model.output_shape == (None, 7, 7, 256)

model.add(layers.Conv2DTranspose(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', use_bias=False)) # 反卷积层
assert model.output_shape == (None, 7, 7, 128)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())

model.add(layers.Conv2DTranspose(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False))
assert model.output_shape == (None, 14, 14, 64)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())

model.add(layers.Conv2DTranspose(filters=1, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
assert model.output_shape == (None, 28, 28, 1)
return model
2、定义判别器

判别器的结构也和上一篇的一模一样,不过这里的输入维度由 28*28*1 变成了 28*28*11,将 one-hot 编码的label 条件添加到图片中。具体级联操作:

labels = tf.reshape(labels, (256,1,1,10)) * tf.ones((256,28,28,10)) #[batch, 28, 28, 10]

tf.concat((images, labels), axis=3) #[batch, 28, 28, 11]

1
2
3
4
5
6
7
8
9
10
11
12
13
def make_discriminator_model():
model = tf.keras.Sequential()
model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 11])) # 维度为 1 + 10(label) = 11
model.add(layers.LeakyReLU())
model.add(layers.Dropout(rate=0.3))

model.add(layers.Conv2D(filters=128, kernel_size=(5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(rate=0.3))

model.add(layers.Flatten())
model.add(layers.Dense(1)) # 真实图片输出 1,伪造的图片输出0
return model
3、定义生成器和判别器的损失函数

损失函数和上一篇的一模一样,不用改,具体如下:

1
2
3
4
5
6
7
8
9
10
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
# 判别器损失
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
# 生成器损失
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
4、定义优化器及训练过程

和上一篇 DCGAN 的一模一样,在训练过程中,每次生成一个维度为 [256, 100] 的随机噪声矩阵,级联 labels 条件后喂入 generator 生成 generated_image

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 实例化模型
generator = make_generator_model()
discriminator = make_discriminator_model()
# 定义优化器
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

@tf.function
def train_step(images, labels):
step_noise = tf.random.normal([256, 100]) # 256 代表 batch 的大小
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(tf.concat((step_noise, labels), axis=1), training=True) # [batch, 28, 28, 1]

labels = tf.reshape(labels, (256, 1, 1, 10)) * tf.ones((256, 28, 28, 10)) # [256, 28, 28, 10]

real_output = discriminator(tf.concat((images, labels), axis=3), training=True) # [256, 28, 28, 11]
fake_output = discriminator(tf.concat((generated_images, labels), axis=3), training=True) # [256, 28, 28, 11]
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator,generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
5、开始训练

此部分还是和上一篇的一模一样,checkpoint 定义要保存的模型对象,这里保存了生成器,判别器及两者各自的优化器,每 20 轮保存一次。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 保存检查点
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, generator=generator, discriminator=discriminator)

def train(train_images, train_labels, epochs): # [70000, 28, 28, 1] [70000, 10]
for epoch in range(epochs):
start = time.time()
# 打乱训练数据
index = random.sample(range(0, train_images.shape[0]), train_images.shape[0])
for step in range(train_images.shape[0] // 256):
train_step(train_images[index[step*256:(step+1)*256]], train_labels[index[step*256:(step+1)*256]])
generate_and_save_images(generator, epoch+1, tf.concat((noise_image, noise_label), axis=1))
if (epoch+1) % 20 == 0:
checkpoint.save(file_prefix="training_checkpoints/cDCGAN")
print("Time for ", epoch, " epoch is: ", time.time() - start)

其中,generate_and_save_images 函数和此前的还是一样,只不过这里喂入的数据,是固定的noise_imagenoise_label 级联后的维度为 110,来查看生成器生成图片的效果。0--9label 每样是 10个,一共100 个样例。具体实现如下:

1
2
3
4
5
6
7
8
noise_image = tf.random.normal([10, 100])
noise_label = tf.zeros([10])
for num in range(1, 10):
tmp_noise_image = tf.random.normal([10, 100])
noise_image = tf.concat((noise_image, tmp_noise_image), axis=0)
tmp_noise_label = tf.zeros([10]) + num
noise_label = tf.concat((noise_label, tmp_noise_label), axis=0)
noise_label = np.eye(10)[tf.cast(noise_label, tf.int32)].astype("float32")
1
2
3
4
5
6
7
8
9
def generate_and_save_images(model, epoch, input):
predictions = model(input, training=False)
fig = plt.figure(figsize=(10, 10))
for i in range(predictions.shape[0]):
plt.subplot(10, 10, i+1)
plt.imshow(predictions[i,:,:,0]*127.5+127.5, cmap='gray')
plt.axis('off')
plt.savefig("epoch_image/image_at_epoch_" + str(epoch) + ".png")
plt.show()
6、效果展示

每次训练生成的数字效果图如下:

可见,该模型成功生成了label(y) 所指定数值的数字手写图片。为了对生成过程有个动态的直观感受,我们使用如下函数,将每轮训练由固定的 noise 生成的效果图做成一个 gif 图片。

1
2
3
4
5
6
7
8
9
10
def gif_animation_generate():
gif_name = "cdcgan_gif.gif"
filenames = []
for i in range(1, 81):
filenames.append("epoch_image/image_at_epoch_" + str(i) + ".png")
frames = []
for filename in filenames:
im = imageio.imread(filename)
frames.append(im)
imageio.mimsave(gif_name, frames, "GIF", duration=0.1)

最后的效果图:

全部代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# _*_ coding: utf-8 _*_
"""
@author: Jibao Wang
@time: 2019/12/27 10:29
"""
import tensorflow as tf
import glob, imageio, os, time, PIL, random
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import numpy as np

# 创建用于展示的 noise 向量和对应的 label,固定不变用来查看生成器的效果,每个类别 10 个 sample
noise_image = tf.random.normal([10, 100])
noise_label = tf.zeros([10])
for num in range(1, 10):
tmp_noise_image = tf.random.normal([10, 100])
noise_image = tf.concat((noise_image, tmp_noise_image), axis=0)
tmp_noise_label = tf.zeros([10]) + num
noise_label = tf.concat((noise_label, tmp_noise_label), axis=0)
noise_label = np.eye(10)[tf.cast(noise_label, tf.int32)].astype("float32")
# noise_image-->tensor([100, 100]), noise_label-->tensor([100, 10])

# 创建模型
# 定义生成器
def make_generator_model():
model = tf.keras.Sequential()
model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(110,))) # 输入数据的维度为 100 + 10 = 110
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Reshape((7, 7, 256)))
assert model.output_shape == (None, 7, 7, 256)

model.add(layers.Conv2DTranspose(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', use_bias=False)) # 反卷积层
assert model.output_shape == (None, 7, 7, 128)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())

model.add(layers.Conv2DTranspose(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False))
assert model.output_shape == (None, 14, 14, 64)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())

model.add(layers.Conv2DTranspose(filters=1, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
assert model.output_shape == (None, 28, 28, 1)
return model

# 定义判别器
def make_discriminator_model():
model = tf.keras.Sequential()
model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 11])) # 维度为 1 + 10(label) = 11
model.add(layers.LeakyReLU())
model.add(layers.Dropout(rate=0.3))

model.add(layers.Conv2D(filters=128, kernel_size=(5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(rate=0.3))

model.add(layers.Flatten())
model.add(layers.Dense(1)) # 真实图片输出 1,伪造的图片输出0
return model

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
# 定义判别器损失
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss

# 定义生成器损失
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)

generator = make_generator_model()
discriminator = make_discriminator_model()
# 定义优化器
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

@tf.function
def train_step(images, labels):
step_noise = tf.random.normal([256, 100]) # 256 代表 batch 的大小
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(tf.concat((step_noise, labels), axis=1), training=True) # [batch, 28, 28, 1]

labels = tf.reshape(labels, (256, 1, 1, 10)) * tf.ones((256, 28, 28, 10)) # [256, 28, 28, 10]

real_output = discriminator(tf.concat((images, labels), axis=3), training=True) # [256, 28, 28, 11]
fake_output = discriminator(tf.concat((generated_images, labels), axis=3), training=True) # [256, 28, 28, 11]
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator,generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))


# 保存检查点
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer,
generator=generator, discriminator=discriminator)

def generate_and_save_images(model, epoch, input):
predictions = model(input, training=False)
fig = plt.figure(figsize=(10, 10))
for i in range(predictions.shape[0]):
plt.subplot(10, 10, i+1)
plt.imshow(predictions[i,:,:,0]*127.5+127.5, cmap='gray') # 与之前正好相反
plt.axis('off')
plt.savefig("epoch_image/image_at_epoch_" + str(epoch) + ".png")
plt.show()

def train(train_images, train_labels, epochs): # [70000, 28, 28, 1] [70000, 10]
for epoch in range(epochs):
start = time.time()
# 打乱训练数据
index = random.sample(range(0, train_images.shape[0]), train_images.shape[0])
for step in range(train_images.shape[0] // 256):
train_step(train_images[index[step*256:(step+1)*256]], train_labels[index[step*256:(step+1)*256]])
generate_and_save_images(generator, epoch+1, tf.concat((noise_image, noise_label), axis=1))
if (epoch+1) % 20 == 0:
checkpoint.save(file_prefix="training_checkpoints/cDCGAN")
print("Time for ", epoch, " epoch is: ", time.time() - start)

# 通过 imageio 生成训练过程动画
def gif_animation_generate():
gif_name = "cdcgan_gif.gif"
filenames = []
for i in range(1, 81):
filenames.append("epoch_image/image_at_epoch_" + str(i) + ".png")
frames = []
for filename in filenames:
im = imageio.imread(filename)
frames.append(im)
imageio.mimsave(gif_name, frames, "GIF", duration=0.1)


if __name__ == "__main__":
# 加载数据,获得训练数据集
data = np.load("dataset/mnist.npz")
train_images = np.concatenate((data["x_train"], data["x_test"]), axis=0) # [70000, 28, 28]
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype("float32") # 增加一维[70000, 28, 28, 1]
train_images = (train_images - 127.5) / 127.5 # 像素值标准化到 [-1, 1] 之间
train_labels = np.concatenate((data["y_train"], data["y_test"]), axis=0) # [70000, ]
train_labels = np.eye(10)[tf.cast(train_labels, tf.int32)].astype("float32") # [70000, 10]
train(train_images, train_labels, epochs=80)

gif_animation_generate()
-------------本文结束感谢您的阅读-------------
您的鼓励就是我创作的动力,求打赏买面包~~
0%