Question: How can I get a latent that was used to generate an image during the projection process of StyleGAN2?
Hello! Am playing around with this StyleGAN2 colab notebook . It can generate 1024x1024 high res face images and more. What I've tried is to find the generatable face closely resembling Christiano Ronaldo. Ran their code, worked fine:
Then I changed the method that projected Ronaldo to return me the Projector object, ran it again and saved the object in a variable.
Projector class:
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from training import misc
class Projector:
def __init__(self):
self.num_steps = 1000
self.dlatent_avg_samples = 10000
self.initial_learning_rate = 0.1
self.initial_noise_factor = 0.05
self.lr_rampdown_length = 0.25
self.lr_rampup_length = 0.05
self.noise_ramp_length = 0.75
self.regularize_noise_weight = 1e5
self.verbose = False
self.clone_net = True
self._Gs = None
self._minibatch_size = None
self._dlatent_avg = None
self._dlatent_std = None
self._noise_vars = None
self._noise_init_op = None
self._noise_normalize_op = None
self._dlatents_var = None
self._noise_in = None
self._dlatents_expr = None
self._images_expr = None
self._target_images_var = None
self._lpips = None
self._dist = None
self._loss = None
self._reg_sizes = None
self._lrate_in = None
self._opt = None
self._opt_step = None
self._cur_step = None
def _info(self, *args):
if self.verbose:
print('Projector:', *args)
def set_network(self, Gs, minibatch_size=1):
assert minibatch_size == 1
self._Gs = Gs
self._minibatch_size = minibatch_size
if self._Gs is None:
if self.clone_net:
self._Gs = self._Gs.clone()
# Find dlatent stats.
self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples)
latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
dlatent_samples =, None)[:, :1, :] # [N, 1, 512]
self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512]
self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
self._info('std = %g' % self._dlatent_std)
# Find noise inputs.
self._info('Setting up noise inputs...')
self._noise_vars = []
noise_init_ops = []
noise_normalize_ops = []
while True:
n = 'G_synthesis/noise%d' % len(self._noise_vars)
if not n in self._Gs.vars:
v = self._Gs.vars[n]
noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32)))
noise_mean = tf.reduce_mean(v)
noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5
noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std))
self._info(n, v)
self._noise_init_op =*noise_init_ops)
self._noise_normalize_op =*noise_normalize_ops)
# Image output graph.
self._info('Building image output graph...')
self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
self._noise_in = tf.placeholder(tf.float32, [], name='noise_in')
dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in
self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False)
# Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
proc_images_expr = (self._images_expr + 1) * (255 / 2)
sh = proc_images_expr.shape.as_list()
if sh[2] > 256:
factor = sh[2] // 256
proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5])
# Loss graph.
self._info('Building loss graph...')
self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var')
if self._lpips is None:
self._lpips = misc.load_pkl('')
self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var)
self._loss = tf.reduce_sum(self._dist)
# Noise regularization graph.
self._info('Building noise regularization graph...')
reg_loss = 0.0
for v in self._noise_vars:
sz = v.shape[2]
while True:
reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2
if sz <= 8:
break # Small enough already
v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale
v = tf.reduce_mean(v, axis=[3, 5])
sz = sz // 2
self._loss += reg_loss * self.regularize_noise_weight
# Optimizer.
self._info('Setting up optimizer...')
self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in')
self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in)
self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars)
self._opt_step = self._opt.apply_updates()
def run(self, target_images):
# Run to completion.
while self._cur_step < self.num_steps:
# Collect results.
pres = dnnlib.EasyDict()
pres.dlatents = self.get_dlatents()
pres.noises = self.get_noises()
pres.images = self.get_images()
return pres
def start(self, target_images):
assert self._Gs is not None
# Prepare target images.
self._info('Preparing target images...')
target_images = np.asarray(target_images, dtype='float32')
target_images = (target_images + 1) * (255 / 2)
sh = target_images.shape
assert sh[0] == self._minibatch_size
if sh[2] > self._target_images_var.shape[2]:
factor = sh[2] // self._target_images_var.shape[2]
target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
# Initialize optimization state.
self._info('Initializing optimization state...')
tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])})
self._cur_step = 0
def step(self):
assert self._cur_step is not None
if self._cur_step >= self.num_steps:
if self._cur_step == 0:
# Hyperparameters.
t = self._cur_step / self.num_steps
noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2
lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length)
lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length)
learning_rate = self.initial_learning_rate * lr_ramp
# Train.
feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate}
_, dist_value, loss_value =[self._opt_step, self._dist, self._loss], feed_dict)
# Print status.
self._cur_step += 1
if self._cur_step == self.num_steps or self._cur_step % 10 == 0:
self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value))
if self._cur_step == self.num_steps:
def get_cur_step(self):
return self._cur_step
def get_dlatents(self):
return, {self._noise_in: 0})
def get_noises(self):
def get_images(self):
return, {self._noise_in: 0})
I got that object, called the get_dlatents method, thinking this is the input latent that produced Christiano.
Generating an image with that latent, clearly not near to Ronaldo ("proji" is the Projector object)
latents = proji.get_dlatents()
latent = latents[0][17]
latent = np.reshape(latent, (1,512))
img = generate_images([latent],1.0)[0]
Result: this was supposed to be Ronaldo
I do not know if I made a thinking or coding mistake, all I want to know is: How can I get a latent that was used to generate an image during the projection process? In order to understand it, you probably need to check the colab notebook yourself, didn't want to paste everything here tho.
Thanks for taking your time to look at this.
I don't know if your question is still relevant but what you are looking is projecting an image to the latent space
. This github page is clean and precise. After settin up the environment, in 2 steps you can get your latents.
To extract and align faces from images: python raw_images/ aligned_images/
and to find latent representation of aligned images use python aligned_images/ generated_images/ latent_representations/
. Under latent_representations
folder you'll have your latents. Now you can use these latents to generate your desired faces. Good luck.