20 KiB
This Place Does Exists - Utilities for Stylegan3¶
This notebook contains utility functions for working with the models created by StyleGAN3.
Usage¶
Include it in any notebook using %run ThisPlaceDoesExist.ipynb
. After which everything from this notebook becomes available. Including a runs
variable which is a list containing all the Run
objects.
from runs import Run, Snapshot, get_projections_in_dir, get_runs_in_dir
from scipy.ndimage.filters import uniform_filter1d
import cv2
from PIL import Image
import os
def is_main():
"""Return True if this notebook is being run by calling
%run in another notebook, False otherwise.
works around jupyter bug: https://github.com/ipython/ipython/issues/10967
"""
try:
__file__
# __file__ has been defined, so this notebook is
# being run in a parent notebook
return True
except NameError:
# __file__ has not been defined, so this notebook is
# not being run in a parent notebook
return False
args = {
'runs_dir': 'training-runs',
}
runs = get_runs_in_dir(args['runs_dir'])
See Snapshot_images.ipynb for examples of each run/snapshot.
Plot run metrics¶
We can plot the progress of the metrics (fid) for each run. Sommige runs zijn een vervolg op een eerder run. Dit zou kunnen zijn om het netwerk een voorgetraind startpunt te geven, maar in dit geval was het vooral omdat de training zo nu en dan was gestopt en weer herstart (wat een 'nieuwe' run geeft.
# importing package
import matplotlib.pyplot as plt
def plot_runs(runs, dpi=300, palette=None):
plt.figure(dpi=dpi)
plt.yscale('log')
for i, run in enumerate(runs):
x = [s.cumulative_iteration for s in run.snapshots]
y = [s.fid for s in run.snapshots]
# plot lines
c = palette[i%len(palette)] if palette else None
plt.plot(x, y, label = f"{i} {run.id}", c=c)
return plt
def is_main():
plot = plot_runs(runs)
plot.legend(bbox_to_anchor=(1,0), loc="lower left")
plot.show()
def plot_stats(stat_ids, runs, dpi=300, palette=None):
fig2, axes = plt.subplots(nrows=1, ncols=len(stat_ids), figsize=(10*len(stat_ids), 9), dpi=dpi)
for i, ax in enumerate(axes):
ax.set_xlabel('kimg')
ax.set_ylabel(stat_ids[i])
ax.set_yscale('symlog', linthresh=1) # 0-1: linear, >1: log scale
for i, run in enumerate(runs):
stats = [
[
s['Progress/kimg']['mean'] + run.kimg_offset
] + [[s[sid]['mean'], s[sid]['std']] for sid in stat_ids]
for s in run.get_stats()
]
x = [ s[0] for s in stats ]
c = palette[i%len(palette)] if palette else None
# smooth slightly for better readability
for i, stat_id in enumerate(stat_ids):
error = [s[i+1][1] for s in stats]
y = uniform_filter1d([s[i+1][0] for s in stats], size=20)
axes[i].plot(x, y, label = f"{i} {run.id}", c=c)
# draw std dev:
# axes[i].fill_between(x, y-error, y+error,
# alpha=0.2,
# antialiased=True)
# print(x,y)
# x = [s.cumulative_iteration for s in stats]
# y = [s.fid for s in run.stats]
# # plot lines
# ax2.plot(x, y2, label = f"{i} {run.id}")
return plt
def is_main():
plot = plot_stats([
'Loss/D/loss',
'Loss/G/loss',
], runs)
plot.legend()
plot.show()
excerpts¶
Als we de runs met de laagste FID scores bekijken bekijken krijgen we een beeld van de kwaliteit van de netwerken.
Wat vooral opvalt is dat het netwerk met gecropte beelden (00014+16) de meest kleurrijke beelden geeft en dus niet convergeert naar een beige-grijs zoals veel van de andere netwerken.
def is_main():
display(
runs[-1].snapshots[-10].iteration,
runs[-1].snapshots[-10].get_preview_img(8,1),
runs[-1].snapshots[-2].iteration,
runs[-1].snapshots[-2].get_preview_img(8,1),
runs[-1].snapshots[-1].iteration,
runs[-1].snapshots[-1].get_preview_img(8,1),
)
def is_main():
display(
runs[3].snapshots[-1].get_preview_img(4,1),
runs[2].snapshots[-1].get_preview_img(4,1),
runs[5].snapshots[-1].get_preview_img(4,1)
)
Stylegan 3 functions¶
Helper functions for Stylegan 3 operations
import torch
Converts seeds to z
space, z
-space to w
-space and use w
-space to generate images and generated images to jupyter-widgets for visualisation
# adapted from https://github.com/dvschultz/stylegan2-ada-pytorch/blob/9b6750b96dc9841816e8ac57b05f395d0f23c30d/generate.py
def seeds_to_zs(G,seeds):
zs = []
for seed_idx, seed in enumerate(seeds):
z = np.random.RandomState(seed).randn(1, G.z_dim)
zs.append(z)
return zs
def zs_to_ws(G,device,label,truncation_psi,zs):
ws = []
for z in zs:
z = torch.from_numpy(z).to(device)
w = G.mapping(z, label, truncation_psi=truncation_psi, truncation_cutoff=8)
ws.append(w)
return ws
def images(G,device,inputs,space,truncation_psi,label,noise_mode,start=None,stop=None):
"""Generate image for z or w space image (deprecated)"""
if(start is not None and stop is not None):
tp = start
tp_i = (stop-start)/len(inputs)
for idx, i in enumerate(inputs):
# print('Generating image for frame %d/%d ...' % (idx, len(inputs)))
if (space=='z'):
z = torch.from_numpy(i).to(device)
if(start is not None and stop is not None):
img = G(z, label, truncation_psi=tp, noise_mode=noise_mode)
tp = tp+tp_i
else:
img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
else:
if len(i.shape) == 2:
i = torch.from_numpy(i).unsqueeze(0).to(device)
img = G.synthesis(i, noise_mode=noise_mode, force_fp32=True)
img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
yield f"{idx:04d}", Image.fromarray(img[0].cpu().numpy(), 'RGB')
def w_to_img(G, device, noise_mode, w):
img = G.synthesis(w, noise_mode=noise_mode, force_fp32=True)
img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
return Image.fromarray(img[0].cpu().numpy(), 'RGB')
Interpolation of vectors; browsing latent space.¶
def interpolate(G,device,projected_w,seeds,random_seed,space,truncation_psi,label,frames,noise_mode,outdir,interpolation,
easing, diameter, start=None,stop=None):
if(interpolation=='noiseloop' or interpolation=='circularloop'):
if seeds is not None:
print(f'Warning: interpolation type: "{interpolation}" doesn’t support set seeds.')
if(interpolation=='noiseloop'):
points = noiseloop(frames, diameter, random_seed)
elif(interpolation=='circularloop'):
points = circularloop(frames, diameter, random_seed, seeds)
else:
if projected_w is not None:
points = np.load(projected_w)['w']
else:
# get zs from seeds
points = seeds_to_zs(G,seeds)
# convert to ws
if(space=='w'):
points = zs_to_ws(G,device,label,truncation_psi,points)
# get interpolation points
if(interpolation=='linear'):
points = line_interpolate(points,frames,easing)
elif(interpolation=='slerp'):
points = slerp_interpolate(points,frames)
# generate frames
for idx, img in images(G,device,points,space,truncation_psi,label,noise_mode,outdir,start,stop):
yield idx, img
# slightly modified version of
# https://github.com/PDillis/stylegan2-fun/blob/master/run_generator.py#L399
def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
'''
Spherical linear interpolation
Args:
t (float/np.ndarray): Float value between 0.0 and 1.0
v0 (np.ndarray): Starting vector
v1 (np.ndarray): Final vector
DOT_THRESHOLD (float): Threshold for considering the two vectors as
colineal. Not recommended to alter this.
Returns:
v2 (np.ndarray): Interpolation vector between v0 and v1
'''
v0 = v0.cpu().detach().numpy() if hasattr(v0, 'cpu') else v0
v1 = v1.cpu().detach().numpy() if hasattr(v1, 'cpu') else v1
# Copy the vectors to reuse them later
v0_copy = np.copy(v0)
v1_copy = np.copy(v1)
# Normalize the vectors to get the directions and angles
v0 = v0 / np.linalg.norm(v0)
v1 = v1 / np.linalg.norm(v1)
# Dot product with the normalized vectors (can't use np.dot in W)
dot = np.sum(v0 * v1)
# If absolute value of dot product is almost 1, vectors are ~colineal, so use lerp
if np.abs(dot) > DOT_THRESHOLD:
return lerp(t, v0_copy, v1_copy)
# Calculate initial angle between v0 and v1
theta_0 = np.arccos(dot)
sin_theta_0 = np.sin(theta_0)
# Angle at timestep t
theta_t = theta_0 * t
sin_theta_t = np.sin(theta_t)
# Finish the slerp algorithm
s0 = np.sin(theta_0 - theta_t) / sin_theta_0
s1 = sin_theta_t / sin_theta_0
v2 = s0 * v0_copy + s1 * v1_copy
return torch.from_numpy(v2).to("cuda")
def slerp_interpolate(zs, steps):
out = []
for i in range(len(zs)-1):
for index in range(steps):
fraction = index/float(steps)
out.append(slerp(fraction,zs[i],zs[i+1]))
return out
Project an imag to the latent space¶
import subprocess, operator
def project_img_to_z(snapshot: Snapshot, image_filename: str, steps=1000, replace_if_exists=False) -> dict:
# imagenr = image_filename[-12:-4]
image_name = image_filename[:-4]
runnr = snapshot.run.as_nr
# !!python pbaylies_projector.py --network $snapshot_pkl --outdir out/projections/$runnr-$imagenr --target-image $image_filename --use-clip=False
if replace_if_exists or not os.path.exists(f"out/projections/{snapshot.id}/{image_name}/proj.png"):
process = subprocess.Popen([
"python", "pbaylies_projector.py",
"--network" , snapshot.pkl_path,
"--outdir", f"out/projections/{snapshot.id}/{image_name}",
"--target-image", image_filename,
"--use-clip", "False",
"--num-steps", str(steps),
"--save-video", "False"
], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
stdout, stderr = process.communicate()
lines = stdout.split("\n")
lossess_and_distances = [operator.itemgetter(-1,-3)(line.split(" ")) for line in lines if line.startswith("step")]
print(stderr)
loss, dist = lossess_and_distances[-1]
else:
# TODO: get loss and dist from somewhere? (currently not using it much)
loss, dist = (None, None)
return {
"img": f"out/projections/{snapshot.id}/{image_name}/proj.png",
"src_img": f"out/projections/{snapshot.id}/{image_name}/target.png",
"src": image_filename,
"npz": f"out/projections/{snapshot.id}/{image_name}/projected_w.npz",
"loss": loss,
"dist": dist
}
Displaying Videos and Images¶
def img_to_widget(img):
buff = io.BytesIO()
img.save(buff, format='png')
return widgets.Image(value=buff.getvalue(), format='png', width=run_data['resolution'], height=run_data['resolution'])
def video_to_widget(filename):
with open(filename, 'rb') as fp:
video = fp.read()
return widgets.Video(value=video)
def image_grid(imgs, cols=None, rows=None, margin = 10):
# create image grid, if no size is given, put all on horizontal axis
if cols is None or rows is None:
cols = len(imgs)
rows = 1
w, h = imgs[0].size
w, h = w+margin, h+margin
grid = Image.new('RGB', size=(cols*w-margin, rows*h-margin))
grid_w, grid_h = grid.size
for i, img in enumerate(imgs):
grid.paste(img, box=(i%cols*w, i//cols*h))
return grid
Postprocessing¶
Sometimes we like to generate videos from a series of ws
from imageio_ffmpeg import write_frames
def generator_to_video(generator, out_filename, fps, frame_size, quality):
writer = write_frames(out_filename, frame_size, quality=quality) # size is (width, height)
writer.send(None) # seed the generator
# print(os.path.abspath(out_filename))
# output = cv2.VideoWriter(
# out_filename,
# # see http://mp4ra.org/#/codecs for codecs
# cv2.VideoWriter_fourcc(*'vp09'),
# fps,
# frame_size)
for frame in generator:
if type(frame) is Image.Image:
open_cv_image = np.array(frame)
frame = open_cv_image
# Convert RGB->BGR (for openCV
# frame = open_cv_image[:, :, ::-1].copy()
# output.write(frame)
writer.send(frame)
# output.release()
writer.close()