stylegan3/ThisPlaceDoesExist.ipynb

20 KiB
Raw Permalink Blame History

This Place Does Exists - Utilities for Stylegan3

This notebook contains utility functions for working with the models created by StyleGAN3.

Usage

Include it in any notebook using %run ThisPlaceDoesExist.ipynb. After which everything from this notebook becomes available. Including a runs variable which is a list containing all the Run objects.

In [1]:
from runs import Run, Snapshot,  get_projections_in_dir, get_runs_in_dir
from scipy.ndimage.filters import uniform_filter1d
import cv2
from PIL import Image
import os
In [2]:
def is_main():
    """Return True if this notebook is being run by calling
    %run in another notebook, False otherwise.
    works around jupyter bug: https://github.com/ipython/ipython/issues/10967
    """
    try:
        __file__
        # __file__ has been defined, so this notebook is 
        # being run in a parent notebook
        return True

    except NameError:
        # __file__ has not been defined, so this notebook is 
        # not being run in a parent notebook
        return False
In [3]:
args = {
    'runs_dir': 'training-runs', 
}
In [4]:
runs = get_runs_in_dir(args['runs_dir'])

See Snapshot_images.ipynb for examples of each run/snapshot.

Plot run metrics

We can plot the progress of the metrics (fid) for each run. Sommige runs zijn een vervolg op een eerder run. Dit zou kunnen zijn om het netwerk een voorgetraind startpunt te geven, maar in dit geval was het vooral omdat de training zo nu en dan was gestopt en weer herstart (wat een 'nieuwe' run geeft.

In [5]:
# importing package
import matplotlib.pyplot as plt

def plot_runs(runs, dpi=300, palette=None):
    
    plt.figure(dpi=dpi)
    plt.yscale('log')
    for i, run in enumerate(runs):
        x = [s.cumulative_iteration for s in run.snapshots]
        y = [s.fid for s in run.snapshots]
        # plot lines
        c = palette[i%len(palette)] if palette else None
        plt.plot(x, y, label = f"{i} {run.id}", c=c)

    
    return plt
In [6]:
def is_main():
    plot = plot_runs(runs)
    plot.legend(bbox_to_anchor=(1,0), loc="lower left")
    plot.show()
In [11]:
def plot_stats(stat_ids, runs, dpi=300, palette=None):    
    fig2, axes = plt.subplots(nrows=1, ncols=len(stat_ids), figsize=(10*len(stat_ids), 9), dpi=dpi)

    for i, ax in enumerate(axes):
        ax.set_xlabel('kimg')
        ax.set_ylabel(stat_ids[i])
        ax.set_yscale('symlog', linthresh=1) # 0-1: linear, >1: log scale

    for i, run in enumerate(runs):
        stats = [
            [
                s['Progress/kimg']['mean'] + run.kimg_offset
            ] + [[s[sid]['mean'], s[sid]['std']] for sid in stat_ids]
            for s in run.get_stats()
        ]
        x = [ s[0] for s in stats ]
        
        c = palette[i%len(palette)] if palette else None
        # smooth slightly for better readability
        for i, stat_id in enumerate(stat_ids):
            error = [s[i+1][1] for s in stats]
            y = uniform_filter1d([s[i+1][0] for s in stats], size=20)
            axes[i].plot(x, y, label = f"{i} {run.id}", c=c)
            # draw std dev:
            # axes[i].fill_between(x, y-error, y+error,
            #     alpha=0.2,
            #     antialiased=True)


        # print(x,y)
        # x = [s.cumulative_iteration for s in stats]
        # y = [s.fid for s in run.stats]
        # # plot lines
        # ax2.plot(x, y2, label = f"{i} {run.id}")
    return plt
In [ ]:
def is_main():
    plot = plot_stats([
        'Loss/D/loss',
        'Loss/G/loss',
    ], runs)
    plot.legend()
    plot.show()

excerpts

Als we de runs met de laagste FID scores bekijken bekijken krijgen we een beeld van de kwaliteit van de netwerken.

Wat vooral opvalt is dat het netwerk met gecropte beelden (00014+16) de meest kleurrijke beelden geeft en dus niet convergeert naar een beige-grijs zoals veel van de andere netwerken.

In [ ]:
def is_main():
    display(
        runs[-1].snapshots[-10].iteration,
        runs[-1].snapshots[-10].get_preview_img(8,1),
        runs[-1].snapshots[-2].iteration,
        runs[-1].snapshots[-2].get_preview_img(8,1),
        runs[-1].snapshots[-1].iteration,
        runs[-1].snapshots[-1].get_preview_img(8,1),
    )
In [ ]:
def is_main():
    display(
        runs[3].snapshots[-1].get_preview_img(4,1),
        runs[2].snapshots[-1].get_preview_img(4,1),
        runs[5].snapshots[-1].get_preview_img(4,1)
           )

Stylegan 3 functions

Helper functions for Stylegan 3 operations

In [20]:
import torch

Converts seeds to z space, z-space to w-space and use w-space to generate images and generated images to jupyter-widgets for visualisation

In [21]:
# adapted from https://github.com/dvschultz/stylegan2-ada-pytorch/blob/9b6750b96dc9841816e8ac57b05f395d0f23c30d/generate.py

def seeds_to_zs(G,seeds):
    zs = []
    for seed_idx, seed in enumerate(seeds):
        z = np.random.RandomState(seed).randn(1, G.z_dim)
        zs.append(z)
    return zs

def zs_to_ws(G,device,label,truncation_psi,zs):
    ws = []
    for z in zs:
        z = torch.from_numpy(z).to(device)
        w = G.mapping(z, label, truncation_psi=truncation_psi, truncation_cutoff=8)
        ws.append(w)
    return ws

def images(G,device,inputs,space,truncation_psi,label,noise_mode,start=None,stop=None):
    """Generate image for z or w space image (deprecated)"""
    if(start is not None and stop is not None):
        tp = start
        tp_i = (stop-start)/len(inputs)

    for idx, i in enumerate(inputs):
        # print('Generating image for frame %d/%d ...' % (idx, len(inputs)))
        
        if (space=='z'):
            z = torch.from_numpy(i).to(device)
            if(start is not None and stop is not None):
                img = G(z, label, truncation_psi=tp, noise_mode=noise_mode)
                tp = tp+tp_i
            else:
                img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
        else:
            if len(i.shape) == 2: 
                i = torch.from_numpy(i).unsqueeze(0).to(device)
            img = G.synthesis(i, noise_mode=noise_mode, force_fp32=True)
        img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
        yield f"{idx:04d}", Image.fromarray(img[0].cpu().numpy(), 'RGB')

def w_to_img(G, device, noise_mode, w):
    img = G.synthesis(w, noise_mode=noise_mode, force_fp32=True)
    img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
    return Image.fromarray(img[0].cpu().numpy(), 'RGB')

Interpolation of vectors; browsing latent space.

In [ ]:
def interpolate(G,device,projected_w,seeds,random_seed,space,truncation_psi,label,frames,noise_mode,outdir,interpolation,
                easing, diameter, start=None,stop=None):
    if(interpolation=='noiseloop' or interpolation=='circularloop'):
        if seeds is not None:
            print(f'Warning: interpolation type: "{interpolation}" doesnt support set seeds.')

        if(interpolation=='noiseloop'):
            points = noiseloop(frames, diameter, random_seed)
        elif(interpolation=='circularloop'):
            points = circularloop(frames, diameter, random_seed, seeds)

    else:
        if projected_w is not None:
            points = np.load(projected_w)['w']
        else:
            # get zs from seeds
            points = seeds_to_zs(G,seeds)  
            # convert to ws
            if(space=='w'):
                points = zs_to_ws(G,device,label,truncation_psi,points)

        # get interpolation points
        if(interpolation=='linear'):
            points = line_interpolate(points,frames,easing)
        elif(interpolation=='slerp'):
            points = slerp_interpolate(points,frames)
            
    # generate frames
    for idx, img in images(G,device,points,space,truncation_psi,label,noise_mode,outdir,start,stop):
        yield idx, img

# slightly modified version of
# https://github.com/PDillis/stylegan2-fun/blob/master/run_generator.py#L399
def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
    '''
    Spherical linear interpolation
    Args:
        t (float/np.ndarray): Float value between 0.0 and 1.0
        v0 (np.ndarray): Starting vector
        v1 (np.ndarray): Final vector
        DOT_THRESHOLD (float): Threshold for considering the two vectors as
                               colineal. Not recommended to alter this.
    Returns:
        v2 (np.ndarray): Interpolation vector between v0 and v1
    '''
    v0 = v0.cpu().detach().numpy() if hasattr(v0, 'cpu') else v0
    v1 = v1.cpu().detach().numpy() if hasattr(v1, 'cpu') else v1
    # Copy the vectors to reuse them later
    v0_copy = np.copy(v0)
    v1_copy = np.copy(v1)
    # Normalize the vectors to get the directions and angles
    v0 = v0 / np.linalg.norm(v0)
    v1 = v1 / np.linalg.norm(v1)
    # Dot product with the normalized vectors (can't use np.dot in W)
    dot = np.sum(v0 * v1)
    # If absolute value of dot product is almost 1, vectors are ~colineal, so use lerp
    if np.abs(dot) > DOT_THRESHOLD:
        return lerp(t, v0_copy, v1_copy)
    # Calculate initial angle between v0 and v1
    theta_0 = np.arccos(dot)
    sin_theta_0 = np.sin(theta_0)
    # Angle at timestep t
    theta_t = theta_0 * t
    sin_theta_t = np.sin(theta_t)
    # Finish the slerp algorithm
    s0 = np.sin(theta_0 - theta_t) / sin_theta_0
    s1 = sin_theta_t / sin_theta_0
    v2 = s0 * v0_copy + s1 * v1_copy
    return torch.from_numpy(v2).to("cuda")

def slerp_interpolate(zs, steps):
    out = []
    for i in range(len(zs)-1):
        for index in range(steps):
            fraction = index/float(steps)
            out.append(slerp(fraction,zs[i],zs[i+1]))
    return out

Project an imag to the latent space

In [ ]:
import subprocess, operator

def project_img_to_z(snapshot: Snapshot, image_filename: str, steps=1000, replace_if_exists=False) -> dict:
    
    # imagenr = image_filename[-12:-4]
    image_name = image_filename[:-4]
    runnr = snapshot.run.as_nr
    # !!python pbaylies_projector.py --network $snapshot_pkl --outdir out/projections/$runnr-$imagenr --target-image $image_filename --use-clip=False
    
    if replace_if_exists or not os.path.exists(f"out/projections/{snapshot.id}/{image_name}/proj.png"):
        process = subprocess.Popen([
            "python", "pbaylies_projector.py",
            "--network" , snapshot.pkl_path,
            "--outdir", f"out/projections/{snapshot.id}/{image_name}",
            "--target-image", image_filename,
            "--use-clip", "False",
            "--num-steps", str(steps),
            "--save-video", "False"
        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout, stderr = process.communicate()
        lines = stdout.split("\n")
        lossess_and_distances = [operator.itemgetter(-1,-3)(line.split(" ")) for line in lines if line.startswith("step")]
        print(stderr)
        loss, dist = lossess_and_distances[-1]
    else:
        # TODO: get loss and dist from somewhere? (currently not using it much)
        loss, dist = (None, None)

    return {
        "img": f"out/projections/{snapshot.id}/{image_name}/proj.png",
        "src_img": f"out/projections/{snapshot.id}/{image_name}/target.png",
        "src": image_filename,
        "npz": f"out/projections/{snapshot.id}/{image_name}/projected_w.npz",
        "loss": loss,
        "dist": dist
    }

Displaying Videos and Images

In [47]:
def img_to_widget(img):
    buff = io.BytesIO()
    img.save(buff, format='png')
    
    return widgets.Image(value=buff.getvalue(), format='png', width=run_data['resolution'], height=run_data['resolution'])

def video_to_widget(filename):
    with open(filename, 'rb') as fp:
        video = fp.read()
    return widgets.Video(value=video)

def image_grid(imgs, cols=None, rows=None, margin = 10):
#     create image grid, if no size is given, put all on horizontal axis
    if cols is None or rows is None:
        cols = len(imgs)
        rows = 1
        
    w, h = imgs[0].size
    w, h = w+margin, h+margin
    grid = Image.new('RGB', size=(cols*w-margin, rows*h-margin))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

Postprocessing

Sometimes we like to generate videos from a series of ws

In [40]:
from imageio_ffmpeg import write_frames

def generator_to_video(generator, out_filename, fps, frame_size, quality):
    writer = write_frames(out_filename, frame_size, quality=quality)  # size is (width, height)
    writer.send(None)  # seed the generator
    # print(os.path.abspath(out_filename))
    # output = cv2.VideoWriter(
    #     out_filename,
    #     # see http://mp4ra.org/#/codecs for codecs
    #     cv2.VideoWriter_fourcc(*'vp09'),
    #     fps,
    #     frame_size)
    for frame in generator:
        if type(frame) is Image.Image:
            open_cv_image = np.array(frame) 
            frame = open_cv_image
            # Convert RGB->BGR (for openCV
            # frame = open_cv_image[:, :, ::-1].copy()
        # output.write(frame)
        writer.send(frame)
        
    # output.release()
    writer.close()