modules.py

# Adapted from code of DIF-Net and SIREN

import math
from collections import OrderedDict

import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torchmeta.modules import MetaModule, MetaSequential
from torchmeta.modules.utils import get_subdict


class BatchLinear(nn.Linear, MetaModule):
    '''A linear meta-layer that can deal with batched weight matrices and biases, as for instance output by a
    hypernetwork.
    '''
    __doc__ = nn.Linear.__doc__

    def forward(self, input, params=None):

        if params is None:
            return nn.Linear.forward(self, input)

        else:

            bias = params.get('bias', None)
            weight = params['weight']

            output = input.matmul(weight.permute(
                *[i for i in range(len(weight.shape) - 2)], -1, -2))
            output += bias.unsqueeze(-2)
            return output


class Sine(nn.Module):
    def __init(self):
        super().__init__()

    def forward(self, input):
        return torch.sin(30 * input)


class FCBlock(MetaModule):
    '''A fully connected neural network that also allows swapping out the weights when used with a hypernetwork.
    Can be used just as a normal neural network though, as well.
    '''

    def __init__(self, in_features, out_features, num_hidden_layers,
                 hidden_features, outermost_linear=False, nonlinearity='relu',
                 weight_init=None, use_dropout=True):
        super().__init__()

        self.first_layer_init = None

        # Dictionary that maps nonlinearity name to the respective function, initialization, and, if applicable,
        # special first-layer initialization scheme
        nls_and_inits = {'sine': (Sine(), sine_init, first_layer_sine_init, last_layer_sine_init),
                         'relu': (nn.ReLU(inplace=True), init_weights_normal, None, None),
                         'sigmoid': (nn.Sigmoid(), init_weights_xavier, None, None),
                         'tanh': (nn.Tanh(), init_weights_xavier, None, None),
                         'selu': (nn.SELU(inplace=True), init_weights_selu, None, None),
                         'softplus': (nn.Softplus(), init_weights_normal, None, None),
                         'elu': (nn.ELU(inplace=True), init_weights_elu, None, None)}

        nl, nl_weight_init, first_layer_init, last_layer_init = nls_and_inits[
            nonlinearity]

        if weight_init is not None:  # Overwrite weight init if passed
            self.weight_init = weight_init
        else:
            self.weight_init = nl_weight_init

        self.net = []
        self.net.append(MetaSequential(
            BatchLinear(in_features, hidden_features), nl
        ))

        for i in range(num_hidden_layers):
            self.net.append(MetaSequential(
                BatchLinear(hidden_features, hidden_features), nl
            ))
            if use_dropout:
                self.net.append(nn.Dropout(0.2))

        if outermost_linear:
            self.net.append(MetaSequential(
                BatchLinear(hidden_features, out_features)))
        else:
            self.net.append(MetaSequential(
                BatchLinear(hidden_features, out_features), nl
            ))

        self.net = MetaSequential(*self.net)
        if self.weight_init is not None:
            self.net.apply(self.weight_init)

        # Apply special initialization to first layer, if applicable.
        if first_layer_init is not None:
            self.net[0].apply(first_layer_init)

        if last_layer_init is not None:
            self.net[-1].apply(last_layer_init)

    def forward(self, coords, params=None, **kwargs):
        if params is not None:
            params = get_subdict(params, 'net')
        output = self.net(coords, params=params)
        return output


class SingleBVPNet(MetaModule):
    '''A canonical representation network for a BVP.'''

    def __init__(self, coord_type, out_features=1, act_type='sine',
                 in_features=2, mode='mlp', hidden_features=256,
                 num_hidden_layers=3, use_dropout=True, **kwargs):
        super().__init__()
        self.mode = mode
        self.net = FCBlock(
            in_features=in_features, out_features=out_features,
            num_hidden_layers=num_hidden_layers,
            hidden_features=hidden_features, outermost_linear=True,
            nonlinearity=act_type, use_dropout=use_dropout)
        self.coord_type = coord_type

    def forward(self, model_input, params=None):

        # Enables us to compute gradients w.r.t. coordinates
        coords_org = model_input[self.coord_type].requires_grad_(True)
        coords = coords_org

        # various input processing methods for different applications
        output = self.net(coords_org, get_subdict(params, 'net'))
        return {'model_in': coords_org, 'model_out': output}


def init_weights_normal(m):
    if type(m) == BatchLinear or type(m) == nn.Linear:
        if hasattr(m, 'weight'):
            nn.init.kaiming_normal_(
                m.weight, a=0.0, nonlinearity='relu', mode='fan_in')


def init_weights_selu(m):
    if type(m) == BatchLinear or type(m) == nn.Linear:
        if hasattr(m, 'weight'):
            num_input = m.weight.size(-1)
            nn.init.normal_(m.weight, std=1 / math.sqrt(num_input))


def init_weights_elu(m):
    if type(m) == BatchLinear or type(m) == nn.Linear:
        if hasattr(m, 'weight'):
            num_input = m.weight.size(-1)
            nn.init.normal_(m.weight, std=math.sqrt(
                1.5505188080679277) / math.sqrt(num_input))


def init_weights_xavier(m):
    if type(m) == BatchLinear or type(m) == nn.Linear:
        if hasattr(m, 'weight'):
            nn.init.xavier_normal_(m.weight)


def sine_init(m):
    with torch.no_grad():
        if hasattr(m, 'weight'):
            num_input = m.weight.size(-1)
            m.weight.uniform_(-np.sqrt(6 / num_input) / 30,
                              np.sqrt(6 / num_input) / 30)


def first_layer_sine_init(m):
    with torch.no_grad():
        if hasattr(m, 'weight'):
            num_input = m.weight.size(-1)
            m.weight.uniform_(-1 / num_input, 1 / num_input)


def last_layer_sine_init(m):
    with torch.no_grad():
        if hasattr(m, 'weight'):
            num_input = m.weight.size(-1)
            nn.init.zeros_(m.weight)
            nn.init.zeros_(m.bias)