Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(whl): add single loss landscape visualizer #37

Merged
merged 12 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ For attacking methods, please refer to our examples in: [demo for attack](https:
- Support for a variety of algorithms and datasets.
- Support multiprocessing training on each client for better efficiency.
- Using single GPU to simulate Federated Learning process (multi-GPU version will be released soon).
- Strong visualization utilities. See [demo](https://github.com/kxzxvbk/Fling/blob/main/fling/utils/visualize_utils/demo) for detailed information.

## Supported Algorithms

Expand Down
1 change: 1 addition & 0 deletions fling/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .utils import Logger, client_sampling, VariableMonitor
from .data_utils import get_data_transform
from .launcher_utils import get_launcher
from .visualize_utils import plot_2d_loss_landscape
3 changes: 3 additions & 0 deletions fling/utils/visualize_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .loss_landscape import plot_2d_loss_landscape
from .conv_kernel_visualizer import plot_conv_kernels
from .hessian_eigen_value import calculate_hessian_dominant_eigen_values
21 changes: 21 additions & 0 deletions fling/utils/visualize_utils/conv_kernel_visualizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import torchvision
from torch import nn

from fling.utils import Logger


def plot_conv_kernels(logger: Logger, layer: nn.Conv2d, name: str) -> None:
"""
Overview:
Plot the kernels in a certain convolution layer for better visualization.
Arguments:
logger: The logger to write result image.
layer: The convolution layer to visualize.
name: The name of the plotted figure.
"""
param = layer.weight
in_channels = param.shape[1]
k_w, k_h = param.size()[3], param.size()[2]
kernel_all = param.view(-1, 1, k_w, k_h)
kernel_grid = torchvision.utils.make_grid(kernel_all, normalize=True, scale_each=True, nrow=in_channels)
logger.add_image(f'{name}', kernel_grid, global_step=0)
14 changes: 14 additions & 0 deletions fling/utils/visualize_utils/demo/demo_conv_kernel_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from torchvision.models import resnet18

from fling.utils import Logger
from fling.utils.visualize_utils import plot_conv_kernels

if __name__ == '__main__':
# Step 1: prepare the model.
model = resnet18(pretrained=True)
kxzxvbk marked this conversation as resolved.
Show resolved Hide resolved

# Step 2: prepare the logger.
logger = Logger('resnet18_conv_kernels')

# Step 3: save the kernels.
plot_conv_kernels(logger, model.conv1, name='pre-conv')
75 changes: 75 additions & 0 deletions fling/utils/visualize_utils/demo/demo_hessian_eigen_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from easydict import EasyDict

import torch
from torch import nn
from torch.utils.data import DataLoader

from fling import dataset
from fling.utils.visualize_utils import calculate_hessian_dominant_eigen_values
from fling.utils.registry_utils import DATASET_REGISTRY


class ToyModel(nn.Module):
"""
Overview:
A toy model for demonstrating attacking results.
"""

def __init__(self):
super(ToyModel, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()

self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()

self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.relu3 = nn.ReLU()

self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.flat = nn.Flatten()

self.fc = nn.Linear(128, 10)

def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.relu2(self.conv2(x))
x = self.relu3(self.conv3(x))
x = self.flat(self.pool(x))
return self.fc(x)


if __name__ == '__main__':
# Step 1: prepare the dataset.
dataset_config = EasyDict(dict(data=dict(data_path='./data/cifar10', transforms=dict())))
dataset = DATASET_REGISTRY.build('cifar10', dataset_config, train=False)

# Test dataset is for generating loss landscape.
test_dataset = [dataset[i] for i in range(100)]
test_dataloader = DataLoader(test_dataset, batch_size=100)

# Step 2: prepare the model.
model = ToyModel()

# Step 3: train the randomly initialized model.
kxzxvbk marked this conversation as resolved.
Show resolved Hide resolved
dataloader = DataLoader(dataset, batch_size=100)
device = 'cuda'
model = model.to(device)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = torch.nn.CrossEntropyLoss()
for _ in range(0):
for _, (data) in enumerate(dataloader):
data_x, data_y = data['input'], data['class_id']
data_x, data_y = data_x.to(device), data_y.to(device)
pred_y = model(data_x)
loss = criterion(pred_y, data_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.to('cpu')

# Step 4: plot the loss landscape after training the model.
# Only one line of code for visualization!
res = calculate_hessian_dominant_eigen_values(model, iter_num=20, dataloader=test_dataloader, device='cuda')
print(res)
kxzxvbk marked this conversation as resolved.
Show resolved Hide resolved
84 changes: 84 additions & 0 deletions fling/utils/visualize_utils/demo/demo_single_loss_landscape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from easydict import EasyDict

import torch
from torch import nn
from torch.utils.data import DataLoader

from fling import dataset
from fling.utils.visualize_utils import plot_2d_loss_landscape
from fling.utils.registry_utils import DATASET_REGISTRY


class ToyModel(nn.Module):
"""
Overview:
A toy model for demonstrating attacking results.
"""

def __init__(self):
super(ToyModel, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()

self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()

self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.relu3 = nn.ReLU()

self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.flat = nn.Flatten()

self.fc = nn.Linear(128, 10)

def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.relu2(self.conv2(x))
x = self.relu3(self.conv3(x))
x = self.flat(self.pool(x))
return self.fc(x)


if __name__ == '__main__':
# Step 1: prepare the dataset.
dataset_config = EasyDict(dict(data=dict(data_path='./data/cifar10', transforms=dict())))
dataset = DATASET_REGISTRY.build('cifar10', dataset_config, train=False)

# Test dataset is for generating loss landscape.
test_dataset = [dataset[i] for i in range(100)]
test_dataloader = DataLoader(test_dataset, batch_size=100)

# Step 2: prepare the model.
model = ToyModel()

# Step 3: train the randomly initialized model.
kxzxvbk marked this conversation as resolved.
Show resolved Hide resolved
dataloader = DataLoader(dataset, batch_size=100)
device = 'cuda'
model = model.to(device)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = torch.nn.CrossEntropyLoss()
for _ in range(10):
for _, (data) in enumerate(dataloader):
data_x, data_y = data['input'], data['class_id']
data_x, data_y = data_x.to(device), data_y.to(device)
pred_y = model(data_x)
loss = criterion(pred_y, data_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.to('cpu')

# Step 4: plot the loss landscape after training the model.
# Only one line of code for visualization!
plot_2d_loss_landscape(
model=model,
dataloader=test_dataloader,
device='cuda',
caption='Loss Landscape Trained',
save_path='./landscape.pdf',
noise_range=(-1, 1),
resolution=30,
log_scale=True,
max_val=20,
)
106 changes: 106 additions & 0 deletions fling/utils/visualize_utils/hessian_eigen_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from typing import Sequence, List, Dict
import copy

import torch
from torch import nn
from torch.autograd import grad
from torch.utils.data import DataLoader


def _get_first_grad(loss: torch.Tensor, w: List) -> Sequence:
"""
Calculate: g_i = \\frac{dL}{dW_i}
"""
return grad(loss, w, retain_graph=True, create_graph=True)


def _get_hv(g: Sequence, w: Sequence, v: Sequence) -> Sequence:
"""
Calculate: Hv = \\frac{d(gv)}{dW_i}
"""
assert len(w) == len(v)
return grad(g, w, grad_outputs=v, retain_graph=True)


def _normalize(vs: Sequence) -> None:
"""
Normalize vectors in ``vs``.
"""
for i in range(len(vs)):
vs[i] = vs[i] / torch.norm(vs[i])


def _calc_loss_value(
model: nn.Module, data_loader: DataLoader, device: str, criterion: nn.Module = nn.CrossEntropyLoss()
):
# Given a model and corresponding dataset, calculate the mean loss value.
model.eval()
tot_loss = []
for _, (data) in enumerate(data_loader):
data_x, data_y = data['input'].to(device), data['class_id'].to(device)
pred_y = model(data_x)
loss = criterion(pred_y, data_y)
tot_loss.append(loss)
tot_loss = torch.stack(tot_loss, dim=0)
return torch.mean(tot_loss)


def _rayleigh_quotient(hv: Sequence, v: Sequence) -> List:
"""
Calculate: \\lambda = \\frac{v^THv}{v^Tv}
"""
return [((torch.flatten(v[i].T) @ torch.flatten(hv[i])) /
(torch.flatten(v[i].T) @ torch.flatten(v[i]))).item() for i in range(len(hv))]


def calculate_hessian_dominant_eigen_values(
model: nn.Module,
iter_num: int,
dataloader: DataLoader,
device: str
) -> Dict:
"""
Overview:
Using power iteration to calculate each dominant eigen value of each layer in the model.
Reference paper: HAWQ: Hessian AWare Quantization of Neural Networks with Mixed-Precision
<link https://arxiv.org/pdf/1905.03696.pdf link>
Arguments:
model: The neural network that calculates ``loss``.
iter_num: Number of iterations using power iteration.
dataloader: The dataloader used to calculate hessian eigen values.
device: The device to run on, such as ``"cuda"`` or ``"cpu"``.
Returns:
A diction of dominant eigen values for each layer.
"""
# Copy the original model.
orig_model = model
model = copy.deepcopy(model).to(device)

# Calculate loss value using given data.
loss = _calc_loss_value(model, data_loader=dataloader, device=device)

# Calculate eigen values and return.
# Flatten the parameter weights.
ws = dict(model.named_parameters())
keys = list(ws.keys())
ws = list(ws.values())

# Calculate grad.
g = _get_first_grad(loss, ws)

# Initialize vs and normalize them.
vs = [torch.randn_like(g[i]) for i in range(len(g))]
_normalize(vs)

# Power iteration.
for i in range(iter_num):
hv = _get_hv(g, ws, vs)
vs = [hv[i].detach() for i in range(len(hv))]
_normalize(vs)

# Calculate eigen values.
hv = _get_hv(g, ws, vs)
lambdas = _rayleigh_quotient(hv, vs)
dict_lambdas = {keys[i]: lambdas[i] for i in range(len(lambdas))}

return dict_lambdas
Loading