一些在使用pytorch时可能会用到的比较有用的脚本
归一化
1 | def normalize_channels(img): |
绘制第一个filter
def plot_first_kernels(weight):
with torch.no_grad():
filters = weight.detach().cpu().float().numpy().transpose([0,2,3,1]) # channels last
filters = normalize_channels(filters)
filters /= filters.max()
n = filters.shape[0]
cols = math.ceil(math.sqrt(n))
rows = math.ceil(n / cols)
fig, axs = plt.subplots(rows, cols)
for c in range(cols):
for r in range(rows):
idx = r + c * rows
if idx < n:
axs[r,c].imshow(filters[idx])
axs[r,c].set_axis_off()
绘制给定参数各层的l2范数分布
def plot_norms(named_parameters, figsize=None):
from matplotlib import cm
p = [0, 25, 50, 75, 100]
with torch.no_grad():
norms, names = [], []
for name, param in named_parameters:
param_flat = param.view(param.shape[0], -1)
norms.append(np.percentile(torch.norm(param_flat, p=2, dim=1).cpu().numpy(), p))
names.append(name)
n = len(norms)
inv_p = np.arange(len(p)-1,-1,-1)
norms = np.array(norms)
if figsize is None:
figsize = (np.min([16, n]), 6)
plt.figure(figsize=figsize)
plt.yscale('log')
for i,c in zip(inv_p, cm.get_cmap('inferno')(0.1+inv_p/len(p))):
plt.bar(np.arange(n), norms[:,i], lw=1, color=c)
plt.xticks(range(n), names, rotation="vertical")
plt.xlabel("layers")
plt.ylabel("norm distribution")
plt.title("Kernel L2 Norms")
plt.grid(True)
plt.legend(labels=[f'{i}%' for i in p[::-1]])
绘制不同层的梯度
def plot_grad_flow(named_parameters, figsize=None):
from matplotlib.lines import Line2D
avg_grads = []
max_grads= []
layers = []
for n, p in named_parameters:
if (p.grad is not None) and ("bias" not in n):
layers.append(n)
avg_grads.append(p.grad.abs().mean())
max_grads.append(p.grad.abs().max())
if figsize is None:
figsize = (np.min([16, len(avg_grads)]), 6)
plt.figure(figsize=figsize)
plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
plt.bar(np.arange(len(max_grads)), avg_grads, alpha=0.1, lw=1, color="b")
plt.hlines(0, 0, len(layers)+1, lw=2, color="k" )
plt.xticks(range(0, len(layers), 1), layers, rotation="vertical")
plt.xlim(left=-1, right=len(layers))
plt.xlabel("Layers")
plt.ylabel("Gradient Magnitude")
plt.yscale('log')
plt.title("Gradient flow")
plt.grid(True)
plt.legend([Line2D([0], [0], color="c", lw=4),
Line2D([0], [0], color="b", lw=4),
Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])
Metrics
class Metrics(RunningAverage):
def __init__(self, evaluator, eval_loader, output_transform, interactive=False):
super().__init__(alpha=0.9, output_transform=output_transform)
self.evaluator = evaluator
self.eval_loader = eval_loader
self.interactive = interactive
self.validation_history = {}
self.loss_history = []
def attach(self, engine, name):
super().attach(engine, name)
engine.add_event_handler(Events.EPOCH_COMPLETED, self.run_evaluation)
def compute(self):
loss = super().compute()
self.loss_history.append(loss)
return loss
def run_evaluation(self, engine=None):
self.evaluator.run(self.eval_loader)
if self.interactive:
print(self.evaluator.state.metrics)
# save validation_history
for k,v in self.evaluator.state.metrics.items():
if k not in self.validation_history.keys():
self.validation_history[k] = [v]
else:
self.validation_history[k].append(v)
def plot(self, epoch_size=None, figsize=(16, 4)):
plt.figure(figsize=figsize)
ax = plt.subplot()
ax.set_yscale('log')
ax.set_xlabel('Batches processed')
ax.set_ylabel("loss")
ax.plot(self.loss_history, label='Train Loss')
ax2 = ax.twinx()
ax2.set_ylabel("score")
for k,v in self.validation_history.items():
if epoch_size is None:
epoch_size = len(self.loss_history) // len(v)
iters = np.arange(1, len(v)+1) * epoch_size
if k == 'Loss':
ax.plot(iters, v, label='Valid Loss')
else: ax2.plot(iters, v, label=k, ls='--')
ax.legend(frameon=False, loc='upper left')
ax2.legend(frameon=False, loc='lower left')