%matplotlib inline
%reload_ext autoreload
%autoreload 2
from pytorch_inferno.model_wrapper import ModelWrapper
from pytorch_inferno.callback import *
from pytorch_inferno.data import get_paper_data
from pytorch_inferno.plotting import *
from pytorch_inferno.inference import *
from pytorch_inferno.utils import *

from fastcore.all import partialler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from typing import *
from collections import OrderedDict

import torch.nn.functional as F
from torch import optim, autograd
import torch
from torch.distributions import Normal

Network

bs = 2000
data, test = get_paper_data(200000, bs=bs, n_test=1000000)

class VariableSoftmax[source]

VariableSoftmax(temp:float=1, dim:int=-1) :: Softmax

Softmax with temperature

x = torch.randn((1,10))
VariableSoftmax(0.1)(x), VariableSoftmax(0.5)(x), VariableSoftmax(1)(x)
(tensor([[2.5189e-04, 5.1106e-04, 8.3655e-04, 9.1589e-08, 9.9670e-01, 1.0867e-05,
          1.4469e-04, 3.3155e-11, 1.1292e-06, 1.5487e-03]]),
 tensor([[0.0825, 0.0951, 0.1049, 0.0169, 0.4326, 0.0440, 0.0739, 0.0035, 0.0280,
          0.1187]]),
 tensor([[0.1041, 0.1117, 0.1174, 0.0472, 0.2383, 0.0760, 0.0985, 0.0213, 0.0606,
          0.1248]]))
net = nn.Sequential(nn.Linear(3,100),  nn.ReLU(),
                    nn.Linear(100,100),nn.ReLU(),
                    nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)

Loss

x,y,w = next(iter(data.trn_dl))
preds = net(x)
assert preds.shape == (bs,10)
def to_shape(p:Tensor) -> Tensor:
    f = p.sum(0)
    f = f/f.sum()
    return f
m = y.squeeze()==0
f_s = to_shape(preds[~m])
f_b = to_shape(preds[m])
plt.plot(to_np(f_s))
plt.plot(to_np(f_b))
[<matplotlib.lines.Line2D at 0x7f9f424de1d0>]

Class initialised with a true number of signal events $\mu$ and a true number of events $n$. The true number of background events is then computed as $n-mu$. The number of nuisance parameters must also be passed.

This process includes the analytical effect of the nuisances, and their interdependence, since the augmented batch is passed through the model. This does, however, require that the effect of each nuisance on the input features is accurately parameterised.

Running cycle is:

  1. on_train_begin: set alpha as tensor with gradient and n_alpha+1 elements. Set element zero to $\mu$.
  2. for e in #epochs:
    1. for b in #batches:
      1. on_batch_begin: add relevant nuisances to incoming batch of inputs (as implemented in inheriting function)
      2. on_forwards_end:
        1. compute the full hessian of the negative log-likelihood w.r.t alpha (shape systematics included since they're added to the input data and passed through the model)
        2. invert hessian , and set the diagonal element corresponding to $\mu$ as the loss value
      3. on_batch_end: zero gradient of alpha

class AbsInferno[source]

AbsInferno(b_true:float, mu_true:float, n_shape_alphas:int=0, s_shape_alpha:bool=False, b_shape_alpha:bool=False, nonaux_b_norm:bool=False, shape_aux:Optional[List[Distribution]]=None, b_norm_aux:Optional[List[Distribution]]=None, s_norm_aux:Optional[List[Distribution]]=None) :: AbsCallback

Attempted reproduction of TF1 & TF2 INFERNO with exact effect of nuisances being passed through model

class PaperInferno[source]

PaperInferno(float_r:bool, float_l:bool, l_init:float=3, b_true:float=1000, mu_true:float=50, n_shape_alphas:int=0, nonaux_b_norm:bool=False, shape_aux:Optional[List[Distribution]]=None, b_norm_aux:Optional[List[Distribution]]=None, s_norm_aux:Optional[List[Distribution]]=None) :: AbsInferno

Inheriting class for dealing with INFERNO paper synthetic problem

net = nn.Sequential(nn.Linear(3,100),  nn.ReLU(),
                    nn.Linear(100,100),nn.ReLU(),
                    nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)
model = ModelWrapper(net)
%%time
model.fit(1, data=data, opt=partialler(optim.Adam,lr=1e-3), loss=None,
              cbs=[PaperInferno(float_r=True, float_l=True, shape_aux=[Normal(0,2), Normal(0,2)], nonaux_b_norm=False, b_norm_aux=[Normal(0,100)]),
               LossTracker(),SaveBest('weights/best_ie.h5'),EarlyStopping(2)])
1: Train=955.807817993164 Valid=750.0136706542969
Loading best model with loss 750.0136706542969
CPU times: user 17.2 s, sys: 4.83 s, total: 22 s
Wall time: 17.9 s
model.save('weights/Inferno_Test_exact_bm2.h5')
model.load('weights/Inferno_Test_exact_bm2.h5')

Results

class InfernoPred[source]

InfernoPred() :: PredHandler

Prediction handler for hard assignments

BM 0

preds = model._predict_dl(test, pred_cb=InfernoPred())
100.00% [250/250 00:11<00:00]
df = pd.DataFrame({'pred':preds})
df['gen_target'] = test.dataset.y
df.head()
pred gen_target
0 1 1.0
1 5 1.0
2 7 1.0
3 1 1.0
4 8 1.0
plot_preds(df, bin_edges=np.linspace(0,10,11))
bin_preds(df)
df.head()
pred gen_target pred_bin
0 1 1.0 1
1 5 1.0 5
2 7 1.0 7
3 1 1.0 1
4 8 1.0 8
f_s,f_b = get_shape(df,1),get_shape(df,0)
assert is_close(f_s.sum().data.item(),f_b.sum().data.item()) and is_close(f_b.sum().data.item(), 1.0)
asimov = (50*f_s)+(1000*f_b)
asimov, asimov.sum()
(tensor([1.2372e+02, 3.7440e+00, 1.1214e+02, 1.0705e+00, 4.6940e+02, 2.1579e+01,
         3.2000e-03, 3.1280e+01, 2.6241e+02, 2.4659e+01]), tensor(1050.))
n = 1050
mu_scan = torch.linspace(20,80,61)
y = torch.zeros_like(mu_scan)
for i,m in enumerate(mu_scan):
    pois = torch.distributions.Poisson((m*f_s)+(1000*f_b), False)
    y[i] = -pois.log_prob(asimov).sum()
y
tensor([25.9082, 25.8155, 25.7267, 25.6417, 25.5602, 25.4821, 25.4081, 25.3374,
        25.2703, 25.2058, 25.1458, 25.0889, 25.0351, 24.9845, 24.9369, 24.8933,
        24.8521, 24.8141, 24.7793, 24.7475, 24.7185, 24.6927, 24.6693, 24.6488,
        24.6315, 24.6167, 24.6047, 24.5957, 24.5888, 24.5850, 24.5838, 24.5852,
        24.5887, 24.5954, 24.6042, 24.6155, 24.6293, 24.6455, 24.6641, 24.6852,
        24.7086, 24.7343, 24.7620, 24.7923, 24.8249, 24.8596, 24.8965, 24.9356,
        24.9767, 25.0203, 25.0658, 25.1135, 25.1633, 25.2149, 25.2689, 25.3247,
        25.3828, 25.4424, 25.5040, 25.5680, 25.6338])
w1 = plot_likelihood(y, mu_scan=mu_scan)
profiler = partialler(calc_profile, f_s_nom=f_s, f_b_nom=f_b, n_obs=1050, mu_scan=mu_scan, mu_true=50, n_steps=100)
nll = profiler()
w2 = plot_likelihood(nll, mu_scan=mu_scan)
assert w1 == w2

Nuisances

bkg = test.dataset.x[test.dataset.y.squeeze() == 0]
assert len(bkg) == 500000
b_shapes = get_paper_syst_shapes(bkg, df, model=model, pred_cb=InfernoPred())
Running: r=-0.2
100.00% [1/1 00:04<00:00]
Running: r=0
100.00% [1/1 00:04<00:00]
Running: r=0.2
100.00% [1/1 00:04<00:00]
Running: l=2.5
100.00% [1/1 00:04<00:00]
Running: l=3
100.00% [1/1 00:04<00:00]
Running: l=3.5
100.00% [1/1 00:04<00:00]
df
pred gen_target pred_bin pred_-0.2_3 pred_-0.2_3_bin pred_0_3 pred_0_3_bin pred_0.2_3 pred_0.2_3_bin pred_0_2.5 pred_0_2.5_bin pred_0_3.5 pred_0_3.5_bin
0 1 1.0 1 1 1 1 1 1 1 1 1 1 1
1 5 1.0 5 5 5 5 5 5 5 5 5 5 5
2 7 1.0 7 7 7 7 7 7 7 7 7 7 7
3 1 1.0 1 1 1 1 1 1 1 1 1 1 1
4 8 1.0 8 8 8 8 8 8 8 8 8 8 8
... ... ... ... ... ... ... ... ... ... ... ... ... ...
999995 4 0.0 4 2 2 4 4 4 4 4 4 4 4
999996 2 0.0 2 2 2 2 2 2 2 2 2 2 2
999997 2 0.0 2 2 2 2 2 2 2 2 2 2 2
999998 4 0.0 4 4 4 4 4 4 4 4 4 4 4
999999 0 0.0 0 0 0 0 0 0 0 0 0 0 0

1000000 rows × 13 columns

plot_preds(df, pred_names=['pred', 'pred_-0.2_3', 'pred_0.2_3', 'pred_0_2.5', 'pred_0_3.5'], bin_edges=np.linspace(0,10,11))

Newton

BM 1

r free, l fixed

nll = profiler(f_b_up=b_shapes['f_b_up'][0], f_b_dw=b_shapes['f_b_dw'][0])
100.00% [61/61 00:08<00:00]
plot_likelihood(nll, mu_scan)
[21.87416913228614]

BM 1l

r fixed, l free

nll = profiler(f_b_up= b_shapes['f_b_up'][1], f_b_dw=b_shapes['f_b_dw'][1])
100.00% [61/61 00:08<00:00]
plot_likelihood(nll, mu_scan)
[20.922407221765884]

BM 2

nll = profiler(**b_shapes)
100.00% [61/61 00:10<00:00]
plot_likelihood(nll, mu_scan)
[22.08785692789433]
b_shapes
OrderedDict([('f_b_nom',
              tensor([[1.2195e-01, 2.1920e-03, 1.0751e-01, 7.4200e-04, 4.5548e-01, 1.3110e-02,
                       2.0000e-06, 2.6924e-02, 2.5166e-01, 2.0424e-02]])),
             ('f_b_up',
              tensor([[1.3063e-01, 2.0640e-03, 1.0356e-01, 6.8200e-04, 4.7004e-01, 1.1618e-02,
                       4.0000e-06, 2.2894e-02, 2.4112e-01, 1.7386e-02],
                      [1.1268e-01, 2.9940e-03, 1.1833e-01, 8.0400e-04, 4.5017e-01, 1.5380e-02,
                       4.0000e-06, 2.6188e-02, 2.5278e-01, 2.0674e-02]])),
             ('f_b_dw',
              tensor([[1.1363e-01, 2.3120e-03, 1.1106e-01, 7.9600e-04, 4.4045e-01, 1.4674e-02,
                       8.0000e-06, 3.1706e-02, 2.6152e-01, 2.3840e-02],
                      [1.3301e-01, 1.4440e-03, 9.4986e-02, 7.8800e-04, 4.6113e-01, 1.0944e-02,
                       2.0000e-06, 2.7590e-02, 2.5009e-01, 2.0012e-02]]))])
f_s
tensor([[3.5316e-02, 3.1040e-02, 9.2450e-02, 6.5700e-03, 2.7827e-01, 1.6937e-01,
         2.4000e-05, 8.7126e-02, 2.1514e-01, 8.4690e-02]])

BM 3

shape_aux = [Normal(0,2), Normal(0,2)]
nll = profiler(shape_aux=shape_aux, **b_shapes)
100.00% [61/61 00:13<00:00]
plot_likelihood(nll, mu_scan)
[21.908752077113412]

BM 4

nll = profiler(shape_aux=shape_aux, b_norm_aux=[Normal(0,100)], **b_shapes)
100.00% [61/61 00:18<00:00]
plot_likelihood(nll, mu_scan)
[27.516062797258904]