%matplotlib inline
%reload_ext autoreload
%autoreload 2

from pytorch_inferno.model_wrapper import ModelWrapper
from pytorch_inferno.callback import *
from pytorch_inferno.data import get_paper_data
from pytorch_inferno.plotting import *
from pytorch_inferno.inference import *
from pytorch_inferno.utils import *

from fastcore.all import partialler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from typing import *
from collections import OrderedDict

import torch.nn.functional as F
from torch import optim, autograd
import torch
from torch.distributions import Normal

Network

bs = 2000
data, test = get_paper_data(200000, bs=bs, n_test=1000000)

x = torch.randn((1,10))

VariableSoftmax(0.1)(x), VariableSoftmax(0.5)(x), VariableSoftmax(1)(x)

(tensor([[2.5189e-04, 5.1106e-04, 8.3655e-04, 9.1589e-08, 9.9670e-01, 1.0867e-05,
          1.4469e-04, 3.3155e-11, 1.1292e-06, 1.5487e-03]]),
 tensor([[0.0825, 0.0951, 0.1049, 0.0169, 0.4326, 0.0440, 0.0739, 0.0035, 0.0280,
          0.1187]]),
 tensor([[0.1041, 0.1117, 0.1174, 0.0472, 0.2383, 0.0760, 0.0985, 0.0213, 0.0606,
          0.1248]]))

net = nn.Sequential(nn.Linear(3,100),  nn.ReLU(),
                    nn.Linear(100,100),nn.ReLU(),
                    nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)

Loss

x,y,w = next(iter(data.trn_dl))
preds = net(x)
assert preds.shape == (bs,10)

def to_shape(p:Tensor) -> Tensor:
    f = p.sum(0)
    f = f/f.sum()
    return f

m = y.squeeze()==0
f_s = to_shape(preds[~m])
f_b = to_shape(preds[m])

plt.plot(to_np(f_s))
plt.plot(to_np(f_b))

[<matplotlib.lines.Line2D at 0x7f9f424de1d0>]

Class initialised with a true number of signal events $\mu$ and a true number of events $n$. The true number of background events is then computed as $n-mu$. The number of nuisance parameters must also be passed.

This process includes the analytical effect of the nuisances, and their interdependence, since the augmented batch is passed through the model. This does, however, require that the effect of each nuisance on the input features is accurately parameterised.

Running cycle is:

on_train_begin: set alpha as tensor with gradient and n_alpha+1 elements. Set element zero to $\mu$.
for e in #epochs:
1. for b in #batches:
  1. on_batch_begin: add relevant nuisances to incoming batch of inputs (as implemented in inheriting function)
  2. on_forwards_end:
    1. compute the full hessian of the negative log-likelihood w.r.t alpha (shape systematics included since they're added to the input data and passed through the model)
    2. invert hessian , and set the diagonal element corresponding to $\mu$ as the loss value
  3. on_batch_end: zero gradient of alpha

net = nn.Sequential(nn.Linear(3,100),  nn.ReLU(),
                    nn.Linear(100,100),nn.ReLU(),
                    nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)
model = ModelWrapper(net)

%%time
model.fit(1, data=data, opt=partialler(optim.Adam,lr=1e-3), loss=None,
              cbs=[PaperInferno(float_r=True, float_l=True, shape_aux=[Normal(0,2), Normal(0,2)], nonaux_b_norm=False, b_norm_aux=[Normal(0,100)]),
               LossTracker(),SaveBest('weights/best_ie.h5'),EarlyStopping(2)])

1: Train=955.807817993164 Valid=750.0136706542969
Loading best model with loss 750.0136706542969
CPU times: user 17.2 s, sys: 4.83 s, total: 22 s
Wall time: 17.9 s

model.save('weights/Inferno_Test_exact_bm2.h5')

model.load('weights/Inferno_Test_exact_bm2.h5')

Results

BM 0

preds = model._predict_dl(test, pred_cb=InfernoPred())

df = pd.DataFrame({'pred':preds})
df['gen_target'] = test.dataset.y
df.head()

plot_preds(df, bin_edges=np.linspace(0,10,11))

bin_preds(df)

df.head()

f_s,f_b = get_shape(df,1),get_shape(df,0)

assert is_close(f_s.sum().data.item(),f_b.sum().data.item()) and is_close(f_b.sum().data.item(), 1.0)

asimov = (50*f_s)+(1000*f_b)

asimov, asimov.sum()

(tensor([1.2372e+02, 3.7440e+00, 1.1214e+02, 1.0705e+00, 4.6940e+02, 2.1579e+01,
         3.2000e-03, 3.1280e+01, 2.6241e+02, 2.4659e+01]), tensor(1050.))

n = 1050
mu_scan = torch.linspace(20,80,61)
y = torch.zeros_like(mu_scan)
for i,m in enumerate(mu_scan):
    pois = torch.distributions.Poisson((m*f_s)+(1000*f_b), False)
    y[i] = -pois.log_prob(asimov).sum()
y

tensor([25.9082, 25.8155, 25.7267, 25.6417, 25.5602, 25.4821, 25.4081, 25.3374,
        25.2703, 25.2058, 25.1458, 25.0889, 25.0351, 24.9845, 24.9369, 24.8933,
        24.8521, 24.8141, 24.7793, 24.7475, 24.7185, 24.6927, 24.6693, 24.6488,
        24.6315, 24.6167, 24.6047, 24.5957, 24.5888, 24.5850, 24.5838, 24.5852,
        24.5887, 24.5954, 24.6042, 24.6155, 24.6293, 24.6455, 24.6641, 24.6852,
        24.7086, 24.7343, 24.7620, 24.7923, 24.8249, 24.8596, 24.8965, 24.9356,
        24.9767, 25.0203, 25.0658, 25.1135, 25.1633, 25.2149, 25.2689, 25.3247,
        25.3828, 25.4424, 25.5040, 25.5680, 25.6338])

w1 = plot_likelihood(y, mu_scan=mu_scan)

profiler = partialler(calc_profile, f_s_nom=f_s, f_b_nom=f_b, n_obs=1050, mu_scan=mu_scan, mu_true=50, n_steps=100)

nll = profiler()

w2 = plot_likelihood(nll, mu_scan=mu_scan)

assert w1 == w2

Nuisances

bkg = test.dataset.x[test.dataset.y.squeeze() == 0]
assert len(bkg) == 500000

b_shapes = get_paper_syst_shapes(bkg, df, model=model, pred_cb=InfernoPred())

Running: r=-0.2

Running: r=0

Running: r=0.2

Running: l=2.5

Running: l=3

Running: l=3.5

df

plot_preds(df, pred_names=['pred', 'pred_-0.2_3', 'pred_0.2_3', 'pred_0_2.5', 'pred_0_3.5'], bin_edges=np.linspace(0,10,11))

Newton

BM 1

r free, l fixed

nll = profiler(f_b_up=b_shapes['f_b_up'][0], f_b_dw=b_shapes['f_b_dw'][0])

plot_likelihood(nll, mu_scan)

[21.87416913228614]

BM 1l

r fixed, l free

nll = profiler(f_b_up= b_shapes['f_b_up'][1], f_b_dw=b_shapes['f_b_dw'][1])

plot_likelihood(nll, mu_scan)

[20.922407221765884]

BM 2

nll = profiler(**b_shapes)

plot_likelihood(nll, mu_scan)

[22.08785692789433]

b_shapes

OrderedDict([('f_b_nom',
              tensor([[1.2195e-01, 2.1920e-03, 1.0751e-01, 7.4200e-04, 4.5548e-01, 1.3110e-02,
                       2.0000e-06, 2.6924e-02, 2.5166e-01, 2.0424e-02]])),
             ('f_b_up',
              tensor([[1.3063e-01, 2.0640e-03, 1.0356e-01, 6.8200e-04, 4.7004e-01, 1.1618e-02,
                       4.0000e-06, 2.2894e-02, 2.4112e-01, 1.7386e-02],
                      [1.1268e-01, 2.9940e-03, 1.1833e-01, 8.0400e-04, 4.5017e-01, 1.5380e-02,
                       4.0000e-06, 2.6188e-02, 2.5278e-01, 2.0674e-02]])),
             ('f_b_dw',
              tensor([[1.1363e-01, 2.3120e-03, 1.1106e-01, 7.9600e-04, 4.4045e-01, 1.4674e-02,
                       8.0000e-06, 3.1706e-02, 2.6152e-01, 2.3840e-02],
                      [1.3301e-01, 1.4440e-03, 9.4986e-02, 7.8800e-04, 4.6113e-01, 1.0944e-02,
                       2.0000e-06, 2.7590e-02, 2.5009e-01, 2.0012e-02]]))])

f_s

tensor([[3.5316e-02, 3.1040e-02, 9.2450e-02, 6.5700e-03, 2.7827e-01, 1.6937e-01,
         2.4000e-05, 8.7126e-02, 2.1514e-01, 8.4690e-02]])

BM 3

shape_aux = [Normal(0,2), Normal(0,2)]

nll = profiler(shape_aux=shape_aux, **b_shapes)

plot_likelihood(nll, mu_scan)

[21.908752077113412]

BM 4

nll = profiler(shape_aux=shape_aux, b_norm_aux=[Normal(0,100)], **b_shapes)

plot_likelihood(nll, mu_scan)

[27.516062797258904]

	pred	gen_target	pred_bin	pred_-0.2_3	pred_-0.2_3_bin	pred_0_3	pred_0_3_bin	pred_0.2_3	pred_0.2_3_bin	pred_0_2.5	pred_0_2.5_bin	pred_0_3.5	pred_0_3.5_bin
0	1	1.0	1	1	1	1	1	1	1	1	1	1	1
1	5	1.0	5	5	5	5	5	5	5	5	5	5	5
2	7	1.0	7	7	7	7	7	7	7	7	7	7	7
3	1	1.0	1	1	1	1	1	1	1	1	1	1	1
4	8	1.0	8	8	8	8	8	8	8	8	8	8	8
...	...	...	...	...	...	...	...	...	...	...	...	...	...
999995	4	0.0	4	2	2	4	4	4	4	4	4	4	4
999996	2	0.0	2	2	2	2	2	2	2	2	2	2	2
999997	2	0.0	2	2	2	2	2	2	2	2	2	2	2
999998	4	0.0	4	4	4	4	4	4	4	4	4	4	4
999999	0	0.0	0	0	0	0	0	0	0	0	0	0	0

Exact INFERNO loss

Network

`class` `VariableSoftmax`[source]

Loss

`class` `AbsInferno`[source]

`class` `PaperInferno`[source]

Results

`class` `InfernoPred`[source]

BM 0

Nuisances

Newton

BM 1

BM 1l

BM 2

BM 3

BM 4

Exact INFERNO loss

Network

class VariableSoftmax[source]

Loss

class AbsInferno[source]

class PaperInferno[source]

Results

class InfernoPred[source]

BM 0

Nuisances

Newton

BM 1

BM 1l

BM 2

BM 3

BM 4

`class` `VariableSoftmax`[source]

`class` `AbsInferno`[source]

`class` `PaperInferno`[source]

`class` `InfernoPred`[source]