%matplotlib inline
%reload_ext autoreload
%autoreload 2
from fastcore.all import partialler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from typing import *
from collections import OrderedDict
import numpy as np
from abc import abstractmethod, ABCMeta
from typing import  Optional, List
from fastcore.all import store_attr, delegates

import torch.nn.functional as F
from torch import optim, autograd
from torch.distributions import Normal
from torch import Tensor
import torch
import torch.nn as nn
from torch.distributions import Distribution

from pytorch_inferno.model_wrapper import ModelWrapper
from pytorch_inferno.callback import *
from pytorch_inferno.data import get_paper_data
from pytorch_inferno.plotting import *
from pytorch_inferno.inference import *
from pytorch_inferno.utils import *
from pytorch_inferno.inferno import *

Loss

Class initialised with a true number of signal events $\mu$ and a true number of events $n$. The true number of background events is then computed as $n-mu$. The number of nuisance parameters must also be passed.

This process only approximates the effect of shape systematics, since they are evaluated as up/down shape variations and the inter-dependence of them is not accounted for. It is, however, perhaps more realistic/practical, since the exact effect of the nuisances on the input features do not have to be analytically derivable, instead one can use up/down Monte Carlo samples. For this synthetic example, the analytical effects are used.

Running cycle is:

  1. for e in #epochs:
    1. for b in #batches:
      1. on_forwards_end:
        1. compute up/down shape variations by augmenting data and passing through model
        2. set alpha as tensor with gradient and n_alpha+1 elements. Set element zero to $\mu$
        3. compute the full hessian of the negative log-likelihood w.r.t alpha (shape systematics included by running model augmented data to get up/down shapes)
        4. invert hessian and set the diagonal element corresponding to $\mu$ as the loss value

class AbsApproxInferno[source]

AbsApproxInferno(aug_alpha:bool=False, n_steps:int=100, lr:float=0.1, n_shape_alphas:int=0, s_shape_alpha:bool=False, b_shape_alpha:bool=False, nonaux_b_norm:bool=False, shape_aux:Optional[List[Distribution]]=None, b_norm_aux:Optional[List[Distribution]]=None, s_norm_aux:Optional[List[Distribution]]=None) :: AbsInferno

Attempted reproduction INFERNO following paper description implementations with nuisances being approximated by creating up/down shapes and interpolating Includes option to randomise params per batch and converge to better values, which results in slightly better performance

class ApproxPaperInferno[source]

ApproxPaperInferno(r_mods:Optional[Tuple[float, float]]=(-0.2, 0.2), l_mods:Optional[Tuple[float, float]]=(2.5, 3.5), l_init:float=3, b_true:float=1000, mu_true:float=50, aug_alpha:bool=False, n_steps:int=100, lr:float=0.1, n_shape_alphas:int=0, nonaux_b_norm:bool=False, shape_aux:Optional[List[Distribution]]=None, b_norm_aux:Optional[List[Distribution]]=None, s_norm_aux:Optional[List[Distribution]]=None) :: AbsApproxInferno

Inheriting class for dealing with INFERNO paper synthetic problem

bs = 2000
data, test = get_paper_data(200000, bs=bs, n_test=1000000)
net = nn.Sequential(nn.Linear(3,100),  nn.ReLU(),
                    nn.Linear(100,100),nn.ReLU(),
                    nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)
model = ModelWrapper(net)
%%time
model.fit(1, data=data, opt=partialler(optim.Adam,lr=1e-3), loss=None,
          cbs=[ApproxPaperInferno(r_mods=(-0.2,0.2), l_mods=(2.5,3.5), shape_aux=[Normal(0,2), Normal(0,2)], nonaux_b_norm=False, b_norm_aux=[Normal(0,100)], aug_alpha=False, n_steps=10),
               LossTracker(),SaveBest('weights/best_ii.h5'),EarlyStopping(2)])
1: Train=2040.1375436401368 Valid=690.299853515625
Loading best model with loss 690.299853515625
CPU times: user 11.5 s, sys: 1.65 s, total: 13.2 s
Wall time: 12 s
model.save('weights/Inferno_Test_interp_bm2.h5')
[autoreload of pytorch_inferno.data failed: Traceback (most recent call last):
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 384, in superreload
    update_generic(old_obj, new_obj)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 323, in update_generic
    update(a, b)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 288, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 323, in update_generic
    update(a, b)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 288, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 323, in update_generic
    update(a, b)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 288, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 323, in update_generic
    update(a, b)
  File "/Users/giles/anaconda3/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 288, in update_class
    if update_generic(old_obj, new_obj): continue
RecursionError: maximum recursion depth exceeded
]
model.load('weights/Inferno_Test_interp_bm2.h5')

Results

BM 0

preds = model._predict_dl(test, pred_cb=InfernoPred())
100.00% [250/250 00:15<00:00]
df = pd.DataFrame({'pred':preds})
df['gen_target'] = test.dataset.y
df.head()
pred gen_target
0 5 1.0
1 7 1.0
2 5 1.0
3 5 1.0
4 5 1.0
plot_preds(df, bin_edges=np.linspace(0,10,11))
bin_preds(df)
df.head()
pred gen_target pred_bin
0 5 1.0 5
1 7 1.0 7
2 5 1.0 5
3 5 1.0 5
4 5 1.0 5
f_s,f_b = get_shape(df,1),get_shape(df,0)
n = 1050
mu_scan = torch.linspace(20,80,61)
profiler = partialler(calc_profile, f_s_nom=f_s, f_b_nom=f_b, n_obs=1050, mu_scan=mu_scan, mu_true=50, n_steps=100)
nll = profiler()
plot_likelihood(nll, mu_scan=mu_scan)
[19.098483749233175]

Nuisances

bkg = test.dataset.x[test.dataset.y.squeeze() == 0]
assert len(bkg) == 500000
b_shapes = get_paper_syst_shapes(bkg, df, model=model, pred_cb=InfernoPred())
Running: r=-0.2
100.00% [1/1 00:05<00:00]
Running: r=0
100.00% [1/1 00:05<00:00]
Running: r=0.2
100.00% [1/1 00:04<00:00]
Running: l=2.5
100.00% [1/1 00:04<00:00]
Running: l=3
100.00% [1/1 00:04<00:00]
Running: l=3.5
100.00% [1/1 00:04<00:00]
df
pred gen_target pred_bin pred_-0.2_3 pred_-0.2_3_bin pred_0_3 pred_0_3_bin pred_0.2_3 pred_0.2_3_bin pred_0_2.5 pred_0_2.5_bin pred_0_3.5 pred_0_3.5_bin
0 5 1.0 5 5 5 5 5 5 5 5 5 5 5
1 7 1.0 7 7 7 7 7 7 7 7 7 7 7
2 5 1.0 5 5 5 5 5 5 5 5 5 5 5
3 5 1.0 5 5 5 5 5 5 5 5 5 5 5
4 5 1.0 5 5 5 5 5 5 5 5 5 5 5
... ... ... ... ... ... ... ... ... ... ... ... ... ...
999995 7 0.0 7 7 7 7 7 7 7 7 7 7 7
999996 4 0.0 4 4 4 4 4 4 4 4 4 4 4
999997 4 0.0 4 4 4 4 4 4 4 4 4 4 4
999998 2 0.0 2 2 2 2 2 2 2 2 2 2 2
999999 7 0.0 7 7 7 7 7 7 7 7 7 7 7

1000000 rows × 13 columns

plot_preds(df, pred_names=['pred', 'pred_-0.2_3', 'pred_0.2_3', 'pred_0_2.5', 'pred_0_3.5'], bin_edges=np.linspace(0,10,11))

Newton

BM 1

r free, l fixed

nll = profiler(f_b_up=b_shapes['f_b_up'][0], f_b_dw=b_shapes['f_b_dw'][0])
100.00% [61/61 00:10<00:00]
plot_likelihood(nll, mu_scan)
[20.755227139465823]

BM 1l

r fixed, l free

nll = profiler(f_b_up= b_shapes['f_b_up'][1], f_b_dw=b_shapes['f_b_dw'][1])
100.00% [61/61 00:10<00:00]
plot_likelihood(nll, mu_scan)
[20.773284157170224]

BM 2

nll = profiler(**b_shapes)
100.00% [61/61 00:13<00:00]
plot_likelihood(nll, mu_scan)
[21.274396215186457]
b_shapes
OrderedDict([('f_b_nom',
              tensor([[2.0900e-03, 6.3720e-03, 2.8979e-01, 2.0000e-13, 1.2954e-01, 3.0890e-02,
                       1.6091e-01, 2.4900e-01, 1.2940e-01, 2.0060e-03]])),
             ('f_b_up',
              tensor([[1.7560e-03, 5.8640e-03, 2.8042e-01, 2.0000e-13, 1.3159e-01, 2.7676e-02,
                       1.7119e-01, 2.5996e-01, 1.1977e-01, 1.7760e-03],
                      [1.8500e-03, 5.5920e-03, 3.0597e-01, 2.0000e-13, 1.2943e-01, 3.6628e-02,
                       1.6034e-01, 2.2894e-01, 1.2928e-01, 1.9580e-03]])),
             ('f_b_dw',
              tensor([[2.3580e-03, 6.5600e-03, 2.9837e-01, 2.0000e-13, 1.2661e-01, 3.4386e-02,
                       1.5052e-01, 2.3961e-01, 1.3933e-01, 2.2480e-03],
                      [2.4460e-03, 7.3080e-03, 2.7112e-01, 2.0000e-13, 1.2962e-01, 2.5366e-02,
                       1.6082e-01, 2.7221e-01, 1.2908e-01, 2.0360e-03]]))])
f_s
tensor([[7.4520e-03, 2.1856e-02, 2.7604e-01, 2.0000e-13, 6.2560e-02, 3.1289e-01,
         6.5100e-02, 1.2767e-01, 1.1074e-01, 1.5700e-02]])

BM 3

shape_aux = [Normal(0,2), Normal(0,2)]
nll = profiler(shape_aux=shape_aux, **b_shapes)
100.00% [61/61 00:15<00:00]
plot_likelihood(nll, mu_scan)
[21.146971175475567]

BM 4

nll = profiler(shape_aux=shape_aux, b_norm_aux=[Normal(0,100)], **b_shapes)
100.00% [61/61 00:22<00:00]
plot_likelihood(nll, mu_scan)
[25.930942296161362]