%matplotlib inline
%reload_ext autoreload
%autoreload 2
from fastcore.all import partialler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from typing import *
from collections import OrderedDict
import numpy as np
from abc import abstractmethod, ABCMeta
from typing import Optional, List
from fastcore.all import store_attr, delegates
import torch.nn.functional as F
from torch import optim, autograd
from torch.distributions import Normal
from torch import Tensor
import torch
import torch.nn as nn
from torch.distributions import Distribution
from pytorch_inferno.model_wrapper import ModelWrapper
from pytorch_inferno.callback import *
from pytorch_inferno.data import get_paper_data
from pytorch_inferno.plotting import *
from pytorch_inferno.inference import *
from pytorch_inferno.utils import *
from pytorch_inferno.inferno import *
Class initialised with a true number of signal events $\mu$ and a true number of events $n$. The true number of background events is then computed as $n-mu$. The number of nuisance parameters must also be passed.
This process only approximates the effect of shape systematics, since they are evaluated as up/down shape variations and the inter-dependence of them is not accounted for. It is, however, perhaps more realistic/practical, since the exact effect of the nuisances on the input features do not have to be analytically derivable, instead one can use up/down Monte Carlo samples. For this synthetic example, the analytical effects are used.
Running cycle is:
- for e in #epochs:
- for b in #batches:
- on_forwards_end:
- compute up/down shape variations by augmenting data and passing through model
- set alpha as tensor with gradient and
n_alpha+1
elements. Set element zero to $\mu$ - compute the full hessian of the negative log-likelihood w.r.t alpha (shape systematics included by running model augmented data to get up/down shapes)
- invert hessian and set the diagonal element corresponding to $\mu$ as the loss value
- on_forwards_end:
- for b in #batches:
bs = 2000
data, test = get_paper_data(200000, bs=bs, n_test=1000000)
net = nn.Sequential(nn.Linear(3,100), nn.ReLU(),
nn.Linear(100,100),nn.ReLU(),
nn.Linear(100,10), VariableSoftmax(0.1))
init_net(net)
model = ModelWrapper(net)
%%time
model.fit(1, data=data, opt=partialler(optim.Adam,lr=1e-3), loss=None,
cbs=[ApproxPaperInferno(r_mods=(-0.2,0.2), l_mods=(2.5,3.5), shape_aux=[Normal(0,2), Normal(0,2)], nonaux_b_norm=False, b_norm_aux=[Normal(0,100)], aug_alpha=False, n_steps=10),
LossTracker(),SaveBest('weights/best_ii.h5'),EarlyStopping(2)])
model.save('weights/Inferno_Test_interp_bm2.h5')
model.load('weights/Inferno_Test_interp_bm2.h5')
preds = model._predict_dl(test, pred_cb=InfernoPred())
df = pd.DataFrame({'pred':preds})
df['gen_target'] = test.dataset.y
df.head()
plot_preds(df, bin_edges=np.linspace(0,10,11))
bin_preds(df)
df.head()
f_s,f_b = get_shape(df,1),get_shape(df,0)
n = 1050
mu_scan = torch.linspace(20,80,61)
profiler = partialler(calc_profile, f_s_nom=f_s, f_b_nom=f_b, n_obs=1050, mu_scan=mu_scan, mu_true=50, n_steps=100)
nll = profiler()
plot_likelihood(nll, mu_scan=mu_scan)
bkg = test.dataset.x[test.dataset.y.squeeze() == 0]
assert len(bkg) == 500000
b_shapes = get_paper_syst_shapes(bkg, df, model=model, pred_cb=InfernoPred())
df
plot_preds(df, pred_names=['pred', 'pred_-0.2_3', 'pred_0.2_3', 'pred_0_2.5', 'pred_0_3.5'], bin_edges=np.linspace(0,10,11))
nll = profiler(f_b_up=b_shapes['f_b_up'][0], f_b_dw=b_shapes['f_b_dw'][0])
plot_likelihood(nll, mu_scan)
nll = profiler(f_b_up= b_shapes['f_b_up'][1], f_b_dw=b_shapes['f_b_dw'][1])
plot_likelihood(nll, mu_scan)
nll = profiler(**b_shapes)
plot_likelihood(nll, mu_scan)
b_shapes
f_s
shape_aux = [Normal(0,2), Normal(0,2)]
nll = profiler(shape_aux=shape_aux, **b_shapes)
plot_likelihood(nll, mu_scan)
nll = profiler(shape_aux=shape_aux, b_norm_aux=[Normal(0,100)], **b_shapes)
plot_likelihood(nll, mu_scan)