Weighting Simulations

refrences:

https://wiki.icecube.wisc.edu/index.php/Weights_in_CORSIKA_Simulation_Data

http://icecube.wisc.edu/~jvansanten/docs/misc/index.html

http://software.icecube.wisc.edu/simulation_trunk/projects/weighting/index.html

Headers

load headers

In [1]:
from I3Tray import *
import icecube
from icecube import icetray, dataio, dataclasses, weighting
import numpy as np
import matplotlib as mpl
import pylab
import matplotlib.pyplot as plt
import sys, tables, glob, os, copy
from icecube import NewNuFlux
from icecube.icetray import I3Units
import matplotlib.gridspec as gridspec

from icecube.weighting.fluxes import Hoerandel5,GaisserH3a
from icecube.weighting import weighting

%matplotlib inline

weighting nugen:

Load data:

In [2]:
inFile_nugen = dataio.I3File('/home/icecube/i3/data/IC86-2011_nugen_numu/Level2_nugen_numu_IC86.2011.011184.000000.i3.bz2')

to weight nugen simulations, type, primary energy, zenith angle and OneWeight variables of the particle are needed.

they can be read from .i3 file(s).

In [3]:
MCtype_nugen=np.array([])
MCenergy_nugen=np.array([])
MCzenith_nugen=np.array([])
OneWeight_nugen=np.array([])
#need this for plots
MCmuonEnergy_nugen=np.array([])

#go to the begining of the file
inFile_nugen.rewind()

while(inFile_nugen.more()):
    frame=inFile_nugen.pop_physics()
    if(dataclasses.get_most_energetic_muon(frame['I3MCTree'])!=None):
        if(frame['FilterMask']['MuonFilter_11'].condition_passed):
            MCmuonEnergy_nugen=np.append(MCmuonEnergy_nugen,dataclasses.get_most_energetic_muon(frame['I3MCTree']).energy)
            #if there are mutiple primary particles in the frame (e.g. background) next command selects the weighted particle
            icecube.weighting.get_weighted_primary(frame,MCPrimary='MCweightedPrimary')
            MCtype_nugen=np.append(MCtype_nugen,frame['MCweightedPrimary'].type)
            MCenergy_nugen=np.append(MCenergy_nugen,frame['MCweightedPrimary'].energy)
            MCzenith_nugen=np.append(MCzenith_nugen,frame['MCweightedPrimary'].dir.zenith)
            OneWeight_nugen=np.append(OneWeight_nugen,frame['I3MCWeightDict']['OneWeight'])

you can read off the "NEvents" (number of generated particle in a single file) from "I3MCWeightDict"

In [4]:
nevts=frame['I3MCWeightDict']['NEvents']
print(nevts)
200000.0

load the flux model

In [5]:
conventional = NewNuFlux.makeFlux("honda2006")
conventional.knee_reweighting_model = "gaisserH3a_elbert"

calculate the weights

In [6]:
nfiles = 1 #number of files (in this case only 1)
weights_nugen = np.divide( conventional.getFlux( np.int32(MCtype_nugen) , MCenergy_nugen,np.cos(MCzenith_nugen) ),
                          (0.5*nevts*nfiles) )
weights_nugen = np.multiply(weights_nugen,OneWeight_nugen)

plot the true muon energy histogram

In [7]:
fig= plt.figure(figsize=(12,6))

bins=np.logspace(1,6,25)

null=plt.hist(MCmuonEnergy_nugen,bins=bins,histtype='step',weights=weights_nugen, label='nugen')

plt.loglog()
plt.legend()
plt.xlabel("True Muon Energy")
plt.ylabel("events")
plt.ylim(1e-6,1e-2)
Out[7]:
(1e-06, 0.01)

weighting CORSIKA

load CORSIKA files

In [8]:
inFile_lowEn = dataio.I3File('/home/icecube/i3/data/IC86-2011_CORSIKA/Level2_IC86.2011_corsika.009622.000000.i3.bz2')

read the variables from .i3 file(s)

In [9]:
MCtype_le=np.array([])
MCenergy_le=np.array([])
#need this for plots
MCmuonEnergy_le=np.array([])

#go to the begining of the file
inFile_lowEn.rewind()

while(inFile_lowEn.more()):
    frame=inFile_lowEn.pop_physics()
    if(frame['FilterMask']['MuonFilter_11'].condition_passed):
        #if there are mutiple primary particles in the frame (e.g. background) next command selects the weighted particle
        icecube.weighting.get_weighted_primary(frame,MCPrimary='MCweightedPrimary')
        MCtype_le=np.append(MCtype_le,frame['MCweightedPrimary'].type)
        MCenergy_le=np.append(MCenergy_le,frame['MCweightedPrimary'].energy)
        MCmuonEnergy_le=np.append(MCmuonEnergy_le,dataclasses.get_most_energetic_muon(frame['I3MCTree']).energy)

get the simulation specs from simprod

In [10]:
generator = weighting.from_simprod(9622) # 9622 is the dataset that we are using
#nfiles, generator = weighting.from_simprod(9622)
nfiles = 1 # nfiles by default is the total number of files exist (incl. possible corrupt ones), we only used 1 file here
generator *= nfiles
flux = GaisserH3a()

calculate the weights

In [11]:
weights_le=flux(MCenergy_le, MCtype_le)/generator(MCenergy_le, MCtype_le)

combining multiple CORSIKA datasets

load the file(s), in this case the high energy events

In [12]:
inFile_highEn = dataio.I3File('/home/icecube/i3/data/IC86-2011_CORSIKA/Level2_IC86.2011_corsika.010309.000000.i3.bz2')

read necessary variables

In [13]:
MCtype_he=np.array([])
MCenergy_he=np.array([])
#need this for plots
MCmuonEnergy_he=np.array([])

#go to the begining of the file
inFile_lowEn.rewind()

while(inFile_highEn.more()):
    frame=inFile_highEn.pop_physics()
    if(frame['FilterMask']['MuonFilter_11'].condition_passed):
        #if there are mutiple primary particles in the frame (e.g. background) next command selects the weighted particle
        icecube.weighting.get_weighted_primary(frame,MCPrimary='MCweightedPrimary')
        MCtype_he=np.append(MCtype_he,frame['MCweightedPrimary'].type)
        MCenergy_he=np.append(MCenergy_he,frame['MCweightedPrimary'].energy)
        MCmuonEnergy_he=np.append(MCmuonEnergy_he,dataclasses.get_most_energetic_muon(frame['I3MCTree']).energy)

you can do the same to get weights for only one dataset

In [14]:
generator = weighting.from_simprod(10309) # 10309 is the dataset that we are using
#nfiles, generator = weighting.from_simprod(10309)
nfiles = 1 # nfiles by default is the total number of files exist (incl. possible corrupt ones), we only used 1 file here
generator *= nfiles
flux = GaisserH3a()

weights_he=flux(MCenergy_he, MCtype_he)/generator(MCenergy_he, MCtype_he)

to combine multiple datasets, a new generator is need to be built

In [15]:
generator_le = weighting.from_simprod(9622)
generator_he = weighting.from_simprod(10309)
#nfiles_le, generator_le = weighting.from_simprod(9622)
#nfiles_he, generator_he = weighting.from_simprod(10309)

# set new nfiles
nfiles_le = 1
nfiles_he = 1

# create the new generator for as many dataset as combined
generator = nfiles_le*generator_le + nfiles_he*generator_he

combine variables of all datasets

In [16]:
MCtype = np.append(MCtype_le, MCtype_he)
MCenergy = np.append(MCenergy_le, MCenergy_he)
MCmuonEnergy = np.append(MCmuonEnergy_le, MCmuonEnergy_he)

calculate the weights like before but with the new generator

In [17]:
weights=flux(MCenergy, MCtype)/generator(MCenergy, MCtype)

plot the energy histogram

In [18]:
fig= plt.figure(figsize=(12,6))

bins=np.logspace(2,6,25)

null=plt.hist(MCmuonEnergy_le,bins=bins,histtype='step',weights=weights_le, label='low energy set')
null=plt.hist(MCmuonEnergy_he,bins=bins,histtype='step',weights=weights_he, label='high energy set')
null=plt.hist(MCmuonEnergy,bins=bins,histtype='step',weights=weights, label='combined sets')

plt.loglog()
plt.legend()
plt.xlabel("True Muon Energy")
plt.ylabel("events")
plt.ylim(1e-2,1e3)
Out[18]:
(0.01, 1000.0)

data

experimental data does NOT need to be weighted

In [19]:
inFile_exp = dataio.I3File('/home/icecube/i3/data/IC86-2011_data/Level2_IC86.2011_data_Run00118670_Part00000000.i3.bz2')
In [20]:
MPEFitMuEX_exp=np.array([])
inFile_exp.rewind()

while(inFile_exp.more()):
    frame=inFile_exp.pop_physics()
    if(frame['FilterMask']['MuonFilter_11'].condition_passed):
        if('MPEFitMuEX' in frame.keys()):
            MPEFitMuEX_exp=np.append(MPEFitMuEX_exp,frame['MPEFitMuEX'].energy)
In [21]:
MPEFitMuEX_he=np.array([])
inFile_highEn.rewind()

while(inFile_highEn.more()):
    frame=inFile_highEn.pop_physics()
    if(frame['FilterMask']['MuonFilter_11'].condition_passed):
        if('MPEFitMuEX' in frame.keys()):
            MPEFitMuEX_he=np.append(MPEFitMuEX_he,frame['MPEFitMuEX'].energy)
        else:
            MPEFitMuEX_he=np.append(MPEFitMuEX_he,0)
In [22]:
MPEFitMuEX_le=np.array([])
inFile_lowEn.rewind()

while(inFile_lowEn.more()):
    frame=inFile_lowEn.pop_physics()
    if(frame['FilterMask']['MuonFilter_11'].condition_passed):
        if('MPEFitMuEX' in frame.keys()):
            MPEFitMuEX_le=np.append(MPEFitMuEX_le,frame['MPEFitMuEX'].energy)
        else:
            MPEFitMuEX_le=np.append(MPEFitMuEX_le,0)
In [23]:
MPEFitMuEX_nugen=np.array([])
inFile_nugen.rewind()

while(inFile_nugen.more()):
    frame=inFile_nugen.pop_physics()
    if(dataclasses.get_most_energetic_muon(frame['I3MCTree'])!=None):
        if(frame['FilterMask']['MuonFilter_11'].condition_passed):
            if('MPEFitMuEX' in frame.keys()):
                MPEFitMuEX_nugen=np.append(MPEFitMuEX_nugen,frame['MPEFitMuEX'].energy)
            else:
                MPEFitMuEX_nugen=np.append(MPEFitMuEX_nugen,0)

for convenience combine both low and high energy CORSIKA datasets

In [24]:
MPEFitMuEX_sum=np.append(np.append(MPEFitMuEX_he,MPEFitMuEX_le),MPEFitMuEX_nugen)
weights_sum=np.append(np.append(weights_he,weights_le),weights_nugen)
In [25]:
fig= plt.figure(figsize=(12,6))

bins=np.logspace(2,6,25)
livetime=204.158061802
null=plt.hist(MPEFitMuEX_he,bins=bins,histtype='step',weights=np.multiply(weights_he,livetime), label='high energy set',color='b')
null=plt.hist(MPEFitMuEX_le,bins=bins,histtype='step',weights=np.multiply(weights_le,livetime), label='low energy set',color='g')
null=plt.hist(MPEFitMuEX_nugen,bins=bins,histtype='step',weights=np.multiply(weights_nugen,livetime), label='nugen',color='c')

null=plt.hist(MPEFitMuEX_sum,bins=bins,histtype='step',weights=np.multiply(weights_sum,livetime), label='nugen', linewidth=2,color='r')


y,binEdges=np.histogram(MPEFitMuEX_exp,bins=bins)
bincenters = 0.5*(binEdges[1:]+ binEdges[:-1])
menStd = np.sqrt(y)
null=plt.errorbar(bincenters,y, label='nugen',yerr=menStd,ecolor='k',fmt='ok')

plt.loglog()
plt.legend()
plt.xlabel("True Muon Energy")
plt.ylabel("events")
plt.ylim(1e-2,1e3)
Out[25]:
(0.01, 1000.0)

make weighted erorrbars for simulation data

standard error of a Poisson distribution can be calculated like: \(\sigma = \sqrt{N}\)

In [26]:
def histogram_dot(ax,x,bins=10,weights=None,color='k',minVal=1e-10):
    """
    plotting a log histogram with errorbars, type=dot
    """
    y,binEdges=np.histogram(x,bins=bins)
    bincenters = 0.5*(binEdges[1:]+ binEdges[:-1])
    menStd_u = np.sqrt(y)
    menStd_l = copy.copy(menStd_u)
    menStd_l[ (y-menStd_l) <= 0] = y[ (y-menStd_l) <= 0]-minVal #very small number
    p=ax.errorbar(bincenters,y,yerr=(menStd_l,menStd_u),ecolor=color,fmt='o'+color)
    return p

weighted standard error of a Poisson distribution is calculated like: \(\sigma = \sqrt{\sum{w^2}}\)

In [27]:
def histogram_fillBetween(ax,x,bins=10,weights=None,color='b',alpha=0.5,minVal=1e-10):
    """
    Plotting a log histogram with errorbars, type=fill_between
    """
    y0, binEdges=np.histogram(x,bins=bins,weights=np.power(weights,2))
    y, binEdges=np.histogram(x,bins=bins,weights=weights)
    bincenters = 0.5*(binEdges[1:]+ binEdges[:-1])
    menStd = np.sqrt(y0)
    yerr2=copy.copy(menStd)
    yerr1=copy.copy(menStd)
    for i in range(len(yerr1)):
        if(yerr1[i]>=y[i]):
            yerr1[i]=y[i]-minVal   #a low number to avoid 0     
    xf=np.array([])
    y1f=np.array([])
    y2f=np.array([])
    ymf=np.array([])
    y2tmp=np.add(y,yerr2)
    y1tmp=np.subtract(y,yerr1)
    for i in range(len(binEdges)-1):
        xf=np.append(xf,binEdges[i])
        xf=np.append(xf,binEdges[i+1])
        y1f=np.append(y1f,y1tmp[i])
        y1f=np.append(y1f,y1tmp[i])
        y2f=np.append(y2f,y2tmp[i])
        y2f=np.append(y2f,y2tmp[i])
        ymf=np.append(ymf,y[i])
        ymf=np.append(ymf,y[i])
    ax.fill_between(xf,y1f,y2f,alpha=alpha,where=y2f>0,color=color)
    return plt.Rectangle((0, 0), 0, 0, fc=color, alpha=alpha)

plot the histograms

In [28]:
fig= plt.figure(figsize=(12,6))
ax = fig.add_subplot(1,1,1)

bins=np.logspace(2,6,25)
livetime=204.158061802
w=np.multiply(weights_sum,livetime)

p1 = histogram_fillBetween(ax, MPEFitMuEX_sum, bins, w, color='b')
p2 = histogram_dot(ax, MPEFitMuEX_exp,bins, w, color='k')

plt.legend( [p1,p2], ['Simulation',"Experimental data"], fontsize=14)


plt.loglog()
plt.xlabel("True Muon Energy",fontsize=14)
plt.ylabel("events",fontsize=14)
plt.ylim(1e-2,1e4)
Out[28]:
(0.01, 10000.0)