From f18364ec6522466afbdd64bdf563a54c9e6e4db9 Mon Sep 17 00:00:00 2001 From: Shivesh Mandalia Date: Tue, 3 Mar 2020 02:36:22 +0000 Subject: documentation --- README.md | 30 ++- docs/source/_static/cascade.jpg | Bin 0 -> 67923 bytes docs/source/_static/fr.png | Bin 0 -> 126787 bytes docs/source/_static/icecube.png | Bin 0 -> 215856 bytes docs/source/_static/jeffreys.png | Bin 0 -> 62949 bytes docs/source/_static/mh.png | Bin 0 -> 32955 bytes docs/source/_static/mixing.png | Bin 0 -> 17166 bytes docs/source/_static/nu_messengers.png | Bin 0 -> 131814 bytes docs/source/_static/osc_params.png | Bin 0 -> 39083 bytes docs/source/_static/track.jpg | Bin 0 -> 66642 bytes docs/source/_templates/footer.html | 53 +++++ docs/source/api.rst | 46 +++- docs/source/examples.rst | 9 + docs/source/index.rst | 2 +- docs/source/installation.rst | 48 ++++ docs/source/notebooks/inference.ipynb | 1 + docs/source/notebooks/tutorial.ipynb | 1 + docs/source/overview.rst | 34 +++ docs/source/physics.rst | 437 ++++++++++++++++++++++++++++++++++ docs/source/statistics.rst | 423 ++++++++++++++++++++++++++++++++ golemflavor/plot.py | 17 +- 21 files changed, 1073 insertions(+), 28 deletions(-) create mode 100644 docs/source/_static/cascade.jpg create mode 100644 docs/source/_static/fr.png create mode 100644 docs/source/_static/icecube.png create mode 100644 docs/source/_static/jeffreys.png create mode 100644 docs/source/_static/mh.png create mode 100644 docs/source/_static/mixing.png create mode 100644 docs/source/_static/nu_messengers.png create mode 100644 docs/source/_static/osc_params.png create mode 100644 docs/source/_static/track.jpg create mode 100644 docs/source/_templates/footer.html create mode 120000 docs/source/notebooks/inference.ipynb create mode 120000 docs/source/notebooks/tutorial.ipynb diff --git a/README.md b/README.md index 8ee8325..438c19d 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,14 @@ pipeline using Astrophysical Flavor data taken at ## Overview ### What is Astrophysical Flavor data? -This is data of the *flavor* of a -[neutrino](https://icecube.wisc.edu/outreach/neutrinos) taken at the [IceCube -neutrino observatory](https://icecube.wisc.edu/), which is a cubic kilometer -array of optical sensors embedded in the glacial ice at the South Pole. In -particular, *astrophysical* neutrinos are ones that are very-high-energy and -come from astrophysical origins such as [active galactic -nuclei](https://doi.org/10.1126/science.aat2890). - +This is data of the *flavor* of a neutrino taken at the [IceCube neutrino +observatory](https://icecube.wisc.edu/), which is a cubic kilometer array of +optical sensors embedded in the glacial ice at the South Pole. In particular, +*astrophysical* neutrinos are ones that are very-high-energy and come from +astrophysical origins such as [active galactic +nuclei](https://doi.org/10.1126/science.aat2890). For more on the physics +behind neutrinos see +[here](https://golemflavor.readthedocs.io/en/latest/physics.html). ### What does the GolemFlavor package do? This package provides utilities for astrophysical neutrino propagation and @@ -29,8 +29,9 @@ Bayesian statistical modeling focused on advanced Markov Chain Monte Carlo (MCMC) algorithms. It has been used to make constraints on New Physics models in the Astrophysical Flavor, as motivated by the paper [*New Physics in Astrophysical Neutrino -Flavor*](https://doi.org/10.1103/PhysRevLett.115.161303). - +Flavor*](https://doi.org/10.1103/PhysRevLett.115.161303). For more information +on the statistical modeling see +[here](https://golemflavor.readthedocs.io/en/latest/statistics.html). ## Features * **Portable Flavor Functions**: A set of useful functions for calculating @@ -54,13 +55,10 @@ Flavor*](https://doi.org/10.1103/PhysRevLett.115.161303). ## Examples You can find examples of how to use GolemFlavor in the [`GolemFlavor/examples`](examples) directory. - - - +The documentation for GolemFlavor can be found at +[https://golemflavor.readthedocs.io/](https://golemflavor.readthedocs.io/). ## Installation GolemFlavor can be installed using `pip` @@ -104,4 +102,4 @@ Additional dependencies: [MIT License](LICENSE) -Copyright (c) 2020 Shivesh Mandalia +Copyright (c) 2020 Shivesh Mandalia [https://shivesh.org][https://shivesh.org) diff --git a/docs/source/_static/cascade.jpg b/docs/source/_static/cascade.jpg new file mode 100644 index 0000000..e33b574 Binary files /dev/null and b/docs/source/_static/cascade.jpg differ diff --git a/docs/source/_static/fr.png b/docs/source/_static/fr.png new file mode 100644 index 0000000..dada2d9 Binary files /dev/null and b/docs/source/_static/fr.png differ diff --git a/docs/source/_static/icecube.png b/docs/source/_static/icecube.png new file mode 100644 index 0000000..07bf278 Binary files /dev/null and b/docs/source/_static/icecube.png differ diff --git a/docs/source/_static/jeffreys.png b/docs/source/_static/jeffreys.png new file mode 100644 index 0000000..a0cea17 Binary files /dev/null and b/docs/source/_static/jeffreys.png differ diff --git a/docs/source/_static/mh.png b/docs/source/_static/mh.png new file mode 100644 index 0000000..e715f47 Binary files /dev/null and b/docs/source/_static/mh.png differ diff --git a/docs/source/_static/mixing.png b/docs/source/_static/mixing.png new file mode 100644 index 0000000..cc9a9a4 Binary files /dev/null and b/docs/source/_static/mixing.png differ diff --git a/docs/source/_static/nu_messengers.png b/docs/source/_static/nu_messengers.png new file mode 100644 index 0000000..c32cfa1 Binary files /dev/null and b/docs/source/_static/nu_messengers.png differ diff --git a/docs/source/_static/osc_params.png b/docs/source/_static/osc_params.png new file mode 100644 index 0000000..94b35f1 Binary files /dev/null and b/docs/source/_static/osc_params.png differ diff --git a/docs/source/_static/track.jpg b/docs/source/_static/track.jpg new file mode 100644 index 0000000..6ac5f74 Binary files /dev/null and b/docs/source/_static/track.jpg differ diff --git a/docs/source/_templates/footer.html b/docs/source/_templates/footer.html new file mode 100644 index 0000000..554e08e --- /dev/null +++ b/docs/source/_templates/footer.html @@ -0,0 +1,53 @@ + diff --git a/docs/source/api.rst b/docs/source/api.rst index c9a5f05..94a6b9e 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -2,6 +2,46 @@ :github_url: https://github.com/ShiveshM/GolemFlavor -*** -API -*** +*************** +GolemFlavor API +*************** + +Emcee hooks +----------- +.. automodule:: golemflavor.mcmc + :members: + +Enumerations +------------ +.. automodule:: golemflavor.enums + :members: + +Flavor Functions +---------------- +.. automodule:: golemflavor.fr + :members: + +GolemFit Hooks +-------------- +.. automodule:: golemflavor.gf + :members: + +Likelihood +---------- +.. automodule:: golemflavor.llh + :members: + +Miscellaneous +------------- +.. automodule:: golemflavor.misc + :members: + +Parameter class +--------------- +.. automodule:: golemflavor.param + :members: + +Visualization +------------- +.. automodule:: golemflavor.plot + :members: diff --git a/docs/source/examples.rst b/docs/source/examples.rst index df8023f..3ae9cef 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -5,3 +5,12 @@ ******** Examples ******** + +.. toctree:: + :maxdepth: 1 + + Basic Tutorial + Bayesian Inference + + +The above links lead to example notebooks. diff --git a/docs/source/index.rst b/docs/source/index.rst index 84454ff..39a478d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,7 +10,7 @@ GolemFlavor GolemFlavor is a Python package for running a Bayesian inference analysis pipeline using Astrophysical Flavor data taken at the `IceCube Neutrino -Obervatory `_ +Obervatory `_. .. toctree:: :maxdepth: 1 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 51a9e01..99841fa 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -5,3 +5,51 @@ ************ Installation ************ + +--- +Pip +--- + +GolemFlavor can be installed using ``pip``: + +.. code-block:: bash + + pip install git+https://github.com/ShiveshM/GolemFlavor.git + +This installs GolemFlavor, along with all the necessary dependencies such as +NumPy and SciPy. + +GolemFlavor uses the IceCube software `GolemFit: The HESE +fitter `_ to fit with IceCube +Astrophysical Flavor data. This software is proprietary and so access is +currently limited to IceCube collaborators. A simple Gaussian likelihood can be +used as a substitute for test purposes if this requirement is not found. + +------------ +Dependencies +------------ + +GolemFlavor has the following dependencies: + +- `Python `_ >= 2.7 or >= 3.4 +- `NumPy `_ +- `SciPy `_ +- `Six `_ +- `mpmath `_ +- `emcee `_ +- `PyMultiNest `_ +- `tqdm `_ +- `Shapely `_ +- `Matplotlib `_ +- `python-ternary `_ +- `GetDist `_ + +You can use ``pip`` to install the above automatically. Note that ``PyMultiNest`` +requires the ``MultiNest`` Bayesian inference library, see [the `PyMultiNest +documentation +`_ +for install instructions. + +Additional dependencies: + +- `GolemFit `_ diff --git a/docs/source/notebooks/inference.ipynb b/docs/source/notebooks/inference.ipynb new file mode 120000 index 0000000..3caf6c6 --- /dev/null +++ b/docs/source/notebooks/inference.ipynb @@ -0,0 +1 @@ +../../../examples/inference.ipynb \ No newline at end of file diff --git a/docs/source/notebooks/tutorial.ipynb b/docs/source/notebooks/tutorial.ipynb new file mode 120000 index 0000000..1a9beb5 --- /dev/null +++ b/docs/source/notebooks/tutorial.ipynb @@ -0,0 +1 @@ +../../../examples/tutorial.ipynb \ No newline at end of file diff --git a/docs/source/overview.rst b/docs/source/overview.rst index e09628e..65e1871 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -5,3 +5,37 @@ ******** Overview ******** + +---------------------------------- +What is Astrophysical Flavor data? +---------------------------------- + +This is data of the *flavor* of a neutrino taken at the `IceCube neutrino +observatory `_, which is a cubic kilometer array of +optical sensors embedded in the glacial ice at the South Pole. In particular, +*astrophysical* neutrinos are ones that are very-high-energy and come from +astrophysical origins such as `active galactic nuclei +`_. For more on the physics behind +neutrinos see the :doc:`physics` section. + +------------------------------------- +What does the GolemFlavor package do? +------------------------------------- + +This package provides utilities for astrophysical neutrino propagation and +Bayesian statistical modeling focused on advanced Markov Chain Monte Carlo +(MCMC) algorithms. It has been used to make constraints on New Physics models +in the Astrophysical Flavor, as motivated by the paper `*New Physics in +Astrophysical Neutrino Flavor* +`_. For more information on +the statistical modeling see the :doc:`statistics` section. + +-------- +Features +-------- + +- **Portable Flavor Functions**: A set of useful functions for calculating measured flavor compositions given a source composition and a mixing matrix. +- **MCMC Algorithms**: Affine invariant and nested sampling algorithms provided by `emcee `_ and `MultiNest `_. +- **Anarchic Sampling**: Sampling of the neutrino mixing matrix is done under the `*neutrino mixing anarchy* `_ hypothesis to ensure an unbiased prior. +- **Distributed and parallel computing**: Scripts included to manage the workload across a CPU cluster using `HTCondor `_. +- **Visualization**: Produce ternary plots of the flavour composition using the `python-ternary `_ package and joint posterior plots for analyzing MCMC chains using the `getdist `_ package. diff --git a/docs/source/physics.rst b/docs/source/physics.rst index 528215d..ed603f2 100644 --- a/docs/source/physics.rst +++ b/docs/source/physics.rst @@ -5,3 +5,440 @@ ******* Physics ******* + +Neutrinos +========= + +Introduction +------------ +The name *neutrino* was coined to describe a then hypothetical particle +suggested by Wolfgang Pauli in the 1930's [1]_. Pauli's idea was that of a +neutral, weakly interacting particle having a very small mass. He proposed this +particle could be used to explain the spectrum of electrons emitted by +:math:`\beta`-decays of atomic nuclei, which was of considerable controversy at +the time. The observed spectrum of these electrons is continuous [2]_, which is +incompatible with the two-body decay description successfully used to describe +discrete spectral lines in :math:`\alpha`- and :math:`\gamma`- decay of atomic +nuclei. As Pauli observed, by describing :math:`\beta`-decay as a three-body +decay instead, releasing both an electron and his proposed particle, one can +explain the continuous :math:`\beta`-decay spectrum. Soon after discovery of +the neutron [3]_, Enrico Fermi used Pauli's light neutral particle as an +essential ingredient in his successful theory of :math:`\beta`-decay, giving +the neutrino it's name as a play on words of *little neutron* in Italian [4]_. + +It was not until some 20 years later that the discovery of the neutrino was +realised. It was eventually understood that neutrinos came in three distinct +*flavours* :math:`\left (\nu_e,\nu_\mu,\nu_\tau\right )` along with their +associated antiparticles :math:`\left +(\bar{\nu}_e,\bar{\nu}_\mu,\bar{\nu}_\tau\right)`. + +Neutrino Mixing +--------------- +For the three massive neutrinos, the flavour eigenstates of the neutrino +:math:`\mid{\nu_\alpha}>`, :math:`\alpha\in\{e,\mu,\tau\}`, are related to the +mass eigenstates :math:`\mid{\nu_i}>`, :math:`i\in\{1,2,3\}` via a unitary +mixing matrix :math:`U_{\alpha i}` known as the PMNS matrix [5]_, [6]_: + +.. math:: + + \mid{\nu_\alpha}>=\sum^3_{i=1}U^*_{\alpha i}\mid{\nu_i}> + +This relationship can be seen better in this image: + +.. figure:: _static/mixing.png + :width: 500px + :align: center + + Graphical representation of the relationship between the neutrino flavour and + mass eigenstates. The three mass eigenstates are depicted as three boxes, + coloured such that the relative area gives the probability of finding the + corresponding flavour neutrino in that given mass state. + +The time evolution of the flavour eigenstate as the neutrino propagates is +given by: + +.. math:: + + \mid{\nu_\alpha\left(t\right)}>= + \sum^3_{i=1}U^*_{\alpha i}\mid{\nu_i\left(t\right)}> + +The oscillation probability gives the probability that a neutrino produced in a +flavour state :math:`\alpha` is then detected in a flavour state :math:`\beta` +after a propagation distance :math:`L`: + +.. math:: + + \begin{align} + P_{\nu_\alpha\rightarrow\nu_\beta}\left(L\right) &= + \mid{<{\nu_\beta\left(L\right)}|{\nu_\alpha\left(0\right)}>}\mid^2\\ + &=\mid{\sum_{i=1}^3\sum_{j=1}^3<{\nu_j\left(L\right)}| + U_{\beta j}U_{\alpha i}^*|{\nu_i\left(0\right)}>}\mid^2\\ + &=\mid{\sum_{i=1}^3\sum_{j=1}^3U_{\beta j}U_{\alpha i}^* + <{\nu_j\left(0\right)}|e^{-i\frac{m_j^2L}{2E}} + e^{-i\frac{m_i^20}{2E}}|{\nu_i\left(0\right)}>}\mid^2\\ + &=\mid{\sum_{i=1}^3U_{\beta i}U_{\alpha i}^* + e^{-i\frac{m_i^2L}{2E}}}\mid^2 + \end{align} + +where the relation :math:`<{\nu_i}|{\nu_j}>=\delta_{ij}` was used. Expanding +this expression gives [7]_: + +.. math:: + + \begin{align} + P_{\nu_\alpha\rightarrow\nu_\beta}\left(L, E\right) = + \delta_{\alpha\beta}&-4\sum_{i>j}\text{Re}\left(U_{\alpha i}^*U_{\beta i} + U_{\alpha j}U_{\beta j}^*\right)\sin^2 + \left(\frac{\Delta m^2_{ij}L}{4E}\right)\\ + &+2\sum_{i>j}\text{Im}\left(U_{\alpha i}^*U_{\beta i}U_{\alpha j} + U_{\beta j}^*\right)\sin\left(\frac{\Delta m^2_{ij}L}{2E}\right) + \end{align} + +where :math:`\Delta m_{ij}^2=m_i^2-m_j^2`. Note that for neutrino oscillations +to occur, there must be at least one non-zero :math:`\Delta m_{ij}^2` and +therefore there must exist at least one non-zero neutrino mass state. + +The mixing matrix can be parameterised using the standard factorisation [8]_: + +.. math:: + + \begin{align} + U= + \begin{pmatrix} + 1 & 0 & 0 \\ + 0 & c_{23} & s_{23} \\ + 0 & -s_{23} & c_{23} \\ + \end{pmatrix} + \begin{pmatrix} + c_{13} & 0 & s_{13}e^{-i\delta} \\ + 0 & 1 & 0 \\ + -s_{13}e^{i\delta} & 0 &c_{13} \\ + \end{pmatrix} + \begin{pmatrix} + c_{12} & s_{12} & 0 \\ + -s_{12} & c_{12} & 0 \\ + 0 & 0 & 1 \\ + \end{pmatrix} + \end{align} + +where :math:`s_{ij}\equiv\sin\theta_{ij}`, :math:`c_{ij}\equiv\cos\theta_{ij}`, +:math:`\theta_{ij}` are the three mixing angles and :math:`\delta` is the CP +violating phase. Overall phases in the mixing matrix do not affect neutrino +oscillations, which only depend on quartic products, and so they have been +omitted. Therefore, this gives a total of six independent free parameters +describing neutrino oscillations for three neutrino flavours in a vacuum. This +table outlines the current knowledge of these parameters determined by a fit to +global data [9]_: + +.. figure:: _static/osc_params.png + :width: 500px + :align: center + + Three neutrino flavour oscillation parameters from a fit to global data + [9]_. + +This table shows two columns of values, *normal ordering* and *inverted +ordering* corresponding to the case where the mass of :math:`\nu_3` is greater +than the mass of :math:`\nu_1` or the mass of :math:`\nu_1` is greater than the +mass of :math:`\nu_3`, respectively. The experimental determination of this +mass ordering is ongoing. + +Astrophysical Neutrinos +----------------------- +The origin and acceleration mechanism of ultra-high-energy cosmic rays is still +unknown. The difficulty comes from the fact that the cosmic rays are bent by +interstellar magnetic fields, and so their arrival direction on Earth does not +point back to their sources. The observation of these ultra-high-energy cosmic +rays supports the existence of neutrino production at the sources of a similar +energy range - an astrophysical neutrino flux. Neutrinos are electrically +neutral, so are not perturbed by interstellar magnetic fields, and they also +have a small enough interaction cross-section to escape from dense regions. +This makes them ideal messengers to help identify the sources of cosmic rays: + +.. figure:: _static/nu_messengers.png + :align: center + + Neutrinos as messengers of astrophysical objects. Exotic astrophysical + objects produce high-energy cosmic rays, photons and neutrinos, which can be + detected on Earth. Credit: IceCube, NSF. + +Ultra-high-energy cosmic rays detected on Earth manifestly succeed in +escaping their sources, therefore these sources must be optically thin +compared to the Earth's atmosphere. Thus, the following interactions of the +accelerated protons are expected to be more important than lengthy shower +processes. High-energy protons can interact with photons as such: + +.. math:: + + p+\gamma\rightarrow\Delta^+\rightarrow + \begin{cases} + p+\pi^0\\ + n+\pi^+ + \end{cases} + + +They can also interact with other hadrons: + +.. math:: + + p+p\rightarrow + \begin{cases} + p+p+\pi^0\\ + p+n+\pi^+ + \end{cases}\\ + p+n\rightarrow + \begin{cases} + p+n+\pi^0\\ + p+p+\pi^- + \end{cases} + +Importantly, final states here tend to produce pions which decay into either +photons if neutral, :math:`\pi^0\rightarrow\gamma\gamma`, or if they are charged +they decay into charged leptons and neutrinos. The neutral and charged pions are +produced in similar amounts, meaning that the neutrino and photon fluxes are +related. Indeed, the diffuse astrophysical neutrino flux can be estimated +through :math:`\gamma`-ray astronomy [10]_. + +Point source searches of neutrinos are also being pursued. In 2017, a +multi-messenger approach which searched for :math:`\gamma`-ray observations in +coincidence with neutrinos coming from a particular source has successfully +been able to identify for the very first time, a source of high-energy +astrophysical neutrinos [11]_, [12]_. + +Of particular interest is the composition of flavours produced at the source. +In the simple pion decay model described above, the *neutrino flavour +composition* (sometimes referred to as the *neutrino flavour ratio*) +produced at the source is: + +.. math:: + + \pi\text{ decay}\rightarrow + \left(f_e:f_\mu:f_\tau\right)_\text{S}=\left(1:2:0\right)_\text{S} + +For all discussions on the astrophysical neutrino flavour composition, the +neutrino and antineutrino fluxes will been summed over as it is not yet +experimentally possible to distinguish between the two. In the case that the +muon interacts in the source before it has a chance to decay, e.g.\@ losing +energy rapidly in strong magnetic fields or being absorbed in matter, only the +:math:`\nu_\mu` from the initial pion decay escapes and so the source flavour +composition is simply: + +.. math:: + \mu\text{ suppressed }\rightarrow + \left(f_e:f_\mu:f_\tau\right)_\text{S}=\left(0:1:0\right)_\text{S} + +Another popular model is one in which the produced flux is dominated by neutron +decay, :math:`n\rightarrow p+e^-+\bar{\nu}_e`, which gives rise to a purely +:math:`\nu_e` component: + +.. math:: + + n\text{ decay}\rightarrow + \left(f_e:f_\mu:f_\tau\right)_\text{S}=\left(1:0:0\right)_\text{S} + +Production of :math:`\nu_\tau` at the source is not expected in standard +astrophysics models. However, even in the standard construction, the +composition could vary between any of the three idealised models above, which +can be represented as a source flavour composition of :math:`(x:1-x:0)`, where +:math:`x` is the fraction of :math:`\nu_e` and can vary between +:math:`0\rightarrow1`. + +Once the neutrinos escape the source, they are free to propagate in the vacuum. +As discussed above, neutrinos can transform from one flavour to another. +Astrophysical neutrinos have :math:`\mathcal{O}(\text{Mpc})` or higher +baselines, large enough that the mass eigenstates completely decouple. The +astrophysical neutrinos detected on Earth are decoherent and are propagating in +pure mass eigenstates. Taking this assumption greatly simplifies the transition +probability as all the interference terms between the three mass eigenstates +can be dropped, and all that is left is to convert from the propagating mass +state to the flavour states: + +.. math:: + + \phi_{i,\oplus}&=\sum_\alpha\phi_{\alpha,\text{S}}\mid{U_{\alpha i}}\mid^2\\ + \phi_{\alpha,\oplus}&=\sum_{i,\beta} + \mid{U_{\alpha i}}\mid^2\mid{U_{\beta i}}\mid^2\phi_{\beta,\text{S}} + +where :math:`\phi_\alpha` is the flux for a neutrino flavour :math:`\nu_\alpha` +and :math:`\phi_i` is the flux for a neutrino mass state :math:`\nu_i`. The +subscript :math:`\text{S}` denotes the source and :math:`\oplus` denotes as +measured on Earth. The same result can be obtained in the plane wave picture of +the neutrino mixing equations above and taking the limit +:math:`L\rightarrow\infty`, thus this type of decoherent mixing is also known +as oscillation-averaged neutrino mixing. From this, the flavour composition on +Earth is defined as +:math:`f_{\alpha,\oplus}=\phi_{\alpha,\oplus}/\sum_\alpha\phi_{\alpha,\oplus}` +and this can be calculated using the mixing matrix parameters the table above. +For the three source models discussed above: + +.. math:: + + \begin{align} + \left(1:2:0\right)_\text{S}&\rightarrow\left(0.31:0.35:0.34\right)_\oplus\\ + \left(0:1:0\right)_\text{S}&\rightarrow\left(0.18:0.44:0.38\right)_\oplus\\ + \left(1:0:0\right)_\text{S}&\rightarrow\left(0.55:0.18:0.27\right)_\oplus + \end{align} + +This can be visualised in a ternary plot, which you can make yourself by +checking out the :doc:`examples` section! The axes here are the fraction of +each neutrino flavour as shown below. The coloured circle, square and triangle +show the source flavour compositions. The arrows show the effect of neutrino +mixing on the flavour composition. The unfilled circle, square and triangle +show the corresponding measured flavour composition. Neutrino mixing during +propagation has the effect of averaging out the flavour contributions, which is +why the arrows point towards the centre of the triangle. This effect is more +pronounced for :math:`\nu_\mu\leftrightarrow\nu_\tau` due to the their larger +mixings. Also shown on this figure in the hatched *Standard Model* area, is the +region of measured flavour compositions containing all source models of +:math:`\left(x:1-x:0\right)`, using Gaussian priors on the standard mixing +angles. Therefore, this hatched area is the region in which all standard +astrophysical models live. + +.. figure:: _static/fr.png + :width: 700px + :align: center + + Astrophysical neutrino flavour composition ternary plot. Axes show the + fraction of each neutrino flavour. Coloured shapes show 3 models for the + source flavour composition. The arrows indicate the effect of neutrino mixing + during propagation and the unfilled shapes show the corresponding measured + flavour compositions. The hatched area shows the region in measured flavour + space in which all standard astrophysical models live. + +IceCube +======= + +Introduction +---------------- +The `IceCube Neutrino Obervatory `_ is a cubic +kilometre photomultiplier array embedded in the extremely thick and clear +glacial ice located near the geographic South Pole in Antarctica. The IceCube +array is made up of 5160 purpose built *Digital Optical Modules* (DOMs) which +are deployed on 86 cables between 1450 and 2450 m below the ground. The +interaction of a neutrino releases a burst of light in the detector, which is +detected by this array of DOMs. The timing and intensity of these photons form +the raw data set at IceCube. This data is analysed so that we can learn more +about the properties of neutrinos. You can checkout some cool animations of how +an event looks like in IceCube on `this website +`_. + +A schematic layout of IceCube is shown below. The IceCube *In-Ice* array is +made up of 5160 purpose built *Digital Optical Modules* (DOMs) which are +deployed on 86 *strings* (or cables) between 1450 and 2450 m below the ground. +The inner string separation is 125 m with a vertical DOM separation of 17 m. +Eight of the centrally located strings make up the subarray *DeepCore* which +are sensitive to lower energy neutrinos. It achieves this through denser +instrumentation, having an inner string separation of 60 m and a vertical DOM +separation of 7 m. A surface air shower array, IceTop, is instrumented on the +surface and consists of a set of frozen water tanks which act as a veto against +the background cosmic rays. + +.. figure:: _static/icecube.png + :width: 600px + :align: center + + The IceCube neutrino observatory with the In-Ice array, its subarray DeepCore + and the cosmic ray shower array IceTop. + +Event Signatures +---------------- +Cherenkov telescope arrays such as IceCube are able to classify the properties +of a neutrino event by looking at the morphology of photon hits across its PMT +array. There are two main types of neutrino event signatures at IceCube - +*tracks* and *cascades*. + +Tracks are predominantly made by muons which are directly produced by +neutrinos in the *charged current* :math:`\nu_\mu` interaction channel. Muons +have a long lifetime, :math:`\sim` 2 :math:`\mu` s at rest, and in ice they +have relatively low energy losses. Therefore, as shown in in the figure below, +a high-energy muon travelling through the IceCube array will leave a long trail +of hits. These features, along with the timing information of hits across the +DOMs, help in determining the directionality of the muon, giving an angular +resolution typically around :math:`0.5-1^\circ` [13]_. At energies of concern +here, there is little deviation between the direction of the neutrino and the +induced muon as they are both heavily boosted. Therefore, this pointing ability +of tracks makes them the most attractive events to use for point source +searches. Energy reconstruction is more complicated, however. At the lower +energies (:math:`\lesssim100` GeV), the muon's range is short enough that it is +able to deposit all its energy inside the detector. This is the ideal situation +for a good energy reconstruction as the IceCube array acts as a calorimeter, so +the total deposited charge is proportional to the energy of the muon. At higher +energies, the range of the muon is typically greater than the length of the +detector. Therefore the energy of muon must be extrapolated from the portion of +energy deposited inside the detector. This is particularly challenging for +muons which are not produced inside the detector, for which only a lower bound +can be made. The typical approach taken to reconstruct the muon is to segment +the reconstruction along the track. In this way, biases from stochastic +variations of the energy loss can be minimised by applying some averaging over +each segment. In each segment, the mean :math:`\text{d} E/\text{d} x` is +determined, which is then roughly proportional to the muon momentum. The energy +resolution improves with the muon energy up to an uncertainty of a factor of 2 +[14]_. For more details on this see the IceCube energy reconstruction methods +publication [15]_. + +.. figure:: _static/track.jpg + :width: 700px + :align: center + + A track event initiated by a CC muon neutrino interaction in the detector. + The muon deposits 74 TeV before escaping. + +Cascades are created as a result of hadronic cascades and/or EM cascades. +*Neutral current* interactions and CC :math:`\nu_e` interactions are the +channels in which a pure cascade is created, and an example of one is shown in +the figure below. However, this does not mean that neutrino events produce +exclusively one type of signature, in fact all high-energy neutrino-nucleon +events produce at least a hadronic cascade at the interaction vertex. +Characteristic of a cascade is the isotropic deposition of energy in a +localised region near the neutrino vertex. Contrary to tracks, cascade events +have much shorter typical lengths and so the entire energy deposition is easily +contained within the detector array. This is ideal for energy reconstruction +giving a deposited energy resolution of :math:`\sim` 15% at neutrino energies +above 10 TeV [15]_. Inferring the true neutrino energy is more difficult, +however, as IceCube is not capable of resolving the difference between EM +showers and hadronic showers, which potentially have a large amount of missing +energy, leading to :math:`\sim` 15% lower light yield compared to an equivalent +EM shower. Deposited energy is reconstructed using an EM shower hypothesis, and +therefore this quantity gives the lower limit of the neutrino energy. +Directional reconstruction is more challenging than for tracks and is done by +looking for timing/light intensity anisotropies around the interaction vertex. +The deviations are small, but it is expected that the light deposition in the +forward direction is greater. Typical angular resolutions are +:math:`10-15^\circ` [16]_. + +.. figure:: _static/cascade.jpg + :width: 700px + :align: center + + A cascade event initiated by a neutrino interaction in the detector. The + cascade deposits 1070 TeV in the detector. + +Not mentioned so far are the charged current :math:`\nu_\tau` interactions, +which for energies :math:`\gtrsim` 1 PeV, can produce :math:`\tau` which +travels a detectable distance before decaying. This provides a unique signature +for such events. The initial :math:`\nu_\tau` interaction produces a hadronic +cascade, followed by a track by the :math:`\tau` itself, in turn followed by +either a track from the :math:`\tau`'s muonic decay +(:math:`\tau^-\rightarrow\mu^-\bar{\nu}_\mu\nu_\tau` with branching ratio +:math:`\sim` 17%), or a cascade from its other decays. Because of their +distinctive signatures, such events are called *double bangs* or *double +cascades*. See [17]_, [18]_ for more details. + +.. [1] Pauli, W. *Letter to Tübingen conference participants* Web document. 1930. +.. [2] Chadwick, J. Intensitätsverteilung im magnetischen Spectrum der :math:`\beta`-Strahlen von radium B + C. Verhandl. Dtsc. Phys. Ges. 16, 383 (1914). +.. [3] Chadwick, J. Possible Existence of a Neutron. Nature 129, 312 (1932). +.. [4] Fermi, E. An attempt of a theory of beta radiation. 1. Z. Phys. 88, 161–177 (1934). +.. [5] Pontecorvo, B. Neutrino Experiments and the Problem of Conservation of Leptonic Charge. Sov. Phys. JETP 26. [Zh. Eksp. Teor. Fiz.53,1717(1967)], 984–988 (1968). +.. [6] Maki, Z., Nakagawa, M. & Sakata, S. Remarks on the unified model of elementary particles. Prog. Theor. Phys. 28. [,34(1962)], 870–880 (1962). +.. [7] Giunti, C. & Kim, C. W. Fundamentals of Neutrino Physics and Astrophysics isbn: 9780198508717 (2007). +.. [8] Beringer, J. et al. Review of Particle Physics (RPP). Phys. Rev. D86, 010001 (2012). +.. [9] Esteban, I., Gonzalez-Garcia, M. C., Maltoni, M., Martinez-Soler, I. & Schwetz, T. Updated fit to three neutrino mixing: exploring the accelerator-reactor complemen- tarity. JHEP 01, 087 (2017). +.. [10] Kappes, A., Hinton, J., Stegmann, C. & Aharonian, F. A. Potential Neutrino Signals from Galactic Gamma-Ray Sources. Astrophys. J. 656. [Erratum: Astrophys. J.661,1348(2007)], 870–896 (2007). +.. [11] Aartsen, M. G. et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018). +.. [12] Aartsen, M. G. et al. Neutrino emission from the direction of the blazar TXS 0506+056 prior to the IceCube-170922A alert. Science 361, 147–151 (2018). +.. [13] Aartsen, M. G. et al. All-sky Search for Time-integrated Neutrino Emission from Astrophysical Sources with 7 yr of IceCube Data. Astrophys. J. 835, 151 (2017). +.. [14] Weaver, C. Evidence for Astrophysical Muon Neutrinos from the Northern Sky PhD thesis (Wisconsin U., Madison, 2015). https://docushare.icecube.wisc.edu/dsweb/Get/Document-73829/. +.. [15] Aartsen, M. G. et al. Energy Reconstruction Methods in the IceCube Neutrino Telescope. JINST 9, P03009 (2014). +.. [16] Aartsen, M. G. et al. Evidence for High-Energy Extraterrestrial Neutrinos at the IceCube Detector. Science 342, 1242856 (2013). +.. [17] Hallen, P. On the Measurement of High-Energy Tau Neutrinos with IceCube PhD thesis (RWTH Aachen University, 2013). https://www.institut3b.physik.rwth-aachen.de/global/show_document.asp?id=aaaaaaaaaapwhzq. +.. [18] Xu, D. L. Search for astrophysical tau neutrinos in three years of IceCube data PhD thesis (The University of Alabama, 2015). http://acumen.lib.ua.edu/content/u0015/0000001/0001906/u0015_0000001_0001906.pdf diff --git a/docs/source/statistics.rst b/docs/source/statistics.rst index 4b45795..e2dd85f 100644 --- a/docs/source/statistics.rst +++ b/docs/source/statistics.rst @@ -5,3 +5,426 @@ ********** Statistics ********** +Data is stochastic in nature, so an experimenter uses statistical methods on a +data sample :math:`\textbf{x}` to make inferences about unknown parameters +:math:`\mathbf{\theta}` of a probabilistic model of the data +:math:`f\left(\textbf{x}\middle|\mathbf{\theta}\right)`. The *likelihood +principle* describes a function of the parameters :math:`\mathbf{\theta}`, +determined by the observed sample, that contains all the information about +:math:`\mathbf{\theta}` that is available from the sample. Given +:math:`\textbf{x}` is observed and is distributed according to a joint +*probability distribution function* (PDF), +:math:`f\left(\textbf{x}\middle|\mathbf{\theta}\right)`, the function of +:math:`\mathbf{\theta}` which is defined by + +.. math:: + + L\left(\mathbf{\theta}\middle|\textbf{x}\right)\equiv + f\left(\textbf{x}\middle|\mathbf{\theta}\right) + +is called the *likelihood function* [1]_. If the data :math:`\textbf{x}` consists +of independent and identically distributed values, then: + +.. math:: + + L\left(\mathbf{\theta}\middle|\textbf{x}\right)= + L\left(\theta_1,\dots,\theta_k\middle|x_1,\dots,x_n\right)= + \prod_{i=1}^{n}f\left(x_i\middle|\theta_1,\dots,\theta_k\right) + +Classical Statistics +-------------------- +First we will take a look at the classical approach to statistics, so that we +can then compare against the Bayesian approach. The *maximum likelihood +estimators* (MLE) of the parameters :math:`\mathbf{\theta}`, denoted by +:math:`\mathbf{\hat\theta}(\textbf{x})`, are attained by maximising +:math:`L\left(\mathbf{\theta}\middle|\textbf{x}\right)`. + +Often, not all the model parameters are of direct inferential interest however +they are included as they may reduce the effect of systematic bias. These are +called the nuisance parameters, :math:`\mathbf{\theta}_\nu`. To reduce the +impact of the nuisance parameters, prior knowledge based on past measurements +:math:`\textbf{y}` may be included in the likelihood: + +.. math:: + + L\left(\mathbf{\theta}_I,\mathbf{\theta}_\nu\middle|\textbf{x},\textbf{y}\right)= + \prod_{i=1}^Nf_I\left(x_i\middle|\mathbf{\theta}_I\right)\cdot + f_\nu\left(\textbf{y}\middle| \mathbf{\theta}_\nu\right) + +By finding the MLE for the nuisance parameters, +:math:`\mathbf{\hat\theta}_{\nu}(\textbf{y})`, the *profile likelihood* can be written +in terms independent of :math:`\mathbf{\theta}_\nu`: + +.. math:: + + L_P\left(\mathbf{\theta}_I\middle|\textbf{x},\textbf{y}\right)= + L\left(\mathbf{\theta}_I,\mathbf{\hat\theta}_{\nu}\left(\textbf{y}\right)\middle|\textbf{x},\textbf{y}\right) + +The key method of inference in physics is *hypothesis testing*. The two +complementary hypotheses in a hypothesis testing problem are called the *null +hypothesis* and the *alternative hypothesis*. The goal of a hypothesis test is +to decide, based on a sample of data, which of the two complementary hypotheses +is preferred. The general format of the null and alternative hypothesis is +:math:`H_0:\mathbf{\theta}\in\Theta_0` and +:math:`H_1:\mathbf{\theta}\in\Theta_0^c`, where :math:`\Theta_0` is some subset +of the parameter space and :math:`\Theta_0^c` is its complement. Typically, a +hypothesis test is specified in terms of a *test statistic* (TS) which is used +to define the *rejection region*, which is the region in which the null +hypothesis can be rejected. The Neyman-Pearson lemma [2]_ then states that the +*likelihood ratio test* (LRT) statistic is the most powerful test statistic: + +.. math:: + + \lambda\left(\textbf{x}\right)=\frac{L\left(\mathbf{\hat\theta}_{0}\middle| + \textbf{x}\right)}{L\left(\mathbf{\hat\theta}\middle|\textbf{x}\right)} + +where :math:`\mathbf{\hat\theta}` is a MLE of :math:`\mathbf{\theta}` and +:math:`\mathbf{\hat\theta}_{0}` is a MLE of :math:`\theta` obtained by doing a +restricted maximisation, assuming :math:`\Theta_0` is the parameter space. The +effect of nuisance parameters can be included by substituting the likelihood +with the profile likelihood :math:`L\rightarrow L_P`. The rejection region then +has the form +:math:`R\left(\mathbf{\theta}_0\right)=\{\textbf{x}:\lambda\left(\textbf{x}\right) +\leq{c}\left(\alpha\right)\}`, such that +:math:`P_{\mathbf{\theta}_0}\left(\textbf{x}\in +R\left(\mathbf{\theta}_0\right)\right)\leq\alpha`, where :math:`\alpha` +satisfies :math:`0\leq{\alpha}\leq 1` and is called the *size* or +*significance level* of the test and is specified prior to the +experiment. Typically in high-energy physics the level of significance where an +effect is said to qualify as a discovery is at the :math:`5\sigma` level, +corresponding to an :math:`\alpha` of :math:`2.87\times10^{-7}`. In a binned +analysis, the distribution of :math:`\lambda\left(\textbf{n}\right)` can be +approximated by generating mock data or *realisations* of the null +hypothesis. However, this can be computationally difficult to perform. +Instead, according to Wilks' theorem [3]_, for sufficiently large +:math:`\textbf{x}` and provided certain regularity conditions are met (MLE +exists and is unique), :math:`-2\ln\lambda\left(\textbf{x}\right)` can be +approximated to follow a :math:`\chi^2` distribution. The :math:`\chi^2` +distribution is parameterised by :math:`k`, the *number of degrees of +freedom*, which is defined as the number of independent normally distributed +variables that were summed together. When the profile likelihood is used to +account for :math:`n` nuisance parameters, the effective number of degrees of +freedom will be reduced to :math:`k-n`, due to additional constraints placed +via profiling. From this, :math:`c\left(\alpha\right)` can be easily obtained +from :math:`\chi^2` *cumulative distribution function* (CDF) lookup +tables. + +As shown so far, the LRT informs on the best fit point of parameters +:math:`\mathbf{\theta}`. In addition to this point estimator, some sort of +*interval* estimator is also useful to report to reflect its statistical +precision. Interval estimators together with a measure of confidence are also +known as *confidence intervals* which has the important property of *coverage*. +The purpose of using an interval estimator is to have some guarantee of +capturing the parameter of interest, quantified by the coverage coefficient, +:math:`1-\alpha`. In this classical approach, one needs to keep in mind that +the interval is the random quantity, not the parameter - which is fixed. Since +we do not know the value of :math:`\theta`, we can only guarantee a coverage +probability equal to the *confidence coefficient*. There is a strong +correspondence between hypothesis testing and interval estimation. The +hypothesis test fixes the parameter and asks what sample values (the acceptance +region) are consistent with that fixed value. The interval estimation fixes +the sample value and asks what parameter values (the confidence interval) make +this sample value most plausible. This can be written in terms of the LRT +statistic: + +.. math:: + + \lambda\left(\mathbf{\theta}\right)=\frac{L\left(\mathbf{\theta}\middle|\textbf{x} + \right)}{L\left(\mathbf{\hat\theta}\middle|\textbf{x}\right)} + + +The confidence interval is then formed as +:math:`C\left(\textbf{x}\right)=\{\mathbf{ +\theta}:\lambda\left(\mathbf{\theta}\right)\geq c\left(1-\alpha\right)\}`, such +that :math:`P_{\mathbf{\theta}}\left(\mathbf{\theta}\in +C\left(\textbf{x}\right)\right)\geq1-\alpha`. Assuming Wilks' theorem holds, +this can be written more specifically as +:math:`C\left(\textbf{x}\right)=\{\mathbf{\theta}:-2\ln\lambda\left( +\mathbf{\theta}\right) \leq\text{CDF}_{\chi^2}^{-1}\left(k,1-\alpha\right)\}`, +where :math:`\text{CDF}_{\chi^2}^{-1}` is the inverse CDF for a :math:`\chi^2` +distribution. One again, the effect of nuisance parameters can be accounted +for by using the profile likelihood analogously to hypothesis testing. + +Bayesian Statistics +------------------- +The Bayesian approach to statistics is fundamentally different to the +classical (*frequentist*) approach that has been taken so far. In the +classical approach the parameter :math:`\theta` is thought to be an unknown, but +fixed quantity. In a Bayesian approach, :math:`\theta` is considered to be a +quantity whose variation can be described by a probability distribution +(called the *prior distribution*), which is subjective based on the +experimenter's belief. The Bayesian approach is also different in terms of +computation, whereby the classical approach consists of optimisation problems +(finding maxima), the Bayesian approach often results in integration +problems. + +In this approach, when an experiment is performed it updates the prior +distribution with new information. The updated prior is called the +*posterior distribution* and is computed through the use of Bayes' +Theorem [4]_: + +.. math:: + + \pi\left(\mathbf{\theta}\middle|\textbf{x}\right)= + \frac{f\left(\textbf{x}\middle|\mathbf{\theta}\right) + \pi\left(\mathbf{\theta}\right)}{m\left(\textbf{x}\right)} + +where :math:`\pi\left(\mathbf{\theta}\middle|\textbf{x}\right)` is the +posterior distribution, +:math:`f\left(\textbf{x}\middle|\mathbf{\theta}\right)\equiv +L\left(\mathbf{\theta}\middle|\textbf{x}\right)` is the likelihood, +:math:`\pi\left(\mathbf{\theta}\right)` is the prior distribution and +:math:`m\left(\textbf{x}\right)` is the *marginal distribution*, + +.. math:: + + m\left(\textbf{x}\right)=\int f\left(\textbf{x}\middle|\mathbf{\theta}\right)\pi\left( + \mathbf{\theta}\right)\text{d}\mathbf{\theta} + +The *maximum a posteriori probability* (MAP) estimate of the parameters +:math:`\mathbf{\theta}`, denoted by +:math:`\mathbf{\hat\theta}_\text{MAP}\left(\textbf{x} \right)` are attained by +maximising :math:`\pi\left(\mathbf{\theta}\middle|\textbf{x} \right)`. This +estimator can be interpreted as an analogue of the MLE for Bayesian estimation, +where the distribution has become the posterior. An important distinction to +make from the classical approach is in the treatment of the nuisance +parameters. The priors on :math:`\theta_\nu` are naturally included as part of +the prior distribution :math:`\pi\left(\mathbf{\theta}_\nu\right)= +\pi\left(\mathbf{\theta}_\nu\middle|\textbf{y}\right)` based on a past +measurement :math:`\textbf{y}`. Then the posterior distribution can be obtained +for :math:`\theta_I` alone by *marginalising* over the nuisance parameters: + +.. math:: + \pi\left(\mathbf{\theta}_I\middle|\textbf{x}\right)=\int \pi\left(\mathbf{\theta}_I,\mathbf{\theta}_\nu\middle|\textbf{x}\right)\text{d}\mathbf{\theta}_\nu + +In contrast to the classical approach, where an interval is said to have +coverage of the parameter :math:`\theta`, the Bayesian approach allows one to +say that :math:`\theta` is inside the interval with a probability +:math:`1-\beta`. This distinction is emphasised by referring to Bayesian +intervals as *credible intervals* and :math:`\beta` is referred to here as the +*credible coefficient*. Therefore, both the interpretation and construction is +more straightforward than for the classical approach. The credible interval is +formed as: + +.. math:: + \pi\left(\mathbf{\theta}\in A\middle|\textbf{x}\right)=\int_A\pi\left( + \mathbf{\theta}\middle|\textbf{x}\right)\text{d}\mathbf{\theta}\geq1-\beta + +This does not uniquely specify the credible interval however. The most useful +convention when working in high dimensions is the *Highest Posterior Density* +(HPD) credible interval. Here the interval is constructed such that the +posterior meets a minimum threshold :math:`A\left(\textbf{x}\right)=\{\mathbf{ +\theta}:\pi\left(\mathbf{\theta}\middle|\textbf{x}\right)\geq +t\left(1-\beta\right)\}`. This can be seen as an interval starting at the MAP, +growing to include an area whose integrated probability is equal to +:math:`\beta` and where all points inside the interval have a higher posterior +value than all points outside the interval. + +In the Bayesian approach, hypothesis testing can be generalised further to +*model selection* in which possible models (or hypotheses) +:math:`\mathcal{M}_0,\mathcal{M}_1,\dots,\mathcal{M}_k` of the data +:math:`\textbf{x}` can be compared. This is again done through Bayes theorem +where the posterior probability that :math:`\textbf{x}` originates from a model +:math:`\mathcal{M}_j` is + +.. math:: + + \pi\left(\mathcal{M}_j\middle|\textbf{x}\right)=\frac{ + f\left(\textbf{x}\middle|\mathcal{M}_j\right)\pi\left(\mathcal{M}_j\right)}{ + M\left(\textbf{x}\right)}\quad\text{where}\quad + M\left(\textbf{x}\right)=\sum_{i=0}^kf\left(\textbf{x}\middle|\mathcal{M}_i\right) + \pi\left(\mathcal{M}_i\right) + + +the likelihood of the model +:math:`f\left(\textbf{x}\middle|\mathcal{M}_j\right)` can then be written as +the marginal distribution over model parameters :math:`\mathbf{\theta}_j`, also +referred to as the *evidence* of a particular model: + +.. math:: + + \mathcal{Z}_j\left(\textbf{x}\right)=f\left(\textbf{x}\middle|\mathcal{M}_j\right)= + \int f_j\left(\textbf{x}\middle|\mathbf{\theta}_j\right) + \pi_j\left(\mathbf{\theta}_j\right)\text{d}\mathbf{\theta}_j + + +This was seen before as just a normalisation constant above; however, this +quantity is central in Bayesian model selection, which for two models +:math:`\mathcal{M}_0` and :math:`\mathcal{M}_1` is realised through the ratio +of the posteriors: + +.. math:: + + \frac{\pi\left(\mathcal{M}_0\middle|\textbf{x}\right)} + {\pi\left(\mathcal{M}_1\middle|\textbf{x}\right)}=B_{\,0/1}\left(\textbf{x}\right) + \frac{\pi\left(\mathcal{M}_0\right)}{\pi\left(\mathcal{M}_1\right)}\quad + \text{where}\quad B_{\,0/1}\left(\textbf{x}\right)= + \frac{\mathcal{Z}_0\left(\textbf{x}\right)}{\mathcal{Z}_1\left(\textbf{x}\right)} + +here we denote the quantity :math:`B_{\,0/1}\left(\textbf{x}\right)` as the +*Bayes factor* which measures the relative evidence of each model and +can be reported independent of the prior on the model. It can be interpreted +as the *strength of evidence* for a particular model and a convention +formulated by Jeffreys [5]_ is one way to interpret this +inference, as shown here: + +.. figure:: _static/jeffreys.png + :width: 500px + :align: center + + List of Bayes factors and their inference convention according to Jeffreys' + scale [5]_. + + +Markov Chain Monte Carlo +------------------------ +The goal of a Bayesian inference is to maintain the full posterior probability +distribution. The models in question may contain a large number of parameters +and so such a large dimensionality makes analytical evaluations and +integrations of the posterior distribution unfeasible. Instead the posterior +distribution can be approximated using *Markov Chain Monte Carlo* (MCMC) +sampling algorithms. As the name suggests these are based on *Markov chains* +which describe a sequence of random variables :math:`\Theta_0,\Theta_1,\dots` +that can be thought of as evolving over time, with probability of transition +depending on the immediate past variable, :math:`P\left(\Theta_{k+1}\in +A\middle|\theta_0,\theta_1,\dots,\theta_k\right)= P\left(\Theta_{k+1}\in +A\middle|\theta_k\right)` [6]_. In an MCMC algorithm, Markov chains are +generated by a simulation of *walkers* which randomly walk the parameter space +according to an algorithm such that the distribution of the chains +asymptotically reaches a *target density* :math:`f\left(\theta\right)` that is +unique and stationary (i.e. no longer evolving). This technique is +particularly useful as the chains generated for the posterior distribution can +automatically provide the chains of any marginalised distribution, e.g. a +marginalisation over any or all of the nuisance parameters. + +The *Metropolis-Hastings* (MH) algorithm [7]_ is the most well known MCMC +algorithm and is shown in this algorithm: + +.. figure:: _static/mh.png + :width: 700px + :align: center + + Metropolis-Hastings Algorithm [6]_, [7]_ + +An initialisation state (seed) is first chosen, :math:`\theta^{(t)}`. The +distribution :math:`q\left(\theta'\middle|\theta\right)`, called the *proposal +distribution*, is drawn from in order to propose a candidate state +:math:`\theta'_t` to transition to. Commonly a Gaussian distribution is used +here. Then the MH *acceptance probability*, +:math:`\rho\left(\theta,\theta'\right)` defines the probability of either +accepting the candidate state :math:`\theta'_t`, or repeating the state +:math:`\theta^{(t)}` for the next step in the Markov chain. The acceptance +probability always accepts the state :math:`\theta'_t` such that the ratio +:math:`f\left(\theta'_t\right)/\,q\left(\theta'_t\middle| \theta^{(t)}\right)` +has increased compared with the previous state +:math:`f\left(\theta^{(t)}\right)/\,q\left(\theta^{(t)}\middle|\theta'_t\right)`, +where :math:`f` is the *target density*. Importantly, in the case that the +target density is the posterior distribution, +:math:`f\left(\theta\right)=\pi\left(\theta\middle|\textbf{x}\right)`, the +acceptance probability only depends on the ratio of posteriors +:math:`\pi\left(\theta'\middle|\textbf{x}\right)/\pi\left(\theta\middle|\textbf{x}\right)`, +crucially cancelling out the difficult to calculate marginal distribution +:math:`m\left(\textbf{x}\right)`. As the chain evolves, the target density +relaxes to a stationary state of samples which, in our case, can be used to map +out the posterior distribution. + +To reduce the impact of any biases arising from the choice of the +initialisation state, typically the first few chains after a *burn-in* period +are discarded, after which the chains should approximate better the target +density :math:`f`. While the MH algorithm forms the foundation of all MCMC +algorithms, its direct application has two disadvantages. First the proposal +distribution :math:`q\left(\theta'\middle|\theta\right)` must be chosen +carefully and secondly the chains can get caught in local modes of the target +density. More bespoke and sophisticated implementations of the MH algorithm +exist, such as *affine invariant* algorithms [8]_ which use already-drawn +samples to define the proposal distribution or *nested sampling* algorithms +[9]_, designed to compute the evidence :math:`\mathcal{Z}`. Both these types +will be used in the Bayesian approach of this analysis. A more in depth look +into the topic of MCMC algorithms is discussed in detail in Robert and Casella +[6]_ or Collin [10]_. + +Anarchic Sampling +----------------- +As with any Bayesian based inference, the prior distribution used for a given +parameter needs to be chosen carefully. In this analysis, a further technology +needs to be introduced in order to ensure the prior distribution is not biasing +any drawn inferences. More specifically, the priors on the standard model +mixing parameters are of concern. These parameters are defined in the +:doc:`physics` section as a representation of the :math:`3\times3` unitary +mixing matrix :math:`U`, in such a way that any valid combination of the mixing +angles can be mapped into a unitary matrix. The ideal and most ignorant choice +of prior here is one in which there is no distinction among the three neutrino +flavours, compatible with the hypothesis of *neutrino mixing anarchy*, which is +the hypothesis that :math:`U` can be described as a result of random draws from +an unbiased distribution of unitary :math:`3\times3` matrices [11]_, [12]_, +[13]_, [14]_. Simply using a flat prior on the mixing angles however, does +not mean that the prior on the elements of :math:`U` is also flat. Indeed doing +this would introduce a significant bias for the elements of :math:`U`. +Statistical techniques used in the study of neutrino mixing anarchy will be +borrowed in order to ensure that :math:`U` is sampled in an unbiased way. Here, +the anarchy hypothesis requires the probability measure of the neutrino mixing +matrix to be invariant under changes of basis for the three generations. This +is the central assumption of *basis independence* and from this, distributions +over the mixing angles are determined by the integration invariant *Haar +measure* [13]_. For the group :math:`U(3)` the Haar measure is given by the +volume element :math:`\text{d} U`, which can be written using the mixing angles +parameterisation: + +.. math:: + + \text{d} U=\text{d}\left(\sin^2\theta_{12}\right)\wedge\, + \text{d}\left(\cos^4\theta_{13}\right)\wedge\, + \text{d}\left(\sin^2\theta_{23}\right)\wedge\,\text{d}\delta + + +which says that the Haar measure for the group :math:`U(3)` is flat in +:math:`\sin^2\theta_{12}`, :math:`\cos^4\theta_{13}`, :math:`\sin^2\theta_{23}` +and :math:`\delta`. Therefore, in order to ensure the distribution over the +mixing matrix :math:`U` is unbiased, the prior on the mixing angles must be +chosen according to this Haar measure, i.e. in :math:`\sin^2\theta_{12}`, +:math:`\cos^4\theta_{13}`, :math:`\sin^2\theta_{23}` and :math:`\delta`. You +can see an example on this in action in the :doc:`examples` notebooks. + +This also needs to be considered in the case of a flavour composition +measurement using sampling techniques in a Bayesian approach. In this case, the +posterior of the measured composition :math:`f_{\alpha,\oplus}` is sampled over +as the parameters of interest. Here, the effective number of parameters can be +reduced from three to two due to the requirement :math:`\sum_\alpha +f_{\alpha,\oplus}=1`. Therefore, the prior on these two parameters must be +determined by Haar measure of the flavour composition volume element, +:math:`\text{d} f_{e,\oplus}\wedge\text{d} f_{\mu,\oplus}\wedge\text{d} +f_{\tau,\oplus}`. The following *flavour angles* parameterisation is found to +be sufficient: + +.. math:: + + \begin{align} + f_{\alpha,\oplus}= + \begin{pmatrix} + f_{e,\oplus} \\ f_{\mu,\oplus} \\ f_{\tau,\oplus} + \end{pmatrix}= + \begin{pmatrix} + \sin^2\phi_\oplus\,\cos^2\psi_\oplus \\ + \sin^2\phi_\oplus\,\sin^2\psi_\oplus \\ + \cos^2\phi_\oplus + \end{pmatrix} \\ + \text{d} f_{e,\oplus}\wedge\text{d} f_{\mu,\oplus}\wedge\text{d} f_{\tau,\oplus}= + \text{d}\left(\sin^4\phi_\oplus\right)\wedge + \text{d}\left(\cos\left(2\psi_\oplus\right)\right) + \end{align} + + +.. [1] Casella, G. & Berger, R. Statistical Inference isbn: 9780534243128. https://books.google.co.uk/books?id=0x\_vAAAAMAAJ (Thomson Learning, 2002). +.. [2] Neyman, J. & Pearson, E. S. On the Problem of the Most Efficient Tests of Statistical Hypotheses. Phil. Trans. Roy. Soc. Lond. A231, 289–337 (1933). +.. [3] Wilks, S. S. The Large-Sample Distribution of the Likelihood Ratio for Testing Composite Hypotheses. Annals Math. Statist. 9, 60–62 (1938). +.. [4] Bayes, R. An essay toward solving a problem in the doctrine of chances. Phil. Trans. Roy. Soc. Lond. 53, 370–418 (1764). +.. [5] Jeffreys, H. The Theory of Probability isbn: 9780198503682, 9780198531937, 0198531931. https://global.oup.com/academic/product/the-theory-of-probability-9780198503682?cc=au&lang=en&#(1939). +.. [6] Robert, C. & Casella, G. Monte Carlo Statistical Methods isbn: 9781475730715. https://books.google.co.uk/books?id=lrvfBwAAQBAJ (Springer New York, 2013) +.. [7] Hastings, W. K. Monte Carlo sampling methods using Markov chains and their applications. Biometrika 57, 97–109. issn: 0006-3444 (Apr. 1970). +.. [8] Goodman, J. & Weare, J. Ensemble samplers with affine invariance. *Communications in Applied Mathematics and Computational Science*, Vol. 5, No. 1, p. 65-80, 2010 5, 65–80 (2010). +.. [9] Skilling, J. Nested Sampling. AIP Conference Proceedings 735, 395–405 (2004) +.. [10] Collin, G. L. *Neutrinos, neurons and neutron stars : applications of new statistical and analysis techniques to particle and astrophysics* PhD thesis (Massachusetts Institute of Technology, 2018). http://hdl.handle.net/1721.1/118817. +.. [11] De Gouvea, A. & Murayama, H. Neutrino Mixing Anarchy: Alive and Kicking. Phys. Lett. B747, 479–483 (2015). +.. [12] Hall, L. J., Murayama, H. & Weiner, N. Neutrino mass anarchy. Phys. Rev. Lett. 84, 2572–2575 (2000). +.. [13] Haba, N. & Murayama, H. Anarchy and hierarchy. Phys. Rev. D63, 053010 (2001). +.. [14] De Gouvea, A. & Murayama, H. Statistical test of anarchy. Phys. Lett. B573, 94–100 (2003). diff --git a/golemflavor/plot.py b/golemflavor/plot.py index acf0209..02aa232 100644 --- a/golemflavor/plot.py +++ b/golemflavor/plot.py @@ -298,14 +298,15 @@ def tax_fill(ax, points, nbins, **kwargs): def alpha_shape(points, alpha): - """ - Compute the alpha shape (concave hull) of a set - of points. - @param points: Iterable container of points. - @param alpha: alpha value to influence the - gooeyness of the border. Smaller numbers - don't fall inward as much as larger numbers. - Too large, and you lose everything! + """Compute the alpha shape (concave hull) of a set of points. + + Parameters + ---------- + points: Iterable container of points. + alpha: alpha value to influence the gooeyness of the border. Smaller + numbers don't fall inward as much as larger numbers. Too large, and you + lose everything! + """ if len(points) < 4: # When you have a triangle, there is no sense -- cgit v1.2.3