Explore ctapipe DL2 data¶

In [1]:

Copied!

import ctapipe
print(f"ctapipe version {ctapipe.__version__}")
import ctapipe
print(f"ctapipe version {ctapipe.__version__}")

ctapipe version 0.17.0

In [2]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from ctapipe.instrument import SubarrayDescription
from ctapipe.io import TableLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from ctapipe.instrument import SubarrayDescription
from ctapipe.io import TableLoader

/home/runner/micromamba/envs/ctaodl2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

In [3]:

Copied!

from tqdm.auto import tqdm
from tqdm.auto import tqdm

In [4]:

Copied!





def read_table(filename, tel_ids=None, stop=None):
    """
    Read data from a file and return it as a pandas DataFrame.

    Parameters
    ----------
    filename : str
        The path to the input file.
    tel_ids : list or None, optional
        A list of telescope IDs to include in the data. If None, all telescopes are included. Default is None.
    stop : int or None, optional
        The number of events to read from the file. If None, all events are read. Default is None.
    Returns
    -------
    pandas.DataFrame
        The data read from the file as a pandas DataFrame.
    """
    loader = TableLoader(
        input_url=filename,
        load_dl1_parameters=True,
        load_dl2=True,
        load_instrument=True,
        load_simulated=True,
        load_true_parameters=True,
    )

    data = loader.read_telescope_events(telescopes=tel_ids, stop=stop)
    data.remove_columns([col for col in data.colnames if len(data[col].shape) > 1])
    return data.to_pandas()
def read_table(filename, tel_ids=None, stop=None):
    """
    Read data from a file and return it as a pandas DataFrame.

    Parameters
    ----------
    filename : str
        The path to the input file.
    tel_ids : list or None, optional
        A list of telescope IDs to include in the data. If None, all telescopes are included. Default is None.
    stop : int or None, optional
        The number of events to read from the file. If None, all events are read. Default is None.
    Returns
    -------
    pandas.DataFrame
        The data read from the file as a pandas DataFrame.
    """
    loader = TableLoader(
        input_url=filename,
        load_dl1_parameters=True,
        load_dl2=True,
        load_instrument=True,
        load_simulated=True,
        load_true_parameters=True,
    )

    data = loader.read_telescope_events(telescopes=tel_ids, stop=stop)
    data.remove_columns([col for col in data.colnames if len(data[col].shape) > 1])
    return data.to_pandas()

In [5]:

Copied!

dl2_proton_filename = "../data/proton.dl2.h5"
dl2_gamma_filename = "../data/gamma-diffuse.dl2.h5"
dl2_proton_filename = "../data/proton.dl2.h5"
dl2_gamma_filename = "../data/gamma-diffuse.dl2.h5"

In [6]:

Copied!

subarray = SubarrayDescription.from_hdf(dl2_proton_filename)
subarray = SubarrayDescription.from_hdf(dl2_proton_filename)

In [7]:

Copied!

# limited due to github action limited memory. Change to None to get full statistics
n_events = 100000

protons = read_table(dl2_proton_filename, stop=n_events)
gammas = read_table(dl2_gamma_filename, stop=n_events)
# limited due to github action limited memory. Change to None to get full statistics
n_events = 100000

protons = read_table(dl2_proton_filename, stop=n_events)
gammas = read_table(dl2_gamma_filename, stop=n_events)

In [8]:

Copied!

protons.describe()
protons.describe()

Out[8]:

	obs_id	event_id	tel_id	hillas_intensity	hillas_skewness	hillas_kurtosis	hillas_fov_lon	hillas_fov_lat	hillas_r	hillas_phi	...	HillasReconstructor_core_uncert_x	HillasReconstructor_core_uncert_y	HillasReconstructor_core_tilted_x	HillasReconstructor_core_tilted_y	HillasReconstructor_core_tilted_uncert_x	HillasReconstructor_core_tilted_uncert_y	HillasReconstructor_h_max	HillasReconstructor_h_max_uncert	HillasReconstructor_average_intensity	HillasReconstructor_goodness_of_fit
count	243434.000000	2.434340e+05	243434.000000	1.847670e+05	184767.000000	184767.000000	184767.000000	184767.000000	184767.000000	184767.000000	...	0.0	0.0	143687.000000	143687.000000	0.0	0.0	143687.000000	0.0	143687.000000	0.0
mean	12278.376574	2.486991e+06	9.872676	1.489208e+03	0.002598	2.328642	0.002845	-0.005889	2.367234	0.165919	...	NaN	NaN	-20.773572	-48.146966	NaN	NaN	8316.845538	NaN	2242.854539	NaN
min	10000.000000	1.010000e+02	1.000000	5.000001e+01	-5.193683	1.000137	-3.736820	-4.054100	0.012106	-179.997231	...	NaN	NaN	-52113.435619	-433154.245119	NaN	NaN	27.450918	NaN	51.679496	NaN
25%	10840.000000	1.216719e+06	4.000000	9.348740e+01	-0.285687	1.872580	-1.356408	-1.380716	1.615606	-89.865696	...	NaN	NaN	-126.372388	-142.112669	NaN	NaN	6009.533139	NaN	192.893026	NaN
50%	11933.000000	2.481509e+06	8.000000	1.843154e+02	0.001152	2.182177	-0.003268	0.007653	2.349409	0.496306	...	NaN	NaN	-29.610555	-22.564810	NaN	NaN	7887.871830	NaN	395.098508	NaN
75%	13786.000000	3.748700e+06	11.000000	5.215911e+02	0.290451	2.582801	1.362969	1.365592	3.258999	90.378599	...	NaN	NaN	57.316993	76.837541	NaN	NaN	9716.422276	NaN	1075.090369	NaN
max	15438.000000	5.000003e+06	35.000000	1.351788e+06	5.969430	45.300332	3.742301	4.060685	4.084685	179.993740	...	NaN	NaN	209323.393149	343728.681274	NaN	NaN	997012.478075	NaN	473773.681108	NaN
std	1627.939306	1.450195e+06	8.907840	1.316058e+04	0.509023	0.892877	1.814565	1.806119	0.975156	104.024104	...	NaN	NaN	1900.207892	3241.114749	NaN	NaN	7375.972406	NaN	11337.581138	NaN

8 rows × 111 columns

In [9]:

Copied!

gammas.describe()
gammas.describe()

Out[9]:

	obs_id	event_id	tel_id	hillas_intensity	hillas_skewness	hillas_kurtosis	hillas_fov_lon	hillas_fov_lat	hillas_r	hillas_phi	...	HillasReconstructor_core_uncert_x	HillasReconstructor_core_uncert_y	HillasReconstructor_core_tilted_x	HillasReconstructor_core_tilted_y	HillasReconstructor_core_tilted_uncert_x	HillasReconstructor_core_tilted_uncert_y	HillasReconstructor_h_max	HillasReconstructor_h_max_uncert	HillasReconstructor_average_intensity	HillasReconstructor_goodness_of_fit
count	273624.000000	2.736240e+05	273624.000000	205891.000000	205891.000000	205891.000000	205891.000000	205891.000000	205891.000000	205891.000000	...	0.0	0.0	176562.000000	176562.000000	0.0	0.0	176562.000000	0.0	176562.000000	0.0
mean	5117.391022	2.496656e+06	9.628907	1217.948032	-0.000048	2.349440	-0.002180	0.012361	2.342098	1.076141	...	NaN	NaN	-46.661051	-39.613491	NaN	NaN	9313.011778	NaN	1592.500842	NaN
min	5000.000000	1.010000e+02	1.000000	50.002919	-4.072445	1.006937	-3.740056	-4.050354	0.003672	-179.998125	...	NaN	NaN	-562891.280995	-429672.684865	NaN	NaN	72.175967	NaN	50.940603	NaN
25%	5059.000000	1.240512e+06	4.000000	96.518319	-0.260956	1.944135	-1.340862	-1.336462	1.580655	-88.648830	...	NaN	NaN	-152.736155	-179.779751	NaN	NaN	7797.330686	NaN	170.377078	NaN
50%	5110.000000	2.496609e+06	7.000000	194.748107	-0.000188	2.202669	0.002231	0.021422	2.275186	1.491453	...	NaN	NaN	-31.302657	-27.147178	NaN	NaN	9130.217230	NaN	335.991344	NaN
75%	5174.000000	3.751409e+06	10.000000	513.354879	0.260221	2.553191	1.336309	1.360847	3.248243	91.091363	...	NaN	NaN	84.922760	114.373096	NaN	NaN	10642.159723	NaN	814.186966	NaN
max	5271.000000	5.000015e+06	35.000000	910988.937369	5.396525	40.447260	3.744971	4.058099	4.080309	179.999212	...	NaN	NaN	242640.421375	193267.563456	NaN	NaN	375505.215232	NaN	306094.062427	NaN
std	73.921067	1.444107e+06	8.805074	9374.779680	0.455707	0.777383	1.801779	1.791422	0.985043	103.901273	...	NaN	NaN	2986.469463	2232.601377	NaN	NaN	3218.053462	NaN	7927.164195	NaN

8 rows × 111 columns

In [10]:

Copied!

columns = protons.columns
columns
columns = protons.columns
columns

Out[10]:

Index(['obs_id', 'event_id', 'tel_id', 'hillas_intensity', 'hillas_skewness',
       'hillas_kurtosis', 'hillas_fov_lon', 'hillas_fov_lat', 'hillas_r',
       'hillas_phi',
       ...
       'HillasReconstructor_core_uncert_y',
       'HillasReconstructor_core_tilted_x',
       'HillasReconstructor_core_tilted_y',
       'HillasReconstructor_core_tilted_uncert_x',
       'HillasReconstructor_core_tilted_uncert_y', 'HillasReconstructor_h_max',
       'HillasReconstructor_h_max_uncert', 'HillasReconstructor_is_valid',
       'HillasReconstructor_average_intensity',
       'HillasReconstructor_goodness_of_fit'],
      dtype='object', length=120)

In [11]:

Copied!





numerical_columns = [col for col in columns if hasattr(protons[col], 'dtype') and np.issubdtype(protons[col].dtype, np.number)]

ncol = 2
nrow = len(numerical_columns) // ncol + 1
fig, axes = plt.subplots(ncols=ncol, nrows=nrow, figsize=(20, 5*nrow))

nbins = 100
opt = dict(bins=nbins, histtype='step', density=True, lw=2)
columns_in_logscale = [col for col in numerical_columns if 'intensity' in col]
print(columns_in_logscale)


for i, col in tqdm(enumerate(numerical_columns), total=len(numerical_columns)):
    ax = axes[i//ncol, i%ncol]
    mask = np.isfinite(protons[col])
    # if col in columns_in_logscale and protons[mask][col].min() > 0 and gammas[col].min() > 0:
    #     opt['bins'] = np.logspace(np.log10(protons[mask][col].min()), np.log10(protons[mask][col].max()), nbins)
    ax.hist(protons[mask][col], label='protons', **opt)

    mask = np.isfinite(gammas[col])
    ax.hist(gammas[mask][col], label='gammas', **opt)
    
    opt['bins'] = nbins
    ax.set_title(col)
    ax.legend()
numerical_columns = [col for col in columns if hasattr(protons[col], 'dtype') and np.issubdtype(protons[col].dtype, np.number)]

ncol = 2
nrow = len(numerical_columns) // ncol + 1
fig, axes = plt.subplots(ncols=ncol, nrows=nrow, figsize=(20, 5*nrow))

nbins = 100
opt = dict(bins=nbins, histtype='step', density=True, lw=2)
columns_in_logscale = [col for col in numerical_columns if 'intensity' in col]
print(columns_in_logscale)


for i, col in tqdm(enumerate(numerical_columns), total=len(numerical_columns)):
    ax = axes[i//ncol, i%ncol]
    mask = np.isfinite(protons[col])
    # if col in columns_in_logscale and protons[mask][col].min() > 0 and gammas[col].min() > 0:
    #     opt['bins'] = np.logspace(np.log10(protons[mask][col].min()), np.log10(protons[mask][col].max()), nbins)
    ax.hist(protons[mask][col], label='protons', **opt)

    mask = np.isfinite(gammas[col])
    ax.hist(gammas[mask][col], label='gammas', **opt)
    
    opt['bins'] = nbins
    ax.set_title(col)
    ax.legend()

['hillas_intensity', 'leakage_intensity_width_1', 'leakage_intensity_width_2', 'intensity_max', 'intensity_min', 'intensity_mean', 'intensity_std', 'intensity_skewness', 'intensity_kurtosis', 'true_hillas_intensity', 'true_leakage_intensity_width_1', 'true_leakage_intensity_width_2', 'true_intensity_max', 'true_intensity_min', 'true_intensity_mean', 'true_intensity_std', 'true_intensity_skewness', 'true_intensity_kurtosis', 'HillasReconstructor_average_intensity']

  0%|          | 0/110 [00:00<?, ?it/s]

  2%|▏         | 2/110 [00:00<00:08, 13.01it/s]

  4%|▎         | 4/110 [00:00<00:08, 12.04it/s]

  5%|▌         | 6/110 [00:00<00:09, 11.33it/s]

  7%|▋         | 8/110 [00:00<00:09, 11.00it/s]

  9%|▉         | 10/110 [00:00<00:09, 10.84it/s]

 11%|█         | 12/110 [00:01<00:09, 10.74it/s]

 13%|█▎        | 14/110 [00:01<00:08, 10.68it/s]

 15%|█▍        | 16/110 [00:01<00:08, 10.50it/s]

 16%|█▋        | 18/110 [00:01<00:08, 10.54it/s]

 18%|█▊        | 20/110 [00:01<00:08, 10.51it/s]

 20%|██        | 22/110 [00:02<00:08, 10.50it/s]

 22%|██▏       | 24/110 [00:02<00:08, 10.52it/s]

 24%|██▎       | 26/110 [00:02<00:07, 10.77it/s]

 25%|██▌       | 28/110 [00:02<00:07, 11.28it/s]

 27%|██▋       | 30/110 [00:02<00:06, 11.77it/s]

 29%|██▉       | 32/110 [00:02<00:06, 11.37it/s]

 31%|███       | 34/110 [00:03<00:06, 11.16it/s]

 33%|███▎      | 36/110 [00:03<00:06, 10.99it/s]

 35%|███▍      | 38/110 [00:03<00:06, 10.69it/s]

 36%|███▋      | 40/110 [00:03<00:06, 10.66it/s]

 38%|███▊      | 42/110 [00:03<00:06, 10.66it/s]

 40%|████      | 44/110 [00:04<00:06, 10.90it/s]

/home/runner/micromamba/envs/ctaodl2/lib/python3.9/site-packages/numpy/lib/histograms.py:885: RuntimeWarning: invalid value encountered in divide
  return n/db/n.sum(), bin_edges

 42%|████▏     | 46/110 [00:04<00:05, 12.27it/s]

 44%|████▎     | 48/110 [00:04<00:05, 11.16it/s]

 45%|████▌     | 50/110 [00:04<00:05, 10.50it/s]

 47%|████▋     | 52/110 [00:04<00:05, 10.09it/s]

 49%|████▉     | 54/110 [00:05<00:05,  9.82it/s]

 51%|█████     | 56/110 [00:05<00:05,  9.63it/s]

 52%|█████▏    | 57/110 [00:05<00:05,  9.57it/s]

 53%|█████▎    | 58/110 [00:05<00:05,  9.48it/s]

 54%|█████▎    | 59/110 [00:05<00:05,  9.43it/s]

 55%|█████▍    | 60/110 [00:05<00:05,  9.35it/s]

 55%|█████▌    | 61/110 [00:05<00:05,  9.32it/s]

 56%|█████▋    | 62/110 [00:05<00:05,  9.28it/s]

 57%|█████▋    | 63/110 [00:05<00:05,  9.23it/s]

 58%|█████▊    | 64/110 [00:06<00:04,  9.21it/s]

 60%|██████    | 66/110 [00:06<00:04, 10.65it/s]

 62%|██████▏   | 68/110 [00:06<00:03, 11.45it/s]

 64%|██████▎   | 70/110 [00:06<00:03, 11.80it/s]

 65%|██████▌   | 72/110 [00:06<00:03, 11.44it/s]

 67%|██████▋   | 74/110 [00:06<00:03, 10.61it/s]

 69%|██████▉   | 76/110 [00:07<00:03, 10.63it/s]

 71%|███████   | 78/110 [00:07<00:02, 11.27it/s]

 73%|███████▎  | 80/110 [00:07<00:02, 11.73it/s]

 75%|███████▍  | 82/110 [00:07<00:02, 11.90it/s]

 76%|███████▋  | 84/110 [00:07<00:02, 12.23it/s]

 78%|███████▊  | 86/110 [00:07<00:01, 12.53it/s]

 80%|████████  | 88/110 [00:08<00:01, 12.76it/s]

 82%|████████▏ | 90/110 [00:08<00:01, 12.87it/s]

 84%|████████▎ | 92/110 [00:08<00:01, 12.91it/s]

 85%|████████▌ | 94/110 [00:08<00:01, 12.93it/s]

 87%|████████▋ | 96/110 [00:08<00:01, 12.25it/s]

 89%|████████▉ | 98/110 [00:08<00:01, 11.76it/s]

 91%|█████████ | 100/110 [00:09<00:00, 11.48it/s]

 95%|█████████▍| 104/110 [00:09<00:00, 14.45it/s]

 97%|█████████▋| 107/110 [00:09<00:00, 17.44it/s]

100%|██████████| 110/110 [00:09<00:00, 20.09it/s]

100%|██████████| 110/110 [00:09<00:00, 11.59it/s]

No description has been provided for this image

In [12]:

Copied!





opt_intensity = dict(bins=np.logspace(1.65, 4, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['hillas_intensity'], **opt_intensity, label='gammas');
plt.hist(protons['hillas_intensity'], **opt_intensity, label='protons');
plt.hist(gammas['HillasReconstructor_average_intensity'], **opt_intensity, label='gammas (average)')
plt.hist(protons['HillasReconstructor_average_intensity'], **opt_intensity, label='protons (average)')

plt.yscale('log')
plt.xscale('log')
xticks = [50, 100, 200, 500, 1000, 10000]
plt.gca().set_xticks(xticks)
plt.gca().set_xticklabels([str(x) for x in xticks])
plt.xlabel('Intensity')
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
opt_intensity = dict(bins=np.logspace(1.65, 4, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['hillas_intensity'], **opt_intensity, label='gammas');
plt.hist(protons['hillas_intensity'], **opt_intensity, label='protons');
plt.hist(gammas['HillasReconstructor_average_intensity'], **opt_intensity, label='gammas (average)')
plt.hist(protons['HillasReconstructor_average_intensity'], **opt_intensity, label='protons (average)')

plt.yscale('log')
plt.xscale('log')
xticks = [50, 100, 200, 500, 1000, 10000]
plt.gca().set_xticks(xticks)
plt.gca().set_xticklabels([str(x) for x in xticks])
plt.xlabel('Intensity')
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()

Out[12]:

<matplotlib.legend.Legend at 0x7f968e95d430>

In [13]:

Copied!





opt_energy = dict(bins=np.logspace(-3, 3, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['true_energy'], **opt_energy, label='gammas');
plt.hist(protons['true_energy'], **opt_energy, label='protons');


plt.yscale('log')
plt.xscale('log')
plt.xlabel(f"True Energy [TeV]")
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
opt_energy = dict(bins=np.logspace(-3, 3, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['true_energy'], **opt_energy, label='gammas');
plt.hist(protons['true_energy'], **opt_energy, label='protons');


plt.yscale('log')
plt.xscale('log')
plt.xlabel(f"True Energy [TeV]")
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()

Out[13]:

<matplotlib.legend.Legend at 0x7f968ea98550>

In [ ]: