Explore ctapipe DL2 data¶
In [1]:
Copied!
import ctapipe
print(f"ctapipe version {ctapipe.__version__}")
import ctapipe
print(f"ctapipe version {ctapipe.__version__}")
ctapipe version 0.17.0
In [2]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from ctapipe.instrument import SubarrayDescription
from ctapipe.io import TableLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from ctapipe.instrument import SubarrayDescription
from ctapipe.io import TableLoader
/home/runner/micromamba/envs/ctaodl2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [3]:
Copied!
from tqdm.auto import tqdm
from tqdm.auto import tqdm
In [4]:
Copied!
def read_table(filename, tel_ids=None, stop=None):
    """
    Read data from a file and return it as a pandas DataFrame.
    Parameters
    ----------
    filename : str
        The path to the input file.
    tel_ids : list or None, optional
        A list of telescope IDs to include in the data. If None, all telescopes are included. Default is None.
    stop : int or None, optional
        The number of events to read from the file. If None, all events are read. Default is None.
    Returns
    -------
    pandas.DataFrame
        The data read from the file as a pandas DataFrame.
    """
    loader = TableLoader(
        input_url=filename,
        load_dl1_parameters=True,
        load_dl2=True,
        load_instrument=True,
        load_simulated=True,
        load_true_parameters=True,
    )
    data = loader.read_telescope_events(telescopes=tel_ids, stop=stop)
    data.remove_columns([col for col in data.colnames if len(data[col].shape) > 1])
    return data.to_pandas()
def read_table(filename, tel_ids=None, stop=None):
    """
    Read data from a file and return it as a pandas DataFrame.
    Parameters
    ----------
    filename : str
        The path to the input file.
    tel_ids : list or None, optional
        A list of telescope IDs to include in the data. If None, all telescopes are included. Default is None.
    stop : int or None, optional
        The number of events to read from the file. If None, all events are read. Default is None.
    Returns
    -------
    pandas.DataFrame
        The data read from the file as a pandas DataFrame.
    """
    loader = TableLoader(
        input_url=filename,
        load_dl1_parameters=True,
        load_dl2=True,
        load_instrument=True,
        load_simulated=True,
        load_true_parameters=True,
    )
    data = loader.read_telescope_events(telescopes=tel_ids, stop=stop)
    data.remove_columns([col for col in data.colnames if len(data[col].shape) > 1])
    return data.to_pandas()
In [5]:
Copied!
dl2_proton_filename = "../data/proton.dl2.h5"
dl2_gamma_filename = "../data/gamma-diffuse.dl2.h5"
dl2_proton_filename = "../data/proton.dl2.h5"
dl2_gamma_filename = "../data/gamma-diffuse.dl2.h5"
In [6]:
Copied!
subarray = SubarrayDescription.from_hdf(dl2_proton_filename)
subarray = SubarrayDescription.from_hdf(dl2_proton_filename)
In [7]:
Copied!
# limited due to github action limited memory. Change to None to get full statistics
n_events = 100000
protons = read_table(dl2_proton_filename, stop=n_events)
gammas = read_table(dl2_gamma_filename, stop=n_events)
# limited due to github action limited memory. Change to None to get full statistics
n_events = 100000
protons = read_table(dl2_proton_filename, stop=n_events)
gammas = read_table(dl2_gamma_filename, stop=n_events)
In [8]:
Copied!
protons.describe()
protons.describe()
Out[8]:
| obs_id | event_id | tel_id | hillas_intensity | hillas_skewness | hillas_kurtosis | hillas_fov_lon | hillas_fov_lat | hillas_r | hillas_phi | ... | HillasReconstructor_core_uncert_x | HillasReconstructor_core_uncert_y | HillasReconstructor_core_tilted_x | HillasReconstructor_core_tilted_y | HillasReconstructor_core_tilted_uncert_x | HillasReconstructor_core_tilted_uncert_y | HillasReconstructor_h_max | HillasReconstructor_h_max_uncert | HillasReconstructor_average_intensity | HillasReconstructor_goodness_of_fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 243434.000000 | 2.434340e+05 | 243434.000000 | 1.847670e+05 | 184767.000000 | 184767.000000 | 184767.000000 | 184767.000000 | 184767.000000 | 184767.000000 | ... | 0.0 | 0.0 | 143687.000000 | 143687.000000 | 0.0 | 0.0 | 143687.000000 | 0.0 | 143687.000000 | 0.0 | 
| mean | 12278.376574 | 2.486991e+06 | 9.872676 | 1.489208e+03 | 0.002598 | 2.328642 | 0.002845 | -0.005889 | 2.367234 | 0.165919 | ... | NaN | NaN | -20.773572 | -48.146966 | NaN | NaN | 8316.845538 | NaN | 2242.854539 | NaN | 
| min | 10000.000000 | 1.010000e+02 | 1.000000 | 5.000001e+01 | -5.193683 | 1.000137 | -3.736820 | -4.054100 | 0.012106 | -179.997231 | ... | NaN | NaN | -52113.435619 | -433154.245119 | NaN | NaN | 27.450918 | NaN | 51.679496 | NaN | 
| 25% | 10840.000000 | 1.216719e+06 | 4.000000 | 9.348740e+01 | -0.285687 | 1.872580 | -1.356408 | -1.380716 | 1.615606 | -89.865696 | ... | NaN | NaN | -126.372388 | -142.112669 | NaN | NaN | 6009.533139 | NaN | 192.893026 | NaN | 
| 50% | 11933.000000 | 2.481509e+06 | 8.000000 | 1.843154e+02 | 0.001152 | 2.182177 | -0.003268 | 0.007653 | 2.349409 | 0.496306 | ... | NaN | NaN | -29.610555 | -22.564810 | NaN | NaN | 7887.871830 | NaN | 395.098508 | NaN | 
| 75% | 13786.000000 | 3.748700e+06 | 11.000000 | 5.215911e+02 | 0.290451 | 2.582801 | 1.362969 | 1.365592 | 3.258999 | 90.378599 | ... | NaN | NaN | 57.316993 | 76.837541 | NaN | NaN | 9716.422276 | NaN | 1075.090369 | NaN | 
| max | 15438.000000 | 5.000003e+06 | 35.000000 | 1.351788e+06 | 5.969430 | 45.300332 | 3.742301 | 4.060685 | 4.084685 | 179.993740 | ... | NaN | NaN | 209323.393149 | 343728.681274 | NaN | NaN | 997012.478075 | NaN | 473773.681108 | NaN | 
| std | 1627.939306 | 1.450195e+06 | 8.907840 | 1.316058e+04 | 0.509023 | 0.892877 | 1.814565 | 1.806119 | 0.975156 | 104.024104 | ... | NaN | NaN | 1900.207892 | 3241.114749 | NaN | NaN | 7375.972406 | NaN | 11337.581138 | NaN | 
8 rows × 111 columns
In [9]:
Copied!
gammas.describe()
gammas.describe()
Out[9]:
| obs_id | event_id | tel_id | hillas_intensity | hillas_skewness | hillas_kurtosis | hillas_fov_lon | hillas_fov_lat | hillas_r | hillas_phi | ... | HillasReconstructor_core_uncert_x | HillasReconstructor_core_uncert_y | HillasReconstructor_core_tilted_x | HillasReconstructor_core_tilted_y | HillasReconstructor_core_tilted_uncert_x | HillasReconstructor_core_tilted_uncert_y | HillasReconstructor_h_max | HillasReconstructor_h_max_uncert | HillasReconstructor_average_intensity | HillasReconstructor_goodness_of_fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 273624.000000 | 2.736240e+05 | 273624.000000 | 205891.000000 | 205891.000000 | 205891.000000 | 205891.000000 | 205891.000000 | 205891.000000 | 205891.000000 | ... | 0.0 | 0.0 | 176562.000000 | 176562.000000 | 0.0 | 0.0 | 176562.000000 | 0.0 | 176562.000000 | 0.0 | 
| mean | 5117.391022 | 2.496656e+06 | 9.628907 | 1217.948032 | -0.000048 | 2.349440 | -0.002180 | 0.012361 | 2.342098 | 1.076141 | ... | NaN | NaN | -46.661051 | -39.613491 | NaN | NaN | 9313.011778 | NaN | 1592.500842 | NaN | 
| min | 5000.000000 | 1.010000e+02 | 1.000000 | 50.002919 | -4.072445 | 1.006937 | -3.740056 | -4.050354 | 0.003672 | -179.998125 | ... | NaN | NaN | -562891.280995 | -429672.684865 | NaN | NaN | 72.175967 | NaN | 50.940603 | NaN | 
| 25% | 5059.000000 | 1.240512e+06 | 4.000000 | 96.518319 | -0.260956 | 1.944135 | -1.340862 | -1.336462 | 1.580655 | -88.648830 | ... | NaN | NaN | -152.736155 | -179.779751 | NaN | NaN | 7797.330686 | NaN | 170.377078 | NaN | 
| 50% | 5110.000000 | 2.496609e+06 | 7.000000 | 194.748107 | -0.000188 | 2.202669 | 0.002231 | 0.021422 | 2.275186 | 1.491453 | ... | NaN | NaN | -31.302657 | -27.147178 | NaN | NaN | 9130.217230 | NaN | 335.991344 | NaN | 
| 75% | 5174.000000 | 3.751409e+06 | 10.000000 | 513.354879 | 0.260221 | 2.553191 | 1.336309 | 1.360847 | 3.248243 | 91.091363 | ... | NaN | NaN | 84.922760 | 114.373096 | NaN | NaN | 10642.159723 | NaN | 814.186966 | NaN | 
| max | 5271.000000 | 5.000015e+06 | 35.000000 | 910988.937369 | 5.396525 | 40.447260 | 3.744971 | 4.058099 | 4.080309 | 179.999212 | ... | NaN | NaN | 242640.421375 | 193267.563456 | NaN | NaN | 375505.215232 | NaN | 306094.062427 | NaN | 
| std | 73.921067 | 1.444107e+06 | 8.805074 | 9374.779680 | 0.455707 | 0.777383 | 1.801779 | 1.791422 | 0.985043 | 103.901273 | ... | NaN | NaN | 2986.469463 | 2232.601377 | NaN | NaN | 3218.053462 | NaN | 7927.164195 | NaN | 
8 rows × 111 columns
In [10]:
Copied!
columns = protons.columns
columns
columns = protons.columns
columns
Out[10]:
Index(['obs_id', 'event_id', 'tel_id', 'hillas_intensity', 'hillas_skewness',
       'hillas_kurtosis', 'hillas_fov_lon', 'hillas_fov_lat', 'hillas_r',
       'hillas_phi',
       ...
       'HillasReconstructor_core_uncert_y',
       'HillasReconstructor_core_tilted_x',
       'HillasReconstructor_core_tilted_y',
       'HillasReconstructor_core_tilted_uncert_x',
       'HillasReconstructor_core_tilted_uncert_y', 'HillasReconstructor_h_max',
       'HillasReconstructor_h_max_uncert', 'HillasReconstructor_is_valid',
       'HillasReconstructor_average_intensity',
       'HillasReconstructor_goodness_of_fit'],
      dtype='object', length=120)
In [11]:
Copied!
numerical_columns = [col for col in columns if hasattr(protons[col], 'dtype') and np.issubdtype(protons[col].dtype, np.number)]
ncol = 2
nrow = len(numerical_columns) // ncol + 1
fig, axes = plt.subplots(ncols=ncol, nrows=nrow, figsize=(20, 5*nrow))
nbins = 100
opt = dict(bins=nbins, histtype='step', density=True, lw=2)
columns_in_logscale = [col for col in numerical_columns if 'intensity' in col]
print(columns_in_logscale)
for i, col in tqdm(enumerate(numerical_columns), total=len(numerical_columns)):
    ax = axes[i//ncol, i%ncol]
    mask = np.isfinite(protons[col])
    # if col in columns_in_logscale and protons[mask][col].min() > 0 and gammas[col].min() > 0:
    #     opt['bins'] = np.logspace(np.log10(protons[mask][col].min()), np.log10(protons[mask][col].max()), nbins)
    ax.hist(protons[mask][col], label='protons', **opt)
    mask = np.isfinite(gammas[col])
    ax.hist(gammas[mask][col], label='gammas', **opt)
    
    opt['bins'] = nbins
    ax.set_title(col)
    ax.legend()
numerical_columns = [col for col in columns if hasattr(protons[col], 'dtype') and np.issubdtype(protons[col].dtype, np.number)]
ncol = 2
nrow = len(numerical_columns) // ncol + 1
fig, axes = plt.subplots(ncols=ncol, nrows=nrow, figsize=(20, 5*nrow))
nbins = 100
opt = dict(bins=nbins, histtype='step', density=True, lw=2)
columns_in_logscale = [col for col in numerical_columns if 'intensity' in col]
print(columns_in_logscale)
for i, col in tqdm(enumerate(numerical_columns), total=len(numerical_columns)):
    ax = axes[i//ncol, i%ncol]
    mask = np.isfinite(protons[col])
    # if col in columns_in_logscale and protons[mask][col].min() > 0 and gammas[col].min() > 0:
    #     opt['bins'] = np.logspace(np.log10(protons[mask][col].min()), np.log10(protons[mask][col].max()), nbins)
    ax.hist(protons[mask][col], label='protons', **opt)
    mask = np.isfinite(gammas[col])
    ax.hist(gammas[mask][col], label='gammas', **opt)
    
    opt['bins'] = nbins
    ax.set_title(col)
    ax.legend()
['hillas_intensity', 'leakage_intensity_width_1', 'leakage_intensity_width_2', 'intensity_max', 'intensity_min', 'intensity_mean', 'intensity_std', 'intensity_skewness', 'intensity_kurtosis', 'true_hillas_intensity', 'true_leakage_intensity_width_1', 'true_leakage_intensity_width_2', 'true_intensity_max', 'true_intensity_min', 'true_intensity_mean', 'true_intensity_std', 'true_intensity_skewness', 'true_intensity_kurtosis', 'HillasReconstructor_average_intensity']
0%| | 0/110 [00:00<?, ?it/s]
2%|▏ | 2/110 [00:00<00:08, 13.01it/s]
4%|▎ | 4/110 [00:00<00:08, 12.04it/s]
5%|▌ | 6/110 [00:00<00:09, 11.33it/s]
7%|▋ | 8/110 [00:00<00:09, 11.00it/s]
9%|▉ | 10/110 [00:00<00:09, 10.84it/s]
11%|█ | 12/110 [00:01<00:09, 10.74it/s]
13%|█▎ | 14/110 [00:01<00:08, 10.68it/s]
15%|█▍ | 16/110 [00:01<00:08, 10.50it/s]
16%|█▋ | 18/110 [00:01<00:08, 10.54it/s]
18%|█▊ | 20/110 [00:01<00:08, 10.51it/s]
20%|██ | 22/110 [00:02<00:08, 10.50it/s]
22%|██▏ | 24/110 [00:02<00:08, 10.52it/s]
24%|██▎ | 26/110 [00:02<00:07, 10.77it/s]
25%|██▌ | 28/110 [00:02<00:07, 11.28it/s]
27%|██▋ | 30/110 [00:02<00:06, 11.77it/s]
29%|██▉ | 32/110 [00:02<00:06, 11.37it/s]
31%|███ | 34/110 [00:03<00:06, 11.16it/s]
33%|███▎ | 36/110 [00:03<00:06, 10.99it/s]
35%|███▍ | 38/110 [00:03<00:06, 10.69it/s]
36%|███▋ | 40/110 [00:03<00:06, 10.66it/s]
38%|███▊ | 42/110 [00:03<00:06, 10.66it/s]
40%|████ | 44/110 [00:04<00:06, 10.90it/s]
/home/runner/micromamba/envs/ctaodl2/lib/python3.9/site-packages/numpy/lib/histograms.py:885: RuntimeWarning: invalid value encountered in divide return n/db/n.sum(), bin_edges
42%|████▏ | 46/110 [00:04<00:05, 12.27it/s]
44%|████▎ | 48/110 [00:04<00:05, 11.16it/s]
45%|████▌ | 50/110 [00:04<00:05, 10.50it/s]
47%|████▋ | 52/110 [00:04<00:05, 10.09it/s]
49%|████▉ | 54/110 [00:05<00:05, 9.82it/s]
51%|█████ | 56/110 [00:05<00:05, 9.63it/s]
52%|█████▏ | 57/110 [00:05<00:05, 9.57it/s]
53%|█████▎ | 58/110 [00:05<00:05, 9.48it/s]
54%|█████▎ | 59/110 [00:05<00:05, 9.43it/s]
55%|█████▍ | 60/110 [00:05<00:05, 9.35it/s]
55%|█████▌ | 61/110 [00:05<00:05, 9.32it/s]
56%|█████▋ | 62/110 [00:05<00:05, 9.28it/s]
57%|█████▋ | 63/110 [00:05<00:05, 9.23it/s]
58%|█████▊ | 64/110 [00:06<00:04, 9.21it/s]
60%|██████ | 66/110 [00:06<00:04, 10.65it/s]
62%|██████▏ | 68/110 [00:06<00:03, 11.45it/s]
64%|██████▎ | 70/110 [00:06<00:03, 11.80it/s]
65%|██████▌ | 72/110 [00:06<00:03, 11.44it/s]
67%|██████▋ | 74/110 [00:06<00:03, 10.61it/s]
69%|██████▉ | 76/110 [00:07<00:03, 10.63it/s]
71%|███████ | 78/110 [00:07<00:02, 11.27it/s]
73%|███████▎ | 80/110 [00:07<00:02, 11.73it/s]
75%|███████▍ | 82/110 [00:07<00:02, 11.90it/s]
76%|███████▋ | 84/110 [00:07<00:02, 12.23it/s]
78%|███████▊ | 86/110 [00:07<00:01, 12.53it/s]
80%|████████ | 88/110 [00:08<00:01, 12.76it/s]
82%|████████▏ | 90/110 [00:08<00:01, 12.87it/s]
84%|████████▎ | 92/110 [00:08<00:01, 12.91it/s]
85%|████████▌ | 94/110 [00:08<00:01, 12.93it/s]
87%|████████▋ | 96/110 [00:08<00:01, 12.25it/s]
89%|████████▉ | 98/110 [00:08<00:01, 11.76it/s]
91%|█████████ | 100/110 [00:09<00:00, 11.48it/s]
95%|█████████▍| 104/110 [00:09<00:00, 14.45it/s]
97%|█████████▋| 107/110 [00:09<00:00, 17.44it/s]
100%|██████████| 110/110 [00:09<00:00, 20.09it/s]
100%|██████████| 110/110 [00:09<00:00, 11.59it/s]
In [12]:
Copied!
opt_intensity = dict(bins=np.logspace(1.65, 4, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['hillas_intensity'], **opt_intensity, label='gammas');
plt.hist(protons['hillas_intensity'], **opt_intensity, label='protons');
plt.hist(gammas['HillasReconstructor_average_intensity'], **opt_intensity, label='gammas (average)')
plt.hist(protons['HillasReconstructor_average_intensity'], **opt_intensity, label='protons (average)')
plt.yscale('log')
plt.xscale('log')
xticks = [50, 100, 200, 500, 1000, 10000]
plt.gca().set_xticks(xticks)
plt.gca().set_xticklabels([str(x) for x in xticks])
plt.xlabel('Intensity')
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
opt_intensity = dict(bins=np.logspace(1.65, 4, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['hillas_intensity'], **opt_intensity, label='gammas');
plt.hist(protons['hillas_intensity'], **opt_intensity, label='protons');
plt.hist(gammas['HillasReconstructor_average_intensity'], **opt_intensity, label='gammas (average)')
plt.hist(protons['HillasReconstructor_average_intensity'], **opt_intensity, label='protons (average)')
plt.yscale('log')
plt.xscale('log')
xticks = [50, 100, 200, 500, 1000, 10000]
plt.gca().set_xticks(xticks)
plt.gca().set_xticklabels([str(x) for x in xticks])
plt.xlabel('Intensity')
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
Out[12]:
<matplotlib.legend.Legend at 0x7f968e95d430>
In [13]:
Copied!
opt_energy = dict(bins=np.logspace(-3, 3, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['true_energy'], **opt_energy, label='gammas');
plt.hist(protons['true_energy'], **opt_energy, label='protons');
plt.yscale('log')
plt.xscale('log')
plt.xlabel(f"True Energy [TeV]")
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
opt_energy = dict(bins=np.logspace(-3, 3, 50), histtype='step', density=False, lw=2)
plt.figure(figsize=(10, 5))
plt.hist(gammas['true_energy'], **opt_energy, label='gammas');
plt.hist(protons['true_energy'], **opt_energy, label='protons');
plt.yscale('log')
plt.xscale('log')
plt.xlabel(f"True Energy [TeV]")
plt.ylabel('Number of events')
plt.grid(which='both')
plt.legend()
Out[13]:
<matplotlib.legend.Legend at 0x7f968ea98550>
In [ ]:
Copied!