black galsec

This commit is contained in:
Noe Brucy
2024-11-28 11:51:25 +01:00
parent b76d4fc688
commit ffdc0db867

105
galsec.py
View File

@@ -298,7 +298,6 @@ def regroup(
class GalsecAnalysis(ABC): class GalsecAnalysis(ABC):
def __init__(self): def __init__(self):
raise NotImplementedError("This method should be implemented in a subclass") raise NotImplementedError("This method should be implemented in a subclass")
@@ -411,7 +410,8 @@ class GalsecAnalysis(ABC):
"delta_y": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc, "delta_y": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc,
"delta_z": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc, "delta_z": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc,
}, },
**kwargs): **kwargs,
):
nb_scales = len(list(scales.values())[0]) nb_scales = len(list(scales.values())[0])
@@ -432,9 +432,13 @@ class GalsecAnalysis(ABC):
if i == 0: if i == 0:
all_grids = grid all_grids = grid
else: else:
all_grids = {fluid: vstack([all_grids[fluid], grid[fluid]]) for fluid in self.fluids} all_grids = {
fluid: vstack([all_grids[fluid], grid[fluid]])
for fluid in self.fluids
}
return all_grids return all_grids
class Galsec(GalsecAnalysis): class Galsec(GalsecAnalysis):
""" """
Galactic sector extractor Galactic sector extractor
@@ -579,7 +583,9 @@ class Galsec(GalsecAnalysis):
elif name == "phi": elif name == "phi":
delta_phi = (delta / data["r_bin"]) * u.rad delta_phi = (delta / data["r_bin"]) * u.rad
phi_bin = (np.trunc(data["phi"] / delta_phi) + 0.5) * delta_phi phi_bin = (np.trunc(data["phi"] / delta_phi) + 0.5) * delta_phi
phi_bin[phi_bin >= 2 * np.pi * u.rad] = 0.5 * delta_phi[phi_bin >= 2 * np.pi * u.rad] phi_bin[phi_bin >= 2 * np.pi * u.rad] = (
0.5 * delta_phi[phi_bin >= 2 * np.pi * u.rad]
)
data["phi_bin"] = phi_bin data["phi_bin"] = phi_bin
else: else:
print(f"Unsupported binning variable {name}") print(f"Unsupported binning variable {name}")
@@ -592,7 +598,6 @@ class Galsec(GalsecAnalysis):
bin = np.digitize(data[name], bin_array, right=False) bin = np.digitize(data[name], bin_array, right=False)
bin_array = np.append(bin_array, np.max(data[name])) bin_array = np.append(bin_array, np.max(data[name]))
# Store the middle value # Store the middle value
data[f"{name}_bin"] = (bin_array[bin - 1] + bin_array[bin]) / 2 data[f"{name}_bin"] = (bin_array[bin - 1] + bin_array[bin]) / 2
@@ -600,7 +605,6 @@ class Galsec(GalsecAnalysis):
keys_binning.append(name) keys_binning.append(name)
return keys_binning return keys_binning
def densify(self, processed_data: dict, bin_deltas: dict, filter_bounds: dict): def densify(self, processed_data: dict, bin_deltas: dict, filter_bounds: dict):
""" """
Densify the data by adding void bins where no data is present Densify the data by adding void bins where no data is present
@@ -612,25 +616,49 @@ class Galsec(GalsecAnalysis):
if "r" in filter_bounds: if "r" in filter_bounds:
rmax = filter_bounds["r"][1].value rmax = filter_bounds["r"][1].value
else: else:
rmax = np.max([np.max(processed_data[fluid]["r"].value) for fluid in processed_data]) rmax = np.max(
[
np.max(processed_data[fluid]["r"].value)
for fluid in processed_data
]
)
delta_phi = bin_deltas["phi"].value / rmax delta_phi = bin_deltas["phi"].value / rmax
bins_array[bin_name] = np.arange(0, 2 * np.pi, delta_phi * 0.9) bins_array[bin_name] = np.arange(0, 2 * np.pi, delta_phi * 0.9)
else: else:
if bin_name in filter_bounds: if bin_name in filter_bounds:
bin_min = filter_bounds[bin_name][0].value + bin_deltas[bin_name].value / 2 bin_min = (
filter_bounds[bin_name][0].value
+ bin_deltas[bin_name].value / 2
)
bin_max = filter_bounds[bin_name][1].value bin_max = filter_bounds[bin_name][1].value
else: else:
bin_min = np.min([np.min(processed_data[fluid][bin_name].value) for fluid in processed_data]) bin_min = np.min(
bin_max = np.max([np.max(processed_data[fluid][bin_name].value) for fluid in processed_data]) [
bins_array[bin_name] = np.arange(bin_min, bin_max, bin_deltas[bin_name].value*0.9) np.min(processed_data[fluid][bin_name].value)
for fluid in processed_data
]
)
bin_max = np.max(
[
np.max(processed_data[fluid][bin_name].value)
for fluid in processed_data
]
)
bins_array[bin_name] = np.arange(
bin_min, bin_max, bin_deltas[bin_name].value * 0.9
)
grids = np.meshgrid(*list(bins_array.values())) grids = np.meshgrid(*list(bins_array.values()))
bins_array = {key: grid.flatten() for key, grid in zip(bins_array.keys(), grids)} bins_array = {
key: grid.flatten() for key, grid in zip(bins_array.keys(), grids)
}
for fluid in processed_data: for fluid in processed_data:
keys = processed_data[fluid].keys() keys = processed_data[fluid].keys()
units = {key: processed_data[fluid][key].unit for key in keys} units = {key: processed_data[fluid][key].unit for key in keys}
void_array = {key: np.zeros_like(list(bins_array.values())[0]) for key in keys} void_array = {
key: np.zeros_like(list(bins_array.values())[0]) for key in keys
}
for bin_name in bin_deltas: for bin_name in bin_deltas:
void_array[bin_name] = bins_array[bin_name] void_array[bin_name] = bins_array[bin_name]
@@ -639,18 +667,23 @@ class Galsec(GalsecAnalysis):
dense_array = vstack([processed_data[fluid], void_array]) dense_array = vstack([processed_data[fluid], void_array])
processed_data[fluid] = dense_array processed_data[fluid] = dense_array
self.binning(dataset=processed_data, bins=bin_deltas, filter_bounds=filter_bounds) self.binning(
dataset=processed_data, bins=bin_deltas, filter_bounds=filter_bounds
)
for fluid in processed_data: for fluid in processed_data:
bin_names_with_bin = [bin_name + "_bin" for bin_name in bin_deltas] bin_names_with_bin = [bin_name + "_bin" for bin_name in bin_deltas]
processed_data[fluid] = processed_data[fluid].group_by(bin_names_with_bin).groups.aggregate(np.add) processed_data[fluid] = (
processed_data[fluid]
.group_by(bin_names_with_bin)
.groups.aggregate(np.add)
)
for bin_name in bin_deltas: for bin_name in bin_deltas:
processed_data[fluid].remove_column(bin_name) processed_data[fluid].remove_column(bin_name)
processed_data[fluid].rename_column(bin_name + "_bin", bin_name) processed_data[fluid].rename_column(bin_name + "_bin", bin_name)
return processed_data return processed_data
def analysis(self, bins: dict, filter_bounds: dict, binning_mode="delta"): def analysis(self, bins: dict, filter_bounds: dict, binning_mode="delta"):
result = {} result = {}
@@ -692,38 +725,48 @@ class Galsec(GalsecAnalysis):
return result return result
class GalsecTimeSeries(GalsecAnalysis): class GalsecTimeSeries(GalsecAnalysis):
def __init__(self, galsecs: list, loader=None): def __init__(self, galsecs: list, loader=None):
self.galsecs = galsecs self.galsecs = galsecs
self.loader = loader self.loader = loader
self.loaded = loader is None self.loaded = loader is None
def _analysis_single(self, galsec, bins, filter_bounds, binning_mode="delta"): def _analysis_single(self, galsec, bins, filter_bounds, binning_mode="delta"):
if not self.loaded: if not self.loaded:
galsec = self.loader(galsec) galsec = self.loader(galsec)
analysis_result = galsec.analysis(bins, filter_bounds, binning_mode=binning_mode) analysis_result = galsec.analysis(
bins, filter_bounds, binning_mode=binning_mode
)
for fluid in analysis_result: for fluid in analysis_result:
analysis_result[fluid]["time"] = galsec.time analysis_result[fluid]["time"] = galsec.time
return analysis_result return analysis_result
def analysis(
def analysis(self, bins: dict, filter_bounds: dict, self,
bins: dict,
filter_bounds: dict,
binning_mode="delta", binning_mode="delta",
aggregate=True, std=True, percentiles=[], aggregate=True,
num_processes=1): std=True,
percentiles=[],
num_processes=1,
):
timed_data = [] timed_data = []
if num_processes == 1: if num_processes == 1:
for galsec in self.galsecs: for galsec in self.galsecs:
timed_data.append(self._analysis_single(galsec, bins, filter_bounds, binning_mode)) timed_data.append(
self._analysis_single(galsec, bins, filter_bounds, binning_mode)
)
else: else:
from multiprocessing import Pool from multiprocessing import Pool
with Pool(num_processes) as p: with Pool(num_processes) as p:
timed_data = p.starmap( timed_data = p.starmap(
self._analysis_single, self._analysis_single,
[(galsec, bins, filter_bounds, binning_mode) for galsec in self.galsecs], [
(galsec, bins, filter_bounds, binning_mode)
for galsec in self.galsecs
],
) )
averaged_data = {} averaged_data = {}
@@ -732,11 +775,13 @@ class GalsecTimeSeries(GalsecAnalysis):
if aggregate: if aggregate:
grouped_data = averaged_data[fluid].group_by(list(bins.keys())).groups grouped_data = averaged_data[fluid].group_by(list(bins.keys())).groups
if std: if std:
averaged_data[fluid + "_std"] = grouped_data.aggregate(lambda x: np.std(x.value)) averaged_data[fluid + "_std"] = grouped_data.aggregate(
lambda x: np.std(x.value)
)
for q in percentiles: for q in percentiles:
averaged_data[fluid + f"_q{q}"] = grouped_data.aggregate(lambda x: np.percentile(x.value, q)) averaged_data[fluid + f"_q{q}"] = grouped_data.aggregate(
lambda x: np.percentile(x.value, q)
)
averaged_data[fluid] = grouped_data.aggregate(np.mean) averaged_data[fluid] = grouped_data.aggregate(np.mean)
return averaged_data return averaged_data