From ffdc0db867ad780136108c201ea593be0dcf7277 Mon Sep 17 00:00:00 2001 From: Noe Brucy Date: Thu, 28 Nov 2024 11:51:25 +0100 Subject: [PATCH] black galsec --- galsec.py | 195 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 120 insertions(+), 75 deletions(-) diff --git a/galsec.py b/galsec.py index 49acc71..bdb897a 100644 --- a/galsec.py +++ b/galsec.py @@ -298,13 +298,12 @@ def regroup( class GalsecAnalysis(ABC): - def __init__(self): raise NotImplementedError("This method should be implemented in a subclass") def analysis(self, bin_deltas: dict, filter_bounds: dict): raise NotImplementedError("This method should be implemented in a subclass") - + def sector_analysis( self, delta_r: Quantity[u.kpc] = u.kpc, @@ -358,7 +357,7 @@ class GalsecAnalysis(ABC): """ bin_deltas = {"x": delta_x, "y": delta_y, "z": delta_z} - filter_bounds = {"x": [xmin, xmax], "y": [ymin, ymax], "z": [zmin, zmax]} + filter_bounds = {"x": [xmin, xmax], "y": [ymin, ymax], "z": [zmin, zmax]} self.grid = self.analysis(bin_deltas, filter_bounds) return self.grid @@ -402,7 +401,7 @@ class GalsecAnalysis(ABC): bin_deltas = {"r": delta_r, "z": delta_z} filter_bounds = {"r": [rmin, rmax], "z": [zmin, zmax]} return self.analysis(bin_deltas, filter_bounds, **kwargs) - + def scale_analysis( self, sub_analysis_method: str = "cartesian_analysis", @@ -411,30 +410,35 @@ class GalsecAnalysis(ABC): "delta_y": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc, "delta_z": np.array([0.01, 0.1, 1, 10, 20]) * u.kpc, }, - **kwargs): - + **kwargs, + ): + nb_scales = len(list(scales.values())[0]) - + methods = { - "cartesian_analysis" : self.cartesian_analysis, - "sector_analysis" : self.sector_analysis, - "ring_analysis" : self.ring_analysis, - "vertical_ring_analysis" : self.vertical_ring_analysis, + "cartesian_analysis": self.cartesian_analysis, + "sector_analysis": self.sector_analysis, + "ring_analysis": self.ring_analysis, + "vertical_ring_analysis": self.vertical_ring_analysis, } - + for i in range(nb_scales): - + scale_i = {key: value[i] for key, value in scales.items()} - + print(f"Doing {sub_analysis_method} for scale {i}/{nb_scales}: {scale_i}") grid = methods[sub_analysis_method](**scale_i, **kwargs) - - if i==0: + + if i == 0: all_grids = grid else: - all_grids = {fluid: vstack([all_grids[fluid], grid[fluid]]) for fluid in self.fluids} + all_grids = { + fluid: vstack([all_grids[fluid], grid[fluid]]) + for fluid in self.fluids + } return all_grids - + + class Galsec(GalsecAnalysis): """ Galactic sector extractor @@ -445,7 +449,7 @@ class Galsec(GalsecAnalysis): lambda: ["mass", "number"], gas=["mass", "volume", "number"], ) - + weighted_fields = defaultdict(lambda: ["velr", "velphi", "velx", "vely", "velz"]) units = { @@ -511,7 +515,7 @@ class Galsec(GalsecAnalysis): self.abundance_He = data["header"]["ABHE"] self.compute_derived_values() - + self.dense_output = dense_output def compute_derived_values(self) -> None: @@ -533,7 +537,7 @@ class Galsec(GalsecAnalysis): dset["position"], dset["velocity"] ) # azimuthal velocity dset["velr"] = vect_r(dset["position"], dset["velocity"]) # radial velocity - + # Number of cells / resolution element per bin (1 when not binned!) dset["number"] = np.ones(len(dset["r"])) @@ -545,7 +549,7 @@ class Galsec(GalsecAnalysis): dset["vely"] = dset["velocity"][:, 1] dset["velz"] = dset["velocity"][:, 2] - def binning(self, bins, filter_bounds, binning_mode = "delta", dataset=None): + def binning(self, bins, filter_bounds, binning_mode="delta", dataset=None): """ Bin the data """ @@ -556,13 +560,13 @@ class Galsec(GalsecAnalysis): for fluid in self.fluids: data = dataset[fluid] # no copy for name in bins: - + delta = None if binning_mode == "delta": delta = bins[name] elif binning_mode == "number": - bin_min = filter_bounds[name][0].value - bin_max = filter_bounds[name][1].value + bin_min = filter_bounds[name][0].value + bin_max = filter_bounds[name][1].value delta = (bin_max - bin_min) / bins[name] if delta is not None: @@ -579,7 +583,9 @@ class Galsec(GalsecAnalysis): elif name == "phi": delta_phi = (delta / data["r_bin"]) * u.rad phi_bin = (np.trunc(data["phi"] / delta_phi) + 0.5) * delta_phi - phi_bin[phi_bin >= 2 * np.pi * u.rad] = 0.5 * delta_phi[phi_bin >= 2 * np.pi * u.rad] + phi_bin[phi_bin >= 2 * np.pi * u.rad] = ( + 0.5 * delta_phi[phi_bin >= 2 * np.pi * u.rad] + ) data["phi_bin"] = phi_bin else: print(f"Unsupported binning variable {name}") @@ -592,64 +598,91 @@ class Galsec(GalsecAnalysis): bin = np.digitize(data[name], bin_array, right=False) bin_array = np.append(bin_array, np.max(data[name])) - # Store the middle value data[f"{name}_bin"] = (bin_array[bin - 1] + bin_array[bin]) / 2 if name not in keys_binning: keys_binning.append(name) return keys_binning - - def densify(self, processed_data:dict, bin_deltas:dict, filter_bounds:dict): + def densify(self, processed_data: dict, bin_deltas: dict, filter_bounds: dict): """ Densify the data by adding void bins where no data is present """ - + bins_array = {} for bin_name in bin_deltas: if bin_name == "phi": if "r" in filter_bounds: rmax = filter_bounds["r"][1].value else: - rmax = np.max([np.max(processed_data[fluid]["r"].value) for fluid in processed_data]) + rmax = np.max( + [ + np.max(processed_data[fluid]["r"].value) + for fluid in processed_data + ] + ) delta_phi = bin_deltas["phi"].value / rmax - bins_array[bin_name] = np.arange(0, 2 * np.pi, delta_phi*0.9) + bins_array[bin_name] = np.arange(0, 2 * np.pi, delta_phi * 0.9) else: if bin_name in filter_bounds: - bin_min = filter_bounds[bin_name][0].value + bin_deltas[bin_name].value / 2 - bin_max = filter_bounds[bin_name][1].value - else: - bin_min = np.min([np.min(processed_data[fluid][bin_name].value) for fluid in processed_data]) - bin_max = np.max([np.max(processed_data[fluid][bin_name].value) for fluid in processed_data]) - bins_array[bin_name] = np.arange(bin_min, bin_max, bin_deltas[bin_name].value*0.9) - + bin_min = ( + filter_bounds[bin_name][0].value + + bin_deltas[bin_name].value / 2 + ) + bin_max = filter_bounds[bin_name][1].value + else: + bin_min = np.min( + [ + np.min(processed_data[fluid][bin_name].value) + for fluid in processed_data + ] + ) + bin_max = np.max( + [ + np.max(processed_data[fluid][bin_name].value) + for fluid in processed_data + ] + ) + bins_array[bin_name] = np.arange( + bin_min, bin_max, bin_deltas[bin_name].value * 0.9 + ) + grids = np.meshgrid(*list(bins_array.values())) - bins_array = {key: grid.flatten() for key, grid in zip(bins_array.keys(), grids)} + bins_array = { + key: grid.flatten() for key, grid in zip(bins_array.keys(), grids) + } for fluid in processed_data: keys = processed_data[fluid].keys() units = {key: processed_data[fluid][key].unit for key in keys} - void_array = {key: np.zeros_like(list(bins_array.values())[0]) for key in keys} + void_array = { + key: np.zeros_like(list(bins_array.values())[0]) for key in keys + } - for bin_name in bin_deltas: + for bin_name in bin_deltas: void_array[bin_name] = bins_array[bin_name] void_array = QTable(void_array, names=keys, units=units) dense_array = vstack([processed_data[fluid], void_array]) processed_data[fluid] = dense_array - - self.binning(dataset=processed_data, bins=bin_deltas, filter_bounds=filter_bounds) - + + self.binning( + dataset=processed_data, bins=bin_deltas, filter_bounds=filter_bounds + ) + for fluid in processed_data: bin_names_with_bin = [bin_name + "_bin" for bin_name in bin_deltas] - processed_data[fluid] = processed_data[fluid].group_by(bin_names_with_bin).groups.aggregate(np.add) + processed_data[fluid] = ( + processed_data[fluid] + .group_by(bin_names_with_bin) + .groups.aggregate(np.add) + ) for bin_name in bin_deltas: - processed_data[fluid].remove_column(bin_name) - processed_data[fluid].rename_column(bin_name + "_bin", bin_name) - + processed_data[fluid].remove_column(bin_name) + processed_data[fluid].rename_column(bin_name + "_bin", bin_name) + return processed_data - def analysis(self, bins: dict, filter_bounds: dict, binning_mode="delta"): @@ -685,58 +718,70 @@ class Galsec(GalsecAnalysis): self.time.value, ) result["stars"]["sfr"] = sfr * u.Msun / u.year - + if self.dense_output and (binning_mode == "delta" or binning_mode == "number"): result = self.densify(result, bins, filter_bounds) return result - class GalsecTimeSeries(GalsecAnalysis): - def __init__(self, galsecs: list, loader=None): - self.galsecs = galsecs + self.galsecs = galsecs self.loader = loader self.loaded = loader is None - def _analysis_single(self, galsec, bins, filter_bounds, binning_mode="delta"): if not self.loaded: galsec = self.loader(galsec) - analysis_result = galsec.analysis(bins, filter_bounds, binning_mode=binning_mode) + analysis_result = galsec.analysis( + bins, filter_bounds, binning_mode=binning_mode + ) for fluid in analysis_result: analysis_result[fluid]["time"] = galsec.time return analysis_result - - def analysis(self, bins: dict, filter_bounds: dict, - binning_mode="delta", - aggregate=True, std=True, percentiles=[], - num_processes=1): + def analysis( + self, + bins: dict, + filter_bounds: dict, + binning_mode="delta", + aggregate=True, + std=True, + percentiles=[], + num_processes=1, + ): timed_data = [] if num_processes == 1: for galsec in self.galsecs: - timed_data.append(self._analysis_single(galsec, bins, filter_bounds, binning_mode)) + timed_data.append( + self._analysis_single(galsec, bins, filter_bounds, binning_mode) + ) else: from multiprocessing import Pool + with Pool(num_processes) as p: timed_data = p.starmap( self._analysis_single, - [(galsec, bins, filter_bounds, binning_mode) for galsec in self.galsecs], + [ + (galsec, bins, filter_bounds, binning_mode) + for galsec in self.galsecs + ], ) - + averaged_data = {} for fluid in timed_data[0]: - averaged_data[fluid] = vstack([result[fluid] for result in timed_data]) - if aggregate: - grouped_data = averaged_data[fluid].group_by(list(bins.keys())).groups - if std: - averaged_data[fluid + "_std"] = grouped_data.aggregate(lambda x: np.std(x.value)) - for q in percentiles: - averaged_data[fluid + f"_q{q}"] = grouped_data.aggregate(lambda x: np.percentile(x.value, q)) - averaged_data[fluid] = grouped_data.aggregate(np.mean) - - + averaged_data[fluid] = vstack([result[fluid] for result in timed_data]) + if aggregate: + grouped_data = averaged_data[fluid].group_by(list(bins.keys())).groups + if std: + averaged_data[fluid + "_std"] = grouped_data.aggregate( + lambda x: np.std(x.value) + ) + for q in percentiles: + averaged_data[fluid + f"_q{q}"] = grouped_data.aggregate( + lambda x: np.percentile(x.value, q) + ) + averaged_data[fluid] = grouped_data.aggregate(np.mean) + return averaged_data -