Improve loading from ramses + hdf5 consersion

This commit is contained in:
Noe Brucy
2023-03-01 15:34:17 +01:00
parent 9bda60702a
commit ae8a8a605d
3 changed files with 213 additions and 50 deletions
+200 -49
View File
@@ -61,7 +61,7 @@ from utils.runselector import RunSelector
def mass_func(dset):
dx = dset["dx"]
return dset["rho"] * dx ** 3 # Mass function
return dset["rho"] * dx**3 # Mass function
def vol_func(dset):
@@ -213,7 +213,7 @@ def pspec2d(map2D):
k = np.where(k_alias >= n // 2, k_alias - n, k_alias)
kx, ky = np.meshgrid(k, k, indexing="ij")
# Compute map of k
kmap = np.sqrt(kx ** 2 + ky ** 2)
kmap = np.sqrt(kx**2 + ky**2)
# Compute fft
fmap = fft.fft2(map2D)
# Compute the power map from the fft
@@ -285,6 +285,10 @@ class SnapshotProcessor(HDF5Container):
"P": "unit_pressure",
"g": {"unit_gravpot": 1, "unit_length": -1},
"phi": "unit_gravpot",
"mass": "unit_mass",
"epoch": "unit_time",
"id": U.none,
"level": U.none,
}
G = 1.0 # Gravitational constant
@@ -334,8 +338,15 @@ class SnapshotProcessor(HDF5Container):
subfolder = ""
self.filename = f"{self.path_out}{subfolder}/postproc_{tag_name}{num:05}.h5"
self.cells_filename = f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5"
self.parts_filename = f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5"
self.cells_filename = (
f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5" # legacy only
)
self.parts_filename = (
f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5" # legacy only
)
self.snap_filename = (
f"{self.path_out}{subfolder}/snap_{num:05}.h5" # new hdf5 snap format
)
self.pspec_filename = f"{self.path_out}{subfolder}/pspec_{tag_name}{num:05}.h5"
self.filaments_filename = (
f"{self.path_out}/{subfolder}filaments_{tag_name}{num:05}.h5"
@@ -424,6 +435,71 @@ class SnapshotProcessor(HDF5Container):
verbose=self.params.pymses.verbose,
check_endianness=False,
)
# Check if variables are in output
name_conv = {
"rho": "density",
"vel": "velocity",
"P": "pressure",
"Br": "B",
"Bl": "B",
"mass": "mass",
"id": "identity",
"level": "levelp",
"epoch": "epoch",
}
hydro_file = open(f"{path}/output_{self.num:05}/hydro_file_descriptor.txt")
part_file = open(f"{path}/output_{self.num:05}/part_file_descriptor.txt")
has_grav = os.path.exists(
f"{path}/output_{self.num:05}/grav_{self.num:05}.out00000"
)
# ugly parsing
hlines = hydro_file.readlines()
plines = part_file.readlines()
hydro_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], hlines[2:]))
)
part_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], plines[2:]))
)
def is_available(available_vars, pymsesrc, var):
if var in ["g", "phi"]:
if not has_grav:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return has_grav and var in pymsesrc
else:
if var in name_conv:
if name_conv[var] not in available_vars:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return name_conv[var] in available_vars and var in pymsesrc
else:
self.logger.warning(f"Variable {var} is unknown")
return False
self.params.pymses.variables = list(
filter(
partial(
is_available,
hydro_var,
pymses.rcConfig.Ramses.amr_fields.field_name_list,
),
self.params.pymses.variables,
)
)
self.params.pymses.part_variables = list(
filter(
partial(
is_available, part_var, ["vel", "mass", "id", "level", "epoch"]
),
self.params.pymses.part_variables,
)
)
self._amr = self._ro.amr_source(self.params.pymses.variables)
self._part = self._ro.particle_source(self.params.pymses.part_variables)
@@ -510,23 +586,43 @@ class SnapshotProcessor(HDF5Container):
finally:
self.close()
def load_data(self, points_src, filename, save, keys=None):
def load_data(self, points_src, filename, save, keys=None, group="data"):
"""
Load data from the source file in the memory.
(Long and memory heavy)
"""
loaded = False
if os.path.exists(filename):
self.logger.debug(f"Found hdf5, loading {filename}.")
hdf5 = tables.open_file(filename, mode="r")
try:
if group in hdf5.root:
node = hdf5.get_node(f"/{group}")
loaded = True
elif "data" in hdf5.root:
self.logger.warning(
f"{filename} has no {group} group, but I found the group data."
)
node = hdf5.get_node("/data")
loaded = True
else:
self.logger.warning(f"{filename} has no {group} group")
if loaded:
data = {}
if keys is None:
keys = node._v_children
for key in keys:
data[key] = hdf5.get_node("/data/" + key).read()
finally:
hdf5.close()
else:
if key in node._v_children:
data[key] = node[key].read()
else:
self.logger.warning(
f"Key {key} is missing, I will try a full reload"
)
loaded = False
break
hdf5.close()
if not loaded:
self.logger.debug("No hdf5, loading from ramses data.")
data_pymses = points_src.flatten()
data = {}
for key in data_pymses.fields:
@@ -537,27 +633,56 @@ class SnapshotProcessor(HDF5Container):
pass
data["pos"] = data_pymses.points
self.logger.info("Ramses data loaded.")
if save:
hdf5 = tables.open_file(filename, mode="w")
try:
for key in data:
if len(data[key] > 0):
hdf5.create_array(
"/data", key, data[key], "", createparents=True
)
finally:
hdf5.close()
self.save_data(data, filename, group)
return data
def load_parts(self, keys=None):
def save_data(self, data, filename, group, overwrite=False):
self.logger.debug(f"Writing {filename}")
hdf5 = tables.open_file(filename, mode="a")
try:
nb_written = 0
for key in data:
if len(data[key] > 0):
if f"/{group}/{key}" in hdf5 and overwrite:
hdf5.remove_node(f"/{group}/{key}")
hdf5.create_array(
f"/{group}", key, data[key], "", createparents=True
)
unit = self._get_units(self.unit_key[key])
hdf5.get_node("/{group}/{key}").unit = unit
nb_written += 1
else:
self.logger.warning("Empty key")
if "namelist" not in hdf5.root._v_attrs:
hdf5.root._v_attrs.namelist = self.namelist.data.todict()
if "info " not in hdf5.root._v_attrs:
hdf5.root._v_attrs.info = self.info
finally:
hdf5.close()
self.logger.info(
f"{filename} successfully written with {nb_written}/{len(data.keys())} updated fields"
)
def load_parts(self, keys=None, force_new_filename=False, save=True):
if not self.parts_loaded:
self.logger.debug("Loading particles")
if os.path.exists(self.parts_filename) and not force_new_filename:
filename = self.parts_filename
else:
filename = self.snap_filename
self.parts = self.load_data(
self._part,
self.parts_filename,
self.params.process.save_parts,
filename,
self.params.process.save_parts and save,
keys=keys,
group="parts",
)
self.parts_loaded = True
self.logger.info("Particles loaded")
def unload_parts(self):
"""
@@ -567,31 +692,57 @@ class SnapshotProcessor(HDF5Container):
if self.parts_loaded:
del self.parts
self.parts_loaded = False
self.logger.info("Particles unloaded")
def load_cells(self, keys=None):
def load_cells(self, keys=None, force_new_filename=False, save=True):
"""
Load all cells from the source file in the memory.
Cells will be accessible trough self.cells
(Long and memory heavy)
"""
if not self.cells_loaded:
self.logger.debug("Loading cells")
if os.path.exists(self.cells_filename) and not force_new_filename:
filename = self.cells_filename
else:
filename = self.snap_filename
cells_src = CellsToPoints(self._amr)
self.cells = self.load_data(
cells_src,
self.cells_filename,
self.params.process.save_cells,
filename,
self.params.process.save_cells and save,
keys=keys,
group="cells",
)
self.cells_loaded = True
self.logger.info("Cells loaded")
def unload_cells(self):
"""
Free space in the memory by telling the garbage collectors that
Free space in the memory by telling the garbage collector that
self.cells is not needed
"""
if self.cells_loaded:
del self.cells
self.cells_loaded = False
self.logger.info("Cells unloaded")
def load_destructured(self, save=True):
self.load_cells(save=save)
self.load_parts(save=save)
def unload_destructured(self):
self.unload_cells()
self.unload_parts()
def convert_hdf5(self, filename=None):
self.load_destructured(save=False)
if filename is None:
filename = self.snap_filename
self.save_data(self.cells, filename, "cells")
self.save_data(self.parts, filename, "parts")
self.unload_destructured()
def get_nml(self, nml_key):
@@ -615,14 +766,14 @@ class SnapshotProcessor(HDF5Container):
def getter_vect_r(self, dset, name_vect):
"""Radial component of a vector"""
r = self.getter_pos_disk(dset)[:, :2]
ur = np.transpose((np.transpose(r) / np.sqrt(np.sum(r ** 2, axis=1))))
ur = np.transpose((np.transpose(r) / np.sqrt(np.sum(r**2, axis=1))))
return np.einsum("ij, ij -> i", dset[name_vect][:, :2], ur)
def getter_vect_phi(self, dset, name_vect):
"""Azimuthal component of a vector"""
r = self.getter_pos_disk(dset)[:, :2]
r_norm = np.sqrt(np.sum(r ** 2, axis=1))
r_norm = np.sqrt(np.sum(r**2, axis=1))
rot = np.array([[0, -1], [1, 0]])
uphi = np.transpose(np.einsum("ij, kj -> ik", rot, r) / r_norm)
vect = dset[name_vect][:, :2]
@@ -641,7 +792,7 @@ class SnapshotProcessor(HDF5Container):
"""Radial component of a vector"""
r = self.oct_getter_pos_disk(dset)[:, :, :2]
ur = np.transpose(
(np.transpose(r, (2, 0, 1)) / np.sqrt(np.sum(r ** 2, axis=2))), (1, 2, 0)
(np.transpose(r, (2, 0, 1)) / np.sqrt(np.sum(r**2, axis=2))), (1, 2, 0)
)
return np.einsum("ikj, ikj -> ik", dset[name_vect][:, :, :2], ur)
@@ -649,7 +800,7 @@ class SnapshotProcessor(HDF5Container):
"""Azimuthal component of a vector"""
r = self.oct_getter_pos_disk(dset)[:, :, :2]
r_norm = np.sqrt(np.sum(r ** 2, axis=2))
r_norm = np.sqrt(np.sum(r**2, axis=2))
rot = np.array([[0, -1], [1, 0]])
uphi = np.transpose(np.einsum("ij, klj -> ikl", rot, r) / r_norm, (1, 2, 0))
vect = dset[name_vect][:, :, :2]
@@ -694,10 +845,10 @@ class SnapshotProcessor(HDF5Container):
distance=size / 2.0,
far_cut_depth=size / 2.0,
up_vector="y",
map_max_size=2 ** level,
map_max_size=2**level,
)
cube = extractor.process(cam, cube_size=1.0, resolution=2 ** level).data
cube = extractor.process(cam, cube_size=1.0, resolution=2**level).data
return cube
def slice(self, getter, ax_los="z", z=0.0, unit=U.none):
@@ -919,7 +1070,7 @@ class SnapshotProcessor(HDF5Container):
rho = getter_rho(self.cells)
rho_kg_m3 = rho * self.info["unit_density"].express(U.kg_m3)
eb = 0.5 * (B_norm) ** 2 / (4 * np.pi * 10 ** (-7)) # mettre le bon mu
ek = 0.5 * v_norm ** 2 * rho_kg_m3
ek = 0.5 * v_norm**2 * rho_kg_m3
rapport = ek / eb
if logbins:
@@ -958,7 +1109,7 @@ class SnapshotProcessor(HDF5Container):
def getter_cos_vfluct_B(dset):
vel_fluct = dset["vel"] - mean_speed
B_norm = np.sqrt(np.sum(dset["Br"] ** 2, axis=1))
v_norm = np.sqrt(np.sum(vel_fluct ** 2, axis=1))
v_norm = np.sqrt(np.sum(vel_fluct**2, axis=1))
# Compute the dot product in each cell
dot_prod = np.einsum("ij,ij->i", vel_fluct, dset["Br"])
return np.abs(dot_prod) / (v_norm * B_norm)
@@ -1035,7 +1186,7 @@ class SnapshotProcessor(HDF5Container):
pos = self.oct_getter_pos_disk(dset)
xx = pos[:, :, 0] * lbox
yy = pos[:, :, 1] * lbox
rc2 = xx ** 2 + yy ** 2 # square of cylindrical radius
rc2 = xx**2 + yy**2 # square of cylindrical radius
vx = dset["vel"][:, :, 0]
vy = dset["vel"][:, :, 1]
omega_rho = dset["rho"]
@@ -1079,10 +1230,10 @@ class SnapshotProcessor(HDF5Container):
omega = self.get_value("/maps/omega_mwavg_z")
space_coeff = self.lbox
else:
G = U.G.express(U.kg ** -1 * U.m ** 3 * U.s ** -2)
cs2 = self.get_value("/maps/T_mwavg_z", unit=U.m ** 2 * U.s ** -2)
coldens = self.get_value("/maps/coldens_z", unit=U.kg * U.m ** -2)
omega = self.get_value("/maps/omega_mwavg_z", unit=U.s ** -1)
G = U.G.express(U.kg**-1 * U.m**3 * U.s**-2)
cs2 = self.get_value("/maps/T_mwavg_z", unit=U.m**2 * U.s**-2)
coldens = self.get_value("/maps/coldens_z", unit=U.kg * U.m**-2)
omega = self.get_value("/maps/omega_mwavg_z", unit=U.s**-1)
space_coeff = self.info["unit_length"].express(U.m)
map_size = self.params.pymses.map_size
@@ -1112,10 +1263,10 @@ class SnapshotProcessor(HDF5Container):
y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1]
xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2)
rr = np.sqrt(xx**2 + yy**2)
# Compute kappa**2 = (2omega/R)*d(R**2*omega)/dR
Gy, Gx = np.gradient(rr ** 2 * omega, dy, dx, edge_order=2)
Gy, Gx = np.gradient(rr**2 * omega, dy, dx, edge_order=2)
Gr = (xx * Gx + yy * Gy) / rr
kappa_square = (2 * omega / rr) * Gr
kappa = np.sqrt(kappa_square)
@@ -1490,7 +1641,7 @@ class SnapshotProcessor(HDF5Container):
x = np.arange(shape[0]) - shape[0] / 2
y = np.arange(shape[1]) - shape[1] / 2
xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2)
rr = np.sqrt(xx**2 + yy**2)
rmin = int(rmin_frac * shape[0])
rmax = int(rmax_frac * shape[0])
mask = (rr >= rmin) & (rr <= rmax)
@@ -1502,9 +1653,9 @@ class SnapshotProcessor(HDF5Container):
verbose=verbose,
smooth_size=1 * u.pix,
adapt_thresh=4 * u.pix,
size_thresh=size_thresh * u.pix ** 2,
size_thresh=size_thresh * u.pix**2,
glob_thresh=glob_thresh,
fill_hole_size=0.1 * u.pix ** 2,
fill_hole_size=0.1 * u.pix**2,
)
self.fil.medskel(verbose=verbose)
self.fil.analyze_skeletons(
@@ -1565,10 +1716,10 @@ class SnapshotProcessor(HDF5Container):
y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1]
xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2)
rr = np.sqrt(xx**2 + yy**2)
# Rotational support
R = vphi ** 2 / rr
R = vphi**2 / rr
# Equilibrium
Gvry, Gvrx = np.gradient(vr, dy, dx, edge_order=2)
@@ -1582,7 +1733,7 @@ class SnapshotProcessor(HDF5Container):
# Gravitational field
e2 = (1.0 / 512) ** 2
gstar = -GM / (rr ** 2 + e2)
gstar = -GM / (rr**2 + e2)
# Substract gravitational field from the star
Rdisk = R + gstar
@@ -1622,10 +1773,10 @@ class SnapshotProcessor(HDF5Container):
position = self.cells["pos"][mask] * self.info["unit_length"].express(U.pc)
cells_group[:, 1:4] = position # position
density = self.cells["rho"][mask] * self.info["unit_density"].express(
U.Msun / U.pc ** 3
U.Msun / U.pc**3
)
size = self.cells["dx"][mask] * self.info["unit_length"].express(U.pc)
cells_group[:, 4] = density * size ** 3 # mass
cells_group[:, 4] = density * size**3 # mass
# save file.txt
head = str(ncells)
+8
View File
@@ -0,0 +1,8 @@
# coding: utf-8
from snapshotprocessor import SnapshotProcessor
def convert_to_hdf5(path, snap_num, path_out=".", filename=None, **kwargs):
snap = SnapshotProcessor(path=path, num=snap_num, path_out=path_out, **kwargs)
snap.convert_hdf5(filename)
+5 -1
View File
@@ -100,7 +100,11 @@ class RunSelector:
self.allow_nodata = allow_nodata
self.namelist = {}
do_tests = (not self.fallback_nml) or (len(filter_nml) > 0)
do_tests = (
(not self.fallback_nml)
or (len(filter_nml) > 0)
or (sort_run_by is not None)
)
self.runs = self.get_runs(
in_runs, filter_name, filter_nml, sort_run_by, do_tests=do_tests
)