Improve loading from ramses + hdf5 consersion

This commit is contained in:
Noe Brucy
2023-03-01 15:34:17 +01:00
parent 9bda60702a
commit ae8a8a605d
3 changed files with 213 additions and 50 deletions
+200 -49
View File
@@ -61,7 +61,7 @@ from utils.runselector import RunSelector
def mass_func(dset): def mass_func(dset):
dx = dset["dx"] dx = dset["dx"]
return dset["rho"] * dx ** 3 # Mass function return dset["rho"] * dx**3 # Mass function
def vol_func(dset): def vol_func(dset):
@@ -213,7 +213,7 @@ def pspec2d(map2D):
k = np.where(k_alias >= n // 2, k_alias - n, k_alias) k = np.where(k_alias >= n // 2, k_alias - n, k_alias)
kx, ky = np.meshgrid(k, k, indexing="ij") kx, ky = np.meshgrid(k, k, indexing="ij")
# Compute map of k # Compute map of k
kmap = np.sqrt(kx ** 2 + ky ** 2) kmap = np.sqrt(kx**2 + ky**2)
# Compute fft # Compute fft
fmap = fft.fft2(map2D) fmap = fft.fft2(map2D)
# Compute the power map from the fft # Compute the power map from the fft
@@ -285,6 +285,10 @@ class SnapshotProcessor(HDF5Container):
"P": "unit_pressure", "P": "unit_pressure",
"g": {"unit_gravpot": 1, "unit_length": -1}, "g": {"unit_gravpot": 1, "unit_length": -1},
"phi": "unit_gravpot", "phi": "unit_gravpot",
"mass": "unit_mass",
"epoch": "unit_time",
"id": U.none,
"level": U.none,
} }
G = 1.0 # Gravitational constant G = 1.0 # Gravitational constant
@@ -334,8 +338,15 @@ class SnapshotProcessor(HDF5Container):
subfolder = "" subfolder = ""
self.filename = f"{self.path_out}{subfolder}/postproc_{tag_name}{num:05}.h5" self.filename = f"{self.path_out}{subfolder}/postproc_{tag_name}{num:05}.h5"
self.cells_filename = f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5" self.cells_filename = (
self.parts_filename = f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5" f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5" # legacy only
)
self.parts_filename = (
f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5" # legacy only
)
self.snap_filename = (
f"{self.path_out}{subfolder}/snap_{num:05}.h5" # new hdf5 snap format
)
self.pspec_filename = f"{self.path_out}{subfolder}/pspec_{tag_name}{num:05}.h5" self.pspec_filename = f"{self.path_out}{subfolder}/pspec_{tag_name}{num:05}.h5"
self.filaments_filename = ( self.filaments_filename = (
f"{self.path_out}/{subfolder}filaments_{tag_name}{num:05}.h5" f"{self.path_out}/{subfolder}filaments_{tag_name}{num:05}.h5"
@@ -424,6 +435,71 @@ class SnapshotProcessor(HDF5Container):
verbose=self.params.pymses.verbose, verbose=self.params.pymses.verbose,
check_endianness=False, check_endianness=False,
) )
# Check if variables are in output
name_conv = {
"rho": "density",
"vel": "velocity",
"P": "pressure",
"Br": "B",
"Bl": "B",
"mass": "mass",
"id": "identity",
"level": "levelp",
"epoch": "epoch",
}
hydro_file = open(f"{path}/output_{self.num:05}/hydro_file_descriptor.txt")
part_file = open(f"{path}/output_{self.num:05}/part_file_descriptor.txt")
has_grav = os.path.exists(
f"{path}/output_{self.num:05}/grav_{self.num:05}.out00000"
)
# ugly parsing
hlines = hydro_file.readlines()
plines = part_file.readlines()
hydro_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], hlines[2:]))
)
part_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], plines[2:]))
)
def is_available(available_vars, pymsesrc, var):
if var in ["g", "phi"]:
if not has_grav:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return has_grav and var in pymsesrc
else:
if var in name_conv:
if name_conv[var] not in available_vars:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return name_conv[var] in available_vars and var in pymsesrc
else:
self.logger.warning(f"Variable {var} is unknown")
return False
self.params.pymses.variables = list(
filter(
partial(
is_available,
hydro_var,
pymses.rcConfig.Ramses.amr_fields.field_name_list,
),
self.params.pymses.variables,
)
)
self.params.pymses.part_variables = list(
filter(
partial(
is_available, part_var, ["vel", "mass", "id", "level", "epoch"]
),
self.params.pymses.part_variables,
)
)
self._amr = self._ro.amr_source(self.params.pymses.variables) self._amr = self._ro.amr_source(self.params.pymses.variables)
self._part = self._ro.particle_source(self.params.pymses.part_variables) self._part = self._ro.particle_source(self.params.pymses.part_variables)
@@ -510,23 +586,43 @@ class SnapshotProcessor(HDF5Container):
finally: finally:
self.close() self.close()
def load_data(self, points_src, filename, save, keys=None): def load_data(self, points_src, filename, save, keys=None, group="data"):
""" """
Load data from the source file in the memory. Load data from the source file in the memory.
(Long and memory heavy) (Long and memory heavy)
""" """
loaded = False
if os.path.exists(filename): if os.path.exists(filename):
self.logger.debug(f"Found hdf5, loading {filename}.")
hdf5 = tables.open_file(filename, mode="r") hdf5 = tables.open_file(filename, mode="r")
try: if group in hdf5.root:
node = hdf5.get_node(f"/{group}")
loaded = True
elif "data" in hdf5.root:
self.logger.warning(
f"{filename} has no {group} group, but I found the group data."
)
node = hdf5.get_node("/data") node = hdf5.get_node("/data")
loaded = True
else:
self.logger.warning(f"{filename} has no {group} group")
if loaded:
data = {} data = {}
if keys is None: if keys is None:
keys = node._v_children keys = node._v_children
for key in keys: for key in keys:
data[key] = hdf5.get_node("/data/" + key).read() if key in node._v_children:
finally: data[key] = node[key].read()
hdf5.close() else:
else: self.logger.warning(
f"Key {key} is missing, I will try a full reload"
)
loaded = False
break
hdf5.close()
if not loaded:
self.logger.debug("No hdf5, loading from ramses data.")
data_pymses = points_src.flatten() data_pymses = points_src.flatten()
data = {} data = {}
for key in data_pymses.fields: for key in data_pymses.fields:
@@ -537,27 +633,56 @@ class SnapshotProcessor(HDF5Container):
pass pass
data["pos"] = data_pymses.points data["pos"] = data_pymses.points
self.logger.info("Ramses data loaded.")
if save: if save:
hdf5 = tables.open_file(filename, mode="w") self.save_data(data, filename, group)
try:
for key in data:
if len(data[key] > 0):
hdf5.create_array(
"/data", key, data[key], "", createparents=True
)
finally:
hdf5.close()
return data return data
def load_parts(self, keys=None): def save_data(self, data, filename, group, overwrite=False):
self.logger.debug(f"Writing {filename}")
hdf5 = tables.open_file(filename, mode="a")
try:
nb_written = 0
for key in data:
if len(data[key] > 0):
if f"/{group}/{key}" in hdf5 and overwrite:
hdf5.remove_node(f"/{group}/{key}")
hdf5.create_array(
f"/{group}", key, data[key], "", createparents=True
)
unit = self._get_units(self.unit_key[key])
hdf5.get_node("/{group}/{key}").unit = unit
nb_written += 1
else:
self.logger.warning("Empty key")
if "namelist" not in hdf5.root._v_attrs:
hdf5.root._v_attrs.namelist = self.namelist.data.todict()
if "info " not in hdf5.root._v_attrs:
hdf5.root._v_attrs.info = self.info
finally:
hdf5.close()
self.logger.info(
f"{filename} successfully written with {nb_written}/{len(data.keys())} updated fields"
)
def load_parts(self, keys=None, force_new_filename=False, save=True):
if not self.parts_loaded: if not self.parts_loaded:
self.logger.debug("Loading particles")
if os.path.exists(self.parts_filename) and not force_new_filename:
filename = self.parts_filename
else:
filename = self.snap_filename
self.parts = self.load_data( self.parts = self.load_data(
self._part, self._part,
self.parts_filename, filename,
self.params.process.save_parts, self.params.process.save_parts and save,
keys=keys, keys=keys,
group="parts",
) )
self.parts_loaded = True self.parts_loaded = True
self.logger.info("Particles loaded")
def unload_parts(self): def unload_parts(self):
""" """
@@ -567,31 +692,57 @@ class SnapshotProcessor(HDF5Container):
if self.parts_loaded: if self.parts_loaded:
del self.parts del self.parts
self.parts_loaded = False self.parts_loaded = False
self.logger.info("Particles unloaded")
def load_cells(self, keys=None): def load_cells(self, keys=None, force_new_filename=False, save=True):
""" """
Load all cells from the source file in the memory. Load all cells from the source file in the memory.
Cells will be accessible trough self.cells Cells will be accessible trough self.cells
(Long and memory heavy) (Long and memory heavy)
""" """
if not self.cells_loaded: if not self.cells_loaded:
self.logger.debug("Loading cells")
if os.path.exists(self.cells_filename) and not force_new_filename:
filename = self.cells_filename
else:
filename = self.snap_filename
cells_src = CellsToPoints(self._amr) cells_src = CellsToPoints(self._amr)
self.cells = self.load_data( self.cells = self.load_data(
cells_src, cells_src,
self.cells_filename, filename,
self.params.process.save_cells, self.params.process.save_cells and save,
keys=keys, keys=keys,
group="cells",
) )
self.cells_loaded = True self.cells_loaded = True
self.logger.info("Cells loaded")
def unload_cells(self): def unload_cells(self):
""" """
Free space in the memory by telling the garbage collectors that Free space in the memory by telling the garbage collector that
self.cells is not needed self.cells is not needed
""" """
if self.cells_loaded: if self.cells_loaded:
del self.cells del self.cells
self.cells_loaded = False self.cells_loaded = False
self.logger.info("Cells unloaded")
def load_destructured(self, save=True):
self.load_cells(save=save)
self.load_parts(save=save)
def unload_destructured(self):
self.unload_cells()
self.unload_parts()
def convert_hdf5(self, filename=None):
self.load_destructured(save=False)
if filename is None:
filename = self.snap_filename
self.save_data(self.cells, filename, "cells")
self.save_data(self.parts, filename, "parts")
self.unload_destructured()
def get_nml(self, nml_key): def get_nml(self, nml_key):
@@ -615,14 +766,14 @@ class SnapshotProcessor(HDF5Container):
def getter_vect_r(self, dset, name_vect): def getter_vect_r(self, dset, name_vect):
"""Radial component of a vector""" """Radial component of a vector"""
r = self.getter_pos_disk(dset)[:, :2] r = self.getter_pos_disk(dset)[:, :2]
ur = np.transpose((np.transpose(r) / np.sqrt(np.sum(r ** 2, axis=1)))) ur = np.transpose((np.transpose(r) / np.sqrt(np.sum(r**2, axis=1))))
return np.einsum("ij, ij -> i", dset[name_vect][:, :2], ur) return np.einsum("ij, ij -> i", dset[name_vect][:, :2], ur)
def getter_vect_phi(self, dset, name_vect): def getter_vect_phi(self, dset, name_vect):
"""Azimuthal component of a vector""" """Azimuthal component of a vector"""
r = self.getter_pos_disk(dset)[:, :2] r = self.getter_pos_disk(dset)[:, :2]
r_norm = np.sqrt(np.sum(r ** 2, axis=1)) r_norm = np.sqrt(np.sum(r**2, axis=1))
rot = np.array([[0, -1], [1, 0]]) rot = np.array([[0, -1], [1, 0]])
uphi = np.transpose(np.einsum("ij, kj -> ik", rot, r) / r_norm) uphi = np.transpose(np.einsum("ij, kj -> ik", rot, r) / r_norm)
vect = dset[name_vect][:, :2] vect = dset[name_vect][:, :2]
@@ -641,7 +792,7 @@ class SnapshotProcessor(HDF5Container):
"""Radial component of a vector""" """Radial component of a vector"""
r = self.oct_getter_pos_disk(dset)[:, :, :2] r = self.oct_getter_pos_disk(dset)[:, :, :2]
ur = np.transpose( ur = np.transpose(
(np.transpose(r, (2, 0, 1)) / np.sqrt(np.sum(r ** 2, axis=2))), (1, 2, 0) (np.transpose(r, (2, 0, 1)) / np.sqrt(np.sum(r**2, axis=2))), (1, 2, 0)
) )
return np.einsum("ikj, ikj -> ik", dset[name_vect][:, :, :2], ur) return np.einsum("ikj, ikj -> ik", dset[name_vect][:, :, :2], ur)
@@ -649,7 +800,7 @@ class SnapshotProcessor(HDF5Container):
"""Azimuthal component of a vector""" """Azimuthal component of a vector"""
r = self.oct_getter_pos_disk(dset)[:, :, :2] r = self.oct_getter_pos_disk(dset)[:, :, :2]
r_norm = np.sqrt(np.sum(r ** 2, axis=2)) r_norm = np.sqrt(np.sum(r**2, axis=2))
rot = np.array([[0, -1], [1, 0]]) rot = np.array([[0, -1], [1, 0]])
uphi = np.transpose(np.einsum("ij, klj -> ikl", rot, r) / r_norm, (1, 2, 0)) uphi = np.transpose(np.einsum("ij, klj -> ikl", rot, r) / r_norm, (1, 2, 0))
vect = dset[name_vect][:, :, :2] vect = dset[name_vect][:, :, :2]
@@ -694,10 +845,10 @@ class SnapshotProcessor(HDF5Container):
distance=size / 2.0, distance=size / 2.0,
far_cut_depth=size / 2.0, far_cut_depth=size / 2.0,
up_vector="y", up_vector="y",
map_max_size=2 ** level, map_max_size=2**level,
) )
cube = extractor.process(cam, cube_size=1.0, resolution=2 ** level).data cube = extractor.process(cam, cube_size=1.0, resolution=2**level).data
return cube return cube
def slice(self, getter, ax_los="z", z=0.0, unit=U.none): def slice(self, getter, ax_los="z", z=0.0, unit=U.none):
@@ -919,7 +1070,7 @@ class SnapshotProcessor(HDF5Container):
rho = getter_rho(self.cells) rho = getter_rho(self.cells)
rho_kg_m3 = rho * self.info["unit_density"].express(U.kg_m3) rho_kg_m3 = rho * self.info["unit_density"].express(U.kg_m3)
eb = 0.5 * (B_norm) ** 2 / (4 * np.pi * 10 ** (-7)) # mettre le bon mu eb = 0.5 * (B_norm) ** 2 / (4 * np.pi * 10 ** (-7)) # mettre le bon mu
ek = 0.5 * v_norm ** 2 * rho_kg_m3 ek = 0.5 * v_norm**2 * rho_kg_m3
rapport = ek / eb rapport = ek / eb
if logbins: if logbins:
@@ -958,7 +1109,7 @@ class SnapshotProcessor(HDF5Container):
def getter_cos_vfluct_B(dset): def getter_cos_vfluct_B(dset):
vel_fluct = dset["vel"] - mean_speed vel_fluct = dset["vel"] - mean_speed
B_norm = np.sqrt(np.sum(dset["Br"] ** 2, axis=1)) B_norm = np.sqrt(np.sum(dset["Br"] ** 2, axis=1))
v_norm = np.sqrt(np.sum(vel_fluct ** 2, axis=1)) v_norm = np.sqrt(np.sum(vel_fluct**2, axis=1))
# Compute the dot product in each cell # Compute the dot product in each cell
dot_prod = np.einsum("ij,ij->i", vel_fluct, dset["Br"]) dot_prod = np.einsum("ij,ij->i", vel_fluct, dset["Br"])
return np.abs(dot_prod) / (v_norm * B_norm) return np.abs(dot_prod) / (v_norm * B_norm)
@@ -1035,7 +1186,7 @@ class SnapshotProcessor(HDF5Container):
pos = self.oct_getter_pos_disk(dset) pos = self.oct_getter_pos_disk(dset)
xx = pos[:, :, 0] * lbox xx = pos[:, :, 0] * lbox
yy = pos[:, :, 1] * lbox yy = pos[:, :, 1] * lbox
rc2 = xx ** 2 + yy ** 2 # square of cylindrical radius rc2 = xx**2 + yy**2 # square of cylindrical radius
vx = dset["vel"][:, :, 0] vx = dset["vel"][:, :, 0]
vy = dset["vel"][:, :, 1] vy = dset["vel"][:, :, 1]
omega_rho = dset["rho"] omega_rho = dset["rho"]
@@ -1079,10 +1230,10 @@ class SnapshotProcessor(HDF5Container):
omega = self.get_value("/maps/omega_mwavg_z") omega = self.get_value("/maps/omega_mwavg_z")
space_coeff = self.lbox space_coeff = self.lbox
else: else:
G = U.G.express(U.kg ** -1 * U.m ** 3 * U.s ** -2) G = U.G.express(U.kg**-1 * U.m**3 * U.s**-2)
cs2 = self.get_value("/maps/T_mwavg_z", unit=U.m ** 2 * U.s ** -2) cs2 = self.get_value("/maps/T_mwavg_z", unit=U.m**2 * U.s**-2)
coldens = self.get_value("/maps/coldens_z", unit=U.kg * U.m ** -2) coldens = self.get_value("/maps/coldens_z", unit=U.kg * U.m**-2)
omega = self.get_value("/maps/omega_mwavg_z", unit=U.s ** -1) omega = self.get_value("/maps/omega_mwavg_z", unit=U.s**-1)
space_coeff = self.info["unit_length"].express(U.m) space_coeff = self.info["unit_length"].express(U.m)
map_size = self.params.pymses.map_size map_size = self.params.pymses.map_size
@@ -1112,10 +1263,10 @@ class SnapshotProcessor(HDF5Container):
y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1] y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1]
xx, yy = np.meshgrid(x, y) xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2) rr = np.sqrt(xx**2 + yy**2)
# Compute kappa**2 = (2omega/R)*d(R**2*omega)/dR # Compute kappa**2 = (2omega/R)*d(R**2*omega)/dR
Gy, Gx = np.gradient(rr ** 2 * omega, dy, dx, edge_order=2) Gy, Gx = np.gradient(rr**2 * omega, dy, dx, edge_order=2)
Gr = (xx * Gx + yy * Gy) / rr Gr = (xx * Gx + yy * Gy) / rr
kappa_square = (2 * omega / rr) * Gr kappa_square = (2 * omega / rr) * Gr
kappa = np.sqrt(kappa_square) kappa = np.sqrt(kappa_square)
@@ -1490,7 +1641,7 @@ class SnapshotProcessor(HDF5Container):
x = np.arange(shape[0]) - shape[0] / 2 x = np.arange(shape[0]) - shape[0] / 2
y = np.arange(shape[1]) - shape[1] / 2 y = np.arange(shape[1]) - shape[1] / 2
xx, yy = np.meshgrid(x, y) xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2) rr = np.sqrt(xx**2 + yy**2)
rmin = int(rmin_frac * shape[0]) rmin = int(rmin_frac * shape[0])
rmax = int(rmax_frac * shape[0]) rmax = int(rmax_frac * shape[0])
mask = (rr >= rmin) & (rr <= rmax) mask = (rr >= rmin) & (rr <= rmax)
@@ -1502,9 +1653,9 @@ class SnapshotProcessor(HDF5Container):
verbose=verbose, verbose=verbose,
smooth_size=1 * u.pix, smooth_size=1 * u.pix,
adapt_thresh=4 * u.pix, adapt_thresh=4 * u.pix,
size_thresh=size_thresh * u.pix ** 2, size_thresh=size_thresh * u.pix**2,
glob_thresh=glob_thresh, glob_thresh=glob_thresh,
fill_hole_size=0.1 * u.pix ** 2, fill_hole_size=0.1 * u.pix**2,
) )
self.fil.medskel(verbose=verbose) self.fil.medskel(verbose=verbose)
self.fil.analyze_skeletons( self.fil.analyze_skeletons(
@@ -1565,10 +1716,10 @@ class SnapshotProcessor(HDF5Container):
y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1] y = np.linspace(im_extent[2], im_extent[3], map_size) + 0.5 * dy - center[1]
xx, yy = np.meshgrid(x, y) xx, yy = np.meshgrid(x, y)
rr = np.sqrt(xx ** 2 + yy ** 2) rr = np.sqrt(xx**2 + yy**2)
# Rotational support # Rotational support
R = vphi ** 2 / rr R = vphi**2 / rr
# Equilibrium # Equilibrium
Gvry, Gvrx = np.gradient(vr, dy, dx, edge_order=2) Gvry, Gvrx = np.gradient(vr, dy, dx, edge_order=2)
@@ -1582,7 +1733,7 @@ class SnapshotProcessor(HDF5Container):
# Gravitational field # Gravitational field
e2 = (1.0 / 512) ** 2 e2 = (1.0 / 512) ** 2
gstar = -GM / (rr ** 2 + e2) gstar = -GM / (rr**2 + e2)
# Substract gravitational field from the star # Substract gravitational field from the star
Rdisk = R + gstar Rdisk = R + gstar
@@ -1622,10 +1773,10 @@ class SnapshotProcessor(HDF5Container):
position = self.cells["pos"][mask] * self.info["unit_length"].express(U.pc) position = self.cells["pos"][mask] * self.info["unit_length"].express(U.pc)
cells_group[:, 1:4] = position # position cells_group[:, 1:4] = position # position
density = self.cells["rho"][mask] * self.info["unit_density"].express( density = self.cells["rho"][mask] * self.info["unit_density"].express(
U.Msun / U.pc ** 3 U.Msun / U.pc**3
) )
size = self.cells["dx"][mask] * self.info["unit_length"].express(U.pc) size = self.cells["dx"][mask] * self.info["unit_length"].express(U.pc)
cells_group[:, 4] = density * size ** 3 # mass cells_group[:, 4] = density * size**3 # mass
# save file.txt # save file.txt
head = str(ncells) head = str(ncells)
+8
View File
@@ -0,0 +1,8 @@
# coding: utf-8
from snapshotprocessor import SnapshotProcessor
def convert_to_hdf5(path, snap_num, path_out=".", filename=None, **kwargs):
snap = SnapshotProcessor(path=path, num=snap_num, path_out=path_out, **kwargs)
snap.convert_hdf5(filename)
+5 -1
View File
@@ -100,7 +100,11 @@ class RunSelector:
self.allow_nodata = allow_nodata self.allow_nodata = allow_nodata
self.namelist = {} self.namelist = {}
do_tests = (not self.fallback_nml) or (len(filter_nml) > 0) do_tests = (
(not self.fallback_nml)
or (len(filter_nml) > 0)
or (sort_run_by is not None)
)
self.runs = self.get_runs( self.runs = self.get_runs(
in_runs, filter_name, filter_nml, sort_run_by, do_tests=do_tests in_runs, filter_name, filter_nml, sort_run_by, do_tests=do_tests
) )