Improve loading from ramses + hdf5 consersion

This commit is contained in:
Noe Brucy
2023-03-01 15:34:17 +01:00
parent 9bda60702a
commit ae8a8a605d
3 changed files with 213 additions and 50 deletions
+174 -23
View File
@@ -285,6 +285,10 @@ class SnapshotProcessor(HDF5Container):
"P": "unit_pressure",
"g": {"unit_gravpot": 1, "unit_length": -1},
"phi": "unit_gravpot",
"mass": "unit_mass",
"epoch": "unit_time",
"id": U.none,
"level": U.none,
}
G = 1.0 # Gravitational constant
@@ -334,8 +338,15 @@ class SnapshotProcessor(HDF5Container):
subfolder = ""
self.filename = f"{self.path_out}{subfolder}/postproc_{tag_name}{num:05}.h5"
self.cells_filename = f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5"
self.parts_filename = f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5"
self.cells_filename = (
f"{self.path_out}{subfolder}/cells_{tag_name}{num:05}.h5" # legacy only
)
self.parts_filename = (
f"{self.path_out}{subfolder}/parts_{tag_name}{num:05}.h5" # legacy only
)
self.snap_filename = (
f"{self.path_out}{subfolder}/snap_{num:05}.h5" # new hdf5 snap format
)
self.pspec_filename = f"{self.path_out}{subfolder}/pspec_{tag_name}{num:05}.h5"
self.filaments_filename = (
f"{self.path_out}/{subfolder}filaments_{tag_name}{num:05}.h5"
@@ -424,6 +435,71 @@ class SnapshotProcessor(HDF5Container):
verbose=self.params.pymses.verbose,
check_endianness=False,
)
# Check if variables are in output
name_conv = {
"rho": "density",
"vel": "velocity",
"P": "pressure",
"Br": "B",
"Bl": "B",
"mass": "mass",
"id": "identity",
"level": "levelp",
"epoch": "epoch",
}
hydro_file = open(f"{path}/output_{self.num:05}/hydro_file_descriptor.txt")
part_file = open(f"{path}/output_{self.num:05}/part_file_descriptor.txt")
has_grav = os.path.exists(
f"{path}/output_{self.num:05}/grav_{self.num:05}.out00000"
)
# ugly parsing
hlines = hydro_file.readlines()
plines = part_file.readlines()
hydro_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], hlines[2:]))
)
part_var = np.unique(
list(map(lambda s: s.split(",")[1][1:].split("_")[0], plines[2:]))
)
def is_available(available_vars, pymsesrc, var):
if var in ["g", "phi"]:
if not has_grav:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return has_grav and var in pymsesrc
else:
if var in name_conv:
if name_conv[var] not in available_vars:
self.logger.warning(f"Variable {var} not in output")
if var not in pymsesrc:
self.logger.warning(f"Variable {var} not in pymsesrc")
return name_conv[var] in available_vars and var in pymsesrc
else:
self.logger.warning(f"Variable {var} is unknown")
return False
self.params.pymses.variables = list(
filter(
partial(
is_available,
hydro_var,
pymses.rcConfig.Ramses.amr_fields.field_name_list,
),
self.params.pymses.variables,
)
)
self.params.pymses.part_variables = list(
filter(
partial(
is_available, part_var, ["vel", "mass", "id", "level", "epoch"]
),
self.params.pymses.part_variables,
)
)
self._amr = self._ro.amr_source(self.params.pymses.variables)
self._part = self._ro.particle_source(self.params.pymses.part_variables)
@@ -510,23 +586,43 @@ class SnapshotProcessor(HDF5Container):
finally:
self.close()
def load_data(self, points_src, filename, save, keys=None):
def load_data(self, points_src, filename, save, keys=None, group="data"):
"""
Load data from the source file in the memory.
(Long and memory heavy)
"""
loaded = False
if os.path.exists(filename):
self.logger.debug(f"Found hdf5, loading {filename}.")
hdf5 = tables.open_file(filename, mode="r")
try:
if group in hdf5.root:
node = hdf5.get_node(f"/{group}")
loaded = True
elif "data" in hdf5.root:
self.logger.warning(
f"{filename} has no {group} group, but I found the group data."
)
node = hdf5.get_node("/data")
loaded = True
else:
self.logger.warning(f"{filename} has no {group} group")
if loaded:
data = {}
if keys is None:
keys = node._v_children
for key in keys:
data[key] = hdf5.get_node("/data/" + key).read()
finally:
hdf5.close()
if key in node._v_children:
data[key] = node[key].read()
else:
self.logger.warning(
f"Key {key} is missing, I will try a full reload"
)
loaded = False
break
hdf5.close()
if not loaded:
self.logger.debug("No hdf5, loading from ramses data.")
data_pymses = points_src.flatten()
data = {}
for key in data_pymses.fields:
@@ -537,27 +633,56 @@ class SnapshotProcessor(HDF5Container):
pass
data["pos"] = data_pymses.points
self.logger.info("Ramses data loaded.")
if save:
hdf5 = tables.open_file(filename, mode="w")
try:
for key in data:
if len(data[key] > 0):
hdf5.create_array(
"/data", key, data[key], "", createparents=True
)
finally:
hdf5.close()
self.save_data(data, filename, group)
return data
def load_parts(self, keys=None):
def save_data(self, data, filename, group, overwrite=False):
self.logger.debug(f"Writing {filename}")
hdf5 = tables.open_file(filename, mode="a")
try:
nb_written = 0
for key in data:
if len(data[key] > 0):
if f"/{group}/{key}" in hdf5 and overwrite:
hdf5.remove_node(f"/{group}/{key}")
hdf5.create_array(
f"/{group}", key, data[key], "", createparents=True
)
unit = self._get_units(self.unit_key[key])
hdf5.get_node("/{group}/{key}").unit = unit
nb_written += 1
else:
self.logger.warning("Empty key")
if "namelist" not in hdf5.root._v_attrs:
hdf5.root._v_attrs.namelist = self.namelist.data.todict()
if "info " not in hdf5.root._v_attrs:
hdf5.root._v_attrs.info = self.info
finally:
hdf5.close()
self.logger.info(
f"{filename} successfully written with {nb_written}/{len(data.keys())} updated fields"
)
def load_parts(self, keys=None, force_new_filename=False, save=True):
if not self.parts_loaded:
self.logger.debug("Loading particles")
if os.path.exists(self.parts_filename) and not force_new_filename:
filename = self.parts_filename
else:
filename = self.snap_filename
self.parts = self.load_data(
self._part,
self.parts_filename,
self.params.process.save_parts,
filename,
self.params.process.save_parts and save,
keys=keys,
group="parts",
)
self.parts_loaded = True
self.logger.info("Particles loaded")
def unload_parts(self):
"""
@@ -567,31 +692,57 @@ class SnapshotProcessor(HDF5Container):
if self.parts_loaded:
del self.parts
self.parts_loaded = False
self.logger.info("Particles unloaded")
def load_cells(self, keys=None):
def load_cells(self, keys=None, force_new_filename=False, save=True):
"""
Load all cells from the source file in the memory.
Cells will be accessible trough self.cells
(Long and memory heavy)
"""
if not self.cells_loaded:
self.logger.debug("Loading cells")
if os.path.exists(self.cells_filename) and not force_new_filename:
filename = self.cells_filename
else:
filename = self.snap_filename
cells_src = CellsToPoints(self._amr)
self.cells = self.load_data(
cells_src,
self.cells_filename,
self.params.process.save_cells,
filename,
self.params.process.save_cells and save,
keys=keys,
group="cells",
)
self.cells_loaded = True
self.logger.info("Cells loaded")
def unload_cells(self):
"""
Free space in the memory by telling the garbage collectors that
Free space in the memory by telling the garbage collector that
self.cells is not needed
"""
if self.cells_loaded:
del self.cells
self.cells_loaded = False
self.logger.info("Cells unloaded")
def load_destructured(self, save=True):
self.load_cells(save=save)
self.load_parts(save=save)
def unload_destructured(self):
self.unload_cells()
self.unload_parts()
def convert_hdf5(self, filename=None):
self.load_destructured(save=False)
if filename is None:
filename = self.snap_filename
self.save_data(self.cells, filename, "cells")
self.save_data(self.parts, filename, "parts")
self.unload_destructured()
def get_nml(self, nml_key):
+8
View File
@@ -0,0 +1,8 @@
# coding: utf-8
from snapshotprocessor import SnapshotProcessor
def convert_to_hdf5(path, snap_num, path_out=".", filename=None, **kwargs):
snap = SnapshotProcessor(path=path, num=snap_num, path_out=path_out, **kwargs)
snap.convert_hdf5(filename)
+5 -1
View File
@@ -100,7 +100,11 @@ class RunSelector:
self.allow_nodata = allow_nodata
self.namelist = {}
do_tests = (not self.fallback_nml) or (len(filter_nml) > 0)
do_tests = (
(not self.fallback_nml)
or (len(filter_nml) > 0)
or (sort_run_by is not None)
)
self.runs = self.get_runs(
in_runs, filter_name, filter_nml, sort_run_by, do_tests=do_tests
)