From bd520d7189f965ab84d78756c7ad5b2f58e0460a Mon Sep 17 00:00:00 2001
From: Noe Brucy <noe.brucy@cea.fr>
Date: Wed, 5 Apr 2023 16:50:43 +0200
Subject: [PATCH] improve io checks

---
 galturb/sectors_extraction_ramses.py |  2 +-
 params.yml                           |  3 +-
 snapshotprocessor.py                 | 67 ++++++++++++++----------
 studyprocessor.py                    | 76 ++++++++++++++++------------
 utils/runselector.py                 | 40 +++++++--------
 5 files changed, 105 insertions(+), 83 deletions(-)

diff --git a/galturb/sectors_extraction_ramses.py b/galturb/sectors_extraction_ramses.py
index 1bf1b0b..208c272 100644
--- a/galturb/sectors_extraction_ramses.py
+++ b/galturb/sectors_extraction_ramses.py
@@ -103,6 +103,6 @@ if __name__ == "__main__":
     from snapshotprocessor import SnapshotProcessor
 
     pp = SnapshotProcessor(
-        "/home/nbrucy/simus/F20H_alfven_frig", num=1, params="params_gal.yml"
+        "/home/nbrucy/ecogal/galturb/F20H_7_1pc_frig", num=80, params="params_gal.yml"
     )
     data = load_fields(pp)
diff --git a/params.yml b/params.yml
index 044e5cd..86edb6a 100644
--- a/params.yml
+++ b/params.yml
@@ -47,7 +47,8 @@ pymses: # Parameters for Pymses reader
 
     # Source settings
     variables :  ["rho","vel","Br","Bl","P", "g", "phi"]  # Read these grid variables
-    part_variables :  ["vel","mass","id","level","epoch"]  # Read these particles variables
+    part_variables :  ["vel","mass","id","level","epoch"] # Read these particles variables
+    check_variables : True                                # Whether to check if the variables are indeed in the data
     order     : '<'                                       # Bit order
 
 
diff --git a/snapshotprocessor.py b/snapshotprocessor.py
index f2ec817..35f3d19 100644
--- a/snapshotprocessor.py
+++ b/snapshotprocessor.py
@@ -287,6 +287,9 @@ class SnapshotProcessor(HDF5Container):
         "phi": "unit_gravpot",
         "mass": "unit_mass",
         "epoch": "unit_time",
+        "size": "unit_length",
+        "dx": "unit_length",
+        "pos": "unit_length",
         "id": U.none,
         "level": U.none,
     }
@@ -422,20 +425,7 @@ class SnapshotProcessor(HDF5Container):
 
         self.def_rules()
 
-    def init_pymses(self):
-        # If ratarmount was used
-        if os.path.exists(f"{self.path}/output_{self.num:05}/output_{self.num:05}"):
-            path = f"{self.path}/output_{self.num:05}"
-        else:
-            path = self.path
-        self._ro = pymses.RamsesOutput(
-            path,
-            self.num,
-            order=self.params.pymses.order,
-            verbose=self.params.pymses.verbose,
-            check_endianness=False,
-        )
-
+    def check_variables(self, path):
         # Check if variables are in output
         name_conv = {
             "rho": "density",
@@ -479,16 +469,16 @@ class SnapshotProcessor(HDF5Container):
         def is_available(available_vars, pymsesrc, var):
             if var in ["g", "phi"]:
                 if not has_grav:
-                    self.logger.warning(f"Variable {var} not in output")
+                    self.logger.debug(f"Variable {var} not in output")
                 if var not in pymsesrc:
-                    self.logger.warning(f"Variable {var} not in pymsesrc")
+                    self.logger.debug(f"Variable {var} not in pymsesrc")
                 return has_grav and var in pymsesrc
             else:
                 if var in name_conv:
                     if name_conv[var] not in available_vars:
-                        self.logger.warning(f"Variable {var} not in output")
+                        self.logger.debug(f"Variable {var} not in output")
                     if var not in pymsesrc:
-                        self.logger.warning(f"Variable {var} not in pymsesrc")
+                        self.logger.debug(f"Variable {var} not in pymsesrc")
                     return name_conv[var] in available_vars and var in pymsesrc
                 else:
                     self.logger.warning(f"Variable {var} is unknown")
@@ -528,6 +518,24 @@ class SnapshotProcessor(HDF5Container):
             pymses.rcConfig.Ramses.amr_fields.remove_field("g")
             pymses.rcConfig.Ramses.amr_fields.remove_field("phi")
 
+    def init_pymses(self):
+
+        # If ratarmount was used
+        if os.path.exists(f"{self.path}/output_{self.num:05}/output_{self.num:05}"):
+            path = f"{self.path}/output_{self.num:05}"
+        else:
+            path = self.path
+        self._ro = pymses.RamsesOutput(
+            path,
+            self.num,
+            order=self.params.pymses.order,
+            verbose=self.params.pymses.verbose,
+            check_endianness=False,
+        )
+
+        if self.params.pymses.check_variables:
+            self.check_variables(path)
+
         self._amr = self._ro.amr_source(self.params.pymses.variables)
         self._part = self._ro.particle_source(self.params.pymses.part_variables)
 
@@ -676,12 +684,12 @@ class SnapshotProcessor(HDF5Container):
                 if len(data[key] > 0):
                     if f"/{group}/{key}" in hdf5 and overwrite:
                         hdf5.remove_node(f"/{group}/{key}")
-                        hdf5.create_array(
-                            f"/{group}", key, data[key], "", createparents=True
-                        )
-                        unit = self._get_units(self.unit_key[key])
-                        hdf5.get_node("/{group}/{key}").unit = unit
-                        nb_written += 1
+                    hdf5.create_array(
+                        f"/{group}", key, data[key], "", createparents=True
+                    )
+                    unit = self._get_units(self.unit_key[key])
+                    hdf5.get_node(f"/{group}/{key}").unit = unit
+                    nb_written += 1
                 else:
                     self.logger.warning("Empty key")
             if "namelist" not in hdf5.root._v_attrs:
@@ -739,12 +747,12 @@ class SnapshotProcessor(HDF5Container):
             self.cells = self.load_data(
                 cells_src,
                 filename,
-                self.params.process.save_cells and save,
+                True,
                 keys=keys,
                 group="cells",
             )
             self.cells_loaded = True
-        self.logger.info("Cells loaded")
+            self.logger.info("Cells loaded")
 
     def unload_cells(self):
         """
@@ -754,7 +762,7 @@ class SnapshotProcessor(HDF5Container):
         if self.cells_loaded:
             del self.cells
             self.cells_loaded = False
-        self.logger.info("Cells unloaded")
+            self.logger.info("Cells unloaded")
 
     def load_destructured(self, save=True):
         self.load_cells(save=save)
@@ -1032,6 +1040,7 @@ class SnapshotProcessor(HDF5Container):
         unit=None,
         logbins=False,
         weight_func=vol_func,
+        **kwargs,
     ):
         self.load_cells()
         data = getter(self.cells)
@@ -1043,7 +1052,9 @@ class SnapshotProcessor(HDF5Container):
         if self.params.process.unload_cells:
             self.unload_cells()
 
-        values, edges = np.histogram(data, bins, weights=weights, density=True)
+        values, edges = np.histogram(
+            data, bins, weights=weights, density=True, **kwargs
+        )
         centers = 0.5 * (edges[1:] + edges[:-1])
         return (np.stack([values, centers]), {"logbins": logbins})
 
diff --git a/studyprocessor.py b/studyprocessor.py
index d690ca3..3ba3f3d 100644
--- a/studyprocessor.py
+++ b/studyprocessor.py
@@ -132,7 +132,10 @@ class StudyProcessor(Aggregator, HDF5Container):
 
     def _save_data(self, name_full, data, description, unit):
         super(StudyProcessor, self)._save_data(name_full, data, description, unit)
-        self.save.get_node(name_full)._v_attrs.nums = self.nums
+        if name_full in self.save:
+            self.save.get_node(name_full)._v_attrs.nums = self.nums
+        else:
+            self.logger.warning(f"{name_full} was not written")
 
     def time_series(self, getter, arg=None):
         series = {}
@@ -380,7 +383,7 @@ class StudyProcessor(Aggregator, HDF5Container):
     def _extract_fine_step_from_log(self, series, log_filename, run):
         cmd_grep = "grep 'Fine step' {} ".format(log_filename)
         content = os.popen(cmd_grep).readlines()
-        block_err = []  # Block that will ill parsed
+        block_err = []  # Blocks that are ill parsed
         for i in range(0, len(content)):
             try:
                 data = content[i].replace("=", " ").split()
@@ -412,38 +415,48 @@ class StudyProcessor(Aggregator, HDF5Container):
             log_filename, nlines - 1
         )
         content = os.popen(cmd_grep).readlines()
+        block_err = []  # Blocks that are ill parsed
         for j in range(0, len(content), 2 * (nlines + 1)):
             i = j + nlines + 1  # Index for the "Main step" grep
 
             if i + nlines - 1 < len(content):
-                series["time"][run].append(
-                    np.float(content[i + nlines - 1].split("=")[2].split()[0])
-                )
-                series["step"][run].append(np.int(content[i].split("=")[1].split()[0]))
-                series["mcons"][run].append(
-                    np.float(content[i].split("=")[2].split()[0])
-                )
-                series["econs"][run].append(
-                    np.float(content[i].split("=")[3].split()[0])
-                )
-                series["epot"][run].append(
-                    np.float(content[i].split("=")[4].split()[0])
-                )
-                series["ekin"][run].append(
-                    np.float(content[i].split("=")[5].split()[0])
-                )
-                if rism:
-                    eint = np.float(content[i].split("=")[6].split()[0])
-                    emag = np.float(content[i + 1].split("=")[1].split()[0])
-                else:
-                    eint = 0.0
-                    emag = 0.0
-                series["eint"][run].append(eint)
-                series["emag"][run].append(emag)
-                series["elapsed"][run].append(
-                    np.float(content[j].split(":")[1].split()[0])
-                )
-                series["memory"][run].append(content[j + 1].split(":")[1])
+                try:
+                    series["time"][run].append(
+                        np.float(content[i + nlines - 1].split("=")[2].split()[0])
+                    )
+                    series["step"][run].append(
+                        np.int(content[i].split("=")[1].split()[0])
+                    )
+                    series["mcons"][run].append(
+                        np.float(content[i].split("=")[2].split()[0])
+                    )
+                    series["econs"][run].append(
+                        np.float(content[i].split("=")[3].split()[0])
+                    )
+                    series["epot"][run].append(
+                        np.float(content[i].split("=")[4].split()[0])
+                    )
+                    series["ekin"][run].append(
+                        np.float(content[i].split("=")[5].split()[0])
+                    )
+                    if rism:
+                        eint = np.float(content[i].split("=")[6].split()[0])
+                        emag = np.float(content[i + 1].split("=")[1].split()[0])
+                    else:
+                        eint = 0.0
+                        emag = 0.0
+                    series["eint"][run].append(eint)
+                    series["emag"][run].append(emag)
+                    series["elapsed"][run].append(
+                        np.float(content[j].split(":")[1].split()[0])
+                    )
+                    series["memory"][run].append(content[j + 1].split(":")[1])
+                except (ValueError, IndexError):
+                    block_err.append(i)
+        if len(block_err) > 0:
+            self.logger.warning(
+                f"Error encountered in parsing {log_filename} (grepped blocks {block_err})"
+            )
         return series
 
     def _extract_rms_from_log(self, series, log_filename, run):
@@ -656,10 +669,7 @@ class StudyProcessor(Aggregator, HDF5Container):
         glob_name,
         name=None,
         glob_group="/globals",
-        subarray_name=None,
-        unload_cells=True,
         unit=U.none,
-        description="",
     ):
 
         if name is None:
diff --git a/utils/runselector.py b/utils/runselector.py
index 39655e7..bb0f372 100644
--- a/utils/runselector.py
+++ b/utils/runselector.py
@@ -352,26 +352,6 @@ class RunSelector:
             def try_load_info(num):
                 return True
 
-        # -- Time getter according to unit_time
-        if unit_time is None:
-
-            def get_time(num):
-                return self.info[run][num]["time"]
-
-        elif isinstance(unit_time, str):
-
-            factor = self.get_nml_value(unit_time, run)
-
-            def get_time(num):
-                time_code = self.info[run][num]["time"]
-                return time_code / factor
-
-        else:
-
-            def get_time(num):
-                time_code = self.info[run][num]["time"]
-                return time_code * self.info[run][num]["unit_time"].express(unit_time)
-
         # -- A function to search a given time using dichotomy
         def search(nums, time, position="closest"):
 
@@ -460,6 +440,26 @@ class RunSelector:
             path = f"{self.path_in}/{run}/output_{nums[0]:05}/namelist.txt"
             self.namelist[run] = self.load_namelist(run, path=path)
 
+        # -- Time getter according to unit_time
+        if unit_time is None:
+
+            def get_time(num):
+                return self.info[run][num]["time"]
+
+        elif isinstance(unit_time, str):
+
+            factor = self.get_nml_value(unit_time, run)
+
+            def get_time(num):
+                time_code = self.info[run][num]["time"]
+                return time_code / factor
+
+        else:
+
+            def get_time(num):
+                time_code = self.info[run][num]["time"]
+                return time_code * self.info[run][num]["unit_time"].express(unit_time)
+
         # -- Select according to time --
         if time_min is not None and len(nums) > 0:
             imin = search(nums, time_min, "right")