[run_selector] Select several outputs by time + doc

2020-06-24 14:47:50 +02:00
parent 2a47693f0e
commit cea59f78a3
1 changed files with 76 additions and 20 deletions
@@ -1,5 +1,7 @@
 # -*- mode: python-mode; python-indent-offset: 4 -*-
 # coding: utf-8
 import os
 import glob
 from functools import partial
@@ -39,18 +41,56 @@ class RunSelector:
        in_runs=None,
        in_nums="all",
        pp_params=default_params(),
-        name_run="*",
+        filter_name="*",
-        namelist_cond={},
+        filter_nml={},
        sort_run_by=None,
        time_min=None,
        time_max=None,
        time=None,
    ):
        """
        Select runs and outputs with several filter options.
        By default, all runs and outputs within path_in are considered
        Parameters
        ---------
        1. Define the set of runs and outputs considered
        path_in : str, path to the folder of the runs
        2. Filter runs and outputs
        in_runs : str or list of str. The name runs to consider. Default: all.
        in_nums : int or list of int or str.
                  The output numbers to consider.
                  "last" select only the last output.
                  "all" preselect all outputs (default)
        filter_name : str, filter runs by name. Default "*"
        filter_nml  : tuple or list of tupple.
                     Filter runs by namelist.
                     tuples are in the following form:
                     (nml_key, operator, nml_value)
                     with nml_key a key from the namelist (eg. "cloud_params/dens0")
                     operator within ("=", "!=", "<", ">", "in")
                     and nml_value a string, float or int
        time_min    : float, select output where time >= time_min (in code units)
        time_max    : float, select output where time <= time_min (in code units)
        time        : float or list of float. For each value, select the output closer to it.
        3. Sort the runs
        sort_run_by : str, a key from the namelist used to sort the runs (by ascending order)
        """
        self.path_in = path_in
        self.pp_params = pp_params
        self.namelist = {}
-        self.runs = self.get_runs(in_runs, name_run, namelist_cond, sort_run_by)
+        self.runs = self.get_runs(in_runs, filter_name, filter_nml, sort_run_by)
        if len(self.runs) == 0:
            raise ValueError("No runs found")
@@ -66,8 +106,10 @@ class RunSelector:
            for run in self.runs:
                in_nums[run] = nums_temp
-        for i, run in enumerate(self.runs):
+            for i, run in enumerate(self.runs):
-            self.nums[run] = self.get_nums(run, in_nums[run], time_min, time_max, time)
+                self.nums[run] = self.get_nums(
                    run, in_nums[run], time_min, time_max, time
                )
    def load_namelist(self, run):
        path_run = self.path_in + "/" + run
@@ -77,11 +119,11 @@ class RunSelector:
    def get_nml_value(self, nml_key, run):
        return self.namelist[run][nml_key]
-    def nml_select(self, runs, namelist_cond):
+    def nml_select(self, runs, filter_nml):
-        if type(namelist_cond) == tuple:
+        if type(filter_nml) == tuple:
-            namelist_cond = [namelist_cond]
+            filter_nml = [filter_nml]
-        for (nml_key, operator, operand) in namelist_cond:
+        for (nml_key, operator, operand) in filter_nml:
            value = {}
            for run in runs:
                value[run] = self.get_nml_value(nml_key, run)
@@ -97,7 +139,7 @@ class RunSelector:
                runs = list(filter(lambda r: value[r] in operand, runs))
        return runs
-    def get_runs(self, in_runs=None, name_run="*", namelist_cond={}, sort_run_by=None):
+    def get_runs(self, in_runs=None, filter_name="*", filter_nml={}, sort_run_by=None):
        def try_load_nml(run):
            try:
                self.namelist[run] = self.load_namelist(run)
@@ -109,15 +151,17 @@ class RunSelector:
        runs = list(
            map(
                os.path.basename,
-                list(filter(os.path.isdir, glob.glob(self.path_in + "/" + name_run))),
+                list(
                    filter(os.path.isdir, glob.glob(self.path_in + "/" + filter_name))
                ),
            )
        )
-        if not in_runs is None:
+        if in_runs is not None:
            runs = list(filter(lambda n: n in runs, in_runs))
        runs = list(filter(try_load_nml, runs))
        # Select runs that match namelist conditions
-        runs = self.nml_select(runs, namelist_cond)
+        runs = self.nml_select(runs, filter_nml)
        # Sort by the value in the namelist of sort_run_by
        if not sort_run_by is None:
@@ -147,7 +191,7 @@ class RunSelector:
            parsed = yaml.safe_load(line.replace("=", ":"))
            if type(parsed) == dict:
                info.update(parsed)
-        info_file.close()
+            info_file.close()
        return info
    def get_nums(self, run, in_nums=None, time_min=None, time_max=None, time=None):
@@ -190,14 +234,26 @@ class RunSelector:
        else:
            nums = list(filter(try_load_info, nums))
-        if not time_min is None:
+        if time_min is not None:
            nums = list(filter(lambda n: self.info[run][n]["time"] >= time_min, nums))
-        if not time_max is None:
+        if time_max is not None:
            nums = list(filter(lambda n: self.info[run][n]["time"] <= time_max, nums))
-        if not time is None:
+        if time is not None:
-            times = np.asarray([[self.info[run][n]["time"], n] for n in nums])
+            filtered_nums = []
-            idx = (np.abs(times[:, 0] - time)).argmin()
+            if not isinstance(time, list):
-            nums = [int(times[idx, 1])]
+                time = [time]
            # Get time for all already selected nums
            time_all = np.asarray([[self.info[run][n]["time"], n] for n in nums])
            # For all times provided by the user, select the output closer to it
            for t in time:
                # Index of this output in the time_all array
                idx = (np.abs(time_all[:, 0] - t)).argmin()
                num = int(time_all[idx, 1])
                # Only add each selected output once
                if num not in filtered_nums:
                    filtered_nums.append(num)
            nums = filtered_nums
        return nums