From cea59f78a36ff8001a1c4abcf30ab4ab43820b80 Mon Sep 17 00:00:00 2001 From: Noe Brucy Date: Wed, 24 Jun 2020 14:47:50 +0200 Subject: [PATCH] [run_selector] Select several outputs by time + doc --- run_selector.py | 96 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/run_selector.py b/run_selector.py index f403d0c..a415cc9 100644 --- a/run_selector.py +++ b/run_selector.py @@ -1,5 +1,7 @@ +# -*- mode: python-mode; python-indent-offset: 4 -*- # coding: utf-8 + import os import glob from functools import partial @@ -39,18 +41,56 @@ class RunSelector: in_runs=None, in_nums="all", pp_params=default_params(), - name_run="*", - namelist_cond={}, + filter_name="*", + filter_nml={}, sort_run_by=None, time_min=None, time_max=None, time=None, ): + """ + Select runs and outputs with several filter options. + By default, all runs and outputs within path_in are considered + + Parameters + --------- + + 1. Define the set of runs and outputs considered + + path_in : str, path to the folder of the runs + + 2. Filter runs and outputs + + in_runs : str or list of str. The name runs to consider. Default: all. + in_nums : int or list of int or str. + The output numbers to consider. + "last" select only the last output. + "all" preselect all outputs (default) + + + filter_name : str, filter runs by name. Default "*" + filter_nml : tuple or list of tupple. + Filter runs by namelist. + tuples are in the following form: + (nml_key, operator, nml_value) + with nml_key a key from the namelist (eg. "cloud_params/dens0") + operator within ("=", "!=", "<", ">", "in") + and nml_value a string, float or int + time_min : float, select output where time >= time_min (in code units) + time_max : float, select output where time <= time_min (in code units) + time : float or list of float. For each value, select the output closer to it. + + 3. Sort the runs + + sort_run_by : str, a key from the namelist used to sort the runs (by ascending order) + + """ + self.path_in = path_in self.pp_params = pp_params self.namelist = {} - self.runs = self.get_runs(in_runs, name_run, namelist_cond, sort_run_by) + self.runs = self.get_runs(in_runs, filter_name, filter_nml, sort_run_by) if len(self.runs) == 0: raise ValueError("No runs found") @@ -66,8 +106,10 @@ class RunSelector: for run in self.runs: in_nums[run] = nums_temp - for i, run in enumerate(self.runs): - self.nums[run] = self.get_nums(run, in_nums[run], time_min, time_max, time) + for i, run in enumerate(self.runs): + self.nums[run] = self.get_nums( + run, in_nums[run], time_min, time_max, time + ) def load_namelist(self, run): path_run = self.path_in + "/" + run @@ -77,11 +119,11 @@ class RunSelector: def get_nml_value(self, nml_key, run): return self.namelist[run][nml_key] - def nml_select(self, runs, namelist_cond): - if type(namelist_cond) == tuple: - namelist_cond = [namelist_cond] + def nml_select(self, runs, filter_nml): + if type(filter_nml) == tuple: + filter_nml = [filter_nml] - for (nml_key, operator, operand) in namelist_cond: + for (nml_key, operator, operand) in filter_nml: value = {} for run in runs: value[run] = self.get_nml_value(nml_key, run) @@ -97,7 +139,7 @@ class RunSelector: runs = list(filter(lambda r: value[r] in operand, runs)) return runs - def get_runs(self, in_runs=None, name_run="*", namelist_cond={}, sort_run_by=None): + def get_runs(self, in_runs=None, filter_name="*", filter_nml={}, sort_run_by=None): def try_load_nml(run): try: self.namelist[run] = self.load_namelist(run) @@ -109,15 +151,17 @@ class RunSelector: runs = list( map( os.path.basename, - list(filter(os.path.isdir, glob.glob(self.path_in + "/" + name_run))), + list( + filter(os.path.isdir, glob.glob(self.path_in + "/" + filter_name)) + ), ) ) - if not in_runs is None: + if in_runs is not None: runs = list(filter(lambda n: n in runs, in_runs)) runs = list(filter(try_load_nml, runs)) # Select runs that match namelist conditions - runs = self.nml_select(runs, namelist_cond) + runs = self.nml_select(runs, filter_nml) # Sort by the value in the namelist of sort_run_by if not sort_run_by is None: @@ -147,7 +191,7 @@ class RunSelector: parsed = yaml.safe_load(line.replace("=", ":")) if type(parsed) == dict: info.update(parsed) - info_file.close() + info_file.close() return info def get_nums(self, run, in_nums=None, time_min=None, time_max=None, time=None): @@ -190,14 +234,26 @@ class RunSelector: else: nums = list(filter(try_load_info, nums)) - if not time_min is None: + if time_min is not None: nums = list(filter(lambda n: self.info[run][n]["time"] >= time_min, nums)) - if not time_max is None: + if time_max is not None: nums = list(filter(lambda n: self.info[run][n]["time"] <= time_max, nums)) - if not time is None: - times = np.asarray([[self.info[run][n]["time"], n] for n in nums]) - idx = (np.abs(times[:, 0] - time)).argmin() - nums = [int(times[idx, 1])] + if time is not None: + filtered_nums = [] + if not isinstance(time, list): + time = [time] + # Get time for all already selected nums + time_all = np.asarray([[self.info[run][n]["time"], n] for n in nums]) + + # For all times provided by the user, select the output closer to it + for t in time: + # Index of this output in the time_all array + idx = (np.abs(time_all[:, 0] - t)).argmin() + num = int(time_all[idx, 1]) + # Only add each selected output once + if num not in filtered_nums: + filtered_nums.append(num) + nums = filtered_nums return nums