TECA
The Toolkit for Extreme Climate Analysis
teca_multi_cf_reader.h
1 #ifndef teca_multi_cf_reader_h
2 #define teca_multi_cf_reader_h
3 
4 #include "teca_config.h"
5 #include "teca_algorithm.h"
6 #include "teca_metadata.h"
7 #include "teca_shared_object.h"
8 #include "teca_cf_reader.h"
9 
10 #include <set>
11 #include <vector>
12 #include <string>
13 
14 TECA_SHARED_OBJECT_FORWARD_DECL(teca_multi_cf_reader)
15 
16 class teca_multi_cf_reader_internals;
17 using p_teca_multi_cf_reader_internals = std::shared_ptr<teca_multi_cf_reader_internals>;
18 
19 /// A reader for data stored in NetCDF CF format in multiple files.
20 /**
21  * The data read is presented to the down stream as a single dataset
22  *
23  * use the add_reader method to specify regular expression and corresponding
24  * list of variables to read. a reader, not necessarily the same one, must be
25  * selected to provide the time and spatial axes.
26  *
27  * this reader could handle spatio-temporal interpolations as well, however
28  * that is currently not implemented. as a result all data is expected to be
29  * on the same coordinate system.
30  *
31  * A number of algorithm properties modify run time behavior, most of these
32  * are exposed from teca_cf_reader. see the teca_cf_reader for details.
33  *
34  * The reader may be initialized via a configuration file. The configuration
35  * file consists of name = value pairs and flags organized in sections.
36  * Sections are declared using []. There is an optional global section
37  * followed by a number of [cf_reader] sections. Each [cf_reader] section
38  * consists of a name(optional), a regex, a list of variables, a provides_time
39  * flag(optional) and a provides geometry flag(optional). At least one section
40  * must contain a provides_time and provides geometry flag. The global section
41  * may contain a data_root. Occurrences of the string %data_root% in the regex
42  * are replaced with the value of data_root.
43  *
44  * The following example configures the reader to read hus,ua and va.
45  *
46  * ```
47  * # TECA multi_cf_reader config
48  *
49  * data_root = /opt/TECA_data/HighResMIP/ECMWF-IFS-HR-SST-present
50  *
51  * [cf_reader]
52  * regex = %data_root%/hus/hus.*\.nc$
53  * variables = hus
54  * provides_time
55  * provides_geometry
56  *
57  * [cf_reader]
58  * regex = %data_root%/va/va.*\.nc$
59  * variables = va
60  *
61  * [cf_reader]
62  * regex = %data_root%/ua/ua.*\.nc$
63  * variables = ua
64  * ```
65  */
67 {
68 public:
69  TECA_ALGORITHM_STATIC_NEW(teca_multi_cf_reader)
70  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_multi_cf_reader)
71  TECA_ALGORITHM_CLASS_NAME(teca_multi_cf_reader)
73 
74  // report/initialize to/from Boost program options
75  // objects.
76  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
77  TECA_SET_ALGORITHM_PROPERTIES()
78 
79  /**
80  * Set the MCF configuration file that describes the dataset to read.
81  * Each section in the MCF file adds an internal reader.
82  */
83  int set_input_file(const std::string &input_file);
84  std::string get_input_file() { return this->input_file; }
85 
86  /**
87  * Adds a reader to the collection and at the same time specifies how it
88  * will be used. This is alternative way to configure the multi_cf_reader
89  * instead of providing the configuration via an MCF file (see
90  * set_input_file).
91  */
92  int add_reader(const std::string &regex,
93  const std::string &key, int provides_time,
94  int provides_geometry,
95  const std::vector<std::string> &variables);
96 
97  /// sets the reader that provides the time axis
98  int set_time_reader(const std::string &key);
99 
100  /// sets the reader that provides the mesh geometry
101  int set_geometry_reader(const std::string &key);
102 
103  /// adds to the list of variables that a reader will provide
104  int add_variable_reader(const std::string &key,
105  const std::string &variable);
106 
107  /// sets the list of variable that a reader will provide.
108  int set_variable_reader(const std::string &key,
109  const std::vector<std::string> &variable);
110 
111  /// get the list of variables that the reader will serve up
112  void get_variables(std::vector<std::string> &vars);
113 
114  /** @name periodic_in_x
115  * Set to indicate the presence of a periodic boundary in the x direction.
116  * If set this will override the corresponding setting from the MCF file
117  * for all internal readers.
118  */
119  ///@{
120  void set_periodic_in_x(int flag);
121  int get_periodic_in_x() const;
122  ///@}
123 
124  /** @name x_axis_variable
125  * Set the variable to use for the mesh x-axis. If set this will override
126  * the corresponding setting from the MCF file for all internal readers.
127  */
128  ///@{
129  void set_x_axis_variable(const std::string &var);
130  std::string get_x_axis_variable() const;
131  ///@}
132 
133  /** @name y_axis_variable
134  * Set the variable to use for the mesh y-axis. If set this will override
135  * the corresponding setting from the MCF file for all internal readers.
136  */
137  ///@{
138  void set_y_axis_variable(const std::string &var);
139  std::string get_y_axis_variable() const;
140  ///@}
141 
142  /** @name z_axis_variable
143  * Set the variable to use for the mesh z-axis. Leaving the z-axis empty
144  * results in a 2D mesh. You must set this to the correct vertical
145  * coordinate dimension to produce a 3D mesh. If set this will override
146  * the corresponding setting from the MCF file for all internal readers.
147  */
148  ///@{
149  void set_z_axis_variable(const std::string &var);
150  std::string get_z_axis_variable() const;
151  ///@}
152 
153  /** @name t_axis_variable_
154  * Set the variable to use for the mesh t-axis. Default "time". Setting
155  * this to an empty string disables the time axis. If set this will
156  * override the corresponding setting from the MCF file for all internal
157  * readers.
158  */
159  ///@{
160  void set_t_axis_variable(const std::string &var);
161  std::string get_t_axis_variable() const;
162  ///@}
163 
164  /** @name calendar
165  * Use this to override the calendar, or set one when specifying t_values
166  * directly. If set this will override the corresponding setting from the
167  * MCF file for all internal readers.
168  */
169  ///@{
170  void set_calendar(const std::string &calendar);
171  std::string get_calendar() const;
172  ///@}
173 
174  /** @name t_units
175  * Use this to set or override the time units. This is necessary when
176  * specifying time values directly. If set this will override the
177  * corresponding setting from the MCF file for all internal readers.
178  */
179  ///@{
180  void set_t_units(const std::string &units);
181  std::string get_t_units() const;
182  ///@}
183 
184  /** @name filename_time_template
185  * a way to infer time from the filename if the time axis is not stored in
186  * the file itself. If set this will override the corresponding setting
187  * from the MCF file for all internal readers.
188  *
189  * strftime format codes are used. For example for the files:
190  * ```
191  * my_file_20170516_00.nc
192  * my_file_20170516_03.nc
193  * ...
194  * ```
195  * the template would be
196  * ```
197  * my_file_%Y%m%d_%H.nc
198  * ```
199  */
200  ///@{
201  void set_filename_time_template(const std::string &templ);
202  std::string get_filename_time_template() const;
203  ///@}
204 
205  /** @name t_values
206  * Set the time values to use instead if a time variable doesn't exist or
207  * you need to override it. If set this will override the corresponding
208  * setting from the MCF file for all internal readers.
209  */
210  ///@{
211  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
212  ///@}
213 
214  /** @name max_metadata_ranks
215  * set/get the number of ranks used to read the time axis. If set this
216  * will override the corresponding setting from the MCF file for all
217  * internal readers.
218  */
219  ///@{
220  TECA_ALGORITHM_PROPERTY(int, max_metadata_ranks)
221  ///@}
222 
223  /** @name periodic_in_x
224  * Set to indicate the presence of a periodic boundary in the x direction.
225  * If set this will override the corresponding setting from the MCF file
226  * for all internal readers.
227  */
228  ///@{
229  ///@}
230 
231  /** @name clamp_dimensions_of_one
232  * If set the requested extent will be clamped in a given direction if the
233  * coorinate axis in that direction has a length of 1 and the requested
234  * extent would be out of bounds. This is a work around to enable loading
235  * 2D data with a vertical dimension of 1, into a 3D mesh and should be
236  * used with caution.
237  */
238  ///@{
239  void set_clamp_dimensions_of_one(int flag);
240  int get_clamp_dimensions_of_one() const;
241  ///@}
242 
243  /** @name target_bounds
244  * If set a teca_cartesian_mesh_coordinate_transform will be added to the
245  * internal pipeline of each managed reader. There must always be 6 values
246  * provided in the form "X0, x1, y0, y1, z0, z1" that define the bounds to
247  * which each axis will be transformed. Use "1, 0" for axis that should be
248  * passed through without applying the transform.
249  */
250  ///@{
251  void set_target_bounds(const std::vector<double> &bounds);
252  const std::vector<double> &get_target_bounds() const;
253  ///@}
254 
255  /** @name target_x_axis_variable
256  * Set the name of the variable to use for the transformed x-coordinate
257  * axis. If not set the name is passed through.
258  */
259  ///@{
260  void set_target_x_axis_variable(const std::string &flag);
261  std::string get_target_x_axis_variable() const;
262  ///@}
263 
264  /** @name target_y_axis_variable
265  * Set the name of the variable to use for the transformed y-coordinate
266  * axis.. If not set the name is passed through.
267  */
268  ///@{
269  void set_target_y_axis_variable(const std::string &flag);
270  std::string get_target_y_axis_variable() const;
271  ///@}
272  /** @name target_z_axis_variable
273  * Set the name of the variable to use for the transformed z-coordinate
274  * axis.. If not set the name is passed through.
275  */
276  ///@{
277  void set_target_z_axis_variable(const std::string &flag);
278  std::string get_target_z_axis_variable() const;
279  ///@}
280 
281  /** @name target_x_axis_units
282  * set/get the units for the transformed x-coordinate axis. If not set the
283  * units are passed through.
284  */
285  ///@{
286  void set_target_x_axis_units(const std::string &flag);
287  std::string get_target_x_axis_units() const;
288  ///@}
289 
290  /** @name target_y_axis_units
291  * set/get the units for the transformed y-coordinate axis. If not set the
292  * units are passed through.
293  */
294  ///@{
295  void set_target_y_axis_units(const std::string &flag);
296  std::string get_target_y_axis_units() const;
297  ///@}
298 
299  /** @name target_z_axis_units
300  * set/get the units for the transformed z-coordinate axis. If not set the
301  * units are passed through.
302  */
303  ///@{
304  void set_target_z_axis_units(const std::string &flag);
305  std::string get_target_z_axis_units() const;
306  ///@}
307 
308  /** @name validate_time_axis
309  * If set consistency checks are made to ensure that time axis from managed
310  * readers match each other. Names, calendar, units, and values of each array
311  * are verified.
312  */
313  ///@{
314  TECA_ALGORITHM_PROPERTY(int, validate_time_axis)
315  ///@}
316 
317  /** @name validate_spatial_coordinates
318  * If set consistency checks are made to ensure that spatial axes from managed
319  * readers match each other. Names, units, and values of each array
320  * are verified.
321  */
322  ///@{
323  TECA_ALGORITHM_PROPERTY(int, validate_spatial_coordinates)
324  ///@}
325 
326 protected:
328 
329 private:
330  void clear_cached_metadata();
331 
332  using teca_algorithm::get_output_metadata;
333 
334  teca_metadata get_output_metadata(unsigned int port,
335  const std::vector<teca_metadata> &input_md) override;
336 
337  const_p_teca_dataset execute(unsigned int port,
338  const std::vector<const_p_teca_dataset> &input_data,
339  const teca_metadata &request) override;
340 
341  void set_modified() override;
342 
343 private:
344  std::string input_file;
345  std::string x_axis_variable;
346  std::string y_axis_variable;
347  std::string z_axis_variable;
348  std::string t_axis_variable;
349  std::string calendar;
350  std::string t_units;
351  std::string filename_time_template;
352  std::vector<double> t_values;
353  std::vector<double> target_bounds;
354  std::string target_x_axis_variable;
355  std::string target_y_axis_variable;
356  std::string target_z_axis_variable;
357  std::string target_x_axis_units;
358  std::string target_y_axis_units;
359  std::string target_z_axis_units;
360  int periodic_in_x;
361  int max_metadata_ranks;
363  int validate_time_axis;
364  int validate_spatial_coordinates;
365 
366  p_teca_multi_cf_reader_internals internals;
367 };
368 
369 #endif
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:21
teca_multi_cf_reader
A reader for data stored in NetCDF CF format in multiple files.
Definition: teca_multi_cf_reader.h:66
teca_coordinate_util::clamp_dimensions_of_one
TECA_EXPORT int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
teca_shared_object.h
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237