TECA
The Toolkit for Extreme Climate Analysis
teca_cf_reader.h
1 #ifndef teca_cf_reader_h
2 #define teca_cf_reader_h
3 
4 #include "teca_config.h"
5 #include "teca_algorithm.h"
6 #include "teca_metadata.h"
7 #include "teca_shared_object.h"
8 #include "teca_netcdf_util.h"
9 
10 #include <vector>
11 #include <string>
12 
13 TECA_SHARED_OBJECT_FORWARD_DECL(teca_cf_reader)
14 
15 class teca_cf_reader_internals;
16 using p_teca_cf_reader_internals = std::shared_ptr<teca_cf_reader_internals>;
17 
18 /// A reader for Cartesian mesh based data stored in NetCDF CF format.
19 /**
20  * Reads a set of arrays from single time step into a teca_cartesian_mesh
21  * dataset. The reader responds to requests for specific arrays and the data
22  * may be optionally subset via extent and bounds request keys.
23  *
24  * The time varying dataset to read is identified by a regular expression
25  * identifying a set of files. Note, regular expressions are similar and more
26  * powerful than the more familiar shell glob but the control characters have
27  * different meanings.
28  *
29  * ### metadata keys:
30  *
31  * | key | description |
32  * | ---- | ----------- |
33  * | variables | a list of all available variables. |
34  * | attributes | a metadata object holding all NetCDF attributes for the variables |
35  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
36  * | files | list of files in this dataset |
37  * | step_count | list of the number of steps in each file |
38  * | index_initializer_key | number_of_time_steps |
39  * | number_of_time_steps | total number of time steps in all files |
40  * | index_request_key | time_step |
41  * | whole_extent | index space extent describing (nodal) dimensions of the mesh |
42  * | bounds | world coordinate space bounding box covered by the mesh |
43  *
44  * ### attribute metadata:
45  *
46  * | key | description |
47  * | ---- | ----------- |
48  * | [variable name] | a metadata object holding all NetCDF attributes, and |
49  * | | TECA specific per-array metadata |
50  *
51  * ### cooridnate metadata:
52  *
53  * | key | description |
54  * | ---- | ----------- |
55  * | x_axis_variable | name of x axis variable |
56  * | y_axis_variable | name of y axis variable |
57  * | z_axis_variable | name of z axis variable |
58  * | t_axis_variable | name of t axis variable |
59  * | x | array of x coordinates |
60  * | y | array of y coordinates |
61  * | z | array of z coordinates |
62  * | t | array of t coordinates |
63  *
64  * ### request keys:
65  *
66  * | key | description |
67  * | ---- | ----------- |
68  * | time_step | the time step to read |
69  * | arrays | list of arrays to read |
70  * | extent | index space extents describing the subset of data to read |
71  * | bounds | world space bounds describing the subset of data to read |
72  *
73  * ### output:
74  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
75  * on the requested extent with the requested point based arrays and value at
76  * this timestep for all time variables.
77  */
79 {
80 public:
81  TECA_ALGORITHM_STATIC_NEW(teca_cf_reader)
82  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_cf_reader)
83  TECA_ALGORITHM_CLASS_NAME(teca_cf_reader)
84  ~teca_cf_reader();
85 
86  // report/initialize to/from Boost program options
87  // objects.
88  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
89  TECA_SET_ALGORITHM_PROPERTIES()
90 
91  /** @name file_name
92  * Set a list of files to open. If this is used then the files_regex is
93  * ignored.
94  */
95  ///@{
96  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
97  ///@}
98 
99  /** @name files_regex
100  * Set a regular expression identifying the set of files comprising the
101  * dataset. This should contain the full path to the files and the regular
102  * expression. Only the final component of a path may contain a regex.
103  * Be aware that regular expression control characters do not have the
104  * same meaning as shell glob control characters. When used in a shell
105  * regular expression control characters need to be quoted or escaped to
106  * prevent the shell from interpreting them.
107  */
108  ///@{
109  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
110  ///@}
111 
112  /** @name periodic_in_x
113  * A flag that indicates a periodic bondary in the x direction
114  */
115  ///@{
116  TECA_ALGORITHM_PROPERTY(int, periodic_in_x)
117  ///@}
118 
119  /** @name periodic_in_y
120  * A flag that indicates a periodic bondary in the y direction
121  */
122  ///@{
123  TECA_ALGORITHM_PROPERTY(int, periodic_in_y)
124  ///@}
125 
126  /** @name periodic_in_z
127  * A flag that indicates a periodic bondary in the z direction
128  */
129  ///@{
130  TECA_ALGORITHM_PROPERTY(int, periodic_in_z)
131  ///@}
132 
133  /** @name select_ensemble_member_index
134  * Index of the ensemble member to use (if an ensemble dimension is given)
135  */
136  ///@{
137  TECA_ALGORITHM_PROPERTY(int, select_ensemble_member_index)
138  ///@}
139 
140  /** @name x_axis_variable
141  * Set the name of the variable to use for the x coordinate axis.
142  * An empty string disables this dimension.
143  */
144  ///@{
145  TECA_ALGORITHM_PROPERTY(std::string, x_axis_variable)
146  ///@}
147 
148  /** @name y_axis_variable
149  * Set the name of the variable to use for the y coordinate axis.
150  * An empty string disables this dimension.
151  */
152  ///@{
153  TECA_ALGORITHM_PROPERTY(std::string, y_axis_variable)
154  ///@}
155  /** @name z_axis_variable
156  * Set the name of the variable to use for the z coordinate axis.
157  * An empty string disables this dimension.
158  */
159  ///@{
160  TECA_ALGORITHM_PROPERTY(std::string, z_axis_variable)
161  ///@}
162 
163  /** @name t_axis_variable
164  * Set the name of the variable to use for the t coordinate axis.
165  * An empty string disables this dimension.
166  */
167  ///@{
168  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
169  ///@}
170 
171  /** @name ensemble_dimension_name
172  * Set the name of the dimension that corresponds to ensemble members.
173  * An empty string disables this dimension.
174  */
175  ///@{
176  TECA_ALGORITHM_PROPERTY(std::string, ensemble_dimension_name)
177  ///@}
178 
179  /** @name calendar
180  * Override the calendar. When specified the values takes precedence over
181  * the values found in the file.
182  */
183  ///@{
184  TECA_ALGORITHM_PROPERTY(std::string, calendar)
185  ///@}
186 
187  /** @name t_units
188  * Override the time units. When specified the value takes precedence over
189  * the values found in the file.
190  */
191  ///@{
192  TECA_ALGORITHM_PROPERTY(std::string, t_units)
193  ///@}
194 
195  /** @name filename_time_template
196  * a way to infer time from the filename if the time axis is not stored in
197  * the file itself. std::get_time format codes are used. If a calendar is
198  * not specified then the standard calendar is used. If time units are not
199  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
200  * where Y,m, and d are computed from the filename of the first file. set
201  * t_axis_variable to an empty string to use.
202  *
203  * For example, for the list of files:
204  *
205  * > my_file_20170516_00.nc
206  * > my_file_20170516_03.nc
207  * > ...
208  *
209  * the template would be
210  *
211  * > my_file_%Y%m%d_%H.nc
212  */
213  ///@{
214  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
215  ///@}
216 
217  /** @name t_value
218  * an explicit list of double precision time values to use. set
219  * t_axis_variable to an empty string to use.
220  */
221  ///@{
222  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
223  ///@}
224 
225  /** @name max_metadata_ranks
226  * set/get the number of ranks used to read the time axis. the default
227  * value of 1024 ranks works well on NERSC Cori scratch file system and may
228  * not be optimal on other systems.
229  */
230  ///@{
231  TECA_ALGORITHM_PROPERTY(int, max_metadata_ranks)
232  ///@}
233 
234  /** @name clamp_dimensions_of_one
235  * If set the requested extent will be clamped in a given direction if the
236  * coorinate axis in that dircetion has a length of 1 and the requested extent
237  * would be out of bounds. This exists to deal with non-conformant data and
238  * should be used with caution.
239  */
240  ///@{
241  TECA_ALGORITHM_PROPERTY(int, clamp_dimensions_of_one)
242  ///@}
243 
244  /** @name collective_buffer
245  * Enables MPI I/O colective buffering. Collective buffering is only valid
246  * when the spatial partitioner is enabled and the number of spatial
247  * partitions is equal to the number of MPI ranks, and the code is single
248  * threaded. This is an experimental feature.
249  */
250  ///@{
251  TECA_ALGORITHM_PROPERTY(int, collective_buffer)
252  ///@}
253 
254 protected:
255  teca_cf_reader();
256  void clear_cached_metadata();
257 
258 private:
259  using teca_algorithm::get_output_metadata;
260 
261  void get_variables_in_group(
262  teca_netcdf_util::netcdf_handle &fh,
263  int parent_id,
264  std::string group_name,
265  teca_metadata &atrs,
266  std::vector<std::string> &vars);
267 
268  teca_metadata get_output_metadata(
269  unsigned int port,
270  const std::vector<teca_metadata> &input_md) override;
271 
272  const_p_teca_dataset execute(
273  unsigned int port,
274  const std::vector<const_p_teca_dataset> &input_data,
275  const teca_metadata &request) override;
276 
277  virtual void set_modified() override;
278 
279 private:
280  std::vector<std::string> file_names;
281  std::string files_regex;
282  std::string x_axis_variable;
283  std::string y_axis_variable;
284  std::string z_axis_variable;
285  std::string t_axis_variable;
286  std::string ensemble_dimension_name;
287  std::string calendar;
288  std::string t_units;
289  std::string filename_time_template;
290  std::vector<double> t_values;
291  int periodic_in_x;
292  int periodic_in_y;
293  int periodic_in_z;
294  int select_ensemble_member_index;
295  int max_metadata_ranks;
297  int collective_buffer;
298  p_teca_cf_reader_internals internals;
299 };
300 
301 #endif
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:244
A reader for Cartesian mesh based data stored in NetCDF CF format.
Definition: teca_cf_reader.h:79
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:22
TECA_EXPORT int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
Codes dealing with NetCDF I/O calls.
Definition: teca_netcdf_util.h:67