TECA
The Toolkit for Extreme Climate Analysis
teca_cf_reader.h
1 #ifndef teca_cf_reader_h
2 #define teca_cf_reader_h
3 
4 #include "teca_config.h"
5 #include "teca_algorithm.h"
6 #include "teca_metadata.h"
7 #include "teca_shared_object.h"
8 
9 #include <vector>
10 #include <string>
11 
12 TECA_SHARED_OBJECT_FORWARD_DECL(teca_cf_reader)
13 
14 class teca_cf_reader_internals;
15 using p_teca_cf_reader_internals = std::shared_ptr<teca_cf_reader_internals>;
16 
17 /// A reader for Cartesian mesh based data stored in NetCDF CF format.
18 /**
19  * Reads a set of arrays from single time step into a teca_cartesian_mesh
20  * dataset. The reader responds to requests for specific arrays and the data
21  * may be optionally subset via extent and bounds request keys.
22  *
23  * The time varying dataset to read is identified by a regular expression
24  * identifying a set of files. Note, regular expressions are similar and more
25  * powerful than the more familiar shell glob but the control characters have
26  * different meanings.
27  *
28  * ### metadata keys:
29  *
30  * | key | description |
31  * | ---- | ----------- |
32  * | variables | a list of all available variables. |
33  * | attributes | a metadata object holding all NetCDF attributes for the variables |
34  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
35  * | files | list of files in this dataset |
36  * | step_count | list of the number of steps in each file |
37  * | index_initializer_key | number_of_time_steps |
38  * | number_of_time_steps | total number of time steps in all files |
39  * | index_request_key | time_step |
40  * | whole_extent | index space extent describing (nodal) dimensions of the mesh |
41  * | bounds | world coordinate space bounding box covered by the mesh |
42  *
43  * ### attribute metadata:
44  *
45  * | key | description |
46  * | ---- | ----------- |
47  * | [variable name] | a metadata object holding all NetCDF attributes, and |
48  * | | TECA specific per-array metadata |
49  *
50  * ### cooridnate metadata:
51  *
52  * | key | description |
53  * | ---- | ----------- |
54  * | x_axis_variable | name of x axis variable |
55  * | y_axis_variable | name of y axis variable |
56  * | z_axis_variable | name of z axis variable |
57  * | t_axis_variable | name of t axis variable |
58  * | x | array of x coordinates |
59  * | y | array of y coordinates |
60  * | z | array of z coordinates |
61  * | t | array of t coordinates |
62  *
63  * ### request keys:
64  *
65  * | key | description |
66  * | ---- | ----------- |
67  * | time_step | the time step to read |
68  * | arrays | list of arrays to read |
69  * | extent | index space extents describing the subset of data to read |
70  * | bounds | world space bounds describing the subset of data to read |
71  *
72  * ### output:
73  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
74  * on the requested extent with the requested point based arrays and value at
75  * this timestep for all time variables.
76  */
78 {
79 public:
80  TECA_ALGORITHM_STATIC_NEW(teca_cf_reader)
81  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_cf_reader)
82  TECA_ALGORITHM_CLASS_NAME(teca_cf_reader)
83  ~teca_cf_reader();
84 
85  // report/initialize to/from Boost program options
86  // objects.
87  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
88  TECA_SET_ALGORITHM_PROPERTIES()
89 
90  /** @name file_name
91  * Set a list of files to open. If this is used then the files_regex is
92  * ignored.
93  */
94  ///@{
95  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
96  ///@}
97 
98  /** @name files_regex
99  * Set a regular expression identifying the set of files comprising the
100  * dataset. This should contain the full path to the files and the regular
101  * expression. Only the final component of a path may contain a regex.
102  * Be aware that regular expression control characters do not have the
103  * same meaning as shell glob control characters. When used in a shell
104  * regular expression control characters need to be quoted or escaped to
105  * prevent the shell from interpreting them.
106  */
107  ///@{
108  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
109  ///@}
110 
111  /** @name periodic_in_x
112  * A flag that indicates a periodic bondary in the z direction
113  */
114  ///@{
115  TECA_ALGORITHM_PROPERTY(int, periodic_in_x)
116  ///@}
117 
118  /** @name periodic_in_y
119  * A flag that indicates a periodic bondary in the z direction
120  */
121  ///@{
122  TECA_ALGORITHM_PROPERTY(int, periodic_in_y)
123  ///@}
124 
125  /** @name periodic_in_z
126  * A flag that indicates a periodic bondary in the z direction
127  */
128  ///@{
129  TECA_ALGORITHM_PROPERTY(int, periodic_in_z)
130  ///@}
131 
132  /** @name x_axis_variable
133  * Set the name of the variable to use for the x coordinate axis.
134  * An empty string disables this dimension.
135  */
136  ///@{
137  TECA_ALGORITHM_PROPERTY(std::string, x_axis_variable)
138  ///@}
139 
140  /** @name y_axis_variable
141  * Set the name of the variable to use for the y coordinate axis.
142  * An empty string disables this dimension.
143  */
144  ///@{
145  TECA_ALGORITHM_PROPERTY(std::string, y_axis_variable)
146  ///@}
147  /** @name z_axis_variable
148  * Set the name of the variable to use for the z coordinate axis.
149  * An empty string disables this dimension.
150  */
151  ///@{
152  TECA_ALGORITHM_PROPERTY(std::string, z_axis_variable)
153  ///@}
154 
155  /** @name t_axis_variable
156  * Set the name of the variable to use for the t coordinate axis.
157  * An empty string disables this dimension.
158  */
159  ///@{
160  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
161  ///@}
162 
163  /** @name calendar
164  * Override the calendar. When specified the values takes precedence over
165  * the values found in the file.
166  */
167  ///@{
168  TECA_ALGORITHM_PROPERTY(std::string, calendar)
169  ///@}
170 
171  /** @name t_units
172  * Override the time units. When specified the value takes precedence over
173  * the values found in the file.
174  */
175  ///@{
176  TECA_ALGORITHM_PROPERTY(std::string, t_units)
177  ///@}
178 
179  /** @name filename_time_template
180  * a way to infer time from the filename if the time axis is not stored in
181  * the file itself. std::get_time format codes are used. If a calendar is
182  * not specified then the standard calendar is used. If time units are not
183  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
184  * where Y,m, and d are computed from the filename of the first file. set
185  * t_axis_variable to an empty string to use.
186  *
187  * For example, for the list of files:
188  *
189  * > my_file_20170516_00.nc
190  * > my_file_20170516_03.nc
191  * > ...
192  *
193  * the template would be
194  *
195  * > my_file_%Y%m%d_%H.nc
196  */
197  ///@{
198  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
199  ///@}
200 
201  /** @name t_value
202  * an explicit list of double precision time values to use. set
203  * t_axis_variable to an empty string to use.
204  */
205  ///@{
206  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
207  ///@}
208 
209  /** @name max_metadata_ranks
210  * set/get the number of ranks used to read the time axis. the default
211  * value of 1024 ranks works well on NERSC Cori scratch file system and may
212  * not be optimal on other systems.
213  */
214  ///@{
215  TECA_ALGORITHM_PROPERTY(int, max_metadata_ranks)
216  ///@}
217 
218  /** @name clamp_dimensions_of_one
219  * If set the requested extent will be clamped in a given direction if the
220  * coorinate axis in that dircetion has a length of 1 and the requested extent
221  * would be out of bounds. This exists to deal with non-conformant data and
222  * should be used with caution.
223  */
224  ///@{
225  TECA_ALGORITHM_PROPERTY(int, clamp_dimensions_of_one)
226  ///@}
227 
228 protected:
229  teca_cf_reader();
230  void clear_cached_metadata();
231 
232 private:
233  using teca_algorithm::get_output_metadata;
234 
235  teca_metadata get_output_metadata(
236  unsigned int port,
237  const std::vector<teca_metadata> &input_md) override;
238 
239  const_p_teca_dataset execute(
240  unsigned int port,
241  const std::vector<const_p_teca_dataset> &input_data,
242  const teca_metadata &request) override;
243 
244  virtual void set_modified() override;
245 
246 private:
247  std::vector<std::string> file_names;
248  std::string files_regex;
249  std::string x_axis_variable;
250  std::string y_axis_variable;
251  std::string z_axis_variable;
252  std::string t_axis_variable;
253  std::string calendar;
254  std::string t_units;
255  std::string filename_time_template;
256  std::vector<double> t_values;
257  int periodic_in_x;
258  int periodic_in_y;
259  int periodic_in_z;
260  int max_metadata_ranks;
262  p_teca_cf_reader_internals internals;
263 };
264 
265 #endif
teca_cf_reader
A reader for Cartesian mesh based data stored in NetCDF CF format.
Definition: teca_cf_reader.h:77
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:21
teca_coordinate_util::clamp_dimensions_of_one
TECA_EXPORT int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
teca_shared_object.h
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237