TECA
The Toolkit for Extreme Climate Analysis
teca_cf_reader.h
1 #ifndef teca_cf_reader_h
2 #define teca_cf_reader_h
3 
4 #include "teca_algorithm.h"
5 #include "teca_metadata.h"
6 #include "teca_shared_object.h"
7 
8 #include <vector>
9 #include <string>
10 
11 TECA_SHARED_OBJECT_FORWARD_DECL(teca_cf_reader)
12 
13 class teca_cf_reader_internals;
14 using p_teca_cf_reader_internals = std::shared_ptr<teca_cf_reader_internals>;
15 
16 /// A reader for Cartesian mesh based data stored in NetCDF CF format.
17 /**
18  * Reads a set of arrays from single time step into a teca_cartesian_mesh
19  * dataset. The reader responds to requests for specific arrays and the data
20  * may be optionally subset via extent and bounds request keys.
21  *
22  * The time varying dataset to read is identified by a regular expression
23  * identifying a set of files. Note, regular expressions are similar and more
24  * powerful than the more familiar shell glob but the control characters have
25  * different meanings.
26  *
27  * ### metadata keys:
28  *
29  * | key | description |
30  * | ---- | ----------- |
31  * | variables | a list of all available variables. |
32  * | attributes | a metadata object holding all NetCDF attributes for the variables |
33  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
34  * | files | list of files in this dataset |
35  * | step_count | list of the number of steps in each file |
36  * | index_initializer_key | number_of_time_steps |
37  * | number_of_time_steps | total number of time steps in all files |
38  * | index_request_key | time_step |
39  * | whole_extent | index space extent describing (nodal) dimensions of the mesh |
40  * | bounds | world coordinate space bounding box covered by the mesh |
41  *
42  * ### attribute metadata:
43  *
44  * | key | description |
45  * | ---- | ----------- |
46  * | [variable name] | a metadata object holding all NetCDF attributes, and |
47  * | | TECA specific per-array metadata |
48  *
49  * ### cooridnate metadata:
50  *
51  * | key | description |
52  * | ---- | ----------- |
53  * | x_axis_variable | name of x axis variable |
54  * | y_axis_variable | name of y axis variable |
55  * | z_axis_variable | name of z axis variable |
56  * | t_axis_variable | name of t axis variable |
57  * | x | array of x coordinates |
58  * | y | array of y coordinates |
59  * | z | array of z coordinates |
60  * | t | array of t coordinates |
61  *
62  * ### request keys:
63  *
64  * | key | description |
65  * | ---- | ----------- |
66  * | time_step | the time step to read |
67  * | arrays | list of arrays to read |
68  * | extent | index space extents describing the subset of data to read |
69  * | bounds | world space bounds describing the subset of data to read |
70  *
71  * ### output:
72  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
73  * on the requested extent with the requested point based arrays and value at
74  * this timestep for all time variables.
75  */
77 {
78 public:
79  TECA_ALGORITHM_STATIC_NEW(teca_cf_reader)
80  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_cf_reader)
81  TECA_ALGORITHM_CLASS_NAME(teca_cf_reader)
82  ~teca_cf_reader();
83 
84  // report/initialize to/from Boost program options
85  // objects.
86  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
87  TECA_SET_ALGORITHM_PROPERTIES()
88 
89  /** @name file_name
90  * Set a list of files to open. If this is used then the files_regex is
91  * ignored.
92  */
93  ///@{
94  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
95  ///@}
96 
97  /** @name files_regex
98  * Set a regular expression identifying the set of files comprising the
99  * dataset. This should contain the full path to the files and the regular
100  * expression. Only the final component of a path may contain a regex.
101  * Be aware that regular expression control characters do not have the
102  * same meaning as shell glob control characters. When used in a shell
103  * regular expression control characters need to be quoted or escaped to
104  * prevent the shell from interpreting them.
105  */
106  ///@{
107  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
108  ///@}
109 
110  /** @name periodic_in_x
111  * A flag that indicates a periodic bondary in the z direction
112  */
113  ///@{
114  TECA_ALGORITHM_PROPERTY(int, periodic_in_x)
115  ///@}
116 
117  /** @name periodic_in_y
118  * A flag that indicates a periodic bondary in the z direction
119  */
120  ///@{
121  TECA_ALGORITHM_PROPERTY(int, periodic_in_y)
122  ///@}
123 
124  /** @name periodic_in_z
125  * A flag that indicates a periodic bondary in the z direction
126  */
127  ///@{
128  TECA_ALGORITHM_PROPERTY(int, periodic_in_z)
129  ///@}
130 
131  /** @name x_axis_variable
132  * Set the name of the variable to use for the x coordinate axis.
133  * An empty string disables this dimension.
134  */
135  ///@{
136  TECA_ALGORITHM_PROPERTY(std::string, x_axis_variable)
137  ///@}
138 
139  /** @name y_axis_variable
140  * Set the name of the variable to use for the y coordinate axis.
141  * An empty string disables this dimension.
142  */
143  ///@{
144  TECA_ALGORITHM_PROPERTY(std::string, y_axis_variable)
145  ///@}
146  /** @name z_axis_variable
147  * Set the name of the variable to use for the z coordinate axis.
148  * An empty string disables this dimension.
149  */
150  ///@{
151  TECA_ALGORITHM_PROPERTY(std::string, z_axis_variable)
152  ///@}
153 
154  /** @name t_axis_variable
155  * Set the name of the variable to use for the t coordinate axis.
156  * An empty string disables this dimension.
157  */
158  ///@{
159  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
160  ///@}
161 
162  /** @name calendar
163  * Override the calendar. When specified the values takes precedence over
164  * the values found in the file.
165  */
166  ///@{
167  TECA_ALGORITHM_PROPERTY(std::string, calendar)
168  ///@}
169 
170  /** @name t_units
171  * Override the time units. When specified the value takes precedence over
172  * the values found in the file.
173  */
174  ///@{
175  TECA_ALGORITHM_PROPERTY(std::string, t_units)
176  ///@}
177 
178  /** @name filename_time_template
179  * a way to infer time from the filename if the time axis is not stored in
180  * the file itself. std::get_time format codes are used. If a calendar is
181  * not specified then the standard calendar is used. If time units are not
182  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
183  * where Y,m, and d are computed from the filename of the first file. set
184  * t_axis_variable to an empty string to use.
185  *
186  * For example, for the list of files:
187  *
188  * > my_file_20170516_00.nc
189  * > my_file_20170516_03.nc
190  * > ...
191  *
192  * the template would be
193  *
194  * > my_file_%Y%m%d_%H.nc
195  */
196  ///@{
197  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
198  ///@}
199 
200  /** @name t_value
201  * an explicit list of double precision time values to use. set
202  * t_axis_variable to an empty string to use.
203  */
204  ///@{
205  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
206  ///@}
207 
208  /** @name max_metadata_ranks
209  * set/get the number of ranks used to read the time axis. the default
210  * value of 1024 ranks works well on NERSC Cori scratch file system and may
211  * not be optimal on other systems.
212  */
213  ///@{
214  TECA_ALGORITHM_PROPERTY(int, max_metadata_ranks)
215  ///@}
216 
217  /** @name clamp_dimensions_of_one
218  * If set the requested extent will be clamped in a given direction if the
219  * coorinate axis in that dircetion has a length of 1 and the requested extent
220  * would be out of bounds. This exists to deal with non-conformant data and
221  * should be used with caution.
222  */
223  ///@{
224  TECA_ALGORITHM_PROPERTY(int, clamp_dimensions_of_one)
225  ///@}
226 
227 protected:
228  teca_cf_reader();
229  void clear_cached_metadata();
230 
231 private:
232  teca_metadata get_output_metadata(
233  unsigned int port,
234  const std::vector<teca_metadata> &input_md) override;
235 
236  const_p_teca_dataset execute(
237  unsigned int port,
238  const std::vector<const_p_teca_dataset> &input_data,
239  const teca_metadata &request) override;
240 
241  virtual void set_modified() override;
242 
243 private:
244  std::vector<std::string> file_names;
245  std::string files_regex;
246  std::string x_axis_variable;
247  std::string y_axis_variable;
248  std::string z_axis_variable;
249  std::string t_axis_variable;
250  std::string calendar;
251  std::string t_units;
252  std::string filename_time_template;
253  std::vector<double> t_values;
254  int periodic_in_x;
255  int periodic_in_y;
256  int periodic_in_z;
257  int max_metadata_ranks;
258  int clamp_dimensions_of_one;
259  p_teca_cf_reader_internals internals;
260 };
261 
262 #endif
teca_cf_reader
A reader for Cartesian mesh based data stored in NetCDF CF format.
Definition: teca_cf_reader.h:76
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:18
teca_shared_object.h
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237