TECA
The Toolkit for Extreme Climate Analysis
teca_array_collection_reader.h
1 #ifndef teca_array_collection_reader_h
2 #define teca_array_collection_reader_h
3 
4 #include "teca_config.h"
5 #include "teca_algorithm.h"
6 #include "teca_metadata.h"
7 #include "teca_shared_object.h"
8 #include "teca_array_collection.h"
9 
10 #include <vector>
11 #include <string>
12 #include <mutex>
13 
14 
15 TECA_SHARED_OBJECT_FORWARD_DECL(teca_array_collection_reader)
16 
17 /// A reader for collections of arrays stored in NetCDF format.
18 /**
19  * The reader reads requested arrays into a teca_array_collection.
20  *
21  * The time varying dataset to read is identified by a regular expression
22  * identifying a set of files. Note, regular expressions are similar and more
23  * powerful than the more familiar shell glob but the control characters have
24  * different meanings.
25  *
26  * ### metadata keys:
27  *
28  * | key | description |
29  * | ---- | ----------- |
30  * | variables | a list of all available variables. |
31  * | attributes | a metadata object holding all NetCDF attributes for the variables |
32  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
33  * | files | the list of files in this dataset |
34  * | step_count | a list containing the number of steps in each file indiexed by file |
35  * | index_initializer_key | set to the string "number_of_time_steps" |
36  * | number_of_time_steps | set to the total number of time steps in all files |
37  * | index_request_key | set to the string "temporal_extent" |
38  *
39  * ### attribute metadata:
40  *
41  * | key | description |
42  * | ---- | ----------- |
43  * | [variable name] | a metadata object holding all NetCDF attributes, and |
44  * | | TECA specific per-array metadata |
45  *
46  * ### cooridnate metadata:
47  *
48  * | key | description |
49  * | ---- | ----------- |
50  * | x_axis_variable | the name of x axis variable |
51  * | y_axis_variable | the name of y axis variable |
52  * | z_axis_variable | the name of z axis variable |
53  * | t_axis_variable | the name of t axis variable |
54  * | x | the array of x coordinates |
55  * | y | the array of y coordinates |
56  * | z | the array of z coordinates |
57  * | t | the array of t coordinates |
58  *
59  * ### request keys:
60  *
61  * | key | description |
62  * | ---- | ----------- |
63  * | temporal_extent | holds an inclusive range of time step to read [i0, i1] |
64  * | arrays | holds a list of arrays to read |
65  *
66  * ### output:
67  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
68  * on the requested extent with the requested point based arrays and value at
69  * this timestep for all time variables.
70  */
72 {
73 public:
74  TECA_ALGORITHM_STATIC_NEW(teca_array_collection_reader)
75  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_array_collection_reader)
76  TECA_ALGORITHM_CLASS_NAME(teca_array_collection_reader)
78 
79  // report/initialize to/from Boost program options
80  // objects.
81  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
82  TECA_SET_ALGORITHM_PROPERTIES()
83 
84  /** @name file_name
85  * Set a list of files to open. If this is used then the files_regex is
86  * ignored.
87  */
88  ///@{
89  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
90  ///@}
91 
92  /** @name files_regex
93  * Set a regular expression identifying the set of files comprising the
94  * dataset. This should contain the full path to the files and the regular
95  * expression. Only the final component of a path may contain a regex.
96  * Be aware that regular expression control characters do not have the
97  * same meaning as shell glob control characters. When used in a shell
98  * regular expression control characters need to be quoted or escaped to
99  * prevent the shell from interpreting them.
100  */
101  ///@{
102  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
103  ///@}
104 
105  ///@}
106  /** @name t_axis_variable
107  * Set the name of the variable to use for the t coordinate axis.
108  * An empty string disables this dimension.
109  */
110  ///@{
111  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
112  ///@}
113 
114  /** @name calendar
115  * Override the calendar. When specified the values takes precedence over
116  * the values found in the file.
117  */
118  ///@{
119  TECA_ALGORITHM_PROPERTY(std::string, calendar)
120  ///@}
121 
122  /** @name t_units
123  * Override the time units. When specified the value takes precedence over
124  * the values found in the file.
125  */
126  ///@{
127  TECA_ALGORITHM_PROPERTY(std::string, t_units)
128  ///@}
129 
130  /** @name filename_time_template
131  * a way to infer time from the filename if the time axis is not stored in
132  * the file itself. std::get_time format codes are used. If a calendar is
133  * not specified then the standard calendar is used. If time units are not
134  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
135  * where Y,m, and d are computed from the filename of the first file. set
136  * t_axis_variable to an empty string to use.
137  *
138  * For example, for the list of files:
139  *
140  * > my_file_20170516_00.nc
141  * > my_file_20170516_03.nc
142  * > ...
143  *
144  * the template would be
145  *
146  * > my_file_%Y%m%d_%H.nc
147  */
148  ///@{
149  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
150  ///@}
151 
152  /** @name t_value
153  * an explicit list of double precision time values to use. set
154  * t_axis_variable to an empty string to use.
155  */
156  ///@{
157  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
158  ///@}
159 
160 protected:
162 
163 private:
164  using teca_algorithm::get_output_metadata;
165 
166  teca_metadata get_output_metadata(unsigned int port,
167  const std::vector<teca_metadata> &input_md) override;
168 
169  const_p_teca_dataset execute(unsigned int port,
170  const std::vector<const_p_teca_dataset> &input_data,
171  const teca_metadata &request) override;
172 
173  void set_modified() override;
174  void clear_cached_metadata();
175 
176 private:
177  std::vector<std::string> file_names;
178  std::string files_regex;
179  std::string t_axis_variable;
180  std::string calendar;
181  std::string t_units;
182  std::string filename_time_template;
183  std::vector<double> t_values;
184  int max_metadata_ranks;
185 
186  struct teca_array_collection_reader_internals;
187  teca_array_collection_reader_internals *internals;
188 };
189 
190 #endif
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:244
A reader for collections of arrays stored in NetCDF format.
Definition: teca_array_collection_reader.h:72
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:22
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.