TECA
The Toolkit for Extreme Climate Analysis
teca_array_collection_reader.h
1 #ifndef teca_array_collection_reader_h
2 #define teca_array_collection_reader_h
3 
4 #include "teca_config.h"
5 #include "teca_algorithm.h"
6 #include "teca_metadata.h"
7 #include "teca_shared_object.h"
8 #include "teca_array_collection.h"
9 
10 #include <vector>
11 #include <string>
12 #include <mutex>
13 
14 
15 TECA_SHARED_OBJECT_FORWARD_DECL(teca_array_collection_reader)
16 
17 /// A reader for collections of arrays stored in NetCDF format.
18 /**
19  * The reader reads requested arrays into a teca_array_collection.
20  *
21  * The time varying dataset to read is identified by a regular expression
22  * identifying a set of files. Note, regular expressions are similar and more
23  * powerful than the more familiar shell glob but the control characters have
24  * different meanings.
25  *
26  * ### metadata keys:
27  *
28  * | key | description |
29  * | ---- | ----------- |
30  * | variables | a list of all available variables. |
31  * | attributes | a metadata object holding all NetCDF attributes for the variables |
32  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
33  * | files | list of files in this dataset |
34  * | step_count | list of the number of steps in each file |
35  * | index_initializer_key | number_of_time_steps |
36  * | number_of_time_steps | total number of time steps in all files |
37  * | index_request_key | time_step |
38  *
39  * ### attribute metadata:
40  *
41  * | key | description |
42  * | ---- | ----------- |
43  * | [variable name] | a metadata object holding all NetCDF attributes, and |
44  * | | TECA specific per-array metadata |
45  *
46  * ### cooridnate metadata:
47  *
48  * | key | description |
49  * | ---- | ----------- |
50  * | t_axis_variable | name of t axis variable |
51  * | t | array of t coordinates |
52  *
53  * ### request keys:
54  *
55  * | key | description |
56  * | ---- | ----------- |
57  * | time_step | the time step to read |
58  * | arrays | list of arrays to read |
59  *
60  * ### output:
61  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
62  * on the requested extent with the requested point based arrays and value at
63  * this timestep for all time variables.
64  */
66 {
67 public:
68  TECA_ALGORITHM_STATIC_NEW(teca_array_collection_reader)
69  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_array_collection_reader)
70  TECA_ALGORITHM_CLASS_NAME(teca_array_collection_reader)
72 
73  // report/initialize to/from Boost program options
74  // objects.
75  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
76  TECA_SET_ALGORITHM_PROPERTIES()
77 
78  /** @name file_name
79  * Set a list of files to open. If this is used then the files_regex is
80  * ignored.
81  */
82  ///@{
83  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
84  ///@}
85 
86  /** @name files_regex
87  * Set a regular expression identifying the set of files comprising the
88  * dataset. This should contain the full path to the files and the regular
89  * expression. Only the final component of a path may contain a regex.
90  * Be aware that regular expression control characters do not have the
91  * same meaning as shell glob control characters. When used in a shell
92  * regular expression control characters need to be quoted or escaped to
93  * prevent the shell from interpreting them.
94  */
95  ///@{
96  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
97  ///@}
98 
99  ///@}
100  /** @name t_axis_variable
101  * Set the name of the variable to use for the t coordinate axis.
102  * An empty string disables this dimension.
103  */
104  ///@{
105  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
106  ///@}
107 
108  /** @name calendar
109  * Override the calendar. When specified the values takes precedence over
110  * the values found in the file.
111  */
112  ///@{
113  TECA_ALGORITHM_PROPERTY(std::string, calendar)
114  ///@}
115 
116  /** @name t_units
117  * Override the time units. When specified the value takes precedence over
118  * the values found in the file.
119  */
120  ///@{
121  TECA_ALGORITHM_PROPERTY(std::string, t_units)
122  ///@}
123 
124  /** @name filename_time_template
125  * a way to infer time from the filename if the time axis is not stored in
126  * the file itself. std::get_time format codes are used. If a calendar is
127  * not specified then the standard calendar is used. If time units are not
128  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
129  * where Y,m, and d are computed from the filename of the first file. set
130  * t_axis_variable to an empty string to use.
131  *
132  * For example, for the list of files:
133  *
134  * > my_file_20170516_00.nc
135  * > my_file_20170516_03.nc
136  * > ...
137  *
138  * the template would be
139  *
140  * > my_file_%Y%m%d_%H.nc
141  */
142  ///@{
143  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
144  ///@}
145 
146  /** @name t_value
147  * an explicit list of double precision time values to use. set
148  * t_axis_variable to an empty string to use.
149  */
150  ///@{
151  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
152  ///@}
153 
154 protected:
156 
157 private:
158  using teca_algorithm::get_output_metadata;
159 
160  teca_metadata get_output_metadata(unsigned int port,
161  const std::vector<teca_metadata> &input_md) override;
162 
163  const_p_teca_dataset execute(unsigned int port,
164  const std::vector<const_p_teca_dataset> &input_data,
165  const teca_metadata &request) override;
166 
167  void set_modified() override;
168  void clear_cached_metadata();
169 
170 private:
171  std::vector<std::string> file_names;
172  std::string files_regex;
173  std::string t_axis_variable;
174  std::string calendar;
175  std::string t_units;
176  std::string filename_time_template;
177  std::vector<double> t_values;
178  int max_metadata_ranks;
179 
180  struct teca_array_collection_reader_internals;
181  teca_array_collection_reader_internals *internals;
182 };
183 
184 #endif
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:21
teca_array_collection_reader
A reader for collections of arrays stored in NetCDF format.
Definition: teca_array_collection_reader.h:65
teca_shared_object.h
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237