TECA
The Toolkit for Extreme Climate Analysis
teca_array_collection_reader.h
1 #ifndef teca_array_collection_reader_h
2 #define teca_array_collection_reader_h
3 
4 #include "teca_algorithm.h"
5 #include "teca_metadata.h"
6 #include "teca_shared_object.h"
7 #include "teca_array_collection.h"
8 
9 #include <vector>
10 #include <string>
11 #include <mutex>
12 
13 
14 TECA_SHARED_OBJECT_FORWARD_DECL(teca_array_collection_reader)
15 
16 /// A reader for collections of arrays stored in NetCDF format.
17 /**
18  * The reader reads requested arrays into a teca_array_collection.
19  *
20  * The time varying dataset to read is identified by a regular expression
21  * identifying a set of files. Note, regular expressions are similar and more
22  * powerful than the more familiar shell glob but the control characters have
23  * different meanings.
24  *
25  * ### metadata keys:
26  *
27  * | key | description |
28  * | ---- | ----------- |
29  * | variables | a list of all available variables. |
30  * | attributes | a metadata object holding all NetCDF attributes for the variables |
31  * | coordinates | a metadata object holding names and arrays of the coordinate axes |
32  * | files | list of files in this dataset |
33  * | step_count | list of the number of steps in each file |
34  * | index_initializer_key | number_of_time_steps |
35  * | number_of_time_steps | total number of time steps in all files |
36  * | index_request_key | time_step |
37  *
38  * ### attribute metadata:
39  *
40  * | key | description |
41  * | ---- | ----------- |
42  * | [variable name] | a metadata object holding all NetCDF attributes, and |
43  * | | TECA specific per-array metadata |
44  *
45  * ### cooridnate metadata:
46  *
47  * | key | description |
48  * | ---- | ----------- |
49  * | t_axis_variable | name of t axis variable |
50  * | t | array of t coordinates |
51  *
52  * ### request keys:
53  *
54  * | key | description |
55  * | ---- | ----------- |
56  * | time_step | the time step to read |
57  * | arrays | list of arrays to read |
58  *
59  * ### output:
60  * The reader generates a 1,2 or 3D cartesian mesh for the requested timestep
61  * on the requested extent with the requested point based arrays and value at
62  * this timestep for all time variables.
63  */
65 {
66 public:
67  TECA_ALGORITHM_STATIC_NEW(teca_array_collection_reader)
68  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_array_collection_reader)
69  TECA_ALGORITHM_CLASS_NAME(teca_array_collection_reader)
71 
72  // report/initialize to/from Boost program options
73  // objects.
74  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
75  TECA_SET_ALGORITHM_PROPERTIES()
76 
77  /** @name file_name
78  * Set a list of files to open. If this is used then the files_regex is
79  * ignored.
80  */
81  ///@{
82  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, file_name)
83  ///@}
84 
85  /** @name files_regex
86  * Set a regular expression identifying the set of files comprising the
87  * dataset. This should contain the full path to the files and the regular
88  * expression. Only the final component of a path may contain a regex.
89  * Be aware that regular expression control characters do not have the
90  * same meaning as shell glob control characters. When used in a shell
91  * regular expression control characters need to be quoted or escaped to
92  * prevent the shell from interpreting them.
93  */
94  ///@{
95  TECA_ALGORITHM_PROPERTY(std::string, files_regex)
96  ///@}
97 
98  ///@}
99  /** @name t_axis_variable
100  * Set the name of the variable to use for the t coordinate axis.
101  * An empty string disables this dimension.
102  */
103  ///@{
104  TECA_ALGORITHM_PROPERTY(std::string, t_axis_variable)
105  ///@}
106 
107  /** @name calendar
108  * Override the calendar. When specified the values takes precedence over
109  * the values found in the file.
110  */
111  ///@{
112  TECA_ALGORITHM_PROPERTY(std::string, calendar)
113  ///@}
114 
115  /** @name t_units
116  * Override the time units. When specified the value takes precedence over
117  * the values found in the file.
118  */
119  ///@{
120  TECA_ALGORITHM_PROPERTY(std::string, t_units)
121  ///@}
122 
123  /** @name filename_time_template
124  * a way to infer time from the filename if the time axis is not stored in
125  * the file itself. std::get_time format codes are used. If a calendar is
126  * not specified then the standard calendar is used. If time units are not
127  * specified then the time units will be "days since %Y-%m-%d 00:00:00"
128  * where Y,m, and d are computed from the filename of the first file. set
129  * t_axis_variable to an empty string to use.
130  *
131  * For example, for the list of files:
132  *
133  * > my_file_20170516_00.nc
134  * > my_file_20170516_03.nc
135  * > ...
136  *
137  * the template would be
138  *
139  * > my_file_%Y%m%d_%H.nc
140  */
141  ///@{
142  TECA_ALGORITHM_PROPERTY(std::string, filename_time_template)
143  ///@}
144 
145  /** @name t_value
146  * an explicit list of double precision time values to use. set
147  * t_axis_variable to an empty string to use.
148  */
149  ///@{
150  TECA_ALGORITHM_VECTOR_PROPERTY(double, t_value)
151  ///@}
152 
153 protected:
155 
156 private:
157  teca_metadata get_output_metadata(unsigned int port,
158  const std::vector<teca_metadata> &input_md) override;
159 
160  const_p_teca_dataset execute(unsigned int port,
161  const std::vector<const_p_teca_dataset> &input_data,
162  const teca_metadata &request) override;
163 
164  void set_modified() override;
165  void clear_cached_metadata();
166 
167 private:
168  std::vector<std::string> file_names;
169  std::string files_regex;
170  std::string t_axis_variable;
171  std::string calendar;
172  std::string t_units;
173  std::string filename_time_template;
174  std::vector<double> t_values;
175  int max_metadata_ranks;
176 
177  struct teca_array_collection_reader_internals;
178  teca_array_collection_reader_internals *internals;
179 };
180 
181 #endif
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:18
teca_array_collection_reader
A reader for collections of arrays stored in NetCDF format.
Definition: teca_array_collection_reader.h:64
teca_shared_object.h
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237