TECA
The Toolkit for Extreme Climate Analysis
teca_cf_writer.h
1 #ifndef teca_cf_writer_h
2 #define teca_cf_writer_h
3 
4 #include "teca_config.h"
5 #include "teca_shared_object.h"
6 #include "teca_threaded_algorithm.h"
7 #include "teca_metadata.h"
8 
9 #include <vector>
10 #include <string>
11 
12 TECA_SHARED_OBJECT_FORWARD_DECL(teca_cf_writer)
13 
14 /// A writer for Cartesian meshes in NetCDF CF2 format.
15 /**
16  * Writes data to NetCDF CF2 format. This algorithm is conceptually an
17  * execution engine capable of driving the above pipeline with our without
18  * threads and stream results in the order that they are generated placing them
19  * in the correct location in the output dataset. The output dataset is a
20  * collection of files each with a user specified number of time steps per
21  * file. The output dataset may be arranged using a fixed number of steps per
22  * file or daily, monthly, seasonal, or yearly file layouts. The total number
23  * of time steps in the output dataset is determined by the combination of the
24  * number of time steps in the input dataset and user defined subsetting if
25  * any. The writer uses MPI collective I/O to produce the files. In parallel
26  * time steps are mapped to ranks such that each rank has approximately the
27  * same number of time steps. Incoming steps are mapped to files. A given MPI
28  * rank may be writing to multiple files. The use of MPI collectives implies
29  * care must be taken in its use to avoid deadlocks.
30  *
31  * Due to the use of MPI collectives I/O certain information must be known
32  * during the report phase of pipeline execution, before the execute phase of
33  * pipeline execution begins. The information that is needed is:
34  *
35  * ### number of time steps ###
36  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37  * specified by the pipeline control index_initializer key found in metadata
38  * produced by the source (e.g CF reader)
39  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
40  *
41  * ### extent ###
42  * ~~~~~~~~~~~~~~
43  * 6, 64 bit integers defining the 3 spatial dimensions of each timestep found
44  * in metadata produced by the source (e.g CF reader)
45  * ~~~~~~~~~~~~~~
46  *
47  * ### point arrays ###
48  * ~~~~~~~~~~~~~~~~~~~~
49  * list of strings naming the point centered arrays that will be written. set
50  * by the user prior to execution by writer properties.
51  * ~~~~~~~~~~~~~~~~~~~~
52  *
53  * ### information arrays ###
54  * ~~~~~~~~~~~~~~~~~~~~~~~~~~
55  * list of strings naming the non-geometric arrays that will written. set by
56  * the user prior to execution by writer properties. See also size attribute
57  * below.
58  * ~~~~~~~~~~~~~~~~~~~~~~~~~~
59  *
60  * ### type_code ###
61  * ~~~~~~~~~~~~~~~~~
62  * the teca_variant_array_code naming the type of each array. this will be in
63  * the array attributes metadata generated by the producer of the array (e.g
64  * any algorithm that adds an array should provide this metadata).
65  * ~~~~~~~~~~~~~~~~~
66  *
67  * ### size ###
68  * ~~~~~~~~~~~~
69  * a 64 bit integer declaring the size of each information array. this will be
70  * in the array attributes metadata generated by the producer of the array (e.g
71  * any algorithm that adds an array should provide this metadata).
72  * ~~~~~~~~~~~~
73  */
75 {
76 public:
77  TECA_ALGORITHM_STATIC_NEW(teca_cf_writer)
78  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_cf_writer)
79  TECA_ALGORITHM_CLASS_NAME(teca_cf_writer)
80  ~teca_cf_writer();
81 
82  // report/initialize to/from Boost program options
83  // objects.
84  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
85  TECA_SET_ALGORITHM_PROPERTIES()
86 
87  /** @name file_name
88  * Set the output filename. For time series the substring %t% is replaced
89  * with the current time step or date. See comments on date_format below
90  * for info about date formatting.
91  */
92  ///@{
93  TECA_ALGORITHM_PROPERTY(std::string, file_name)
94  ///@}
95 
96 
97  /** @name date_format
98  * set the format for the date to write in the filename. this requires the
99  * input dataset to have unit/calendar information if none are available,
100  * the time index is used instead. (%F-%HZ)
101  */
102  ///@{
103  TECA_ALGORITHM_PROPERTY(std::string, date_format)
104  ///@}
105 
106  /** @name first_step
107  * Set the first step in the range of time step to process.
108  */
109  ///@{
110  TECA_ALGORITHM_PROPERTY(long, first_step)
111  ///@}
112 
113  /** @name last_step
114  * Set the last step in the range of time step to process.
115  */
116  ///@{
117  TECA_ALGORITHM_PROPERTY(long, last_step)
118  ///@}
119 
120  /** @name layout
121  * Set the layout mode to one of : number_of_steps, daily, monthly,
122  * seasonal, or yearly. This controls the size of the files written. In
123  * daily, monthly, seasonal, and yearly modes each file will contain the
124  * steps spanning the given duration. The number_of_steps mode writes a
125  * fixed number of steps per file which can be set using the
126  * steps_per_file property.
127  */
128  ///@{
129  enum {invalid=0, number_of_steps=1, daily=2, monthly=3, seasonal=4, yearly=5};
130  TECA_ALGORITHM_PROPERTY_V(int, layout)
131 
132  void set_layout_to_number_of_steps() { this->set_layout(number_of_steps); }
133  void set_layout_to_daily() { this->set_layout(daily); }
134  void set_layout_to_monthly() { this->set_layout(monthly); }
135  void set_layout_to_seasonal() { this->set_layout(seasonal); }
136  void set_layout_to_yearly() { this->set_layout(yearly); }
137 
138  /// set the layout mode from a string.
139  int set_layout(const std::string &layout);
140 
141  /// @returns 0 if the passed value is a valid layout mode
142  int validate_layout(int mode)
143  {
144  if ((mode == number_of_steps) || (mode == daily) ||
145  (mode == monthly) || (mode == seasonal) || (mode == yearly))
146  return 0;
147 
148  TECA_ERROR("Invalid layout mode " << mode)
149  return -1;
150  }
151  ///@}
152 
153  /** @name steps_per_file
154  * Set how many time steps are written to each file when the layout mode is
155  * set to number_of_steps.
156  */
157  ///@{
158  TECA_ALGORITHM_PROPERTY(unsigned int, steps_per_file)
159  ///@}
160 
161  /** @name mode_flags
162  * sets the flags passed to NetCDF during file creation. (NC_CLOBBER)
163  */
164  ///@{
165  TECA_ALGORITHM_PROPERTY(int, mode_flags)
166  ///@}
167 
168 
169  /** @name use_unlimited_dim
170  * if set the slowest varying dimension is specified to be NC_UNLIMITED.
171  * This has a negative impact on performance when reading the values in a
172  * single pass. However, unlimited dimensions are used ubiquitously thus
173  * by default it is set. For data being consumed by TECA performance will
174  * be better when using fixed dimensions. (1) This feature requires
175  * collective writes and is incompatible with out of order execution,
176  * and hence currently not supported.
177  */
178  ///@{
179  TECA_ALGORITHM_PROPERTY(int, use_unlimited_dim)
180  ///@}
181 
182  /** @name compression_level
183  * sets the compression level used for each variable compression is not
184  * used if the value is less than or equal to 0. This feature requires
185  * collective writes and is incompatible with out of order execution,
186  * and hence currently not supported.
187  */
188  ///@{
189  TECA_ALGORITHM_PROPERTY(int, compression_level)
190  ///@}
191 
192  /** @name flush_files
193  * Flush files before closing them, this may be necessary if accessing data
194  * immediately.
195  */
196  ///@{
197  TECA_ALGORITHM_PROPERTY(int, flush_files)
198  ///@}
199 ;
200 
201  /** @name point_array
202  * Specify the arrays to write. A data array is only written to disk if
203  * it is included in this list. It is an error to not specify at least
204  * one point centered array to write
205  */
206  ///@{
207  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, point_array)
208  ///@}
209 
210  /** @name information_array
211  * Set the list of non-geometric arrays to write.
212  */
213  ///@{
214  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, information_array)
215  ///@}
216 
217 
218 protected:
219  teca_cf_writer();
220 
221 private:
222  using teca_algorithm::get_output_metadata;
223  using teca_algorithm::execute;
224 
225  const_p_teca_dataset execute(unsigned int port,
226  const std::vector<const_p_teca_dataset> &input_data,
227  const teca_metadata &request, int streaming) override;
228 
229  teca_metadata get_output_metadata(unsigned int port,
230  const std::vector<teca_metadata> &input_md) override;
231 
232  std::vector<teca_metadata> get_upstream_request(unsigned int port,
233  const std::vector<teca_metadata> &input_md,
234  const teca_metadata &request) override;
235 
236  // flush data to disk. this may be necessary if accessing data
237  // immediately.
238  int flush();
239 
240 private:
241  std::string file_name;
242  std::string date_format;
243  long first_step;
244  long last_step;
245  int layout;
246  unsigned int steps_per_file;
247  int mode_flags;
248  int use_unlimited_dim;
249  int compression_level;
250  int flush_files;
251 
252  std::vector<std::string> point_arrays;
253  std::vector<std::string> information_arrays;
254 
255  class internals_t;
256  internals_t *internals;
257 };
258 
259 #endif
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:21
teca_cf_writer
A writer for Cartesian meshes in NetCDF CF2 format.
Definition: teca_cf_writer.h:74
teca_shared_object.h
teca_cf_writer::validate_layout
int validate_layout(int mode)
Definition: teca_cf_writer.h:142
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_threaded_algorithm
This is the base class defining a threaded algorithm.
Definition: teca_threaded_algorithm.h:60
TECA_ERROR
#define TECA_ERROR(_msg)
Constructs an error message and sends it to the stderr stream.
Definition: teca_common.h:146
teca_algorithm
The interface to TECA pipeline architecture.
Definition: teca_algorithm.h:237