TECA
The Toolkit for Extreme Climate Analysis
teca_cf_writer.h
1 #ifndef teca_cf_writer_h
2 #define teca_cf_writer_h
3 
4 #include "teca_shared_object.h"
5 #include "teca_threaded_algorithm.h"
6 #include "teca_metadata.h"
7 
8 #include <vector>
9 #include <string>
10 
11 TECA_SHARED_OBJECT_FORWARD_DECL(teca_cf_writer)
12 
13 /// A writer for Cartesian meshes in NetCDF CF2 format.
14 /**
15  * Writes data to NetCDF CF2 format. This algorithm is conceptually an
16  * execution engine capable of driving the above pipeline with our without
17  * threads and stream results in the order that they are generated placing them
18  * in the correct location in the output dataset. The output dataset is a
19  * collection of files each with a user specified number of time steps per
20  * file. The output dataset may be arranged using a fixed number of steps per
21  * file or daily, monthly, seasonal, or yearly file layouts. The total number
22  * of time steps in the output dataset is determined by the combination of the
23  * number of time steps in the input dataset and user defined subsetting if
24  * any. The writer uses MPI collective I/O to produce the files. In parallel
25  * time steps are mapped to ranks such that each rank has approximately the
26  * same number of time steps. Incoming steps are mapped to files. A given MPI
27  * rank may be writing to multiple files. The use of MPI collectives implies
28  * care must be taken in its use to avoid deadlocks.
29  *
30  * Due to the use of MPI collectives I/O certain information must be known
31  * during the report phase of pipeline execution, before the execute phase of
32  * pipeline execution begins. The information that is needed is:
33  *
34  * ### number of time steps ###
35  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
36  * specified by the pipeline control index_initializer key found in metadata
37  * produced by the source (e.g CF reader)
38  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39  *
40  * ### extent ###
41  * ~~~~~~~~~~~~~~
42  * 6, 64 bit integers defining the 3 spatial dimensions of each timestep found
43  * in metadata produced by the source (e.g CF reader)
44  * ~~~~~~~~~~~~~~
45  *
46  * ### point arrays ###
47  * ~~~~~~~~~~~~~~~~~~~~
48  * list of strings naming the point centered arrays that will be written. set
49  * by the user prior to execution by writer properties.
50  * ~~~~~~~~~~~~~~~~~~~~
51  *
52  * ### information arrays ###
53  * ~~~~~~~~~~~~~~~~~~~~~~~~~~
54  * list of strings naming the non-geometric arrays that will written. set by
55  * the user prior to execution by writer properties. See also size attribute
56  * below.
57  * ~~~~~~~~~~~~~~~~~~~~~~~~~~
58  *
59  * ### type_code ###
60  * ~~~~~~~~~~~~~~~~~
61  * the teca_variant_array_code naming the type of each array. this will be in
62  * the array attributes metadata generated by the producer of the array (e.g
63  * any algorithm that adds an array should provide this metadata).
64  * ~~~~~~~~~~~~~~~~~
65  *
66  * ### size ###
67  * ~~~~~~~~~~~~
68  * a 64 bit integer declaring the size of each information array. this will be
69  * in the array attributes metadata generated by the producer of the array (e.g
70  * any algorithm that adds an array should provide this metadata).
71  * ~~~~~~~~~~~~
72  */
74 {
75 public:
76  TECA_ALGORITHM_STATIC_NEW(teca_cf_writer)
77  TECA_ALGORITHM_DELETE_COPY_ASSIGN(teca_cf_writer)
78  TECA_ALGORITHM_CLASS_NAME(teca_cf_writer)
79  ~teca_cf_writer();
80 
81  // report/initialize to/from Boost program options
82  // objects.
83  TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION()
84  TECA_SET_ALGORITHM_PROPERTIES()
85 
86  /** @name file_name
87  * Set the output filename. For time series the substring %t% is replaced
88  * with the current time step or date. See comments on date_format below
89  * for info about date formatting.
90  */
91  ///@{
92  TECA_ALGORITHM_PROPERTY(std::string, file_name)
93  ///@}
94 
95 
96  /** @name date_format
97  * set the format for the date to write in the filename. this requires the
98  * input dataset to have unit/calendar information if none are available,
99  * the time index is used instead. (%F-%HZ)
100  */
101  ///@{
102  TECA_ALGORITHM_PROPERTY(std::string, date_format)
103  ///@}
104 
105  /** @name first_step
106  * Set the first step in the range of time step to process.
107  */
108  ///@{
109  TECA_ALGORITHM_PROPERTY(long, first_step)
110  ///@}
111 
112  /** @name last_step
113  * Set the last step in the range of time step to process.
114  */
115  ///@{
116  TECA_ALGORITHM_PROPERTY(long, last_step)
117  ///@}
118 
119  /** @name layout
120  * Set the layout mode to one of : number_of_steps, daily, monthly,
121  * seasonal, or yearly. This controls the size of the files written. In
122  * daily, monthly, seasonal, and yearly modes each file will contain the
123  * steps spanning the given duration. The number_of_steps mode writes a
124  * fixed number of steps per file which can be set using the
125  * steps_per_file property.
126  */
127  ///@{
128  enum {invalid=0, number_of_steps=1, daily=2, monthly=3, seasonal=4, yearly=5};
129  TECA_ALGORITHM_PROPERTY_V(int, layout)
130 
131  void set_layout_to_number_of_steps() { this->set_layout(number_of_steps); }
132  void set_layout_to_daily() { this->set_layout(daily); }
133  void set_layout_to_monthly() { this->set_layout(monthly); }
134  void set_layout_to_seasonal() { this->set_layout(seasonal); }
135  void set_layout_to_yearly() { this->set_layout(yearly); }
136 
137  /// set the layout mode from a string.
138  int set_layout(const std::string &layout);
139 
140  /// @returns 0 if the passed value is a valid layout mode
141  int validate_layout(int mode)
142  {
143  if ((mode == number_of_steps) || (mode == daily) ||
144  (mode == monthly) || (mode == seasonal) || (mode == yearly))
145  return 0;
146 
147  TECA_ERROR("Invalid layout mode " << mode)
148  return -1;
149  }
150  ///@}
151 
152  /** @name steps_per_file
153  * Set how many time steps are written to each file when the layout mode is
154  * set to number_of_steps.
155  */
156  ///@{
157  TECA_ALGORITHM_PROPERTY(unsigned int, steps_per_file)
158  ///@}
159 
160  /** @name mode_flags
161  * sets the flags passed to NetCDF during file creation. (NC_CLOBBER)
162  */
163  ///@{
164  TECA_ALGORITHM_PROPERTY(int, mode_flags)
165  ///@}
166 
167 
168  /** @name use_unlimited_dim
169  * if set the slowest varying dimension is specified to be NC_UNLIMITED.
170  * This has a negative impact on performance when reading the values in a
171  * single pass. However, unlimited dimensions are used ubiquitously thus
172  * by default it is set. For data being consumed by TECA performance will
173  * be better when using fixed dimensions. (1) This feature requires
174  * collective writes and is incompatible with out of order execution,
175  * and hence currently not supported.
176  */
177  ///@{
178  TECA_ALGORITHM_PROPERTY(int, use_unlimited_dim)
179  ///@}
180 
181  /** @name compression_level
182  * sets the compression level used for each variable compression is not
183  * used if the value is less than or equal to 0. This feature requires
184  * collective writes and is incompatible with out of order execution,
185  * and hence currently not supported.
186  */
187  ///@{
188  TECA_ALGORITHM_PROPERTY(int, compression_level)
189  ///@}
190 
191  /** @name flush_files
192  * Flush files before closing them, this may be necessary if accessing data
193  * immediately.
194  */
195  ///@{
196  TECA_ALGORITHM_PROPERTY(int, flush_files)
197  ///@}
198 ;
199 
200  /** @name point_array
201  * Specify the arrays to write. A data array is only written to disk if
202  * it is included in this list. It is an error to not specify at least
203  * one point centered array to write
204  */
205  ///@{
206  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, point_array)
207  ///@}
208 
209  /** @name information_array
210  * Set the list of non-geometric arrays to write.
211  */
212  ///@{
213  TECA_ALGORITHM_VECTOR_PROPERTY(std::string, information_array)
214  ///@}
215 
216 
217 protected:
218  teca_cf_writer();
219 
220 private:
221  const_p_teca_dataset execute(unsigned int port,
222  const std::vector<const_p_teca_dataset> &input_data,
223  const teca_metadata &request, int streaming) override;
224 
225  teca_metadata get_output_metadata(unsigned int port,
226  const std::vector<teca_metadata> &input_md) override;
227 
228  std::vector<teca_metadata> get_upstream_request(unsigned int port,
229  const std::vector<teca_metadata> &input_md,
230  const teca_metadata &request) override;
231 
232  // flush data to disk. this may be necessary if accessing data
233  // immediately.
234  int flush();
235 
236 private:
237  std::string file_name;
238  std::string date_format;
239  long first_step;
240  long last_step;
241  int layout;
242  unsigned int steps_per_file;
243  int mode_flags;
244  int use_unlimited_dim;
245  int compression_level;
246  int flush_files;
247 
248  std::vector<std::string> point_arrays;
249  std::vector<std::string> information_arrays;
250 
251  class internals_t;
252  internals_t *internals;
253 };
254 
255 #endif
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:18
teca_cf_writer
A writer for Cartesian meshes in NetCDF CF2 format.
Definition: teca_cf_writer.h:73
teca_shared_object.h
teca_cf_writer::validate_layout
int validate_layout(int mode)
Definition: teca_cf_writer.h:141
teca_threaded_algorithm
This is the base class defining a threaded algorithm.
Definition: teca_threaded_algorithm.h:46
TECA_ERROR
#define TECA_ERROR(_msg)
Constructs an error message and sends it to the stderr stream.
Definition: teca_common.h:138