TECA
The Toolkit for Extreme Climate Analysis
teca_netcdf_util.h
Go to the documentation of this file.
1 #ifndef teca_netcdf_util_h
2 #define teca_netcdf_util_h
3 
4 /// @file
5 
6 #include "teca_config.h"
7 #include "teca_mpi.h"
8 #include "teca_metadata.h"
9 #include "teca_thread_pool.h"
10 
11 #include <mutex>
12 #include <string>
13 
14 #include <netcdf.h>
15 #if defined(TECA_HAS_NETCDF_MPI)
16 #include <netcdf_par.h>
17 #endif
18 
19 /// macro to help with netcdf floating point data types
20 #define NC_DISPATCH_FP(tc_, code_) \
21  switch (tc_) \
22  { \
23  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
24  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
25  default: \
26  TECA_ERROR("netcdf type code_ " << tc_ \
27  << " is not a floating point type") \
28  }
29 
30 /// macro to help with netcdf data types
31 #define NC_DISPATCH(tc_, code_) \
32  switch (tc_) \
33  { \
34  NC_DISPATCH_CASE(NC_BYTE, char, code_) \
35  NC_DISPATCH_CASE(NC_UBYTE, unsigned char, code_) \
36  NC_DISPATCH_CASE(NC_CHAR, char, code_) \
37  NC_DISPATCH_CASE(NC_SHORT, short int, code_) \
38  NC_DISPATCH_CASE(NC_USHORT, unsigned short int, code_) \
39  NC_DISPATCH_CASE(NC_INT, int, code_) \
40  NC_DISPATCH_CASE(NC_UINT, unsigned int, code_) \
41  NC_DISPATCH_CASE(NC_INT64, long long, code_) \
42  NC_DISPATCH_CASE(NC_UINT64, unsigned long long, code_) \
43  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
44  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
45  default: \
46  TECA_ERROR("netcdf type code " << tc_ \
47  << " is not supported") \
48  }
49 
50 /// macro that executes code when the type code is matched.
51 #define NC_DISPATCH_CASE(cc_, tt_, code_) \
52  case cc_: \
53  { \
54  using NC_T = tt_; \
55  code_ \
56  break; \
57  }
58 
59 /// Codes dealing with NetCDF I/O calls
61 {
62 
63 /// A traits class mapping to netcdf from C++
64 template<typename num_t> class netcdf_tt {};
65 
66 /// A traits class mapping to C++ from netcdf
67 template<int nc_enum> class cpp_tt {};
68 
69 #define DECLARE_NETCDF_TT(cpp_t_, nc_c_) \
70 /** A traits class mapping to NetCDF from C++, specialized for cpp_t_ */ \
71 template <> class netcdf_tt<cpp_t_> \
72 { \
73 public: \
74  enum { type_code = nc_c_ }; \
75  static const char *name() { return #nc_c_; } \
76 };
77 DECLARE_NETCDF_TT(char, NC_BYTE)
78 DECLARE_NETCDF_TT(unsigned char, NC_UBYTE)
79 //DECLARE_NETCDF_TT(char, NC_CHAR)
80 DECLARE_NETCDF_TT(short int, NC_SHORT)
81 DECLARE_NETCDF_TT(unsigned short int, NC_USHORT)
82 DECLARE_NETCDF_TT(int, NC_INT)
83 DECLARE_NETCDF_TT(long, NC_LONG)
84 DECLARE_NETCDF_TT(unsigned long, NC_LONG)
85 DECLARE_NETCDF_TT(unsigned int, NC_UINT)
86 DECLARE_NETCDF_TT(long long, NC_INT64)
87 DECLARE_NETCDF_TT(unsigned long long, NC_UINT64)
88 DECLARE_NETCDF_TT(float, NC_FLOAT)
89 DECLARE_NETCDF_TT(double, NC_DOUBLE)
90 
91 #define DECLARE_CPP_TT(cpp_t_, nc_c_) \
92 /** A traits class mapping to C++ from NetCDF, specialized for cpp_t_ */ \
93 template <> class cpp_tt<nc_c_> \
94 { \
95 public: \
96  using type = cpp_t_; \
97  static const char *name() { return #cpp_t_; } \
98 };
99 DECLARE_CPP_TT(char, NC_BYTE)
100 DECLARE_CPP_TT(unsigned char, NC_UBYTE)
101 //DECLARE_CPP_TT(char, NC_CHAR)
102 DECLARE_CPP_TT(short int, NC_SHORT)
103 DECLARE_CPP_TT(unsigned short int, NC_USHORT)
104 DECLARE_CPP_TT(int, NC_INT)
105 //DECLARE_CPP_TT(long, NC_LONG)
106 //DECLARE_CPP_TT(unsigned long, NC_LONG)
107 DECLARE_CPP_TT(unsigned int, NC_UINT)
108 DECLARE_CPP_TT(long long, NC_INT64)
109 DECLARE_CPP_TT(unsigned long long, NC_UINT64)
110 DECLARE_CPP_TT(float, NC_FLOAT)
111 DECLARE_CPP_TT(double, NC_DOUBLE)
112 
113 /** To deal with fortran fixed length strings which are not properly nulll
114  * terminated.
115  */
116 void crtrim(char *s, long n);
117 
118 /** NetCDF 3 is not threadsafe. The HDF5 C-API can be compiled to be
119  * threadsafe, but it is usually not. NetCDF uses HDF5-HL API to access HDF5,
120  * but HDF5-HL API is not threadsafe without the --enable-unsupported flag. For
121  * all those reasons it's best for the time being to protect all NetCDF I/O.
122  */
123 std::mutex &get_netcdf_mutex();
124 
125 /// A RAII class for managing NETCDF files. The file is kept open while the object exists.
127 {
128 public:
129  netcdf_handle() : m_handle(0)
130  {}
131 
132  /** Initialize with a handle returned from nc_open/nc_create etc. */
133  netcdf_handle(int h) : m_handle(h)
134  {}
135 
136  /** Close the file during destruction. */
138  { this->close(); }
139 
140  /**
141  * This is a move only class, and should
142  * only be initialized with an valid handle.
143  */
144  netcdf_handle(const netcdf_handle &) = delete;
145  void operator=(const netcdf_handle &) = delete;
146 
147  /** Move construction takes ownership from the other object. */
149  {
150  m_handle = other.m_handle;
151  other.m_handle = 0;
152  }
153 
154  /** Move assignment takes ownership from the other object. */
155  void operator=(netcdf_handle &&other)
156  {
157  this->close();
158  m_handle = other.m_handle;
159  other.m_handle = 0;
160  }
161 
162  /**
163  * Open the file. this can be used from MPI parallel runs, but collective
164  * I/O is not possible when a file is opened this way. Returns 0 on
165  * success.
166  */
167  int open(const std::string &file_path, int mode);
168 
169  /**
170  * Open the file. this can be used when collective I/O is desired. the
171  * passed in communicator specifies the subset of ranks that will access
172  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
173  * from a parallel run will, result in an error. Returns 0 on success.
174  */
175  int open(MPI_Comm comm, const std::string &file_path, int mode);
176 
177  /**
178  * Create the file. this can be used from MPI parallel runs, but collective
179  * I/O is not possible when a file is created this way. Returns 0 on
180  * success.
181  */
182  int create(const std::string &file_path, int mode);
183 
184  /**
185  * Create the file. this can be used when collective I/O is desired. the
186  * passed in communicator specifies the subset of ranks that will access
187  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
188  * from a parallel run will, result in an error. Returns 0 on success.
189  */
190  int create(MPI_Comm comm, const std::string &file_path, int mode);
191 
192  /** Close the file. */
193  int close();
194 
195  /** Flush all data to disk. */
196  int flush();
197 
198  /** Returns a reference to the handle. */
199  int &get()
200  { return m_handle; }
201 
202  /** Test if the handle is valid. */
203  operator bool() const
204  { return m_handle > 0; }
205 
206 private:
207  int m_handle;
208 };
209 
210 /**
211  * Read the specified variable attribute by name.
212  * Its value is stored in the metadata object
213  * return is non-zero if an error occurred.
214  */
215 int read_attribute(netcdf_handle &fh, int var_id,
216  const std::string &att_name, teca_metadata &atts);
217 
218 /**
219  * Read the specified variable attribute by id.
220  * Its value is stored in the metadata object
221  * return is non-zero if an error occurred.
222  */
223 int read_attribute(netcdf_handle &fh, int var_id,
224  int att_id, teca_metadata &atts);
225 
226 /**
227  * Read the specified variable's name, dimensions, and it's associated
228  * NetCDF attributes into the metadata object. Additionally the following
229  * key/value pairs are added and useful for subsequent I/O and processing
230  *
231  * <H4 ID="cf_atts">CF Attributes</H4>
232  *
233  * | Key | Description |
234  * | ---- | ----------- |
235  * | cf_id | The NetCDF variable id that can be used to read the |
236  * | | variable. |
237  * | cf_dims | A vector of the NetCDF dimension lengths (i.e. the |
238  * | | variable's shape). |
239  * | cf_dim_names | A vector of the names of the NetCDF dimensions. |
240  * | cf_type_code | The NetCDF type code. |
241  * | type_code | The teca_variant_array::code type code. |
242  * | centering | The mesh centering, point_centering or no_centering |
243  * | have_mesh_dim | Flags indicating the presence of the x,y,z, and t |
244  * | | mesh dimensions |
245  * | mesh_dim_active | Flags indicating if the x,y,z, and t dimension is |
246  * | | active. |
247  *
248  * In order for centering and have_mesh_dim flags to be set, the x_variable,
249  * y_variable, z_variable, and t_variable must be specified.
250  *
251  * If dimension is 1 and clamp_dimensions_of_one is set then the dimension is
252  * marked as inactive.
253  *
254  * returns non-zero if an error occurred.
255  */
256 int read_variable_attributes(netcdf_handle &fh, int var_id,
257  const std::string &x_variable, const std::string &y_variable,
258  const std::string &z_variable, const std::string &t_variable,
259  int clamp_dimensions_of_one, std::string &name, teca_metadata &atts);
260 
261 /**
262  * Read the specified variable's name, dimensions, and it's associated
263  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
264  * for details of attributes returned. returns non-zero if an error occurred.
265  */
266 int read_variable_attributes(netcdf_handle &fh, int var_id,
267  std::string &name, teca_metadata &atts);
268 
269 /**
270  * Read the specified variable's dimensions, and it's associated
271  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
272  * for details of attributes returned. returns non-zero if an error occurred.
273  */
274 int read_variable_attributes(netcdf_handle &fh,
275  const std::string &name,
276  const std::string &x_variable, const std::string &y_variable,
277  const std::string &z_variable, const std::string &t_variable,
279 
280 /**
281  * Read the specified variable's dimensions, and it's associated
282  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
283  * for details of attributes returned. returns non-zero if an error occurred.
284  */
285 int read_variable_attributes(netcdf_handle &fh,
286  const std::string &var_name, teca_metadata &atts);
287 
288 /// Functional that reads and returns a variable from the named file.
289 /**
290  * We're doing this so we can do thread
291  * parallel I/O to hide some of the cost of opening files
292  * on Lustre and to hide the cost of reading time coordinate
293  * which is typically very expensive as NetCDF stores
294  * unlimited dimensions non-contiguously.
295  *
296  * @note
297  * Thu 09 Apr 2020 05:45:29 AM PDT
298  * Threading these operations worked well in NetCDF 3, however
299  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
300  * speed up.
301  */
303 {
304 public:
305  /** Data and task types. */
306  using data_elem_t = std::pair<p_teca_variant_array, teca_metadata>;
307  using data_t = std::pair<unsigned long, data_elem_t>;
308  using task_t = std::packaged_task<data_t()>;
310  using p_queue_t = std::shared_ptr<queue_t>;
311 
312  read_variable_and_attributes(const std::string &path, const std::string &file,
313  unsigned long id, const std::string &variable) : m_path(path),
314  m_file(file), m_variable(variable), m_id(id)
315  {}
316 
317  static
318  data_t package(unsigned long id,
319  p_teca_variant_array var = nullptr,
320  const teca_metadata &md = teca_metadata())
321  {
322  return std::make_pair(id, std::make_pair(var, md));
323  }
324 
325  data_t operator()();
326 
327 private:
328  std::string m_path;
329  std::string m_file;
330  std::string m_variable;
331  unsigned long m_id;
332 };
333 
334 /// Function that reads and returns a variable from the named file.
335 /**
336  * we're doing this so we can do thread
337  * parallel I/O to hide some of the cost of opening files
338  * on Lustre and to hide the cost of reading time coordinate
339  * which is typically very expensive as NetCDF stores
340  * unlimited dimensions non-contiguously
341  *
342  * @note
343  * Thu 09 Apr 2020 05:45:29 AM PDT
344  * Threading these operations worked well in NetCDF 3, however
345  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
346  * speed up.
347  */
349 {
350 public:
351  /** Data and task types. */
352  using data_t = std::pair<unsigned long, p_teca_variant_array>;
353  using task_t = std::packaged_task<data_t()>;
355  using p_queue_t = std::shared_ptr<queue_t>;
356 
357 
358  read_variable(const std::string &path, const std::string &file,
359  unsigned long id, const std::string &variable) : m_path(path),
360  m_file(file), m_variable(variable), m_id(id)
361  {}
362 
363  static
364  data_t package(unsigned long id,
365  p_teca_variant_array var = nullptr)
366  {
367  return std::make_pair(id, var);
368  }
369 
370  data_t operator()();
371 
372 private:
373  std::string m_path;
374  std::string m_file;
375  std::string m_variable;
376  unsigned long m_id;
377 };
378 
379 
380 /**
381  * Write the attributes in array_atts to the variable identified by var_id the
382  * name is used in error messages. Returns zero of successful.
383  */
384 int write_variable_attributes(netcdf_handle &fh, int var_id,
385  teca_metadata &array_atts);
386 
387 }
388 #endif
teca_netcdf_util::get_netcdf_mutex
std::mutex & get_netcdf_mutex()
teca_netcdf_util::netcdf_handle::netcdf_handle
netcdf_handle(netcdf_handle &&other)
Definition: teca_netcdf_util.h:148
teca_netcdf_util::crtrim
void crtrim(char *s, long n)
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:18
teca_netcdf_util::read_attribute
int read_attribute(netcdf_handle &fh, int var_id, const std::string &att_name, teca_metadata &atts)
teca_netcdf_util::netcdf_handle
A RAII class for managing NETCDF files. The file is kept open while the object exists.
Definition: teca_netcdf_util.h:126
teca_thread_pool
A class to manage a fixed size pool of threads that dispatch I/O work.
Definition: teca_thread_pool.h:24
teca_netcdf_util::netcdf_handle::operator=
void operator=(netcdf_handle &&other)
Definition: teca_netcdf_util.h:155
teca_netcdf_util
Codes dealing with NetCDF I/O calls.
Definition: teca_netcdf_util.h:60
teca_netcdf_util::read_variable_and_attributes::data_elem_t
std::pair< p_teca_variant_array, teca_metadata > data_elem_t
Definition: teca_netcdf_util.h:306
teca_file_util::path
std::string path(const std::string &filename)
teca_netcdf_util::cpp_tt
A traits class mapping to C++ from netcdf.
Definition: teca_netcdf_util.h:67
teca_netcdf_util::read_variable_and_attributes
Functional that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:302
teca_netcdf_util::read_variable::data_t
std::pair< unsigned long, p_teca_variant_array > data_t
Definition: teca_netcdf_util.h:352
teca_netcdf_util::netcdf_handle::get
int & get()
Definition: teca_netcdf_util.h:199
teca_coordinate_util::clamp_dimensions_of_one
int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
teca_netcdf_util::netcdf_handle::~netcdf_handle
~netcdf_handle()
Definition: teca_netcdf_util.h:137
teca_netcdf_util::write_variable_attributes
int write_variable_attributes(netcdf_handle &fh, int var_id, teca_metadata &array_atts)
teca_netcdf_util::read_variable
Function that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:348
p_teca_variant_array
std::shared_ptr< teca_variant_array > p_teca_variant_array
Definition: teca_variant_array.h:22
teca_netcdf_util::netcdf_handle::netcdf_handle
netcdf_handle(int h)
Definition: teca_netcdf_util.h:133
teca_netcdf_util::netcdf_tt
A traits class mapping to netcdf from C++.
Definition: teca_netcdf_util.h:64
teca_netcdf_util::read_variable_attributes
int read_variable_attributes(netcdf_handle &fh, int var_id, const std::string &x_variable, const std::string &y_variable, const std::string &z_variable, const std::string &t_variable, int clamp_dimensions_of_one, std::string &name, teca_metadata &atts)