TECA
The Toolkit for Extreme Climate Analysis
teca_netcdf_util.h
Go to the documentation of this file.
1 #ifndef teca_netcdf_util_h
2 #define teca_netcdf_util_h
3 
4 /// @file
5 
6 #include "teca_config.h"
7 #include "teca_mpi.h"
8 #include "teca_metadata.h"
9 #include "teca_cpu_thread_pool.h"
10 
11 #include <mutex>
12 #include <string>
13 
14 #include <netcdf.h>
15 #if defined(TECA_HAS_NETCDF_MPI)
16 #include <netcdf_par.h>
17 #endif
18 
19 /// macro to help with netcdf floating point data types
20 #define NC_DISPATCH_FP(tc_, code_) \
21  switch (tc_) \
22  { \
23  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
24  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
25  default: \
26  TECA_ERROR("netcdf type code_ " << tc_ \
27  << " is not a floating point type") \
28  }
29 
30 /// macro to help with netcdf data types
31 #define NC_DISPATCH(tc_, code_) \
32  switch (tc_) \
33  { \
34  NC_DISPATCH_CASE(NC_BYTE, char, code_) \
35  NC_DISPATCH_CASE(NC_UBYTE, unsigned char, code_) \
36  NC_DISPATCH_CASE(NC_CHAR, char, code_) \
37  NC_DISPATCH_CASE(NC_SHORT, short int, code_) \
38  NC_DISPATCH_CASE(NC_USHORT, unsigned short int, code_) \
39  NC_DISPATCH_CASE(NC_INT, int, code_) \
40  NC_DISPATCH_CASE(NC_UINT, unsigned int, code_) \
41  NC_DISPATCH_CASE(NC_INT64, long long, code_) \
42  NC_DISPATCH_CASE(NC_UINT64, unsigned long long, code_) \
43  NC_DISPATCH_CASE(NC_FLOAT, float, code_) \
44  NC_DISPATCH_CASE(NC_DOUBLE, double, code_) \
45  default: \
46  TECA_ERROR("netcdf type code " << tc_ \
47  << " is not supported") \
48  }
49 
50 /// macro that executes code when the type code is matched.
51 #define NC_DISPATCH_CASE(cc_, tt_, code_) \
52  case cc_: \
53  { \
54  using NC_T = tt_; \
55  code_ \
56  break; \
57  }
58 
59 /// Codes dealing with NetCDF I/O calls
61 {
62 
63 /// A traits class mapping to netcdf from C++
64 template<typename num_t> class TECA_EXPORT netcdf_tt {};
65 
66 /// A traits class mapping to C++ from netcdf
67 template<int nc_enum> class TECA_EXPORT cpp_tt {};
68 
69 #define DECLARE_NETCDF_TT(cpp_t_, nc_c_) \
70 /** A traits class mapping to NetCDF from C++, specialized for cpp_t_ */ \
71 template <> class netcdf_tt<cpp_t_> \
72 { \
73 public: \
74  enum { type_code = nc_c_ }; \
75  static const char *name() { return #nc_c_; } \
76 };
77 DECLARE_NETCDF_TT(char, NC_BYTE)
78 DECLARE_NETCDF_TT(unsigned char, NC_UBYTE)
79 //DECLARE_NETCDF_TT(char, NC_CHAR)
80 DECLARE_NETCDF_TT(short int, NC_SHORT)
81 DECLARE_NETCDF_TT(unsigned short int, NC_USHORT)
82 DECLARE_NETCDF_TT(int, NC_INT)
83 DECLARE_NETCDF_TT(long, NC_LONG)
84 DECLARE_NETCDF_TT(unsigned long, NC_LONG)
85 DECLARE_NETCDF_TT(unsigned int, NC_UINT)
86 DECLARE_NETCDF_TT(long long, NC_INT64)
87 DECLARE_NETCDF_TT(unsigned long long, NC_UINT64)
88 DECLARE_NETCDF_TT(float, NC_FLOAT)
89 DECLARE_NETCDF_TT(double, NC_DOUBLE)
90 
91 #define DECLARE_CPP_TT(cpp_t_, nc_c_) \
92 /** A traits class mapping to C++ from NetCDF, specialized for cpp_t_ */ \
93 template <> class cpp_tt<nc_c_> \
94 { \
95 public: \
96  using type = cpp_t_; \
97  static const char *name() { return #cpp_t_; } \
98 };
99 DECLARE_CPP_TT(char, NC_BYTE)
100 DECLARE_CPP_TT(unsigned char, NC_UBYTE)
101 //DECLARE_CPP_TT(char, NC_CHAR)
102 DECLARE_CPP_TT(short int, NC_SHORT)
103 DECLARE_CPP_TT(unsigned short int, NC_USHORT)
104 DECLARE_CPP_TT(int, NC_INT)
105 //DECLARE_CPP_TT(long, NC_LONG)
106 //DECLARE_CPP_TT(unsigned long, NC_LONG)
107 DECLARE_CPP_TT(unsigned int, NC_UINT)
108 DECLARE_CPP_TT(long long, NC_INT64)
109 DECLARE_CPP_TT(unsigned long long, NC_UINT64)
110 DECLARE_CPP_TT(float, NC_FLOAT)
111 DECLARE_CPP_TT(double, NC_DOUBLE)
112 
113 /** To deal with fortran fixed length strings which are not properly nulll
114  * terminated.
115  */
116 void crtrim(char *s, long n);
117 
118 /** NetCDF 3 is not threadsafe. The HDF5 C-API can be compiled to be
119  * threadsafe, but it is usually not. NetCDF uses HDF5-HL API to access HDF5,
120  * but HDF5-HL API is not threadsafe without the --enable-unsupported flag. For
121  * all those reasons it's best for the time being to protect all NetCDF I/O.
122  */
123 std::mutex &get_netcdf_mutex();
124 
125 /// A RAII class for managing NETCDF files. The file is kept open while the object exists.
127 {
128 public:
129  netcdf_handle() : m_handle(0)
130  {}
131 
132  /** Initialize with a handle returned from nc_open/nc_create etc. */
133  netcdf_handle(int h) : m_handle(h)
134  {}
135 
136  /** Close the file during destruction. */
138  { this->close(); }
139 
140  /**
141  * This is a move only class, and should
142  * only be initialized with an valid handle.
143  */
144  netcdf_handle(const netcdf_handle &) = delete;
145  void operator=(const netcdf_handle &) = delete;
146 
147  /** Move construction takes ownership from the other object. */
149  {
150  m_handle = other.m_handle;
151  other.m_handle = 0;
152  }
153 
154  /** Move assignment takes ownership from the other object. */
155  void operator=(netcdf_handle &&other)
156  {
157  this->close();
158  m_handle = other.m_handle;
159  other.m_handle = 0;
160  }
161 
162  /**
163  * Open the file. this can be used from MPI parallel runs, but collective
164  * I/O is not possible when a file is opened this way. Returns 0 on
165  * success.
166  */
167  int open(const std::string &file_path, int mode);
168 
169  /**
170  * Open the file. this can be used when collective I/O is desired. the
171  * passed in communicator specifies the subset of ranks that will access
172  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
173  * from a parallel run will, result in an error. Returns 0 on success.
174  */
175  int open(MPI_Comm comm, const std::string &file_path, int mode);
176 
177  /**
178  * Create the file. this can be used from MPI parallel runs, but collective
179  * I/O is not possible when a file is created this way. Returns 0 on
180  * success.
181  */
182  int create(const std::string &file_path, int mode);
183 
184  /**
185  * Create the file. this can be used when collective I/O is desired. the
186  * passed in communicator specifies the subset of ranks that will access
187  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
188  * from a parallel run will, result in an error. Returns 0 on success.
189  */
190  int create(MPI_Comm comm, const std::string &file_path, int mode);
191 
192  /** Close the file. */
193  int close();
194 
195  /** Flush all data to disk. */
196  int flush();
197 
198  /** Returns a reference to the handle. */
199  int &get()
200  { return m_handle; }
201 
202  /** Test if the handle is valid. */
203  operator bool() const
204  { return m_handle > 0; }
205 
206 private:
207  int m_handle;
208 };
209 
210 /**
211  * Read the specified variable attribute by name.
212  * Its value is stored in the metadata object
213  * return is non-zero if an error occurred.
214  */
216 int read_attribute(netcdf_handle &fh, int var_id,
217  const std::string &att_name, teca_metadata &atts);
218 
219 /**
220  * Read the specified variable attribute by id.
221  * Its value is stored in the metadata object
222  * return is non-zero if an error occurred.
223  */
225 int read_attribute(netcdf_handle &fh, int var_id,
226  int att_id, teca_metadata &atts);
227 
228 /**
229  * Read the specified variable's name, dimensions, and it's associated
230  * NetCDF attributes into the metadata object. Additionally the following
231  * key/value pairs are added and useful for subsequent I/O and processing
232  *
233  * <H4 ID="cf_atts">CF Attributes</H4>
234  *
235  * | Key | Description |
236  * | ---- | ----------- |
237  * | cf_id | The NetCDF variable id that can be used to read the |
238  * | | variable. |
239  * | cf_dims | A vector of the NetCDF dimension lengths (i.e. the |
240  * | | variable's shape). |
241  * | cf_dim_names | A vector of the names of the NetCDF dimensions. |
242  * | cf_type_code | The NetCDF type code. |
243  * | type_code | The teca_variant_array::code type code. |
244  * | centering | The mesh centering, point_centering or no_centering |
245  * | have_mesh_dim | Flags indicating the presence of the x,y,z, and t |
246  * | | mesh dimensions |
247  * | mesh_dim_active | Flags indicating if the x,y,z, and t dimension is |
248  * | | active. |
249  *
250  * In order for centering and have_mesh_dim flags to be set, the x_variable,
251  * y_variable, z_variable, and t_variable must be specified.
252  *
253  * If dimension is 1 and clamp_dimensions_of_one is set then the dimension is
254  * marked as inactive.
255  *
256  * returns non-zero if an error occurred.
257  */
259 int read_variable_attributes(netcdf_handle &fh, int var_id,
260  const std::string &x_variable, const std::string &y_variable,
261  const std::string &z_variable, const std::string &t_variable,
262  int clamp_dimensions_of_one, std::string &name, teca_metadata &atts);
263 
264 /**
265  * Read the specified variable's name, dimensions, and it's associated
266  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
267  * for details of attributes returned. returns non-zero if an error occurred.
268  */
270 int read_variable_attributes(netcdf_handle &fh, int var_id,
271  std::string &name, teca_metadata &atts);
272 
273 /**
274  * Read the specified variable's dimensions, and it's associated
275  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
276  * for details of attributes returned. returns non-zero if an error occurred.
277  */
279 int read_variable_attributes(netcdf_handle &fh,
280  const std::string &name,
281  const std::string &x_variable, const std::string &y_variable,
282  const std::string &z_variable, const std::string &t_variable,
284 
285 /**
286  * Read the specified variable's dimensions, and it's associated
287  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
288  * for details of attributes returned. returns non-zero if an error occurred.
289  */
291 int read_variable_attributes(netcdf_handle &fh,
292  const std::string &var_name, teca_metadata &atts);
293 
294 /// Functional that reads and returns a variable from the named file.
295 /**
296  * We're doing this so we can do thread
297  * parallel I/O to hide some of the cost of opening files
298  * on Lustre and to hide the cost of reading time coordinate
299  * which is typically very expensive as NetCDF stores
300  * unlimited dimensions non-contiguously.
301  *
302  * @note
303  * Thu 09 Apr 2020 05:45:29 AM PDT
304  * Threading these operations worked well in NetCDF 3, however
305  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
306  * speed up.
307  */
309 {
310 public:
311  /** Data and task types. */
312  using data_elem_t = std::pair<p_teca_variant_array, teca_metadata>;
313  using data_t = std::pair<unsigned long, data_elem_t>;
314  using task_t = std::packaged_task<data_t()>;
316  using p_queue_t = std::shared_ptr<queue_t>;
317 
318  read_variable_and_attributes(const std::string &path, const std::string &file,
319  unsigned long id, const std::string &variable) : m_path(path),
320  m_file(file), m_variable(variable), m_id(id)
321  {}
322 
323  static
324  data_t package(unsigned long id,
325  p_teca_variant_array var = nullptr,
326  const teca_metadata &md = teca_metadata())
327  {
328  return std::make_pair(id, std::make_pair(var, md));
329  }
330 
331  data_t operator()(int device_id = -1);
332 
333 private:
334  std::string m_path;
335  std::string m_file;
336  std::string m_variable;
337  unsigned long m_id;
338 };
339 
340 /// Function that reads and returns a variable from the named file.
341 /**
342  * we're doing this so we can do thread
343  * parallel I/O to hide some of the cost of opening files
344  * on Lustre and to hide the cost of reading time coordinate
345  * which is typically very expensive as NetCDF stores
346  * unlimited dimensions non-contiguously
347  *
348  * @note
349  * Thu 09 Apr 2020 05:45:29 AM PDT
350  * Threading these operations worked well in NetCDF 3, however
351  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
352  * speed up.
353  */
355 {
356 public:
357  /** Data and task types. */
358  using data_t = std::pair<unsigned long, p_teca_variant_array>;
359  using task_t = std::packaged_task<data_t(int)>;
361  using p_queue_t = std::shared_ptr<queue_t>;
362 
363 
364  read_variable(const std::string &path, const std::string &file,
365  unsigned long id, const std::string &variable) : m_path(path),
366  m_file(file), m_variable(variable), m_id(id)
367  {}
368 
369  static
370  data_t package(unsigned long id,
371  p_teca_variant_array var = nullptr)
372  {
373  return std::make_pair(id, var);
374  }
375 
376  data_t operator()(int device_id = -1);
377 
378 private:
379  std::string m_path;
380  std::string m_file;
381  std::string m_variable;
382  unsigned long m_id;
383 };
384 
385 
386 /**
387  * Write the attributes in array_atts to the variable identified by var_id the
388  * name is used in error messages. Returns zero of successful.
389  */
391 int write_variable_attributes(netcdf_handle &fh, int var_id,
392  teca_metadata &array_atts);
393 
394 }
395 #endif
teca_netcdf_util::get_netcdf_mutex
std::mutex & get_netcdf_mutex()
teca_netcdf_util::netcdf_handle::netcdf_handle
netcdf_handle(netcdf_handle &&other)
Definition: teca_netcdf_util.h:148
teca_netcdf_util::crtrim
void crtrim(char *s, long n)
teca_metadata
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:21
teca_netcdf_util::netcdf_handle
A RAII class for managing NETCDF files. The file is kept open while the object exists.
Definition: teca_netcdf_util.h:126
teca_netcdf_util::read_variable_attributes
TECA_EXPORT int read_variable_attributes(netcdf_handle &fh, int var_id, const std::string &x_variable, const std::string &y_variable, const std::string &z_variable, const std::string &t_variable, int clamp_dimensions_of_one, std::string &name, teca_metadata &atts)
teca_netcdf_util::netcdf_handle::operator=
void operator=(netcdf_handle &&other)
Definition: teca_netcdf_util.h:155
teca_netcdf_util
Codes dealing with NetCDF I/O calls.
Definition: teca_netcdf_util.h:60
teca_netcdf_util::read_variable_and_attributes::data_elem_t
std::pair< p_teca_variant_array, teca_metadata > data_elem_t
Definition: teca_netcdf_util.h:312
teca_netcdf_util::write_variable_attributes
TECA_EXPORT int write_variable_attributes(netcdf_handle &fh, int var_id, teca_metadata &array_atts)
teca_netcdf_util::cpp_tt
A traits class mapping to C++ from netcdf.
Definition: teca_netcdf_util.h:67
teca_netcdf_util::read_variable_and_attributes
Functional that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:308
teca_netcdf_util::read_variable::data_t
std::pair< unsigned long, p_teca_variant_array > data_t
Definition: teca_netcdf_util.h:358
teca_netcdf_util::netcdf_handle::get
int & get()
Definition: teca_netcdf_util.h:199
teca_netcdf_util::read_attribute
TECA_EXPORT int read_attribute(netcdf_handle &fh, int var_id, const std::string &att_name, teca_metadata &atts)
teca_cpu_thread_pool
A class to manage a fixed size pool of threads that dispatch work.
Definition: teca_cpu_thread_pool.h:25
teca_coordinate_util::clamp_dimensions_of_one
TECA_EXPORT int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
teca_netcdf_util::netcdf_handle::~netcdf_handle
~netcdf_handle()
Definition: teca_netcdf_util.h:137
teca_netcdf_util::read_variable
Function that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:354
p_teca_variant_array
std::shared_ptr< teca_variant_array > p_teca_variant_array
Definition: teca_variant_array.h:27
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_netcdf_util::netcdf_handle::netcdf_handle
netcdf_handle(int h)
Definition: teca_netcdf_util.h:133
teca_netcdf_util::netcdf_tt
A traits class mapping to netcdf from C++.
Definition: teca_netcdf_util.h:64
teca_file_util::path
TECA_EXPORT std::string path(const std::string &filename)