TECA
The Toolkit for Extreme Climate Analysis
teca_netcdf_util.h
Go to the documentation of this file.
1 #ifndef teca_netcdf_util_h
2 #define teca_netcdf_util_h
3 
4 /// @file
5 
6 #include "teca_config.h"
7 #include "teca_mpi.h"
8 #include "teca_metadata.h"
9 #include "teca_cpu_thread_pool.h"
10 
11 #include <mutex>
12 #include <string>
13 
14 #include <netcdf.h>
15 #if defined(TECA_HAS_NETCDF_MPI)
16 #include <netcdf_par.h>
17 #endif
18 
19 /// macro to help with netcdf floating point data types
20 #define NC_DISPATCH_FP(tc_, ...) \
21  switch (tc_) \
22  { \
23  NC_DISPATCH_CASE(NC_FLOAT, float, __VA_ARGS__) \
24  NC_DISPATCH_CASE(NC_DOUBLE, double, __VA_ARGS__) \
25  default: \
26  TECA_ERROR("netcdf type __VA_ARGS__ " << tc_ \
27  << " is not a floating point type") \
28  }
29 
30 /// macro to help with netcdf data types
31 #define NC_DISPATCH(tc_, ...) \
32  switch (tc_) \
33  { \
34  NC_DISPATCH_CASE(NC_BYTE, char, __VA_ARGS__) \
35  NC_DISPATCH_CASE(NC_UBYTE, unsigned char, __VA_ARGS__) \
36  NC_DISPATCH_CASE(NC_CHAR, char, __VA_ARGS__) \
37  NC_DISPATCH_CASE(NC_SHORT, short int, __VA_ARGS__) \
38  NC_DISPATCH_CASE(NC_USHORT, unsigned short int, __VA_ARGS__) \
39  NC_DISPATCH_CASE(NC_INT, int, __VA_ARGS__) \
40  NC_DISPATCH_CASE(NC_UINT, unsigned int, __VA_ARGS__) \
41  NC_DISPATCH_CASE(NC_INT64, long long, __VA_ARGS__) \
42  NC_DISPATCH_CASE(NC_UINT64, unsigned long long, __VA_ARGS__) \
43  NC_DISPATCH_CASE(NC_FLOAT, float, __VA_ARGS__) \
44  NC_DISPATCH_CASE(NC_DOUBLE, double, __VA_ARGS__) \
45  default: \
46  TECA_ERROR("netcdf type code " << tc_ \
47  << " is not supported") \
48  }
49 
50 /// macro that executes code when the type code is matched.
51 #define NC_DISPATCH_CASE(cc_, tt_, ...) \
52  case cc_: \
53  { \
54  using NC_NT = tt_; \
55  using NC_TT = teca_variant_array_impl<tt_>; \
56  using NC_CTT = const teca_variant_array_impl<tt_>; \
57  using NC_PT = std::shared_ptr<teca_variant_array_impl<tt_>>; \
58  using NC_CPT = std::shared_ptr<const teca_variant_array_impl<tt_>>; \
59  using NC_SP = std::shared_ptr<tt_>; \
60  using NC_CSP = std::shared_ptr<const tt_>; \
61  __VA_ARGS__ \
62  break; \
63  }
64 
65 /// Codes dealing with NetCDF I/O calls
67 {
68 
69 /// A traits class mapping to netcdf from C++
70 template<typename num_t> class TECA_EXPORT netcdf_tt {};
71 
72 /// A traits class mapping to C++ from netcdf
73 template<int nc_enum> class TECA_EXPORT cpp_tt {};
74 
75 #define DECLARE_NETCDF_TT(cpp_t_, nc_c_) \
76 /** A traits class mapping to NetCDF from C++, specialized for cpp_t_ */ \
77 template <> class netcdf_tt<cpp_t_> \
78 { \
79 public: \
80  enum { type_code = nc_c_ }; \
81  static const char *name() { return #nc_c_; } \
82 };
83 DECLARE_NETCDF_TT(char, NC_BYTE)
84 DECLARE_NETCDF_TT(unsigned char, NC_UBYTE)
85 //DECLARE_NETCDF_TT(char, NC_CHAR)
86 DECLARE_NETCDF_TT(short int, NC_SHORT)
87 DECLARE_NETCDF_TT(unsigned short int, NC_USHORT)
88 DECLARE_NETCDF_TT(int, NC_INT)
89 DECLARE_NETCDF_TT(long, NC_LONG)
90 DECLARE_NETCDF_TT(unsigned long, NC_LONG)
91 DECLARE_NETCDF_TT(unsigned int, NC_UINT)
92 DECLARE_NETCDF_TT(long long, NC_INT64)
93 DECLARE_NETCDF_TT(unsigned long long, NC_UINT64)
94 DECLARE_NETCDF_TT(float, NC_FLOAT)
95 DECLARE_NETCDF_TT(double, NC_DOUBLE)
96 
97 #define DECLARE_CPP_TT(cpp_t_, nc_c_) \
98 /** A traits class mapping to C++ from NetCDF, specialized for cpp_t_ */ \
99 template <> class cpp_tt<nc_c_> \
100 { \
101 public: \
102  using type = cpp_t_; \
103  static const char *name() { return #cpp_t_; } \
104 };
105 DECLARE_CPP_TT(char, NC_BYTE)
106 DECLARE_CPP_TT(unsigned char, NC_UBYTE)
107 //DECLARE_CPP_TT(char, NC_CHAR)
108 DECLARE_CPP_TT(short int, NC_SHORT)
109 DECLARE_CPP_TT(unsigned short int, NC_USHORT)
110 DECLARE_CPP_TT(int, NC_INT)
111 //DECLARE_CPP_TT(long, NC_LONG)
112 //DECLARE_CPP_TT(unsigned long, NC_LONG)
113 DECLARE_CPP_TT(unsigned int, NC_UINT)
114 DECLARE_CPP_TT(long long, NC_INT64)
115 DECLARE_CPP_TT(unsigned long long, NC_UINT64)
116 DECLARE_CPP_TT(float, NC_FLOAT)
117 DECLARE_CPP_TT(double, NC_DOUBLE)
118 
119 /** To deal with fortran fixed length strings which are not properly nulll
120  * terminated.
121  */
122 void crtrim(char *s, long n);
123 
124 /** NetCDF 3 is not threadsafe. The HDF5 C-API can be compiled to be
125  * threadsafe, but it is usually not. NetCDF uses HDF5-HL API to access HDF5,
126  * but HDF5-HL API is not threadsafe without the --enable-unsupported flag. For
127  * all those reasons it's best for the time being to protect all NetCDF I/O.
128  */
129 std::mutex &get_netcdf_mutex();
130 
131 /// A RAII class for managing NETCDF files. The file is kept open while the object exists.
133 {
134 public:
135  netcdf_handle() : m_handle(0)
136  {}
137 
138  /** Initialize with a handle returned from nc_open/nc_create etc. */
139  netcdf_handle(int h) : m_handle(h)
140  {}
141 
142  /** Close the file during destruction. */
144  { this->close(); }
145 
146  /**
147  * This is a move only class, and should
148  * only be initialized with an valid handle.
149  */
150  netcdf_handle(const netcdf_handle &) = delete;
151  void operator=(const netcdf_handle &) = delete;
152 
153  /** Move construction takes ownership from the other object. */
155  {
156  m_handle = other.m_handle;
157  other.m_handle = 0;
158  }
159 
160  /** Move assignment takes ownership from the other object. */
161  void operator=(netcdf_handle &&other)
162  {
163  this->close();
164  m_handle = other.m_handle;
165  other.m_handle = 0;
166  }
167 
168  /**
169  * Open the file. this can be used from MPI parallel runs, but collective
170  * I/O is not possible when a file is opened this way. Returns 0 on
171  * success.
172  */
173  int open(const std::string &file_path, int mode);
174 
175  /**
176  * Open the file. this can be used when collective I/O is desired. the
177  * passed in communicator specifies the subset of ranks that will access
178  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
179  * from a parallel run will, result in an error. Returns 0 on success.
180  */
181  int open(MPI_Comm comm, const std::string &file_path, int mode);
182 
183  /**
184  * Create the file. this can be used from MPI parallel runs, but collective
185  * I/O is not possible when a file is created this way. Returns 0 on
186  * success.
187  */
188  int create(const std::string &file_path, int mode);
189 
190  /**
191  * Create the file. this can be used when collective I/O is desired. the
192  * passed in communicator specifies the subset of ranks that will access
193  * the file. Calling this when linked to a non-MPI enabled NetCDF install,
194  * from a parallel run will, result in an error. Returns 0 on success.
195  */
196  int create(MPI_Comm comm, const std::string &file_path, int mode);
197 
198  /** Close the file. */
199  int close();
200 
201  /** Flush all data to disk. */
202  int flush();
203 
204  /** Returns a reference to the handle. */
205  int &get()
206  { return m_handle; }
207 
208  /** Test if the handle is valid. */
209  operator bool() const
210  { return m_handle > 0; }
211 
212 private:
213  int m_handle;
214  //int m_grp_handle;
215 };
216 
217 /**
218  * Read the specified variable attribute by name.
219  * Its value is stored in the metadata object
220  * return is non-zero if an error occurred.
221  */
223 int read_attribute(int parent_id, int var_id,
224  const std::string &att_name, teca_metadata &atts);
225 
226 /**
227  * Read the specified variable attribute by id.
228  * Its value is stored in the metadata object
229  * return is non-zero if an error occurred.
230  */
232 int read_attribute(int parent_id, int var_id,
233  int att_id, teca_metadata &atts);
234 
235 /**
236  * Read the specified variable's name, dimensions, and it's associated
237  * NetCDF attributes into the metadata object. Additionally the following
238  * key/value pairs are added and useful for subsequent I/O and processing
239  *
240  * <H4 ID="cf_atts">CF Attributes</H4>
241  *
242  * | Key | Description |
243  * | ---- | ----------- |
244  * | cf_parent_group | The name of the group containing the NetCF variable |
245  * | | id or an empty string if variable is in the root |
246  * | | group (likely the most common case) |
247  * | cf_id | The NetCDF variable id that can be used to read the |
248  * | | variable. |
249  * | cf_dims | A vector of the NetCDF dimension lengths (i.e. the |
250  * | | variable's shape). |
251  * | cf_dim_names | A vector of the names of the NetCDF dimensions. |
252  * | cf_type_code | The NetCDF type code. |
253  * | type_code | The teca_variant_array::code type code. |
254  * | centering | The mesh centering, point_centering or no_centering |
255  * | have_mesh_dim | Flags indicating the presence of the x,y,z, and t |
256  * | | mesh dimensions |
257  * | mesh_dim_active | Flags indicating if the x,y,z, and t dimension is |
258  * | | active. |
259  *
260  * In order for centering and have_mesh_dim flags to be set, the x_variable,
261  * y_variable, z_variable, and t_variable must be specified.
262  *
263  * If dimension is 1 and clamp_dimensions_of_one is set then the dimension is
264  * marked as inactive.
265  *
266  * returns non-zero if an error occurred.
267  */
269 int read_variable_attributes(netcdf_handle &fh, const std::string &parent_group,
270  int var_id, const std::string &x_variable, const std::string &y_variable,
271  const std::string &z_variable, const std::string &t_variable,
272  const std::string &ensemble_dimension_name, int clamp_dimensions_of_one,
273  std::string &name, teca_metadata &atts);
274 
275 /**
276  * Read the specified variable's name, dimensions, and it's associated
277  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
278  * for details of attributes returned. returns non-zero if an error occurred.
279  */
281 int read_variable_attributes(netcdf_handle &fh, const std::string &parent_group,
282  int var_id, std::string &name, teca_metadata &atts);
283 
284 /**
285  * Get the variable ID in a NetCDF file where var_name can be a fully qualified
286  * path including group names, such as "global/lat"
287  */
289 int get_varid(netcdf_handle &fh, const std::string &var_name,
290  int *parent_id, int *var_id);
291 
292 /**
293  * Read the specified variable's dimensions, and it's associated
294  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
295  * for details of attributes returned. returns non-zero if an error occurred.
296  */
299  const std::string &name,
300  const std::string &x_variable, const std::string &y_variable,
301  const std::string &z_variable, const std::string &t_variable,
302  const std::string &ensemble_dim_name, int clamp_dimensions_of_one,
303  teca_metadata &atts);
304 
305 /**
306  * Read the specified variable's dimensions, and it's associated
307  * NetCDF attributes into the metadata object. See <A HREF="#cf_atts">CF Attributes</A>
308  * for details of attributes returned. returns non-zero if an error occurred.
309  */
312  const std::string &var_name, teca_metadata &atts);
313 
314 /// Functional that reads and returns a variable from the named file.
315 /**
316  * We're doing this so we can do thread
317  * parallel I/O to hide some of the cost of opening files
318  * on Lustre and to hide the cost of reading time coordinate
319  * which is typically very expensive as NetCDF stores
320  * unlimited dimensions non-contiguously.
321  *
322  * @note
323  * Thu 09 Apr 2020 05:45:29 AM PDT
324  * Threading these operations worked well in NetCDF 3, however
325  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
326  * speed up.
327  */
329 {
330 public:
331  /** Data and task types. */
332  using data_elem_t = std::pair<p_teca_variant_array, teca_metadata>;
333  using data_t = std::pair<unsigned long, data_elem_t>;
334  using task_t = std::packaged_task<data_t()>;
336  using p_queue_t = std::shared_ptr<queue_t>;
337 
338  read_variable_and_attributes(const std::string &path, const std::string &file,
339  unsigned long id, const std::string &variable) : m_path(path),
340  m_file(file), m_variable(variable), m_id(id)
341  {}
342 
343  static
344  data_t package(unsigned long id,
345  p_teca_variant_array var = nullptr,
346  const teca_metadata &md = teca_metadata())
347  {
348  return std::make_pair(id, std::make_pair(var, md));
349  }
350 
351  data_t operator()(int device_id = -1);
352 
353 private:
354  std::string m_path;
355  std::string m_file;
356  std::string m_variable;
357  unsigned long m_id;
358 };
359 
360 /// Function that reads and returns a variable from the named file.
361 /**
362  * we're doing this so we can do thread
363  * parallel I/O to hide some of the cost of opening files
364  * on Lustre and to hide the cost of reading time coordinate
365  * which is typically very expensive as NetCDF stores
366  * unlimited dimensions non-contiguously
367  *
368  * @note
369  * Thu 09 Apr 2020 05:45:29 AM PDT
370  * Threading these operations worked well in NetCDF 3, however
371  * in NetCDF 4 backed by HDF5 necessary locking eliminates any
372  * speed up.
373  */
375 {
376 public:
377  /** Data and task types. */
378  using data_t = std::pair<unsigned long, p_teca_variant_array>;
379  using task_t = std::packaged_task<data_t(int)>;
381  using p_queue_t = std::shared_ptr<queue_t>;
382 
383 
384  read_variable(const std::string &path, const std::string &file,
385  unsigned long id, const std::string &variable) : m_path(path),
386  m_file(file), m_variable(variable), m_id(id)
387  {}
388 
389  static
390  data_t package(unsigned long id,
391  p_teca_variant_array var = nullptr)
392  {
393  return std::make_pair(id, var);
394  }
395 
396  data_t operator()(int device_id = -1);
397 
398 private:
399  std::string m_path;
400  std::string m_file;
401  std::string m_variable;
402  unsigned long m_id;
403 };
404 
405 
406 /**
407  * Write the attributes in array_atts to the variable identified by var_id the
408  * name is used in error messages. Returns zero of successful.
409  */
411 int write_variable_attributes(int parent_id, int var_id,
412  teca_metadata &array_atts);
413 
414 }
415 #endif
A class to manage a fixed size pool of threads that dispatch work.
Definition: teca_cpu_thread_pool.h:34
A generic container for meta data in the form of name=value pairs.
Definition: teca_metadata.h:22
A traits class mapping to C++ from netcdf.
Definition: teca_netcdf_util.h:73
A RAII class for managing NETCDF files. The file is kept open while the object exists.
Definition: teca_netcdf_util.h:133
netcdf_handle(const netcdf_handle &)=delete
netcdf_handle(netcdf_handle &&other)
Definition: teca_netcdf_util.h:154
int create(MPI_Comm comm, const std::string &file_path, int mode)
netcdf_handle(int h)
Definition: teca_netcdf_util.h:139
void operator=(netcdf_handle &&other)
Definition: teca_netcdf_util.h:161
int open(MPI_Comm comm, const std::string &file_path, int mode)
int open(const std::string &file_path, int mode)
int create(const std::string &file_path, int mode)
int & get()
Definition: teca_netcdf_util.h:205
~netcdf_handle()
Definition: teca_netcdf_util.h:143
A traits class mapping to netcdf from C++.
Definition: teca_netcdf_util.h:70
Functional that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:329
std::pair< p_teca_variant_array, teca_metadata > data_elem_t
Definition: teca_netcdf_util.h:332
Function that reads and returns a variable from the named file.
Definition: teca_netcdf_util.h:375
std::pair< unsigned long, p_teca_variant_array > data_t
Definition: teca_netcdf_util.h:378
TECA_EXPORT int clamp_dimensions_of_one(unsigned long nx_max, unsigned long ny_max, unsigned long nz_max, unsigned long *extent, bool verbose)
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
TECA_EXPORT std::string path(const std::string &filename)
Codes dealing with NetCDF I/O calls.
Definition: teca_netcdf_util.h:67
void crtrim(char *s, long n)
TECA_EXPORT int write_variable_attributes(int parent_id, int var_id, teca_metadata &array_atts)
TECA_EXPORT int read_attribute(int parent_id, int var_id, const std::string &att_name, teca_metadata &atts)
std::mutex & get_netcdf_mutex()
TECA_EXPORT int get_varid(netcdf_handle &fh, const std::string &var_name, int *parent_id, int *var_id)
TECA_EXPORT int read_variable_attributes(netcdf_handle &fh, const std::string &parent_group, int var_id, const std::string &x_variable, const std::string &y_variable, const std::string &z_variable, const std::string &t_variable, const std::string &ensemble_dimension_name, int clamp_dimensions_of_one, std::string &name, teca_metadata &atts)
std::shared_ptr< teca_variant_array > p_teca_variant_array
Definition: teca_variant_array.h:27