TECA
The Toolkit for Extreme Climate Analysis
teca_string_util.h
Go to the documentation of this file.
1 #ifndef teca_string_util_h
2 #define teca_string_util_h
3 
4 /// @file
5 
6 #include "teca_config.h"
7 #include "teca_common.h"
8 
9 #include <cstdlib>
10 #include <cstring>
11 #include <cerrno>
12 #include <string>
13 #include <vector>
14 #include <set>
15 
16 /// Codes for dealing with string processing
18 {
19 /** Convert the characters between the first and second double
20  * quote to a std::string. Escaped characters are skipped. Return
21  * 0 if successful.
22  */
24 int extract_string(const char *istr, std::string &field);
25 
26 /** Scan the input string (istr) for the given a delimiter (delim). push a pointer
27  * to the first non-delimiter character and the first character after each
28  * instance of the delimiter. return zero if successful. when successful there
29  * will be at least one value.
30  */
32 int tokenize(char *istr, char delim, int n_cols, char **ostr);
33 
34 
35 /** Scan the input string (istr) for the given a delimiter (delim). push a point
36  * to the first non-delimiter character and the first character after each
37  * instance of the delimiter. return zero if successful. when successful there
38  * will be at least one value.
39  */
40 template <typename container_t = std::vector<char*>>
42 int tokenize(char *istr, char delim, container_t &ostr)
43 {
44  // skip delim at the beginning
45  while ((*istr == delim) && (*istr != '\0'))
46  ++istr;
47 
48  // nothing here
49  if (*istr == '\0')
50  return -1;
51 
52  // save the first
53  ostr.push_back(istr);
54 
55  while (*istr != '\0')
56  {
57  while ((*istr != delim) && (*istr != '\0'))
58  ++istr;
59 
60  if (*istr == delim)
61  {
62  // terminate the token
63  *istr = '\0';
64  ++istr;
65  if (*istr != '\0')
66  {
67  // not at the end, start the next token
68  ostr.push_back(istr);
69  }
70  }
71  }
72 
73  return 0;
74 }
75 
76 /** Skip space, tabs, and new lines. return non-zero if the end of the string
77  * is reached before a non-pad character is encountered
78  */
79 inline
80 int skip_pad(char *&buf)
81 {
82  while ((*buf != '\0') &&
83  ((*buf == ' ') || (*buf == '\n') || (*buf == '\r') || (*buf == '\t')))
84  ++buf;
85  return *buf == '\0' ? -1 : 0;
86 }
87 
88 /// return 0 if the first non-pad character is #
89 inline
90 int is_comment(char *buf)
91 {
92  skip_pad(buf);
93  if (buf[0] == '#')
94  return 1;
95  return 0;
96 }
97 
98 /// A traits class for scanf conversion codes.
99 template <typename num_t>
101 
102 #define DECLARE_SCANF_TT(_CPP_T, _FMT_STR) \
103 template<> \
104 /** A traits class for scanf conversion codes, specialized fo _CPP_T */ \
105 struct scanf_tt<_CPP_T> \
106 { \
107  static \
108  const char *format() { return _FMT_STR; } \
109 };
110 DECLARE_SCANF_TT(float," %g")
111 DECLARE_SCANF_TT(double," %lg")
112 DECLARE_SCANF_TT(char," %hhi")
113 DECLARE_SCANF_TT(short, " %hi")
114 DECLARE_SCANF_TT(int, " %i")
115 DECLARE_SCANF_TT(long, " %li")
116 DECLARE_SCANF_TT(long long, "%lli")
117 DECLARE_SCANF_TT(unsigned char," %hhu")
118 DECLARE_SCANF_TT(unsigned short, " %hu")
119 DECLARE_SCANF_TT(unsigned int, " %u")
120 DECLARE_SCANF_TT(unsigned long, " %lu")
121 DECLARE_SCANF_TT(unsigned long long, "%llu")
122 DECLARE_SCANF_TT(std::string, " \"%128s")
123 
124 /// A traits class for conversion from text to numbers
125 template <typename T>
127 
128 #define DECLARE_STR_CONVERSION_I(_CPP_T, _FUNC) \
129 /** A traits class for conversion from text to numbers, specialized for _CPP_T */ \
130 template <> \
131 struct string_tt<_CPP_T> \
132 { \
133  static const char *type_name() { return # _CPP_T; } \
134  \
135  static int convert(const char *str, _CPP_T &val) \
136  { \
137  errno = 0; \
138  char *endp = nullptr; \
139  _CPP_T tmp = _FUNC(str, &endp, 0); \
140  if (errno != 0) \
141  { \
142  TECA_ERROR("Failed to convert string \"" \
143  << str << "\" to a nunber." << strerror(errno)) \
144  return -1; \
145  } \
146  else if (endp == str) \
147  { \
148  TECA_ERROR("Failed to convert string \"" \
149  << str << "\" to a nunber. Invalid string.") \
150  return -1; \
151  } \
152  val = tmp; \
153  return 0; \
154  } \
155 };
156 
157 #define DECLARE_STR_CONVERSION_F(_CPP_T, _FUNC) \
158 /** A traits class for conversion from text to numbers, specialized for _CPP_T */ \
159 template <> \
160 struct string_tt<_CPP_T> \
161 { \
162  static const char *type_name() { return # _CPP_T; } \
163  \
164  static int convert(const char *str, _CPP_T &val) \
165  { \
166  errno = 0; \
167  char *endp = nullptr; \
168  _CPP_T tmp = _FUNC(str, &endp); \
169  if (errno != 0) \
170  { \
171  TECA_ERROR("Failed to convert string \"" \
172  << str << "\" to a nunber." << strerror(errno)) \
173  return -1; \
174  } \
175  else if (endp == str) \
176  { \
177  TECA_ERROR("Failed to convert string \"" \
178  << str << "\" to a nunber. Invalid string.") \
179  return -1; \
180  } \
181  val = tmp; \
182  return 0; \
183  } \
184 };
185 
186 DECLARE_STR_CONVERSION_F(float, strtof)
187 DECLARE_STR_CONVERSION_F(double, strtod)
188 DECLARE_STR_CONVERSION_I(char, strtol)
189 DECLARE_STR_CONVERSION_I(short, strtol)
190 DECLARE_STR_CONVERSION_I(int, strtol)
191 DECLARE_STR_CONVERSION_I(long, strtoll)
192 DECLARE_STR_CONVERSION_I(long long, strtoll)
193 
194 /// A traits class for conversion from text to numbers, specialized for bool
195 template <>
196 struct string_tt<bool>
197 {
198  static const char *type_name() { return "bool"; }
199 
200  static int convert(const char *str, bool &val)
201  {
202  char buf[17];
203  buf[16] = '\0';
204  size_t n = strlen(str);
205  n = n < 17 ? n : 16;
206  for (size_t i = 0; i < n && i < 16; ++i)
207  buf[i] = tolower(str[i]);
208  buf[n] = '\0';
209  if ((strcmp(buf, "0") == 0)
210  || (strcmp(buf, "false") == 0) || (strcmp(buf, "off") == 0))
211  {
212  val = false;
213  return 0;
214  }
215  else if ((strcmp(buf, "1") == 0)
216  || (strcmp(buf, "true") == 0) || (strcmp(buf, "on") == 0))
217  {
218  val = true;
219  return 0;
220  }
221 
222  TECA_ERROR("Failed to convert string \"" << str << "\" to a bool")
223  return -1;
224  }
225 };
226 
227 /// A traits class for conversion from text to numbers, specialized for std::string
228 template <>
229 struct string_tt<std::string>
230 {
231  static const char *type_name() { return "std::string"; }
232 
233  static int convert(const char *str, std::string &val)
234  {
235  val = str;
236  return 0;
237  }
238 };
239 
240 /** A traits class for conversion from text to numbers, specialized for char*
241  * watch out for memory leak, val needs to be free'd
242  */
243 template <>
244 struct string_tt<char*>
245 {
246  static const char *type_name() { return "char*"; }
247 
248  static int convert(const char *str, char *&val)
249  {
250  val = strdup(str);
251  return 0;
252  }
253 };
254 
255 /** Extract the value in a "name = value" pair.
256  * an error occurs if splitting the input on '=' doesn't produce 2 tokens
257  * or if the conversion to val_t fails. returns 0 if successful.
258  */
259 template <typename val_t>
261 int extract_value(char *l, val_t &val)
262 {
263  std::vector<char*> tmp;
264  if (tokenize(l, '=', tmp) || (tmp.size() != 2))
265  {
266  TECA_ERROR("Invalid name specifier in \"" << l << "\"")
267  return -1;
268  }
269 
270  char *r = tmp[1];
271  if (skip_pad(r) || string_tt<val_t>::convert(r, val))
272  {
274  << " value \"" << r << "\" in \"" << l << "\"")
275  return -1;
276  }
277 
278  return 0;
279 }
280 
281 /** Given a collection of strings, where some of the strings end with a common
282  * substring, the post-fix, this function visits each string in the collection
283  * and removes the post-fix from each string that it is found in.
284  */
286 void remove_postfix(std::set<std::string> &names, std::string postfix);
287 
288 /// When passed the string "" return empty string otherwise return the passed string
290 inline std::string emptystr(const std::string &in)
291 {
292  return (in == "\"\"" ? std::string() : in);
293 }
294 
295 }
296 
297 #endif
teca_string_util::scanf_tt
A traits class for scanf conversion codes.
Definition: teca_string_util.h:100
teca_string_util::skip_pad
int skip_pad(char *&buf)
Definition: teca_string_util.h:80
teca_string_util::remove_postfix
TECA_EXPORT void remove_postfix(std::set< std::string > &names, std::string postfix)
teca_string_util::extract_string
TECA_EXPORT int extract_string(const char *istr, std::string &field)
teca_string_util::tokenize
TECA_EXPORT int tokenize(char *istr, char delim, int n_cols, char **ostr)
teca_string_util::string_tt
A traits class for conversion from text to numbers.
Definition: teca_string_util.h:126
teca_string_util::extract_value
TECA_EXPORT int extract_value(char *l, val_t &val)
Definition: teca_string_util.h:261
teca_common.h
teca_string_util::emptystr
TECA_EXPORT std::string emptystr(const std::string &in)
When passed the string "" return empty string otherwise return the passed string.
Definition: teca_string_util.h:290
teca_string_util
Codes for dealing with string processing.
Definition: teca_string_util.h:17
teca_error::TECA_EXPORT
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
teca_string_util::is_comment
int is_comment(char *buf)
return 0 if the first non-pad character is #
Definition: teca_string_util.h:90
TECA_ERROR
#define TECA_ERROR(_msg)
Constructs an error message and sends it to the stderr stream.
Definition: teca_common.h:146