TECA
The Toolkit for Extreme Climate Analysis
teca_string_util.h
Go to the documentation of this file.
1 #ifndef teca_string_util_h
2 #define teca_string_util_h
3 
4 /// @file
5 
6 #include "teca_common.h"
7 
8 #include <cstdlib>
9 #include <cstring>
10 #include <cerrno>
11 #include <string>
12 #include <vector>
13 #include <set>
14 
15 /// Codes for dealing with string processing
17 {
18 /** Convert the characters between the first and second double
19  * quote to a std::string. Escaped characters are skipped. Return
20  * 0 if successful.
21  */
22 int extract_string(const char *istr, std::string &field);
23 
24 /** Scan the input string (istr) for the given a delimiter (delim). push a pointer
25  * to the first non-delimiter character and the first character after each
26  * instance of the delimiter. return zero if successful. when successful there
27  * will be at least one value.
28  */
29 int tokenize(char *istr, char delim, int n_cols, char **ostr);
30 
31 
32 /** Scan the input string (istr) for the given a delimiter (delim). push a point
33  * to the first non-delimiter character and the first character after each
34  * instance of the delimiter. return zero if successful. when successful there
35  * will be at least one value.
36  */
37 template <typename container_t = std::vector<char*>>
38 int tokenize(char *istr, char delim, container_t &ostr)
39 {
40  // skip delim at the beginning
41  while ((*istr == delim) && (*istr != '\0'))
42  ++istr;
43 
44  // nothing here
45  if (*istr == '\0')
46  return -1;
47 
48  // save the first
49  ostr.push_back(istr);
50 
51  while (*istr != '\0')
52  {
53  while ((*istr != delim) && (*istr != '\0'))
54  ++istr;
55 
56  if (*istr == delim)
57  {
58  // terminate the token
59  *istr = '\0';
60  ++istr;
61  if (*istr != '\0')
62  {
63  // not at the end, start the next token
64  ostr.push_back(istr);
65  }
66  }
67  }
68 
69  return 0;
70 }
71 
72 /** Skip space, tabs, and new lines. return non-zero if the end of the string
73  * is reached before a non-pad character is encountered
74  */
75 inline
76 int skip_pad(char *&buf)
77 {
78  while ((*buf != '\0') &&
79  ((*buf == ' ') || (*buf == '\n') || (*buf == '\r') || (*buf == '\t')))
80  ++buf;
81  return *buf == '\0' ? -1 : 0;
82 }
83 
84 /// return 0 if the first non-pad character is #
85 inline
86 int is_comment(char *buf)
87 {
88  skip_pad(buf);
89  if (buf[0] == '#')
90  return 1;
91  return 0;
92 }
93 
94 /// A traits class for scanf conversion codes.
95 template <typename num_t>
96 struct scanf_tt {};
97 
98 #define DECLARE_SCANF_TT(_CPP_T, _FMT_STR) \
99 template<> \
100 /** A traits class for scanf conversion codes, specialized fo _CPP_T */ \
101 struct scanf_tt<_CPP_T> \
102 { \
103  static \
104  const char *format() { return _FMT_STR; } \
105 };
106 DECLARE_SCANF_TT(float," %g")
107 DECLARE_SCANF_TT(double," %lg")
108 DECLARE_SCANF_TT(char," %hhi")
109 DECLARE_SCANF_TT(short, " %hi")
110 DECLARE_SCANF_TT(int, " %i")
111 DECLARE_SCANF_TT(long, " %li")
112 DECLARE_SCANF_TT(long long, "%lli")
113 DECLARE_SCANF_TT(unsigned char," %hhu")
114 DECLARE_SCANF_TT(unsigned short, " %hu")
115 DECLARE_SCANF_TT(unsigned int, " %u")
116 DECLARE_SCANF_TT(unsigned long, " %lu")
117 DECLARE_SCANF_TT(unsigned long long, "%llu")
118 DECLARE_SCANF_TT(std::string, " \"%128s")
119 
120 /// A traits class for conversion from text to numbers
121 template <typename T>
122 struct string_tt {};
123 
124 #define DECLARE_STR_CONVERSION_I(_CPP_T, _FUNC) \
125 /** A traits class for conversion from text to numbers, specialized for _CPP_T */ \
126 template <> \
127 struct string_tt<_CPP_T> \
128 { \
129  static const char *type_name() { return # _CPP_T; } \
130  \
131  static int convert(char *str, _CPP_T &val) \
132  { \
133  errno = 0; \
134  char *endp = nullptr; \
135  _CPP_T tmp = _FUNC(str, &endp, 0); \
136  if (errno != 0) \
137  { \
138  TECA_ERROR("Failed to convert string \"" \
139  << str << "\" to a nunber." << strerror(errno)) \
140  return -1; \
141  } \
142  else if (endp == str) \
143  { \
144  TECA_ERROR("Failed to convert string \"" \
145  << str << "\" to a nunber. Invalid string.") \
146  return -1; \
147  } \
148  val = tmp; \
149  return 0; \
150  } \
151 };
152 
153 #define DECLARE_STR_CONVERSION_F(_CPP_T, _FUNC) \
154 /** A traits class for conversion from text to numbers, specialized for _CPP_T */ \
155 template <> \
156 struct string_tt<_CPP_T> \
157 { \
158  static const char *type_name() { return # _CPP_T; } \
159  \
160  static int convert(const char *str, _CPP_T &val) \
161  { \
162  errno = 0; \
163  char *endp = nullptr; \
164  _CPP_T tmp = _FUNC(str, &endp); \
165  if (errno != 0) \
166  { \
167  TECA_ERROR("Failed to convert string \"" \
168  << str << "\" to a nunber." << strerror(errno)) \
169  return -1; \
170  } \
171  else if (endp == str) \
172  { \
173  TECA_ERROR("Failed to convert string \"" \
174  << str << "\" to a nunber. Invalid string.") \
175  return -1; \
176  } \
177  val = tmp; \
178  return 0; \
179  } \
180 };
181 
182 DECLARE_STR_CONVERSION_F(float, strtof)
183 DECLARE_STR_CONVERSION_F(double, strtod)
184 DECLARE_STR_CONVERSION_I(char, strtol)
185 DECLARE_STR_CONVERSION_I(short, strtol)
186 DECLARE_STR_CONVERSION_I(int, strtol)
187 DECLARE_STR_CONVERSION_I(long, strtoll)
188 DECLARE_STR_CONVERSION_I(long long, strtoll)
189 
190 /// A traits class for conversion from text to numbers, specialized for bool
191 template <>
192 struct string_tt<bool>
193 {
194  static const char *type_name() { return "bool"; }
195 
196  static int convert(const char *str, bool &val)
197  {
198  char buf[17];
199  buf[16] = '\0';
200  size_t n = strlen(str);
201  n = n < 17 ? n : 16;
202  for (size_t i = 0; i < n && i < 16; ++i)
203  buf[i] = tolower(str[i]);
204  buf[n] = '\0';
205  if ((strcmp(buf, "0") == 0)
206  || (strcmp(buf, "false") == 0) || (strcmp(buf, "off") == 0))
207  {
208  val = false;
209  return 0;
210  }
211  else if ((strcmp(buf, "1") == 0)
212  || (strcmp(buf, "true") == 0) || (strcmp(buf, "on") == 0))
213  {
214  val = true;
215  return 0;
216  }
217 
218  TECA_ERROR("Failed to convert string \"" << str << "\" to a bool")
219  return -1;
220  }
221 };
222 
223 /// A traits class for conversion from text to numbers, specialized for std::string
224 template <>
225 struct string_tt<std::string>
226 {
227  static const char *type_name() { return "std::string"; }
228 
229  static int convert(const char *str, std::string &val)
230  {
231  val = str;
232  return 0;
233  }
234 };
235 
236 /** A traits class for conversion from text to numbers, specialized for char*
237  * watch out for memory leak, val needs to be free'd
238  */
239 template <>
240 struct string_tt<char*>
241 {
242  static const char *type_name() { return "char*"; }
243 
244  static int convert(const char *str, char *&val)
245  {
246  val = strdup(str);
247  return 0;
248  }
249 };
250 
251 /** Extract the value in a "name = value" pair.
252  * an error occurs if splitting the input on '=' doesn't produce 2 tokens
253  * or if the conversion to val_t fails. returns 0 if successful.
254  */
255 template <typename val_t>
256 int extract_value(char *l, val_t &val)
257 {
258  std::vector<char*> tmp;
259  if (tokenize(l, '=', tmp) || (tmp.size() != 2))
260  {
261  TECA_ERROR("Invalid name specifier in \"" << l << "\"")
262  return -1;
263  }
264 
265  char *r = tmp[1];
266  if (skip_pad(r) || string_tt<val_t>::convert(r, val))
267  {
269  << " value \"" << r << "\" in \"" << l << "\"")
270  return -1;
271  }
272 
273  return 0;
274 }
275 
276 /** Given a collection of strings, where some of the strings end with a common
277  * substring, the post-fix, this function visits each string in the collection
278  * and removes the post-fix from each string that it is found in.
279  */
280 void remove_post_fix(std::set<std::string> &names, std::string post_fix);
281 
282 /// When passed the string "" return empty string otherwise return the passed string
283 inline std::string emptystr(const std::string &in)
284 {
285  return (in == "\"\"" ? std::string() : in);
286 }
287 
288 }
289 
290 #endif
teca_string_util::scanf_tt
A traits class for scanf conversion codes.
Definition: teca_string_util.h:96
teca_string_util::skip_pad
int skip_pad(char *&buf)
Definition: teca_string_util.h:76
teca_string_util::extract_string
int extract_string(const char *istr, std::string &field)
teca_string_util::tokenize
int tokenize(char *istr, char delim, int n_cols, char **ostr)
teca_string_util::string_tt
A traits class for conversion from text to numbers.
Definition: teca_string_util.h:122
teca_common.h
teca_string_util::emptystr
std::string emptystr(const std::string &in)
When passed the string "" return empty string otherwise return the passed string.
Definition: teca_string_util.h:283
teca_string_util
Codes for dealing with string processing.
Definition: teca_string_util.h:16
teca_string_util::extract_value
int extract_value(char *l, val_t &val)
Definition: teca_string_util.h:256
teca_string_util::is_comment
int is_comment(char *buf)
return 0 if the first non-pad character is #
Definition: teca_string_util.h:86
TECA_ERROR
#define TECA_ERROR(_msg)
Constructs an error message and sends it to the stderr stream.
Definition: teca_common.h:138
teca_string_util::remove_post_fix
void remove_post_fix(std::set< std::string > &names, std::string post_fix)