TECA
The Toolkit for Extreme Climate Analysis
teca_parser.h
Go to the documentation of this file.
1 #ifndef teca_parser_h
2 #define teca_parser_h
3 
4 /// @file
5 
6 #include "teca_common.h"
7 
8 #include <vector>
9 #include <set>
10 #include <string>
11 #include <iostream>
12 #include <cctype>
13 #include <cstdlib>
14 #include <cstring>
15 #include <sstream>
16 #include <cmath>
17 #include <cstdio>
18 
19 #define TECA_PARSER_ERROR(_descr, _expr, _pos) \
20  TECA_MESSAGE(std::cerr, "ERROR:", ANSI_RED, \
21  << "" _descr \
22  << "at position " << (_pos) << " in \"" \
23  << std::string(_expr, _expr+(_pos)) << END_HL \
24  << BEGIN_HL(ANSI_RED) << (_expr+(_pos))[0] << END_HL \
25  << BEGIN_HL(ANSI_WHITE) << std::string(_expr+(_pos)+1) << "\"")
26 
27 #define TECA_SYNTAX_ERROR(_expr, _pos) \
28  TECA_PARSER_ERROR("Syntax error. ", _expr, _pos)
29 
30 #define TECA_NAME_RESOLUTION_ERROR(_name, _expr, _pos) \
31  TECA_PARSER_ERROR("Name resolution error \"" \
32  << _name << "\". ", _expr, _pos)
33 
34 #define TECA_INVALID_OPERATION_ERROR(_op, _expr, _pos) \
35  TECA_PARSER_ERROR("Invalid operation \"" \
36  << _op << "\". ", _expr, _pos)
37 
38 #define TECA_OPERATION_FAILED_ERROR(_op, _expr, _pos) \
39  TECA_PARSER_ERROR("Operation \"" << _op \
40  << "\" failed. " , _expr, _pos)
41 
42 #define TECA_NUM_OPERANDS_ERROR(_op, _nreq, _ngive, _expr, _pos) \
43  TECA_PARSER_ERROR("Operation \"" << _op \
44  << "\" requires " << _nreq << " operands, given " \
45  << _ngive << ". ", _expr, _pos)
46 
47 /// Codes dealing with expression parsing.
48 namespace teca_parser
49 {
50 /** Recognizes and extracts tokens during parsing.
51  * given a pointer (first argument) the methods return the
52  * number of chars in the token, or 0 when the pointer doesn't
53  * point to a valid token, and copies the token into the buffer
54  * (second argument).
55 */
57 {
58 public:
59  static unsigned int get_open_group(const char *s, char *g);
60  static unsigned int get_close_group(const char *s, char *g);
61  static unsigned int get_constant_name(const char *s, char *c);
62  static unsigned int get_variable_name(const char *s, char *v);
63  static unsigned int get_unary_operator_name(const char *expr, char *op_name);
64  static unsigned int get_binary_operator_name(const char *expr, char *op_name);
65  static unsigned int get_ternary_operator_name(const char *expr, char *op_name);
66  static unsigned int get_operator_precedence(char *op);
67 };
68 
69 /** Convert infix expression to postfix. returns the postfix form
70  * of the expression in a string allocated with malloc. caller to
71  * free the string. return nullptr if there is an error.
72  *
73  * template types implement detection of classes of syntactical
74  * tokens. groups, constants, variables, and operators.
75 */
76 template<typename tokenizer_t=teca_parser::tokenizer>
77 char *infix_to_postfix(const char *iexpr, std::set<std::string> *variables)
78 {
79  std::vector<char*> operator_stack;
80  std::vector<unsigned int> group_position; // position of un-matched open group
81 
82  const char *expr = iexpr;
83  unsigned int n = strlen(expr);
84  char *rexpr = static_cast<char*>(malloc(3*n));
85  char *rpnexpr = rexpr;
86 
87  unsigned int token_len;
88  char token[256];
89 
90  while (*expr)
91  {
92  // skip white space
93  if (isspace(*expr))
94  {
95  while(*expr && isspace(*expr)) ++expr;
96  }
97  // recurse into grouped expression
98  else if ((token_len = tokenizer_t::get_open_group(expr, token)))
99  {
100  token[token_len-1] = '\0';
101 
102  char *tmp = infix_to_postfix<tokenizer_t>(token+1, variables);
103  unsigned int tmp_len = strlen(tmp);
104 
105  memcpy(rexpr, tmp, tmp_len);
106  rexpr += tmp_len;
107 
108  expr += token_len;
109 
110  free(tmp);
111  }
112  // pass constants through
113  else if ((token_len = tokenizer_t::get_constant_name(expr, token)))
114  {
115  memcpy(rexpr, token, token_len);
116  rexpr += token_len;
117  *rexpr++ = ' ';
118 
119  expr += token_len;
120  }
121  // pass variable names through, save the variable name
122  else if ((token_len = tokenizer_t::get_variable_name(expr, token)))
123  {
124  if (variables)
125  variables->insert(token);
126 
127  memcpy(rexpr, token, token_len);
128  rexpr += token_len;
129  *rexpr++ = ' ';
130 
131  expr += token_len;
132  }
133  // push operator names onto the stack
134  else if ((token_len = tokenizer_t::get_ternary_operator_name(expr, token))
135  || (token_len = tokenizer_t::get_binary_operator_name(expr, token))
136  || (token_len = tokenizer_t::get_unary_operator_name(expr, token)))
137  {
138  // apply precedence rules. operators of higher precedence
139  // are popped and applied
140  unsigned int p1 = tokenizer_t::get_operator_precedence(token);
141  while (operator_stack.size() &&
142  (tokenizer_t::get_operator_precedence(operator_stack.back()) >= p1))
143  {
144  char *op_name = operator_stack.back();
145  unsigned int op_len = strlen(op_name);
146 
147  operator_stack.pop_back();
148 
149  memcpy(rexpr, op_name, op_len);
150  rexpr += op_len;
151  *rexpr++ = ' ';
152 
153  free(op_name);
154  }
155 
156  // push the new operator
157  operator_stack.push_back(strdup(token));
158  expr += token_len;
159  }
160  // every other input indicates an error
161  else
162  {
163  TECA_SYNTAX_ERROR(iexpr, expr-iexpr)
164  return nullptr;
165  }
166  }
167 
168  // catch unmatched open group
169  if (group_position.size())
170  {
171  TECA_SYNTAX_ERROR(iexpr, group_position.back())
172  return nullptr;
173  }
174 
175  // apply the remaining operators
176  while (operator_stack.size())
177  {
178  char *op_name = operator_stack.back();
179  unsigned int op_len = strlen(op_name);
180 
181  operator_stack.pop_back();
182 
183  memcpy(rexpr, op_name, op_len);
184  rexpr += op_len;
185  *rexpr++ = ' ';
186 
187  free(op_name);
188  }
189 
190  // null terminate the transformed expression
191  *rexpr = '\0';
192  return rpnexpr;
193 }
194 
195 /** evaluate a postfix expression. returns non zero if an error occurred.
196  * the result of the evaluated expression is returned in iexpr_result.
197  *
198  * template types define the intermediate types used in the calculation.
199  * arg_t would likely be the const form of work_t. resolvers for constants,
200  * variables, and operators are passed. The purpose of the resolvers is
201  * to identify token class and implement variable lookup, and operator
202  * evaluation.
203 */
204 template<typename work_t, typename arg_t, typename operand_resolver_t,
205 typename operator_resolver_t, typename tokenizer_t=teca_parser::tokenizer>
206 int eval_postfix(arg_t &iexpr_result,
207  const char *iexpr, operand_resolver_t &operands)
208 {
209  if (!iexpr)
210  return -1;
211 
212  char token[256];
213  unsigned int token_len;
214 
215  std::vector<arg_t> var_stack;
216 
217  const char *expr = iexpr;
218  while (*expr)
219  {
220  // skip white space
221  if (isspace(*expr))
222  {
223  while(*expr && isspace(*expr)) ++expr;
224  }
225  // push constants onto the stack
226  else if ((token_len = tokenizer_t::get_constant_name(expr, token)))
227  {
228  work_t var;
229  if (operands.get_constant(token, var))
230  {
231  TECA_NAME_RESOLUTION_ERROR(token, iexpr, expr-iexpr)
232  return -1;
233  }
234  var_stack.push_back(var);
235  expr += token_len;
236  }
237  // push variables onto the stack
238  else if ((token_len = tokenizer_t::get_variable_name(expr, token)))
239  {
240  arg_t var;
241  if (operands.get_variable(token, var))
242  {
243  TECA_NAME_RESOLUTION_ERROR(token, iexpr, expr-iexpr)
244  return -1;
245  }
246  var_stack.push_back(var);
247  expr += token_len;
248  }
249  // pop 3 operands and apply ternary operators, push the result
250  else if ((token_len = tokenizer_t::get_ternary_operator_name(expr, token)))
251  {
252  // there must be at least 3 operands
253  unsigned int n_operands = var_stack.size();
254  if (n_operands < 3)
255  {
256  TECA_NUM_OPERANDS_ERROR(token, 3, n_operands, iexpr, expr-iexpr)
257  return -1;
258  }
259 
260  // get the operands
261  arg_t arg3 = var_stack.back();
262  var_stack.pop_back();
263 
264  arg_t arg2 = var_stack.back();
265  var_stack.pop_back();
266 
267  arg_t arg1 = var_stack.back();
268  var_stack.pop_back();
269 
270  // invoke ternary operator
271  int err_code;
272  work_t result;
273  if ((err_code = operator_resolver_t::invoke(token, result, arg1, arg2, arg3)))
274  {
275  if (err_code == -1)
276  {
277  TECA_INVALID_OPERATION_ERROR(token, iexpr, expr-iexpr)
278  }
279  else if (err_code == -2)
280  {
281  TECA_OPERATION_FAILED_ERROR(token, iexpr, expr-iexpr)
282  }
283  return -1;
284  }
285 
286  // push result
287  var_stack.push_back(result);
288 
289  expr += token_len;
290  }
291  // pop 2 operands and apply binary operators, push the result
292  else if ((token_len = tokenizer_t::get_binary_operator_name(expr, token)))
293  {
294  // there must be at least 2 operands
295  unsigned int n_operands = var_stack.size();
296  if (n_operands < 2)
297  {
298  TECA_NUM_OPERANDS_ERROR(token, 2, n_operands, iexpr, expr-iexpr)
299  return -1;
300  }
301 
302  // get the operands
303  arg_t right_arg = var_stack.back();
304  var_stack.pop_back();
305 
306  arg_t left_arg = var_stack.back();
307  var_stack.pop_back();
308 
309  // invoke binary operator
310  int err_code;
311  work_t result;
312  if ((err_code = operator_resolver_t::invoke(token, result, left_arg, right_arg)))
313  {
314  if (err_code == -1)
315  {
316  TECA_INVALID_OPERATION_ERROR(token, iexpr, expr-iexpr)
317  }
318  else if (err_code == -2)
319  {
320  TECA_OPERATION_FAILED_ERROR(token, iexpr, expr-iexpr)
321  }
322  return -1;
323  }
324 
325  // push result
326  var_stack.push_back(result);
327 
328  expr += token_len;
329  }
330  // pop one operand, apply unary operator, push the result
331  else if ((token_len = tokenizer_t::get_unary_operator_name(expr, token)))
332  {
333  // there must be at least 1 operands
334  unsigned int n_operands = var_stack.size();
335  if (n_operands < 1)
336  {
337  TECA_NUM_OPERANDS_ERROR(token, 1, n_operands, iexpr, expr-iexpr)
338  return -1;
339  }
340 
341  // get the operands
342  arg_t arg = var_stack.back();
343  var_stack.pop_back();
344 
345  // invoke unary operator
346  int err_code;
347  work_t result;
348  if ((err_code = operator_resolver_t::invoke(token, result, arg)))
349  {
350  if (err_code == -1)
351  {
352  TECA_INVALID_OPERATION_ERROR(token, iexpr, expr-iexpr)
353  }
354  else if (err_code == -2)
355  {
356  TECA_OPERATION_FAILED_ERROR(token, iexpr, expr-iexpr)
357  }
358  return -1;
359  }
360 
361  // store the result
362  var_stack.push_back(result);
363 
364  // move to operator_resolver
365  expr += token_len;
366  }
367  // the expression contains characters that are of an unknown
368  // class, not constant, nor variable, nor operator
369  else
370  {
371  TECA_SYNTAX_ERROR(iexpr, expr-iexpr)
372  return -1;
373  }
374  }
375 
376  // the result should be on the stack, and it should be the only thing
377  // on the stack
378  if (var_stack.size() != 1)
379  {
380  TECA_SYNTAX_ERROR(iexpr, expr-iexpr)
381  return -1;
382  }
383  iexpr_result = var_stack.back();
384 
385  return 0;
386 }
387 };
388 
389 #endif
Definition: teca_parser.h:57
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.
Codes dealing with expression parsing.
Definition: teca_parser.h:49
int eval_postfix(arg_t &iexpr_result, const char *iexpr, operand_resolver_t &operands)
Definition: teca_parser.h:206
char * infix_to_postfix(const char *iexpr, std::set< std::string > *variables)
Definition: teca_parser.h:77