XTL  0.1
eXtended Template Library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
parse.hpp
Go to the documentation of this file.
1 
8 #pragma once
9 
10 #include <xtd/xtd.hpp>
11 
12 #include <memory>
13 #include <regex>
14 
15 #include <xtd/meta.hpp>
16 
17 namespace xtd{
18 
21  namespace parse{
22 
23 
28 #define STRING_(x) \
29  namespace _{ char x[] = #x; } \
30  using x = xtd::parse::string< decltype(_::x), _::x>;
31 
38 #define STRING(_name, _value) \
39  namespace _{ char _name[] = _value; } \
40  using _name = xtd::parse::string< decltype(_::_name), _::_name>;
41 
42 
50 #define CHARACTER_(_name, _value) \
51  namespace _{} \
52  using _name = xtd::parse::character<_value>
53 
61 #define REGEX(_name, _value) \
62  namespace _{ char _name[] = _value; } \
63  using _name = xtd::parse::regex< decltype(_::_name), _::_name>;
64 
65 
70  class rule_base{
71  public:
72  using pointer_type = std::shared_ptr<rule_base>;
73  using vector_type = std::vector<pointer_type>;
74 
78  template <typename ... _ChildRuleTs>
79  explicit rule_base(_ChildRuleTs&& ... oChildRules) : _Items{ std::forward<_ChildRuleTs>(oChildRules)... }{}
80 
81  virtual ~rule_base() = default;
86  virtual bool isa(const std::type_info& oType) const = 0;
88  virtual const std::type_info& type() const = 0;
92  const vector_type& items() const{ return _Items; }
93 
94  protected:
95  vector_type _Items;
96  };
97 
98 
104  template <typename _DeclT, typename _ImplT = _DeclT>
105  class rule : public rule_base{
106  public:
107  using decl_type = _DeclT;
108  using impl_type = _ImplT;
110 
111  template <typename ... _ChildRuleTs>
112  explicit rule(_ChildRuleTs&& ... oChildRules) : rule_base(std::forward<_ChildRuleTs>(oChildRules)...){}
113 
114  virtual ~rule() = default;
115 
116  virtual bool isa(const std::type_info& oType) const override{
117  return (typeid(rule) == oType) || (typeid(decl_type) == oType) || (typeid(impl_type) == oType) || (typeid(rule_base) == oType);
118  }
119 
120  virtual const std::type_info& type() const override{
121  return typeid(_DeclT);
122  }
123 
124  };
125 
126 
131  template <typename ...> class and_;
132 #if (!DOXY_INVOKED)
133  template <> class and_<> : public rule<and_<>>{
134  public:
135  using _super_t = rule<and_<>>;
136  template <typename ... _ChildRuleTs>
137  explicit and_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
138  };
139 
140  template <typename _HeadT, typename ... _TailT> class and_<_HeadT, _TailT...> : public rule<and_<_HeadT, _TailT...>>{
141  public:
142  using _super_t = rule<and_<_HeadT, _TailT...>>;
143  template <typename ... _ChildRuleTs>
144  explicit and_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
145 
146  };
147 #endif
148 
153  template <typename ...> class or_;
154 #if (!DOXY_INVOKED)
155  template <> class or_<> : public rule<or_<>>{
156  public:
157  using _super_t = rule<or_<>>;
158  template <typename ... _ChildRuleTs>
159  explicit or_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
160  };
161 
162  template <typename _HeadT, typename ... _TailT> class or_<_HeadT, _TailT...> : public rule<or_<_HeadT, _TailT...>>{
163  public:
164  using _super_t = rule<or_<_HeadT, _TailT...>>;
165  template <typename ... _ChildRuleTs>
166  explicit or_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
167  };
168 #endif
169 
170 
176  template <typename _Ty> class one_or_more_ : public rule<one_or_more_<_Ty>>{
177  public:
179  template <typename ... _ChildRuleTs>
180  explicit one_or_more_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
181  };
182 
183 
184 
190  template <typename _Ty> class zero_or_more_ : public rule<zero_or_more_<_Ty>>{
191  public:
193  template <typename ... _ChildRuleTs>
194  explicit zero_or_more_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
195  };
196 
197 
202  template <typename _Ty> class zero_or_one_ : public rule<zero_or_one_<_Ty>>{
203  public:
205  template <typename ... _ChildRuleTs>
206  explicit zero_or_one_(_ChildRuleTs&& ... oChildRules) : _super_t(std::forward<_ChildRuleTs>(oChildRules)...){}
207  };
208 
209 
215  template <char..._Ch> class whitespace{
216  public:
217  using whitespace_type = whitespace<_Ch...>;
218  };
219 
220 
224  template <typename _Ty, _Ty &> class string;
225  #if (!DOXY_INVOKED)
226  template <size_t _len, char(&_str)[_len]> class string<char[_len], _str> : public rule<string<char[_len], _str>>{
227  public:
228  using _super_t = rule<string<char[_len], _str>>;
229  static constexpr size_t length = _len;
230  string() = default;
231  };
232  #endif
233 
234 
238  template <char _value> class character : public rule<character<_value>>{};
239 
243  template <typename _Ty, _Ty &> class regex;
244  #if (!DOXY_INVOKED)
245  template <size_t _len, char(&_str)[_len]> class regex<char[_len], _str> : public rule<regex<char[_len], _str>>{
246  public:
247  using _super_t = rule<regex<char[_len], _str>>;
248  static constexpr size_t length = _len;
249  explicit regex(const std::string& newval) : _super_t(), _value(newval){}
250  const std::string& value() const{ return _value; }
251  protected:
252  std::string _value;
253  };
254  #endif
255 
256 #if (!DOXY_INVOKED)
257 
261  namespace _{
262 
263  template <typename _DeclT, typename _ImplT, bool _IgnoreCase, typename _WhitespaceT> class parse_helper;
264 
266  template <typename _DeclT, size_t _len, char(&_str)[_len], typename _WhitespaceT>
267  class parse_helper<_DeclT, parse::string<char[_len], _str>, false, _WhitespaceT>{
268  public:
269  template <typename _IteratorT> static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end){
270  _IteratorT oCurr = begin;
271 
272  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
273 
274  rule_base::pointer_type oRet(nullptr);
275  for (size_t i = 0; (i < (_len-1)) && (oCurr < end); ++i, ++oCurr){
276  if (_str[i] != *oCurr){
277  return rule_base::pointer_type(nullptr);
278  }
279  }
280 
281  /*
282  Problem: The string comparison algorithms compare character by character and skip any leading or trailing whitespace between terminals.
283  Rules 'ABC' + 'XYZ' maybe defined expecting the two terminals be separated by whitespace but this algorithm could parse the string 'ABCXYZ' as two separate terminals.
284  There's a number of traditional approaches to solving this such as tokenizing before parsing or creating parse tables.
285  A proper handling would compound the complexity of this library beyond it's intended scope, there are plenty of complex parsers around.
286  Currently, the last character parsed is checked against the next character in the stream to see if they're of the same 'class' and fail if so.
287  This isn't ideal because it maybe perfectly valid in some grammars to expect 'ABCXYZ' to appear in the input stream yet successfully parse into independent terminals.
288  This library assumes contiguous alpha-numeric terminals constitute a single terminal so the input stream of 'ABCXYZ' will fail to parse without grammar definition trickery
289  */
290  if (oCurr < end && isalnum(*oCurr) && isalnum(_str[_len - 2])){
291  return rule_base::pointer_type(nullptr);
292  }
293  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
294 
295  begin = oCurr;
296  return rule_base::pointer_type(new _DeclT);
297  }
298  };
299 
300 
302  template <typename _DeclT, size_t _len, char(&_str)[_len], typename _WhitespaceT>
303  class parse_helper<_DeclT, parse::string<char[_len], _str>, true, _WhitespaceT>{
304  public:
305 
306  template <typename _IteratorT>
307  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end){
308  _IteratorT oCurr = begin;
309 
310  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
311 
312  rule_base::pointer_type oRet(nullptr);
313  for (size_t i = 0; (i < _len-1) && (oCurr < end); ++i, ++oCurr){
314  if (tolower(_str[i]) != tolower(*oCurr)){
315  return rule_base::pointer_type(nullptr);
316  }
317  }
319  if (oCurr < end && isalnum(*oCurr) && isalnum(_str[_len - 2])){
320  return rule_base::pointer_type(nullptr);
321  }
322 
323  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
324 
325  begin = oCurr;
326  return rule_base::pointer_type(new _DeclT);
327  }
328 
329  };
330 
332  template <typename _DeclT, size_t _len, char(&_str)[_len], bool _IgnoreCase, typename _WhitespaceT>
333  class parse_helper<_DeclT, parse::regex<char[_len], _str>, _IgnoreCase, _WhitespaceT>{
334  public:
335 
336  template <typename _IteratorT>
337  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end){
338  _IteratorT oCurr = begin;
339 
340  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
341 
342  static std::regex oRE(_str, std::regex_constants::ECMAScript | std::regex_constants::nosubs | std::regex_constants::optimize | (_IgnoreCase ? std::regex_constants::icase : std::regex_constants::optimize));
343  std::match_results<std::string::iterator> oMatch;
344  if (!std::regex_search(oCurr, end, oMatch, oRE, std::regex_constants::match_continuous | std::regex_constants::format_first_only)){
345  return rule_base::pointer_type(nullptr);
346  }
347  oCurr += oMatch[0].length();
348 
350  if (oCurr < end && isalnum(*oCurr) && isalnum(_str[_len - 1])){
351  return rule_base::pointer_type(nullptr);
352  }
353 
354 
355  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
356 
357  begin = oCurr;
358  return rule_base::pointer_type(new _DeclT(oMatch[0].str()));
359 
360  }
361 
362  };
363 
365  template <bool _IgnoreCase>
366  class parse_helper<whitespace<>, void, _IgnoreCase, void>{
367  public:
368  template <typename _IteratorT>
369  static bool parse(_IteratorT&, _IteratorT&){ return false; }
370  };
371 
372  template <char _HeadCH, char... _TailCH, bool _IgnoreCase>
373  class parse_helper<whitespace<_HeadCH, _TailCH...>, void, _IgnoreCase, void>{
374  public:
375  template <typename _IteratorT>
376  static bool parse(_IteratorT& begin, _IteratorT& end){
377  _IteratorT oCurr = begin;
378 
379  while (oCurr < end){
380  if (*oCurr == _HeadCH){
381  oCurr++;
382  }else if (parse_helper<whitespace<_TailCH...>, void, _IgnoreCase, void>::parse(oCurr, end)){
383  //do nothing
384  }else{
385  break;
386  }
387  }
388  begin = oCurr;
389  return false;
390  }
391  };
392 
393  //character
394  template <typename _DeclT, char _Ch, typename _WhitespaceT>
395  class parse_helper<_DeclT, character<_Ch>, true, _WhitespaceT>{
396  public:
397 
398  template <typename _IteratorT>
399  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end){
400  _IteratorT oCurr = begin;
401  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
402  if (lower_case<char, _Ch>::value != tolower(*oCurr)){
403  return rule_base::pointer_type(nullptr);
404  }
405  ++oCurr;
406  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
407  begin = oCurr;
408  return rule_base::pointer_type(new character<_Ch>);
409  }
410 
411  };
412 
413  template <typename _DeclT, char _Ch, typename _WhitespaceT>
414  class parse_helper<_DeclT, character<_Ch>, false, _WhitespaceT>{
415  public:
416 
417  template <typename _IteratorT>
418  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end){
419  _IteratorT oCurr = begin;
420  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
421  if (_Ch != oCurr[0]){
422  return rule_base::pointer_type(nullptr);
423  }
424  ++oCurr;
425  parse_helper< _WhitespaceT, void, true, void>::parse(oCurr, end);
426  begin = oCurr;
427  return rule_base::pointer_type(new character<_Ch>);
428  }
429 
430  };
431 
432 
433  //and
434  template <typename _DeclT, bool _IgnoreCase, typename _WhitespaceT >
435  class parse_helper < _DeclT, parse::and_<>, _IgnoreCase, _WhitespaceT>{
436  public:
437 
438  template <typename _IteratorT, typename ... _ChildRuleTs>
439  static rule_base::pointer_type parse(_IteratorT& , _IteratorT& , _ChildRuleTs&& ... oChildRules){
440  return rule_base::pointer_type(new _DeclT(std::forward<_ChildRuleTs>(oChildRules)...));
441  }
442 
443  };
444 
445  template <typename _DeclT, typename _HeadT, typename ... _TailT, bool _IgnoreCase, typename _WhitespaceT >
446  class parse_helper < _DeclT, parse::and_<_HeadT, _TailT...>, _IgnoreCase, _WhitespaceT>{
447  public:
448 
449  template <typename _IteratorT, typename ... _ChildRuleTs>
450  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end, _ChildRuleTs&& ... oChildRules){
451  _IteratorT oBegin = begin;
452  auto oItem = parse_helper<_HeadT, typename _HeadT::impl_type, _IgnoreCase, _WhitespaceT>::parse(oBegin, end);
453  if (!oItem){
454  return oItem;
455  }
456  oItem = parse_helper<_DeclT, parse::and_<_TailT...>, _IgnoreCase, _WhitespaceT>::parse(oBegin, end, std::forward<_ChildRuleTs>(oChildRules)..., oItem);
457  if (oItem){
458  begin = oBegin;
459  }
460  return oItem;
461  }
462 
463  };
464 
465 
467  template <typename _DeclT, bool _IgnoreCase, typename _WhitespaceT >
468  class parse_helper < _DeclT, parse::or_<>, _IgnoreCase, _WhitespaceT>{
469  public:
470 
471  template <typename _IteratorT, typename ... _ChildRuleTs>
472  static rule_base::pointer_type parse(_IteratorT& , _IteratorT& , _ChildRuleTs ... ){
473  return rule_base::pointer_type(nullptr);
474  }
475 
476  };
477 
478  template <typename _DeclT, typename _HeadT, typename ... _TailT, bool _IgnoreCase, typename _WhitespaceT >
479  class parse_helper < _DeclT, parse::or_<_HeadT, _TailT...>, _IgnoreCase, _WhitespaceT>{
480  public:
481 
482  template <typename _IteratorT, typename ... _ChildRuleTs>
483  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end, _ChildRuleTs&& ... oChildRules){
484  _IteratorT oBegin = begin;
485  auto oItem = parse_helper<_HeadT, typename _HeadT::impl_type, _IgnoreCase, _WhitespaceT>::parse(oBegin, end);
486  if (oItem){
487  begin = oBegin;
488  return rule_base::pointer_type(new _DeclT(oItem));
489  }
490  oItem = parse_helper<_DeclT, parse::or_<_TailT...>, _IgnoreCase, _WhitespaceT>::parse(oBegin, end, std::forward<_ChildRuleTs>(oChildRules)..., oItem);
491  if (oItem){
492  begin = oBegin;
493  }
494  return oItem;
495  }
496 
497  };
498 
499 
500 
502  template <typename _DeclT, typename _Ty, bool _IgnoreCase, typename _WhitespaceT >
503  class parse_helper < _DeclT, parse::zero_or_more_<_Ty>, _IgnoreCase, _WhitespaceT>{
504  public:
505 
506  template <typename _IteratorT, typename ... _ChildRuleTs>
507  static rule_base::pointer_type parse(_IteratorT& begin, _IteratorT& end, _ChildRuleTs&& ... oChildRules){
508  _IteratorT oBegin = begin;
509  auto oItem = parse_helper<_Ty, typename _Ty::impl_type, _IgnoreCase, _WhitespaceT>::parse(oBegin, end);
510  if (!oItem){
511  return rule_base::pointer_type(new _DeclT(std::forward<_ChildRuleTs>(oChildRules)...));
512  }
513  return parse_helper<_DeclT, parse::zero_or_more_<_Ty>, _IgnoreCase, _WhitespaceT>::parse(oBegin, end, std::forward<_ChildRuleTs>(oChildRules)..., oItem);
514  }
515 
516  };
517  }
518 #endif
519  }
527  template <typename _RuleT, bool _IgnoreCase = false, typename _WhitespaceT = xtd::parse::whitespace<>> class parser {
528  public:
529 
535  template <typename _IteratorT> static parse::rule_base::pointer_type parse(_IteratorT begin, _IteratorT end) {
536  _IteratorT oBegin = begin;
537  _IteratorT oEnd = end;
538  return parse::_::parse_helper<_RuleT, typename _RuleT::impl_type, _IgnoreCase, _WhitespaceT>::parse(oBegin, oEnd);
539  }
540 
541  };
543 
544 }
545 
Represents a parse algorithm where all specified elements parse are contiguously present in the input...
Definition: parse.hpp:131
Represents a parse algorithm where where the terminal or rule is repeated one or more times in the in...
Definition: parse.hpp:176
Main parser class.
Definition: parse.hpp:527
virtual const std::type_info & type() const =0
Gets the type info of the concrete implementation.
template meta-programming utilities
Character terminal parsing algorithm.
Definition: parse.hpp:238
Curiously recurring template pattern to simplify creation of rule_base implementations Rules and term...
Definition: parse.hpp:105
Represents a parse algorithm where one of the listed elements is present in the input stream One of t...
Definition: parse.hpp:153
virtual bool isa(const std::type_info &oType) const override
Determines if the interface is implemented by a concrete type.
Definition: parse.hpp:116
list of whitespace characters to ignore in the input stream.
Definition: parse.hpp:215
Represents a parse algorithm where the terminal or rule repeats exactly zero or one time in the input...
Definition: parse.hpp:202
host, target and build configurations and settings Various components are purpose built for specific ...
virtual const std::type_info & type() const override
Gets the type info of the concrete implementation.
Definition: parse.hpp:120
virtual bool isa(const std::type_info &oType) const =0
Determines if the interface is implemented by a concrete type.
regular expression parsing algorithm.
Definition: parse.hpp:243
rule_base(_ChildRuleTs &&...oChildRules)
Constructor.
Definition: parse.hpp:79
static parse::rule_base::pointer_type parse(_IteratorT begin, _IteratorT end)
Parses text.
Definition: parse.hpp:535
Represents a parse algorithm where the terminal or rule repeats zero or more times in the input strea...
Definition: parse.hpp:190
const vector_type & items() const
Accessor for child parse elements.
Definition: parse.hpp:92
Base class of both rules and terminals Though rules and terminals are technically different they shar...
Definition: parse.hpp:70
String terminal parsing algorithm.
Definition: parse.hpp:224