Awali
Another Weighted Automata library
exp_parser.hh
Go to the documentation of this file.
1 // This file is part of Awali.
2 // Copyright 2016-2022 Sylvain Lombardy, Victor Marsault, Jacques Sakarovitch
3 //
4 // Awali is a free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 #ifndef AWALI_EXP_PARSER_HH
18 #define AWALI_EXP_PARSER_HH
19 namespace awali {
20  namespace sttc {
21  namespace internal {
22 
23  template<typename RatExpSet>
24  struct exp_parser;
25  }
26  }
27 }
32 #include <awali/common/parse_exception.cc>
33 #include<string>
34 #include<list>
35 
36 /*
37 
38  E -> P | E+P
39  P -> S | PS | P.S
40  S -> L | S* | S{exp} | S?
41  L -> R | <weight>R
42  R -> A | A<weight> | S*<weight> | S{exp}<weight> | S?<weight>
43  A -> label | (E) | [labelinterval]
44 
45  This grammar implements the following priorities:
46  - + has the least priority
47  - then product (either . or catenation)
48  - then exponent
49  - then left weight
50  right weight has a priori the highest priority, but loses it if it is applied to an exponent:
51  <x>a*<y> is interpreted as ((<x>a)*)<y>
52 */
53 
54 namespace awali { namespace sttc {
55 
56  namespace internal {
57 
58  template<typename RatExpSet>
59  struct exp_parser {
60  using ratexpset_t = RatExpSet;
63  using ratexp_t = typename ratexpset_t::value_t;
65 
66  exp_parser(const ratexpset_t& rs, const std::string& s, bool strict_alphabet)
67  : p_(s.length()), rs_(rs), s_(s), strict_(strict_alphabet)
68  {}
69 
70  exp_parser(const ratexpset_t& rs, const std::string& s, size_t& p, bool strict_alphabet)
71  : rs_(rs), s_(s), p_(p), strict_(strict_alphabet)
72  {}
73 
75  ignore();
76  ratexp_t e2=parseP();
77  if(p_ == 0)
78  return e2;
79  if(s_[p_-1]=='+') {
80  --p_;
81  ignore();
82  if(p_==0)
83  throw parse_exception("Parsing expression");
84  ratexp_t e1=parseE();
85  return ratexp_t(rs_.add(e1,e2));
86  }
87  return e2;
88  }
89 
91  ratexp_t e2=parseS();
92  if(p_==0)
93  return e2;
94  if(s_[p_-1]=='.') {
95  --p_;
96  ignore();
97  if(p_==0)
98  throw parse_exception("Parsing product");
99  ratexp_t e1=parseP();
100  return ratexp_t(rs_.mul(e1,e2));
101  }
102  if(s_[p_-1]!='(' && s_[p_-1]!='+') {
103  ratexp_t e1=parseP();
104  return ratexp_t(rs_.mul(e1,e2));
105  }
106  return e2;
107  }
108 
110  if(s_[p_-1]=='*'){
111  --p_;
112  ignore();
113  if(p_==0)
114  throw parse_exception("Parsing star");
115  ratexp_t e=parseS();
116  return ratexp_t(rs_.star(e));
117  }
118  if(s_[p_-1]=='?'){
119  --p_;
120  ignore();
121  if(p_==0)
122  throw parse_exception("Parsing ?");
123  ratexp_t e=parseS();
124  return ratexp_t(rs_.add(e,rs_.one()));
125  }
126  if(s_[p_-1]=='}'){
127  --p_;
128  ignore();
129  if(p_==0)
130  throw parse_exception("Parsing exponent");
131  int a,b,h;
132  if(s_[p_-1]==',')
133  b=-1;
134  else {
135  b=0; h=1;
136  while(s_[p_-1]>='0' && s_[p_-1]<='9') {
137  b+=h*(s_[p_-1]-'0'); h*=10;
138  --p_;
139  ignore();
140  if(p_==0)
141  throw parse_exception("Parsing exponent");
142  }
143  }
144  a=b;
145  if(s_[p_-1]!='{') {
146  if(s_[p_-1]!=',' || p_==1)
147  throw parse_exception(p_-1,"Parsing exponent");
148  --p_;
149  ignore();
150  a=0; h=1;
151  while(s_[p_-1]>='0' && s_[p_-1]<='9') {
152  a+=h*(s_[p_-1]-'0'); h*=10;
153  --p_;
154  ignore();
155  if(p_==0)
156  throw parse_exception("Parsing exponent");
157  }
158  if(s_[p_-1]!='{')
159  throw parse_exception(p_-1,"Parsing exponent");
160  }
161  --p_;
162  ignore();
163  if(p_==0)
164  throw parse_exception("Parsing");
165  ratexp_t e=parseS();
166  ratexp_t p(rs_.one());
167  for(int i=0; i<a; ++i)
168  p=ratexp_t(rs_.mul(p,e));
169  if(b==-1)
170  return ratexp_t(rs_.mul(p,rs_.star(e)));
171  ratexp_t pp(rs_.one());
172  ratexp_t qq(rs_.one());
173  for(int i=a; i<b; ++i) {
174  pp=ratexp_t(rs_.mul(pp,e));
175  qq=ratexp_t(rs_.add(qq,pp));
176  }
177  return ratexp_t(rs_.mul(p,qq));
178  }
179  return parseL();
180  }
181 
183  ratexp_t e=parseR();
184  if(p_==0)
185  return e;
186  if(s_[p_-1]=='>') {
187  --p_;
188  ignore();
189  auto w = ws_.parse(s_,p_);
190  ignore();
191  if(p_==0 || s_[p_-1]!='<')
192  throw parse_exception(p_-1,"Parsing weight");
193  --p_;
194  ignore();
195  return ratexp_t(rs_.lmul(w, e));
196  }
197  return e;
198  }
199 
201  if(p_==0)
202  throw parse_exception("Parsing right weight");
203  if(s_[p_-1]=='>') {
204  --p_;
205  ignore();
206  auto w = ws_.parse(s_,p_);
207  ignore();
208  if(p_==0 || s_[p_-1]!='<')
209  throw parse_exception(p_-1,"Parsing right weight");
210  --p_;
211  ignore();
212  if(s_[p_-1]=='*' || s_[p_-1]=='?' || s_[p_-1]=='}') {
213  ratexp_t e=parseS();
214  return ratexp_t(rs_.rmul(e, w));
215  }
216  else {
217  ratexp_t e=parseA();
218  return ratexp_t(rs_.rmul(e, w));
219  }
220  }
221  return parseA();
222  }
223 
225  if(p_>1 && s_[p_-1]=='e' && s_[p_-2]=='\\') {
226  p_-=2;
227  ignore();
228  return ratexp_t(rs_.one());
229  }
230  if(p_>1 && s_[p_-1]=='z' && s_[p_-2]=='\\') {
231  p_-=2;
232  ignore();
233  return ratexp_t(rs_.zero());
234  }
235  if(p_==0)
236  throw parse_exception("Parsing atom");
237  if(s_[p_-1]==')'){
238  --p_;
239  ignore();
240  ratexp_t res= parseE();
241  if(p_==0)
242  throw parse_exception("Parsing atom");
243  if(s_[p_-1]=='(') {
244  --p_;
245  ignore();
246  return res;
247  }
248  }
249  if(s_[p_-1]==']')
250  return parseSqrBrkt(ls_);
251  auto l = ls_.parse(s_,p_, strict_);
252  ignore();
253  return ratexp_t(rs_.atom(l));
254  }
255 
256  template<typename T>
257  ratexp_t parseSqrBrkt(const T&) {
258  --p_;
259  //ignore(); // to capture space at the end of list
260  std::set<typename labelset_t::value_t> letter_list;
261  while (p_>0 && s_[p_-1]!='[') {
262  if(strict_ && p_>1 && s_[p_-1]=='^' && s_[p_-2]=='['){
263  std::set<typename labelset_t::value_t> tmp;
264  auto const& alpha=ls_.genset();
265  auto it=alpha.begin();
266  auto ite=alpha.end();
267  for( auto l : letter_list) {
268  while(!(*it == l)) {
269  tmp.emplace(*it);
270  ++it;
271  }
272  ++it;
273  }
274  for(; it!=ite; ++it)
275  tmp.emplace(*it);
276  letter_list=tmp;
277  p_--;
278  break;
279  }
280  auto l = ls_.parse(s_,p_, strict_);
281  ignore();
282  if(s_[p_-1]=='-') {
283  --p_;
284  ignore();
285  auto k = ls_.parse(s_,p_, strict_);
286  ignore();
287  for(auto i=l; i>=k; --i) {
288  std::ostringstream os;
289  ls_.print(i,os);
290  const std::string& s=os.str();
291  size_t q=s.length();
292  ls_.parse(s,q, strict_);
293  letter_list.emplace(i);
294  }
295  }
296  else
297  letter_list.emplace(l);
298  }
299  if(p_==0)
300  throw parse_exception("Parsing interval");
301  --p_;
302  ignore();
303  bool first=true;
304  ratexp_t res;
305  for(auto l : letter_list) {
306  if(first) {
307  res = ratexp_t(rs_.atom(l));
308  first=false;
309  }
310  else
311  res = rs_.add(res,ratexp_t(rs_.atom(l)));
312  }
313  return res;
314  }
315 
316  template<typename... T>
318  auto l = ls_.parse(s_,p_, strict_);
319  ignore();
320  return ratexp_t(rs_.atom(l));
321  }
322 
323  template<typename F, typename S>
325  auto l = ls_.parse(s_,p_, strict_);
326  ignore();
327  return ratexp_t(rs_.atom(l));
328  }
329 
330  template<typename T>
332  auto l = ls_.parse(s_,p_, strict_);
333  ignore();
334  return ratexp_t(rs_.atom(l));
335  }
336 
337  void ignore() {
338  while(p_>0 && s_[p_-1]==' ')
339  --p_;
340  }
341 
342  size_t getPos() {
343  return p_;
344  }
345 
346  size_t p_;
347  private:
348  const ratexpset_t& rs_;
349  const std::string& s_;
350  const bool strict_;
351  weightset_t ws_ = *rs_.weightset();
352  labelset_t ls_ = *rs_.labelset();
353 
354  };
355  }
356 
435  template <typename RatExpSet>
436  inline
437  typename RatExpSet::ratexp_t
438  parse_exp(const RatExpSet& ratexpset,
439  const std::string& s, bool check_complete=true, bool strict_alphabet=true)
440  {
441  internal::exp_parser<RatExpSet> parser{ratexpset,s,strict_alphabet};
442  auto exp=parser.parseE();
443  if(check_complete && parser.getPos()>0)
444  throw parse_exception(parser.getPos(), "Parsing of a proper suffix");
445  return exp;
446  }
447 
448 }}//end of ns awali::stcs
449 
450 
451 #endif
The Boolean semring.
Definition: b.hh:38
Implementation of labels are letters.
Definition: letterset.hh:43
helper for manipulations of pair letters
Definition: pair.hh:35
The semiring of rational numbers.
Definition: q.hh:42
objets that represent the alphabets of letters as char
Definition: setalpha.hh:44
A ValueSet which is a Cartesian product of ValueSets.
Definition: tupleset.hh:80
Implementation of labels are words.
Definition: wordset.hh:35
variadic_mul_mixin< rat::ratexpset_impl< Context > > ratexpset
Definition: fwd.hh:182
RatExpSet::ratexp_t parse_exp(const RatExpSet &ratexpset, const std::string &s, bool check_complete=true, bool strict_alphabet=true)
parser of rational expressions
Definition: exp_parser.hh:438
typename internal::context_t_of_impl< internal::base_t< ValueSet > >::type context_t_of
Helper to retrieve the type of the context of a value set.
Definition: traits.hh:66
typename internal::labelset_t_of_impl< internal::base_t< ValueSet > >::type labelset_t_of
Helper to retrieve the type of the labelset of a value set.
Definition: traits.hh:76
typename internal::weightset_t_of_impl< internal::base_t< ValueSet > >::type weightset_t_of
Helper to retrieve the type of the weightset of a value set.
Definition: traits.hh:86
Main namespace of Awali.
Definition: ato.hh:22
Exceptions thrown during parsing.
Definition: parse_exception.hh:26
Definition: exp_parser.hh:59
size_t getPos()
Definition: exp_parser.hh:342
ratexp_t parseA()
Definition: exp_parser.hh:224
ratexp_t parseSqrBrkt(const tupleset< T... > &)
Definition: exp_parser.hh:317
context_t_of< ratexpset_t > context_t
Definition: exp_parser.hh:61
ratexp_t parseP()
Definition: exp_parser.hh:90
exp_parser(const ratexpset_t &rs, const std::string &s, size_t &p, bool strict_alphabet)
Definition: exp_parser.hh:70
exp_parser(const ratexpset_t &rs, const std::string &s, bool strict_alphabet)
Definition: exp_parser.hh:66
ratexp_t parseSqrBrkt(const T &)
Definition: exp_parser.hh:257
typename ratexpset_t::value_t ratexp_t
Definition: exp_parser.hh:63
ratexp_t parseE()
Definition: exp_parser.hh:74
RatExpSet ratexpset_t
Definition: exp_parser.hh:60
void ignore()
Definition: exp_parser.hh:337
ratexp_t parseS()
Definition: exp_parser.hh:109
ratexp_t parseL()
Definition: exp_parser.hh:182
size_t p_
Definition: exp_parser.hh:346
ratexp_t parseSqrBrkt(const letterset< set_alphabet< pair_letters< F, S >>> &)
Definition: exp_parser.hh:324
ratexp_t parseSqrBrkt(const wordset< T > &)
Definition: exp_parser.hh:331
weightset_t_of< ratexpset_t > weightset_t
Definition: exp_parser.hh:64
labelset_t_of< context_t > labelset_t
Definition: exp_parser.hh:62
ratexp_t parseR()
Definition: exp_parser.hh:200
Provide a variadic mul on top of a binary mul(), and one().
Definition: weightset.hh:38