Awali
Another Weighted Automata library
exp_parser.hh
Go to the documentation of this file.
1 // This file is part of Awali.
2 // Copyright 2016-2023 Sylvain Lombardy, Victor Marsault, Jacques Sakarovitch
3 //
4 // Awali is a free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 #ifndef AWALI_EXP_PARSER_HH
18 #define AWALI_EXP_PARSER_HH
19 namespace awali {
20  namespace sttc {
21  namespace internal {
22 
23  template<typename RatExpSet>
24  struct exp_parser;
25  }
26  }
27 }
32 #include <awali/common/parse_exception.cc>
33 #include<string>
34 #include<list>
35 
36 /*
37 
38  E -> P | E+P
39  P -> S | PS | P.S
40  S -> L | S* | S{exp} | S?
41  L -> R | <weight>R
42  R -> A | A<weight> | S*<weight> | S{exp}<weight> | S?<weight>
43  A -> label | (E) | [labelinterval]
44 
45  This grammar implements the following priorities:
46  - + has the least priority
47  - then product (either . or catenation)
48  - then exponent
49  - then left weight
50  right weight has a priori the highest priority, but loses it if it is applied to an exponent:
51  <x>a*<y> is interpreted as ((<x>a)*)<y>
52 */
53 
54 namespace awali { namespace sttc {
55 
56  namespace internal {
57 
58  template<typename RatExpSet>
59  struct exp_parser {
60  using ratexpset_t = RatExpSet;
63  using ratexp_t = typename ratexpset_t::value_t;
65 
66  exp_parser(const ratexpset_t& rs, const std::string& s, bool strict_alphabet)
67  : p_(s.length()), rs_(rs), s_(s), strict_(strict_alphabet)
68  {}
69 
70  exp_parser(const ratexpset_t& rs, const std::string& s, size_t& p, bool strict_alphabet)
71  : rs_(rs), s_(s), p_(p), strict_(strict_alphabet)
72  {}
73 
75  ignore();
76  ratexp_t e2=parseP();
77  if(p_ == 0)
78  return e2;
79  if(s_[p_-1]=='+') {
80  --p_;
81  ignore();
82  if(p_==0)
83  throw parse_exception("Parsing expression");
84  ratexp_t e1=parseE();
85  return ratexp_t(rs_.add(e1,e2));
86  }
87  return e2;
88  }
89 
91  ratexp_t e2=parseS();
92  if(p_==0)
93  return e2;
94  if(s_[p_-1]=='.') {
95  --p_;
96  ignore();
97  if(p_==0)
98  throw parse_exception("Parsing product");
99  ratexp_t e1=parseP();
100  return ratexp_t(rs_.mul(e1,e2));
101  }
102  if(s_[p_-1]!='(' && s_[p_-1]!='+') {
103  ratexp_t e1=parseP();
104  return ratexp_t(rs_.mul(e1,e2));
105  }
106  return e2;
107  }
108 
110  if(s_[p_-1]=='*'){
111  --p_;
112  ignore();
113  if(p_==0)
114  throw parse_exception("Parsing star");
115  ratexp_t e=parseS();
116  return ratexp_t(rs_.star(e));
117  }
118  if(s_[p_-1]=='?'){
119  --p_;
120  ignore();
121  if(p_==0)
122  throw parse_exception("Parsing ?");
123  ratexp_t e=parseS();
124  return ratexp_t(rs_.maybe(e));
125  }
126  if(s_[p_-1]=='}'){
127  --p_;
128  ignore();
129  if(p_==0)
130  throw parse_exception("Parsing exponent");
131  if(s_[p_-1]=='+') {
132  --p_;
133  if(p_==0)
134  throw parse_exception("Parsing {+}");
135  if(s_[p_-1]!='{')
136  throw parse_exception("Parsing {+}");
137  --p_;
138  ratexp_t e=parseS();
139  return ratexp_t(rs_.plus(e));
140  }
141  int a,b,h;
142  if(s_[p_-1]==',')
143  b=-1;
144  else {
145  b=0; h=1;
146  while(s_[p_-1]>='0' && s_[p_-1]<='9') {
147  b+=h*(s_[p_-1]-'0'); h*=10;
148  --p_;
149  ignore();
150  if(p_==0)
151  throw parse_exception("Parsing exponent");
152  }
153  }
154  a=b;
155  if(s_[p_-1]!='{') {
156  if(s_[p_-1]!=',' || p_==1)
157  throw parse_exception(p_-1,"Parsing exponent");
158  --p_;
159  ignore();
160  a=0; h=1;
161  while(s_[p_-1]>='0' && s_[p_-1]<='9') {
162  a+=h*(s_[p_-1]-'0'); h*=10;
163  --p_;
164  ignore();
165  if(p_==0)
166  throw parse_exception("Parsing exponent");
167  }
168  if(s_[p_-1]!='{')
169  throw parse_exception(p_-1,"Parsing exponent");
170  }
171  --p_;
172  ignore();
173  if(p_==0)
174  throw parse_exception("Parsing");
175  ratexp_t e=parseS();
176  ratexp_t p(rs_.one());
177  for(int i=0; i<a; ++i)
178  p=ratexp_t(rs_.mul(p,e));
179  if(b==-1)
180  return ratexp_t(rs_.mul(p,rs_.star(e)));
181  ratexp_t pp(rs_.one());
182  ratexp_t qq(rs_.one());
183  for(int i=a; i<b; ++i) {
184  pp=ratexp_t(rs_.mul(pp,e));
185  qq=ratexp_t(rs_.add(qq,pp));
186  }
187  return ratexp_t(rs_.mul(p,qq));
188  }
189  return parseL();
190  }
191 
193  ratexp_t e=parseR();
194  if(p_==0)
195  return e;
196  if(s_[p_-1]=='>') {
197  --p_;
198  ignore();
199  auto w = ws_.parse(s_,p_);
200  ignore();
201  if(p_==0 || s_[p_-1]!='<')
202  throw parse_exception(p_-1,"Parsing weight");
203  --p_;
204  ignore();
205  return ratexp_t(rs_.lmul(w, e));
206  }
207  return e;
208  }
209 
211  if(p_==0)
212  throw parse_exception("Parsing right weight");
213  if(s_[p_-1]=='>') {
214  --p_;
215  ignore();
216  auto w = ws_.parse(s_,p_);
217  ignore();
218  if(p_==0 || s_[p_-1]!='<')
219  throw parse_exception(p_-1,"Parsing right weight");
220  --p_;
221  ignore();
222  if(s_[p_-1]=='*' || s_[p_-1]=='?' || s_[p_-1]=='}') {
223  ratexp_t e=parseS();
224  return ratexp_t(rs_.rmul(e, w));
225  }
226  else {
227  ratexp_t e=parseA();
228  return ratexp_t(rs_.rmul(e, w));
229  }
230  }
231  return parseA();
232  }
233 
235  if(p_>1 && s_[p_-1]=='e' && s_[p_-2]=='\\') {
236  p_-=2;
237  ignore();
238  return ratexp_t(rs_.one());
239  }
240  if(p_>1 && s_[p_-1]=='z' && s_[p_-2]=='\\') {
241  p_-=2;
242  ignore();
243  return ratexp_t(rs_.zero());
244  }
245  if(p_==0)
246  throw parse_exception("Parsing atom");
247  if(s_[p_-1]==')'){
248  --p_;
249  ignore();
250  ratexp_t res= parseE();
251  if(p_==0)
252  throw parse_exception("Parsing atom");
253  if(s_[p_-1]=='(') {
254  --p_;
255  ignore();
256  return res;
257  }
258  }
259  if(s_[p_-1]==']')
260  return parseSqrBrkt(ls_);
261  auto l = ls_.parse(s_,p_, strict_);
262  ignore();
263  return ratexp_t(rs_.atom(l));
264  }
265 
266  template<typename T>
267  ratexp_t parseSqrBrkt(const T&) {
268  --p_;
269  //ignore(); // to capture space at the end of list
270  std::set<typename labelset_t::value_t> letter_list;
271  while (p_>0 && s_[p_-1]!='[') {
272  if(strict_ && p_>1 && s_[p_-1]=='^' && s_[p_-2]=='['){
273  std::set<typename labelset_t::value_t> tmp;
274  auto const& alpha=ls_.genset();
275  auto it=alpha.begin();
276  auto ite=alpha.end();
277  for( auto l : letter_list) {
278  while(!(*it == l)) {
279  tmp.emplace(*it);
280  ++it;
281  }
282  ++it;
283  }
284  for(; it!=ite; ++it)
285  tmp.emplace(*it);
286  letter_list=tmp;
287  p_--;
288  break;
289  }
290  auto l = ls_.parse(s_,p_, strict_);
291  ignore();
292  if(s_[p_-1]=='-') {
293  --p_;
294  ignore();
295  auto k = ls_.parse(s_,p_, strict_);
296  ignore();
297  for(auto i=l; i>=k; --i) {
298  std::ostringstream os;
299  ls_.print(i,os);
300  const std::string& s=os.str();
301  size_t q=s.length();
302  ls_.parse(s,q, strict_);
303  letter_list.emplace(i);
304  }
305  }
306  else
307  letter_list.emplace(l);
308  }
309  if(p_==0)
310  throw parse_exception("Parsing interval");
311  --p_;
312  ignore();
313  bool first=true;
314  ratexp_t res;
315  for(auto l : letter_list) {
316  if(first) {
317  res = ratexp_t(rs_.atom(l));
318  first=false;
319  }
320  else
321  res = rs_.add(res,ratexp_t(rs_.atom(l)));
322  }
323  return res;
324  }
325 
326  template<typename... T>
328  auto l = ls_.parse(s_,p_, strict_);
329  ignore();
330  return ratexp_t(rs_.atom(l));
331  }
332 
333  template<typename F, typename S>
335  auto l = ls_.parse(s_,p_, strict_);
336  ignore();
337  return ratexp_t(rs_.atom(l));
338  }
339 
340  template<typename T>
342  auto l = ls_.parse(s_,p_, strict_);
343  ignore();
344  return ratexp_t(rs_.atom(l));
345  }
346 
347  void ignore() {
348  while(p_>0 && s_[p_-1]==' ')
349  --p_;
350  }
351 
352  size_t getPos() {
353  return p_;
354  }
355 
356  size_t p_;
357  private:
358  const ratexpset_t& rs_;
359  const std::string& s_;
360  const bool strict_;
361  weightset_t ws_ = *rs_.weightset();
362  labelset_t ls_ = *rs_.labelset();
363 
364  };
365  }
366 
445  template <typename RatExpSet>
446  inline
447  typename RatExpSet::ratexp_t
448  parse_exp(const RatExpSet& ratexpset,
449  const std::string& s, bool check_complete=true, bool strict_alphabet=true)
450  {
451  internal::exp_parser<RatExpSet> parser{ratexpset,s,strict_alphabet};
452  auto exp=parser.parseE();
453  if(check_complete && parser.getPos()>0)
454  throw parse_exception(parser.getPos(), "Parsing of a proper suffix");
455  return exp;
456  }
457 
458 }}//end of ns awali::stcs
459 
460 
461 #endif
The Boolean semring.
Definition: b.hh:38
Implementation of labels are letters.
Definition: letterset.hh:43
helper for manipulations of pair letters
Definition: pair.hh:35
The semiring of rational numbers.
Definition: q.hh:42
objets that represent the alphabets of letters as char
Definition: setalpha.hh:44
A ValueSet which is a Cartesian product of ValueSets.
Definition: tupleset.hh:80
Implementation of labels are words.
Definition: wordset.hh:35
variadic_mul_mixin< rat::ratexpset_impl< Context > > ratexpset
Definition: fwd.hh:192
RatExpSet::ratexp_t parse_exp(const RatExpSet &ratexpset, const std::string &s, bool check_complete=true, bool strict_alphabet=true)
parser of rational expressions
Definition: exp_parser.hh:448
typename internal::context_t_of_impl< internal::base_t< ValueSet > >::type context_t_of
Helper to retrieve the type of the context of a value set.
Definition: traits.hh:66
typename internal::labelset_t_of_impl< internal::base_t< ValueSet > >::type labelset_t_of
Helper to retrieve the type of the labelset of a value set.
Definition: traits.hh:76
typename internal::weightset_t_of_impl< internal::base_t< ValueSet > >::type weightset_t_of
Helper to retrieve the type of the weightset of a value set.
Definition: traits.hh:86
Main namespace of Awali.
Definition: ato.hh:22
Exceptions thrown during parsing.
Definition: parse_exception.hh:26
Definition: exp_parser.hh:59
size_t getPos()
Definition: exp_parser.hh:352
ratexp_t parseA()
Definition: exp_parser.hh:234
ratexp_t parseSqrBrkt(const tupleset< T... > &)
Definition: exp_parser.hh:327
context_t_of< ratexpset_t > context_t
Definition: exp_parser.hh:61
ratexp_t parseP()
Definition: exp_parser.hh:90
exp_parser(const ratexpset_t &rs, const std::string &s, size_t &p, bool strict_alphabet)
Definition: exp_parser.hh:70
exp_parser(const ratexpset_t &rs, const std::string &s, bool strict_alphabet)
Definition: exp_parser.hh:66
ratexp_t parseSqrBrkt(const T &)
Definition: exp_parser.hh:267
typename ratexpset_t::value_t ratexp_t
Definition: exp_parser.hh:63
ratexp_t parseE()
Definition: exp_parser.hh:74
RatExpSet ratexpset_t
Definition: exp_parser.hh:60
void ignore()
Definition: exp_parser.hh:347
ratexp_t parseS()
Definition: exp_parser.hh:109
ratexp_t parseL()
Definition: exp_parser.hh:192
size_t p_
Definition: exp_parser.hh:356
ratexp_t parseSqrBrkt(const letterset< set_alphabet< pair_letters< F, S >>> &)
Definition: exp_parser.hh:334
ratexp_t parseSqrBrkt(const wordset< T > &)
Definition: exp_parser.hh:341
weightset_t_of< ratexpset_t > weightset_t
Definition: exp_parser.hh:64
labelset_t_of< context_t > labelset_t
Definition: exp_parser.hh:62
ratexp_t parseR()
Definition: exp_parser.hh:210
Provide a variadic mul on top of a binary mul(), and one().
Definition: weightset.hh:38