Awali
Another Weighted Automata library
congruence_det.hh
Go to the documentation of this file.
1 // This file is part of Awali.
2 // Copyright 2016-2022 Sylvain Lombardy, Victor Marsault, Jacques Sakarovitch
3 //
4 // Awali is a free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 #ifndef AWALI_ALGOS_CONGRUENCE_DET_HH
18 # define AWALI_ALGOS_CONGRUENCE_DET_HH
19 
20 #include <vector>
21 #include <queue>
22 
23 #include <awali/sttc/weightset/fwd.hh> // b
25 
26 /* This files contains two functions which compute the coarsest equivalence on states which is a congruence w.r.t transitions. The first one is an implementation of
27 the Moore algorithm; the second on is an implementation of the Hopcroft algorithm
28 */
29 
30 
31 namespace awali {
32  namespace sttc {
33 
34  namespace internal {
35  template<typename Aut, typename Weightset>
36  struct extended_label {
38  using type = std::pair<label_t,typename Weightset::value_t>;
39 
40  template <typename Tr>
41  static type getLabel(const Aut& aut, Tr tr) {
42  return {aut->label_of(tr),aut->weight_of(tr)};
43  }
44  };
45 
46  template<typename Aut>
47  struct extended_label<Aut, b> {
49  using type = label_t;
50 
51  template <typename Tr>
52  static type getLabel(const Aut& aut, Tr tr) {
53  return aut->label_of(tr);
54  }
55  };
56 
57  template <typename Aut>
58  void initialize_partition(const Aut& aut,std::vector<std::vector<state_t> >& states_in_part) {
59  using automaton_t = Aut;
60  using weightset_t = weightset_t_of<automaton_t>;
61  // using label_t = label_t_of<automaton_t>;
62  using extended_label_t = extended_label<automaton_t,weightset_t>;
63 
64  //The list of successors for each letter
66  for(auto s : aut->states()) {
67  for(auto tr : aut->all_out(s) ) {
68  auto a = extended_label_t::getLabel(aut,tr);
69  meet[a].emplace_back(s);
70  }
71  }
72 
73  states_in_part.resize(2);
74  states_in_part[0].emplace_back(aut->pre());
75  states_in_part[1].emplace_back(aut->post());
76  if(meet.domain().empty())
77  return;
78 
79  //The lists of outgoing labels for each state
81  for(auto a: meet.domain())
82  for(auto p:meet[a]) {
83  labels[p].emplace_back(a);
84  }
85 
86  //We first compute initial parts as states with the same output labels.
87  //Each part (unsigned) is a list of states
88  std::vector<std::vector<state_t> > list(1);
89  for(auto s : aut->states() )
90  list[0].emplace_back(s);
92  for(unsigned k=0;!list.empty();++k) {
93  std::vector<std::vector<state_t> > new_list;
94  for(auto l : list) {
95  tmplist.clear();
96  std::vector<state_t> shorter_sig_list;
97  for(auto r : l)
98  if(labels[r].size()==k)
99  shorter_sig_list.emplace_back(r);
100  else {
101  tmplist[labels[r][k]].emplace_back(r);
102  }
103  if(!shorter_sig_list.empty())
104  states_in_part.push_back(std::move(shorter_sig_list));
105  for(auto a : tmplist.domain())
106  new_list.push_back(std::move(tmplist[a]));
107  }
108  list.swap(new_list);
109  }
110  }
111  }
112 
113  /* This is a partition refining algorithm. At the beginning, there are three parts:
114  - the pre-initial state(0),
115  - the post-final state(1),
116  - the other states(2)
117 
118  At each round, every non singleton part 'i' is checked:
119  for each state 's' of 'i' the 'signature' of 's' is computed;
120  this is the parts that are reached from 's' for every letter
121  the algorithm guarantees (#) that two states are "Moore equivalent" iff they have the same signature.
122  The part 'i' is then split w.r.t. the states that share a signature.
123  If no part have been split during a round, the algorithm ends.
124 
125  The rounds are separated in the queue through a false part '0' (the true part 0 never appear in the queue).
126 
127  (#) The signatures are computed through a "weak sort" implemented with linked hash maps:
128  it is a linear algorithm in amortized time.
129  First, for every state 's' in 'i', for every transition s-a->q,
130  the pair (s,part[q]) is put in the list meet[a],
131  Then, for each letter 'a' in the domain of 'meet',
132  the pair (a,part[q]) is appended to the signature of s
133  This latter loop creates an ordering on letters: in every signature, the ordering over letter is a subsequence of the ordering of the domain of 'meet'
134  .
135  */
142  template <typename Aut>
143  void moore_det(const Aut& aut, std::vector<std::vector<state_t> >& states_in_part) {
144  using automaton_t = Aut;
146 
147  internal::initialize_partition(aut, states_in_part);
148 
151  //The list of successors for each letter
152  for(auto s : aut->states())
153  for(auto tr : aut->all_out(s) )
154  meet[aut->label_of(tr)].emplace_back(std::pair<state_t, state_t>{s,aut->dst_of(tr)});
155 
156  for(auto a: meet.domain())
157  for(auto p:meet[a])
158  succs[p.first].emplace_back(p.second);
159 
160  std::queue<unsigned> queue_part;
161  //And each state belongs to a part
162  std::vector<unsigned> part(aut->max_state()+1);
163  for(unsigned k=0; k< states_in_part.size(); ++k) {
164  for(auto s:states_in_part[k])
165  part[s]=k;
166  if(states_in_part[k].size()>1)
167  queue_part.emplace(k);
168  }
169 
170  //0 is a marker in the queue for the end of a round
171  //if there is no splitting between two occurences of 0
172  //the algorithm ends.
173  queue_part.emplace(0);
174  bool stop=true;
176  while(!queue_part.empty()) {
177  //i is the part which is the current splitter
178  unsigned i= queue_part.front();
179  queue_part.pop();
180  if(i==0) {//end of the round
181  if(stop)
182  return;
183  else {
184  //the parts for the next round are exactly the parts in the queue
185  //we start a new round and put a marker in the queue
186  stop=true;
187  queue_part.emplace(0);
188  continue;
189  }
190  }
191 
192  /* We compute the new parts
193  * At every step k, each vector of list contains states which coincide
194  * on the k first components of their signature;
195  * each vector in new_parts contains contains states with a signature <k
196  */
197 
198  std::vector<std::vector<state_t> > list;
199  std::vector<std::vector<state_t> > new_parts;
200  list.emplace_back(states_in_part[i]);
201 
202  for(unsigned k=0; !list.empty(); ++k) {
203  std::vector<std::vector<state_t> > new_list;
204  for(auto l : list) {
206  std::vector<state_t> shorter_sig_list;
207  for(auto r : l)
208  if(succs[r].size()==k)
209  shorter_sig_list.emplace_back(r);
210  else {
211  tmplist[part[succs[r][k]]].emplace_back(r);
212  }
213  if(!shorter_sig_list.empty())
214  new_parts.push_back(std::move(shorter_sig_list));
215  for(auto a : tmplist.domain())
216  new_list.push_back(std::move(tmplist[a]));
217  }
218  list.swap(new_list);
219  }
220 
221  //Creation of the new parts
222  unsigned lim=new_parts.size();
223  //if lim=1 the part has not been split
224  if(lim>1)
225  stop=false;
226  else {
227  queue_part.emplace(i);
228  continue;
229  }
230 
231  unsigned np=states_in_part.size();
232  for(unsigned k=0; k<lim;++k) {
233  unsigned p;
234  if(k==0) {
235  p=i;
236  states_in_part[i].swap(new_parts[0]);
237  }
238  else {
239  p=np++;
240  for(auto r : new_parts[k])
241  part[r]=p;
242  states_in_part.push_back(std::move(new_parts[k]));
243  }
244  if(states_in_part[p].size()>1)
245  queue_part.emplace(p);
246  }
247  }
248  }
249 
250  /*,-------------------.
251  | |
252  `-------------------'*/
253 
254  namespace internal {
255  template<typename Aut, typename Weightset>
257 
258  static void init(const Aut& aut,std::vector<std::vector<state_t> >& states_in_part, std::vector<unsigned>& part, std::queue<unsigned>& splitters, std::vector<bool>& is_in_queue) {
259  initialize_partition(aut, states_in_part);
260  //The part is initialised as 0 for every state
261  //except for post() which is in 1.
262  part[aut->pre()]=0;
263  part[aut->post()]=1;
264  // A part is a "splitter" if its predecessors can be considered to split the parts
265  is_in_queue.push_back(false); //Part 0 is not in the queue
266  unsigned max=1;
267  for(unsigned k=2; k< states_in_part.size(); ++k) {
268  for(auto s:states_in_part[k])
269  part[s]=k;
270  if(states_in_part[k].size()>states_in_part[max].size())
271  max=k;
272  }
273  for(unsigned k=1; k< states_in_part.size(); ++k) {
274  is_in_queue.push_back(k!=max);
275  if(k!=max)
276  splitters.emplace(k);
277  }
278  }
279  };
280 
281  //In the Boolean case, the initialisation is slightly different
282  template<typename Aut>
283  struct backward_initialisation<Aut, b> {
284  static void init(const Aut& aut,std::vector<std::vector<state_t> >& states_in_part, std::vector<unsigned>& part, std::queue<unsigned>& splitters, std::vector<bool>& is_in_queue) {
285  states_in_part.resize(3);
286  states_in_part[0].emplace_back(aut->pre());
287  states_in_part[1].emplace_back(aut->post());
288  part[aut->pre()]=0;
289  part[aut->post()]=1;
290  for(auto s: aut->states()) {
291  states_in_part[2].emplace_back(s);
292  part[s]=2;
293  }
294  is_in_queue.push_back(false);
295  is_in_queue.push_back(true);
296  is_in_queue.push_back(true);
297  splitters.emplace(1);
298  splitters.emplace(2);
299  }
300  };
301  }
302 
303  /* Description of the algorithm
304 
305  This algorithm compute the coarsest congruence ~ of the set of states Q such that,
306  for each pair of states p and q, if p~q then
307  1. p if final <=> q is final
308  2. for every letter a, the set of succ(p,a) = succ(q,a) mod ~
309 
310  This is a refinement algorithm.
311  There is first a partition between final and non final states.
312  The principle is that every part p is used as a "splitter":
313  for every predecessor r of any s of p,
314  the "signature" of r is computed:
315  that is the list of letters for which the successor of r exists and is in p.
316  Every concerned part is the split w.r.t the signatures of its states.
317 
318  To achieve a O(|A||Q|) complexity in each iteration,
319  weak sort is used.
320  Here, the weak sort is not implemented with sparse lists, but with linked_hash_map;
321  the complexity is therefore in amortized O(A||Q|).
322 
323  1. For every letter a that appear in a transition incoming to p,
324  meet[a] is the list of predecessors of some state of p by a
325  2. For every letter in the domain of meet,
326  for every state r in meet[a]
327  a is appended to the list signature[r]
328  -> letters appear in signatures in the same ordering
329 
330  Then, the list of states which have a signature must be (weakly) sorted in such a way
331  that states with the same signature and that belong to the same part are neighbours
332  1. For each state r in the domain of signature
333  r is appended to tmplist[part[r]]
334  Then, the list of lists (tmplist[p] | p in domain of tmplist) are concatenated.
335  For convenience, this concatenation 'list' is represented as a list of lists.
336  2. For every integer k,
337  r is appended to tmplist[signature[k]]
338  Then, the list of lists (tmplist[a] | a in domain of tmplist) are concatenated.
339  This is done for k up to the longest signature; if some signature are shorter,
340  the corresponding states are set in the "special list" list.
341 
342  Finally all the concerned part are splitted.
343  The smallest subpart inherits the index of the original part, the other receive fresh indices.
344  If there is more than a subpart, every subpart but the largest is pushed into the queue of splitters.
345  This leads to a logarithmic number of iterations
346 
347  Implementation idiosyncrasy :
348  * The pre-initial state and the post-final state are a part by themself.
349  * It is useless to use the pre-initial state as a splitter since it has no predecessor
350  * It it useless to identify final states before the main loop since they are exactly predecessors of the post-final state
351  */
359  template <typename Aut>
360  void hopcroft_det(const Aut& aut, std::vector<std::vector<state_t> >& states_in_part) {
361 
362  using automaton_t = Aut;
363 
365  using weightset_t = weightset_t_of<automaton_t>;
366 
367  //Each part (unsigned) is a list of states
368  std::vector<unsigned> part(aut->max_state()+1);
369  std::queue<unsigned> splitters;
370  std::vector<bool> is_in_queue;
371 
372  internal::backward_initialisation<automaton_t, weightset_t>::init(aut, states_in_part, part, splitters, is_in_queue);
373 
375  while(!splitters.empty()) {
376  //i is the part which is the current splitter
377  unsigned i= splitters.front();
378  splitters.pop();
379  is_in_queue[i]=false;
380 
381  //The list of predecessors for each letter
383  for(auto s : states_in_part[i]) {
384  for(auto tr : aut->all_in(s) ) {
385  state_t r = aut->src_of(tr);
386  label_t a = aut->label_of(tr);
387  meet[a].emplace_back(r);
388  }
389  }
390  if(meet.domain().empty())
391  continue;
392  /* The signature of each met 'r' state is computed
393  * signature[r] is the list of letters 'a' such that succ(r,a) is in part 'i'.
394  * The ordering of letters in signature[r] is compliant with the ordering
395  * of letters in meet.domain().
396  * Hence, if two states have successors in 'i' with the same letters, they have the same
397  * signature.
398  */
400  for(auto a : meet.domain())
401  for(auto r : meet[a])
402  signature[r].emplace_back(a);
403 
404  /* We compute the new parts
405  * At every step k, each vector of list contains states which coincide
406  * on the k first components of their signature;
407  * each vector in new_parts contains contains states qith a signature <k
408  * First, states that belong to different parts are separated.
409  */
411  {
413  for(auto r : signature.domain()) {
414  tmplist[part[r]].emplace_back(r);
416  }
417  for(auto j : tmplist.domain())
418  lists[j].push_back(std::move(tmplist[j]));
419  }
421  std::vector<std::vector<state_t> > new_list;
422  std::vector<state_t> shorter_sig_list;
423  for(unsigned j: lists.domain()) {
424  std::vector<std::vector<state_t>> list{std::move(lists[j])};
425  lists[j].clear();
426  for(unsigned k=0;!list.empty();++k) {
427  new_list.clear();
428  for(auto l : list) {
429  tmplist.clear();
430  shorter_sig_list.clear();
431  for(auto r : l)
432  if(signature[r].size()==k)
433  shorter_sig_list.emplace_back(r);
434  else {
435  tmplist[signature[r][k]].emplace_back(r);
436  }
437  if(!shorter_sig_list.empty())
438  lists[j].push_back(std::move(shorter_sig_list));
439  for(auto a : tmplist.domain())
440  new_list.push_back(std::move(tmplist[a]));
441  }
442  list.swap(new_list);
443  }
444  }
445  for(unsigned p: lists.domain()) {
446  std::vector<std::vector<state_t> >& new_parts=lists[p];;
447  //unsigned tp=new_parts.size();
448  std::vector<state_t> unmet_states;
449  for(auto r : states_in_part[p])
450  if(part[r]==p) //The met states have an undefined part
451  unmet_states.emplace_back(r);
452  if(!unmet_states.empty())
453  new_parts.push_back(std::move(unmet_states));
454  //p is the original part which is being split
455  unsigned lim=new_parts.size();
456  unsigned np=states_in_part.size();
457  if(is_in_queue[p]) {
458  states_in_part[p].swap(new_parts[0]);
459  for(auto r : states_in_part[p])
460  part[r]=p;
461  for(unsigned k=1; k<lim; ++k) {
462  for(auto r : new_parts[k])
463  part[r]=np;
464  states_in_part.push_back(std::move(new_parts[k]));
465  splitters.emplace(np);
466  is_in_queue.push_back(true);
467  ++np;
468  }
469  }
470  else {
471  //Search of the smallest and largest subparts
472  unsigned min_part=0;
473  unsigned max_part=0;
474  for(unsigned k=1; k<lim; ++k) {
475  if(new_parts[k].size() > new_parts[max_part].size())
476  max_part=k;
477  if(new_parts[k].size() <= new_parts[min_part].size())
478  min_part=k;
479  }
480  for(unsigned k=0; k<lim;++k)
481  if(k==min_part) {
482  //The index p is recycled for the smallest subpart
483  for(auto r : new_parts[k])
484  part[r] = p;
485  states_in_part[p].swap(new_parts[k]);
486  if(lim>1) {
487  is_in_queue[p]=true;
488  splitters.emplace(p);
489  }
490  }
491  else {
492  //For the other subparts, fresh parts are created
493  for(auto r : new_parts[k])
494  part[r]=np;
495  states_in_part.push_back(std::move(new_parts[k]));
496  if(k!=max_part) {
497  splitters.emplace(np);
498  is_in_queue.push_back(true);
499  }
500  else
501  is_in_queue.push_back(false);
502  ++np;
503  }
504  }
505  }
506  }
507  }
508  }
509 }//end of ns awali::stc
510 
511 #endif // !AWALI_ALGOS_MOORE16_HH
The Boolean semring.
Definition: b.hh:38
The semiring of floating Numbers.
Definition: r.hh:35
any_t label_t
Type for (transition) labels; it is an alias to any_t since its precise type depends on the weightset...
Definition: typedefs.hh:48
void initialize_partition(const Aut &aut, std::vector< std::vector< state_t > > &states_in_part)
Definition: congruence_det.hh:58
typename internal::label_t_of_impl< internal::base_t< ValueSet > >::type label_t_of
Helper to retrieve the type of the labels of a value set.
Definition: traits.hh:71
auto meet(const ratexpset< Ctx1 > &a, const ratexpset< Ctx2 > &b) -> ratexpset< meet_t< Ctx1, Ctx2 >>
The meet of two ratexpsets.
Definition: ratexpset.hh:434
void moore_det(const Aut &aut, std::vector< std::vector< state_t > > &states_in_part)
Moore algorithm for the minimization of deterministic Boolean automata (DFA), not necessarily complet...
Definition: congruence_det.hh:143
void hopcroft_det(const Aut &aut, std::vector< std::vector< state_t > > &states_in_part)
minimization with Hopcroft for incomplete deterministic automata
Definition: congruence_det.hh:360
typename internal::weightset_t_of_impl< internal::base_t< ValueSet > >::type weightset_t_of
Helper to retrieve the type of the weightset of a value set.
Definition: traits.hh:86
ATTRIBUTE_CONST int max(int a, int b)
Definition: arith.hh:54
Main namespace of Awali.
Definition: ato.hh:22
unsigned state_t
Definition: types.hh:21
static void init(const Aut &aut, std::vector< std::vector< state_t > > &states_in_part, std::vector< unsigned > &part, std::queue< unsigned > &splitters, std::vector< bool > &is_in_queue)
Definition: congruence_det.hh:284
Definition: congruence_det.hh:256
static void init(const Aut &aut, std::vector< std::vector< state_t > > &states_in_part, std::vector< unsigned > &part, std::queue< unsigned > &splitters, std::vector< bool > &is_in_queue)
Definition: congruence_det.hh:258
label_t_of< Aut > label_t
Definition: congruence_det.hh:48
static type getLabel(const Aut &aut, Tr tr)
Definition: congruence_det.hh:52
label_t type
Definition: congruence_det.hh:49
Definition: congruence_det.hh:36
static type getLabel(const Aut &aut, Tr tr)
Definition: congruence_det.hh:41
std::pair< label_t, typename Weightset::value_t > type
Definition: congruence_det.hh:38
label_t_of< Aut > label_t
Definition: congruence_det.hh:37
Definition: linkedhashmap.hh:31
const std::vector< K > & domain() const
Definition: linkedhashmap.hh:36
void clear()
Definition: linkedhashmap.hh:50