1
use std::collections::VecDeque;
2
use std::fmt::Debug;
3
use std::time::Instant;
4

            
5
use ahash::HashMap;
6
use log::debug;
7
use log::info;
8
use log::log_enabled;
9
use log::trace;
10
use log::warn;
11
use mcrl2::aterm::ATermRef;
12
use mcrl2::data::is_data_abstraction;
13
use mcrl2::data::is_data_application;
14
use mcrl2::data::is_data_function_symbol;
15
use mcrl2::data::is_data_machine_number;
16
use mcrl2::data::is_data_untyped_identifier;
17
use mcrl2::data::is_data_variable;
18
use mcrl2::data::is_data_where_clause;
19
use mcrl2::data::DataExpression;
20
use mcrl2::data::DataExpressionRef;
21
use mcrl2::data::DataFunctionSymbol;
22
use smallvec::smallvec;
23
use smallvec::SmallVec;
24

            
25
use crate::rewrite_specification::RewriteSpecification;
26
use crate::rewrite_specification::Rule;
27
use crate::utilities::ExplicitPosition;
28

            
29
use super::DotFormatter;
30
use super::MatchGoal;
31

            
32
// The Set Automaton used to find all matching patterns in a term. Based on the
33
// following article. Implemented by Mark Bouwman, and adapted by Maurice
34
// Laveaux.
35
//
36
// Erkens, R., Groote, J.F. (2021). A Set Automaton to Locate All Pattern
37
// Matches in a Term. In: Cerone, A., Ölveczky, P.C. (eds) Theoretical Aspects
38
// of Computing – ICTAC 2021. ICTAC 2021. Lecture Notes in Computer Science(),
39
// vol 12819. Springer, Cham. https://doi.org/10.1007/978-3-030-85315-0_5
40
pub struct SetAutomaton<T> {
41
    pub(crate) states: Vec<State>,
42
    pub(crate) transitions: HashMap<(usize, usize), Transition<T>>,
43
}
44

            
45
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
46
pub(crate) struct MatchAnnouncement {
47
    pub rule: Rule,
48
    pub position: ExplicitPosition,
49
    pub symbols_seen: usize,
50
}
51

            
52
#[derive(Clone)]
53
pub struct Transition<T> {
54
    pub(crate) symbol: DataFunctionSymbol,
55
    pub(crate) announcements: SmallVec<[(MatchAnnouncement, T); 1]>,
56
    pub(crate) destinations: SmallVec<[(ExplicitPosition, usize); 1]>,
57
}
58

            
59
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
60
pub(crate) struct MatchObligation {
61
    pub pattern: DataExpression,
62
    pub position: ExplicitPosition,
63
}
64

            
65
#[derive(Debug)]
66
enum GoalsOrInitial {
67
    InitialState,
68
    Goals(Vec<MatchGoal>),
69
}
70

            
71
impl<M> SetAutomaton<M> {
72
401
    pub fn new(spec: &RewriteSpecification, annotate: impl Fn(&Rule) -> M, apma: bool) -> SetAutomaton<M> {
73
401
        let start = Instant::now();
74
401

            
75
401
        // States are labelled s0, s1, s2, etcetera. state_counter keeps track of count.
76
401
        let mut state_counter: usize = 1;
77
401

            
78
401
        // Remove rules that we cannot deal with
79
401
        let supported_rules: Vec<Rule> = spec
80
401
            .rewrite_rules
81
401
            .iter()
82
39686
            .filter(|rule| is_supported_rule(rule))
83
401
            .map(Rule::clone)
84
401
            .collect();
85

            
86
        // Find the indices of all the function symbols.
87
401
        let symbols = {
88
401
            let mut symbols = HashMap::default();
89

            
90
37546
            for rule in &supported_rules {
91
37145
                find_symbols(&rule.lhs.copy(), &mut symbols);
92
37145
                find_symbols(&rule.rhs.copy(), &mut symbols);
93

            
94
40755
                for cond in &rule.conditions {
95
3610
                    find_symbols(&cond.lhs.copy(), &mut symbols);
96
3610
                    find_symbols(&cond.rhs.copy(), &mut symbols);
97
3610
                }
98
            }
99

            
100
401
            symbols
101
        };
102

            
103
21621
        for (index, (symbol, arity)) in symbols.iter().enumerate() {
104
21620
            trace!("{}: {} {}", index, symbol, arity);
105
        }
106

            
107
        // The initial state has a match goals for each pattern. For each pattern l there is a match goal
108
        // with one obligation l@ε and announcement l@ε.
109
401
        let mut initial_match_goals = Vec::<MatchGoal>::new();
110
37546
        for rr in &supported_rules {
111
37145
            initial_match_goals.push(MatchGoal {
112
37145
                obligations: vec![MatchObligation {
113
37145
                    pattern: rr.lhs.clone(),
114
37145
                    position: ExplicitPosition::empty_pos(),
115
37145
                }],
116
37145
                announcement: MatchAnnouncement {
117
37145
                    rule: (*rr).clone(),
118
37145
                    position: ExplicitPosition::empty_pos(),
119
37145
                    symbols_seen: 0,
120
37145
                },
121
37145
            });
122
37145
        }
123

            
124
        // Match goals need to be sorted so that we can easily check whether a state with a certain
125
        // set of match goals already exists.
126
401
        initial_match_goals.sort();
127
401

            
128
401
        // Create the initial state
129
401
        let initial_state = State {
130
401
            label: ExplicitPosition::empty_pos(),
131
401
            match_goals: initial_match_goals.clone(),
132
401
        };
133
401

            
134
401
        // HashMap from goals to state number
135
401
        let mut map_goals_state = HashMap::default();
136
401

            
137
401
        // Queue of states that still need to be explored
138
401
        let mut queue = VecDeque::new();
139
401
        queue.push_back(0);
140
401

            
141
401
        map_goals_state.insert(initial_match_goals, 0);
142
401

            
143
401
        let mut states = vec![initial_state];
144
401
        let mut transitions = HashMap::default();
145

            
146
        // Pick a state to explore
147
19897
        while let Some(s_index) = queue.pop_front() {
148
2809826
            for (symbol, arity) in &symbols {
149
2790330
                let (mut announcements, pos_to_goals) =
150
2790330
                    states
151
2790330
                        .get(s_index)
152
2790330
                        .unwrap()
153
2790330
                        .derive_transition(symbol, *arity, &supported_rules, apma);
154
2790330

            
155
2790330
                announcements.sort_by(|ma1, ma2| ma1.position.cmp(&ma2.position));
156

            
157
                // For the destinations we convert the match goal destinations to states
158
2790330
                let mut destinations = smallvec![];
159

            
160
                // Loop over the hypertransitions
161
3368495
                for (pos, goals_or_initial) in pos_to_goals {
162
                    // Match goals need to be sorted so that we can easily check whether a state with a certain
163
                    // set of match goals already exists.
164
578165
                    if let GoalsOrInitial::Goals(goals) = goals_or_initial {
165
508160
                        if map_goals_state.contains_key(&goals) {
166
                            // The destination state already exists
167
489065
                            destinations.push((pos, *map_goals_state.get(&goals).unwrap()))
168
19095
                        } else if !goals.is_empty() {
169
19095
                            // The destination state does not yet exist, create it
170
19095
                            let new_state = State::new(goals.clone());
171
19095
                            states.push(new_state);
172
19095
                            destinations.push((pos, state_counter));
173
19095
                            map_goals_state.insert(goals, state_counter);
174
19095
                            queue.push_back(state_counter);
175
19095
                            state_counter += 1;
176
19095
                        }
177
70005
                    } else {
178
70005
                        // The transition is to the initial state
179
70005
                        destinations.push((pos, 0));
180
70005
                    }
181
                }
182

            
183
                // Add the annotation for every match announcement.
184
2790330
                let announcements = announcements
185
2790330
                    .into_iter()
186
2790330
                    .map(|ma| {
187
107500
                        let annotation = annotate(&ma.rule);
188
107500
                        (ma, annotation)
189
2790330
                    })
190
2790330
                    .collect();
191
2790330

            
192
2790330
                // Add the resulting outgoing transition to the state.
193
2790330
                debug_assert!(
194
2790330
                    !&transitions.contains_key(&(s_index, symbol.operation_id())),
195
                    "Set automaton should not contain duplicated transitions"
196
                );
197
2790330
                transitions.insert(
198
2790330
                    (s_index, symbol.operation_id()),
199
2790330
                    Transition {
200
2790330
                        symbol: symbol.clone(),
201
2790330
                        announcements,
202
2790330
                        destinations,
203
2790330
                    },
204
2790330
                );
205
            }
206

            
207
19496
            debug!(
208
                "Queue size {}, currently {} states and {} transitions",
209
                queue.len(),
210
                states.len(),
211
                transitions.len()
212
            );
213
        }
214

            
215
        // Clear the match goals since they are only for debugging purposes.
216
401
        if !log_enabled!(log::Level::Debug) {
217
19897
            for state in &mut states {
218
19496
                state.match_goals.clear();
219
19496
            }
220
        }
221
401
        info!(
222
1
            "Created set automaton (states: {}, transitions: {}, apma: {}) in {} ms",
223
1
            states.len(),
224
1
            transitions.len(),
225
1
            apma,
226
1
            (Instant::now() - start).as_millis()
227
        );
228

            
229
401
        let result = SetAutomaton { states, transitions };
230
401
        debug!("{}", result);
231

            
232
401
        result
233
401
    }
234

            
235
    /// Returns the number of states
236
    pub fn num_of_states(&self) -> usize {
237
        self.states.len()
238
    }
239

            
240
    /// Returns the number of transitions
241
    pub fn num_of_transitions(&self) -> usize {
242
        self.transitions.len()
243
    }
244

            
245
    /// Provides a formatter for the .dot file format
246
    pub fn to_dot_graph(&self, show_backtransitions: bool, show_final: bool) -> DotFormatter<M> {
247
        DotFormatter {
248
            automaton: self,
249
            show_backtransitions,
250
            show_final,
251
        }
252
    }
253
}
254

            
255
#[derive(Debug)]
256
pub struct Derivative {
257
    pub(crate) completed: Vec<MatchGoal>,
258
    pub(crate) unchanged: Vec<MatchGoal>,
259
    pub(crate) reduced: Vec<MatchGoal>,
260
}
261

            
262
#[derive(Debug)]
263
pub(crate) struct State {
264
    pub(crate) label: ExplicitPosition,
265
    pub(crate) match_goals: Vec<MatchGoal>,
266
}
267

            
268
impl State {
269
    /// Derive transitions from a state given a head symbol. The resulting transition is returned as a tuple
270
    /// The tuple consists of a vector of outputs and a set of destinations (which are sets of match goals).
271
    /// We don't use the struct Transition as it requires that the destination is a full state, with name.
272
    /// Since we don't yet know whether the state already exists we just return a set of match goals as 'state'.
273
    ///
274
    /// Parameter symbol is the symbol for which the transition is computed
275
2790330
    fn derive_transition(
276
2790330
        &self,
277
2790330
        symbol: &DataFunctionSymbol,
278
2790330
        arity: usize,
279
2790330
        rewrite_rules: &Vec<Rule>,
280
2790330
        apma: bool,
281
2790330
    ) -> (Vec<MatchAnnouncement>, Vec<(ExplicitPosition, GoalsOrInitial)>) {
282
2790330
        // Computes the derivative containing the goals that are completed, unchanged and reduced
283
2790330
        let mut derivative = self.compute_derivative(symbol, arity);
284
2790330

            
285
2790330
        // The outputs/matching patterns of the transitions are those who are completed
286
2790330
        let outputs = derivative.completed.into_iter().map(|x| x.announcement).collect();
287
2790330

            
288
2790330
        // The new match goals are the unchanged and reduced match goals.
289
2790330
        let mut new_match_goals = derivative.unchanged;
290
2790330
        new_match_goals.append(&mut derivative.reduced);
291
2790330

            
292
2790330
        let mut destinations = vec![];
293
2790330
        // If we are building an APMA we do not deepen the position or create a hypertransitions
294
2790330
        // with multiple endpoints
295
2790330
        if apma {
296
2616320
            if !new_match_goals.is_empty() {
297
272080
                destinations.push((ExplicitPosition::empty_pos(), GoalsOrInitial::Goals(new_match_goals)));
298
2344240
            }
299
        } else {
300
            // In case we are building a set automaton we partition the match goals
301
174010
            let partitioned = MatchGoal::partition(new_match_goals);
302
174010

            
303
174010
            // Get the greatest common prefix and shorten the positions
304
174010
            let mut positions_per_partition = vec![];
305
174010
            let mut gcp_length_per_partition = vec![];
306
410090
            for (p, pos) in partitioned {
307
236080
                positions_per_partition.push(pos);
308
236080
                let gcp = MatchGoal::greatest_common_prefix(&p);
309
236080
                let gcp_length = gcp.len();
310
236080
                gcp_length_per_partition.push(gcp_length);
311
236080
                let mut goals = MatchGoal::remove_prefix(p, gcp_length);
312
236080
                goals.sort_unstable();
313
236080
                destinations.push((gcp, GoalsOrInitial::Goals(goals)));
314
236080
            }
315

            
316
            // Handle fresh match goals, they are the positions Label(state).i
317
            // where i is between 2 and the arity + 2 of the function symbol of
318
            // the transition. Position 1 is the function symbol.
319
174010
            for i in 2..arity + 2 {
320
160295
                let mut pos = self.label.clone();
321
160295
                pos.indices.push(i);
322
160295

            
323
160295
                // Check if the fresh goals are related to one of the existing partitions
324
160295
                let mut partition_key = None;
325
265630
                'outer: for (i, part_pos) in positions_per_partition.iter().enumerate() {
326
574090
                    for p in part_pos {
327
398750
                        if MatchGoal::pos_comparable(p, &pos) {
328
90290
                            partition_key = Some(i);
329
90290
                            break 'outer;
330
308460
                        }
331
                    }
332
                }
333

            
334
160295
                if let Some(key) = partition_key {
335
                    // If the fresh goals fall in an existing partition
336
90290
                    let gcp_length = gcp_length_per_partition[key];
337
90290
                    let pos = ExplicitPosition {
338
90290
                        indices: SmallVec::from_slice(&pos.indices[gcp_length..]),
339
90290
                    };
340

            
341
                    // Add the fresh goals to the partition
342
5500090
                    for rr in rewrite_rules {
343
5409800
                        if let GoalsOrInitial::Goals(goals) = &mut destinations[key].1 {
344
5409800
                            goals.push(MatchGoal {
345
5409800
                                obligations: vec![MatchObligation {
346
5409800
                                    pattern: rr.lhs.clone(),
347
5409800
                                    position: pos.clone(),
348
5409800
                                }],
349
5409800
                                announcement: MatchAnnouncement {
350
5409800
                                    rule: (*rr).clone(),
351
5409800
                                    position: pos.clone(),
352
5409800
                                    symbols_seen: 0,
353
5409800
                                },
354
5409800
                            });
355
5409800
                        }
356
                    }
357
70005
                } else {
358
70005
                    // The transition is simply to the initial state
359
70005
                    // GoalsOrInitial::InitialState avoids unnecessary work of creating all these fresh goals
360
70005
                    destinations.push((pos, GoalsOrInitial::InitialState));
361
70005
                }
362
            }
363
        }
364

            
365
        // Sort the destination such that transitions which do not deepen the position are listed first
366
2790330
        destinations.sort_unstable_by(|(pos1, _), (pos2, _)| pos1.cmp(pos2));
367
2790330
        (outputs, destinations)
368
2790330
    }
369

            
370
    /// For a transition 'symbol' of state 'self' this function computes which match goals are
371
    /// completed, unchanged and reduced.
372
2790330
    fn compute_derivative(&self, symbol: &DataFunctionSymbol, arity: usize) -> Derivative {
373
2790330
        let mut result = Derivative {
374
2790330
            completed: vec![],
375
2790330
            unchanged: vec![],
376
2790330
            reduced: vec![],
377
2790330
        };
378

            
379
41414895
        for mg in &self.match_goals {
380
38624565
            debug_assert!(
381
38624565
                !mg.obligations.is_empty(),
382
                "The obligations should never be empty, should be completed then"
383
            );
384

            
385
            // Completed match goals
386
38624565
            if mg.obligations.len() == 1
387
37108145
                && mg.obligations.iter().any(|mo| {
388
37108145
                    mo.position == self.label
389
23870555
                        && mo.pattern.data_function_symbol() == symbol.copy()
390
306760
                        && mo.pattern.data_arguments().all(|x| is_data_variable(&x))
391
                    // Again skip the function symbol
392
37108145
                })
393
107500
            {
394
107500
                result.completed.push(mg.clone());
395
38517065
            } else if mg
396
38517065
                .obligations
397
38517065
                .iter()
398
38790710
                .any(|mo| mo.position == self.label && mo.pattern.data_function_symbol() != symbol.copy())
399
24970515
            {
400
24970515
                // Match goal is discarded since head symbol does not match.
401
24970515
            } else if mg.obligations.iter().all(|mo| mo.position != self.label) {
402
                // Unchanged match goals
403
13361465
                let mut mg = mg.clone();
404
13361465
                if mg.announcement.rule.lhs != mg.obligations.first().unwrap().pattern {
405
655145
                    mg.announcement.symbols_seen += 1;
406
12706320
                }
407

            
408
13361465
                result.unchanged.push(mg.clone());
409
            } else {
410
                // Reduce match obligations
411
185085
                let mut mg = mg.clone();
412
185085
                let mut new_obligations = vec![];
413

            
414
379480
                for mo in mg.obligations {
415
194395
                    if mo.pattern.data_function_symbol() == symbol.copy() && mo.position == self.label {
416
                        // Reduced match obligation
417
330155
                        for (index, t) in mo.pattern.data_arguments().enumerate() {
418
330155
                            assert!(
419
330155
                                index < arity,
420
                                "This pattern associates function symbol {:?} with different arities {} and {}",
421
                                symbol,
422
                                index + 1,
423
                                arity
424
                            );
425

            
426
330155
                            if !is_data_variable(&t) {
427
231225
                                let mut new_pos = mo.position.clone();
428
231225
                                new_pos.indices.push(index + 2);
429
231225
                                new_obligations.push(MatchObligation {
430
231225
                                    pattern: t.protect().into(),
431
231225
                                    position: new_pos,
432
231225
                                });
433
231225
                            }
434
                        }
435
9310
                    } else {
436
9310
                        // remains unchanged
437
9310
                        new_obligations.push(mo.clone());
438
9310
                    }
439
                }
440

            
441
185085
                new_obligations.sort_unstable_by(|mo1, mo2| mo1.position.len().cmp(&mo2.position.len()));
442
185085
                mg.obligations = new_obligations;
443
185085
                mg.announcement.symbols_seen += 1;
444
185085

            
445
185085
                result.reduced.push(mg);
446
185085
            }
447
        }
448

            
449
2790330
        trace!(
450
            "=== compute_derivative(symbol = {}, label = {}) ===",
451
            symbol,
452
            self.label
453
        );
454
2790330
        trace!("Match goals: {{");
455
41414895
        for mg in &self.match_goals {
456
38624565
            trace!("\t {}", mg);
457
        }
458

            
459
2790330
        trace!("}}");
460
2790330
        trace!("Completed: {{");
461
2897830
        for mg in &result.completed {
462
107500
            trace!("\t {}", mg);
463
        }
464

            
465
2790330
        trace!("}}");
466
2790330
        trace!("Unchanged: {{");
467
16151795
        for mg in &result.unchanged {
468
13361465
            trace!("\t {}", mg);
469
        }
470

            
471
2790330
        trace!("}}");
472
2790330
        trace!("Reduced: {{");
473
2975415
        for mg in &result.reduced {
474
185085
            trace!("\t {}", mg);
475
        }
476
2790330
        trace!("}}");
477

            
478
2790330
        result
479
2790330
    }
480

            
481
    /// Create a state from a set of match goals
482
19095
    fn new(goals: Vec<MatchGoal>) -> State {
483
19095
        // The label of the state is taken from a match obligation of a root match goal.
484
19095
        let mut label: Option<ExplicitPosition> = None;
485
        // Go through all match goals until a root match goal is found
486
574620
        for goal in &goals {
487
555525
            if goal.announcement.position == ExplicitPosition::empty_pos() {
488
                // Find the shortest match obligation position.
489
                // This design decision was taken as it presumably has two advantages.
490
                // 1. Patterns that overlap will be more quickly distinguished, potentially decreasing
491
                // the size of the automaton.
492
                // 2. The average lookahead may be shorter.
493
49800
                if label.is_none() {
494
19095
                    label = Some(goal.obligations.first().unwrap().position.clone());
495
30705
                }
496

            
497
109285
                for obligation in &goal.obligations {
498
59485
                    if let Some(l) = &label {
499
59485
                        if &obligation.position < l {
500
1075
                            label = Some(obligation.position.clone());
501
58410
                        }
502
                    }
503
                }
504
505725
            }
505
        }
506

            
507
19095
        State {
508
19095
            label: label.unwrap(),
509
19095
            match_goals: goals,
510
19095
        }
511
19095
    }
512
}
513

            
514
/// Adds the given function symbol to the indexed symbols. Errors when a
515
/// function symbol is overloaded with different arities.
516
149655
fn add_symbol(function_symbol: DataFunctionSymbol, arity: usize, symbols: &mut HashMap<DataFunctionSymbol, usize>) {
517
149655
    if let Some(x) = symbols.get(&function_symbol) {
518
128035
        assert_eq!(
519
            *x, arity,
520
            "Function symbol {} occurs with different arities",
521
            function_symbol,
522
        );
523
21620
    } else {
524
21620
        symbols.insert(function_symbol, arity);
525
21620
    }
526
149655
}
527

            
528
/// Returns false iff this is a higher order term, of the shape t(t_0, ..., t_n), or an unknown term.
529
90095
fn is_supported_term(t: &DataExpression) -> bool {
530
2358040
    for subterm in t.iter() {
531
2358040
        if is_data_application(&subterm) && !is_data_function_symbol(&subterm.arg(0)) {
532
2010
            warn!("{} is higher order", &subterm);
533
2010
            return false;
534
2356030
        } else if is_data_abstraction(&subterm)
535
2355190
            || is_data_where_clause(&subterm)
536
2355190
            || is_data_untyped_identifier(&subterm)
537
        {
538
840
            warn!("{} contains unsupported construct", subterm);
539
840
            return false;
540
2355190
        }
541
    }
542

            
543
87245
    true
544
90095
}
545

            
546
/// Checks whether the set automaton can use this rule, no higher order rules or binders.
547
42495
pub fn is_supported_rule(rule: &Rule) -> bool {
548
42495
    // There should be no terms of the shape t(t0,...,t_n)
549
42495
    if !is_supported_term(&rule.rhs) || !is_supported_term(&rule.lhs) {
550
2850
        return false;
551
39645
    }
552

            
553
43455
    for cond in &rule.conditions {
554
3810
        if !is_supported_term(&cond.rhs) || !is_supported_term(&cond.lhs) {
555
            return false;
556
3810
        }
557
    }
558

            
559
39645
    true
560
42495
}
561

            
562
/// Finds all data symbols in the term and adds them to the symbol index.
563
257220
fn find_symbols(t: &DataExpressionRef<'_>, symbols: &mut HashMap<DataFunctionSymbol, usize>) {
564
257220
    if is_data_function_symbol(t) {
565
47625
        let t: &ATermRef<'_> = t;
566
47625
        add_symbol(t.protect().into(), 0, symbols);
567
209595
    } else if is_data_application(t) {
568
        // REC specifications should never contain this so it can be a debug error.
569
102030
        assert!(
570
102030
            is_data_function_symbol(&t.data_function_symbol()),
571
            "Error in term {}, higher order term rewrite systems are not supported",
572
            t
573
        );
574

            
575
102030
        add_symbol(t.data_function_symbol().protect(), t.data_arguments().len(), symbols);
576
175710
        for arg in t.data_arguments() {
577
175710
            find_symbols(&arg.into(), symbols);
578
175710
        }
579
107565
    } else if is_data_machine_number(t) {
580
        // Ignore machine numbers during matching?
581
107565
    } else if !is_data_variable(t) {
582
        panic!("Unexpected term {:?}", t);
583
107565
    }
584
257220
}