MimIR
MimIR is my Intermediate Representation
Loading...
Searching...
No Matches
normalizers.cpp
Go to the documentation of this file.
1#include <algorithm>
2#include <iterator>
3#include <numeric>
4#include <ranges>
5#include <vector>
6
8#include <fe/assert.h>
9
10#include "mim/axm.h"
11#include "mim/def.h"
12#include "mim/tuple.h"
13#include "mim/world.h"
14
15#include "mim/util/dbg.h"
16#include "mim/util/log.h"
17
20
23
24namespace mim::plug::regex {
25
26template<quant id>
27const Def* normalize_quant(const Def* type, const Def* callee, const Def* arg) {
28 auto& world = type->world();
29
30 // quantifiers are idempotent
31 if (Axm::isa(id, arg)) return arg;
32
33 if constexpr (id == quant::plus) {
34 // (\d?)+ and (\d*)+ == \d*
35 if (auto optional_app = Axm::isa(quant::optional, arg))
36 return world.call(quant::star, optional_app->arg());
37 else if (auto star_app = Axm::isa(quant::star, arg))
38 return arg;
39 } else if constexpr (id == quant::star) {
40 // (\d?)* and (\d+)* == \d*
41 if (auto quant_app = Axm::isa<quant>(arg)) return world.app(callee, quant_app->arg());
42 } else if constexpr (id == quant::optional) {
43 // (\d*)? and (\d+)? == \d*
44 if (auto star_app = Axm::isa(quant::star, arg))
45 return arg;
46 else if (auto plus_app = Axm::isa(quant::plus, arg))
47 return world.call(quant::star, plus_app->arg());
48 }
49
50 return {};
51}
52
53template<class ConjOrDisj>
55 assert(!args.empty());
56 auto& world = args.front()->world();
57 return std::accumulate(args.begin() + 1, args.end(), args.front(), [&world](const Def* lhs, const Def* rhs) {
58 return world.call<ConjOrDisj, false>(Defs{lhs, rhs});
59 });
60}
61
62const Def* normalize_conj(const Def* type, const Def* callee, const Def* arg) {
63 auto& world = type->world();
64 world.DLOG("conj {}:{} ({})", type, callee, arg);
65
66 if (auto a = Lit::isa(arg->arity())) {
67 switch (*a) {
68 case 0: return world.lit_tt();
69 case 1: return arg;
70 default:
71 if (auto args = detail::flatten_in_arg<conj>(arg); !args.empty())
72 return make_binary_tree<conj>(args);
73 else
74 return world.annex<empty>();
75 }
76 }
77
78 return {};
79}
80
81bool compare_re(const Def* lhs, const Def* rhs) {
82 auto lhs_range = Axm::isa<range>(lhs);
83 auto rhs_range = Axm::isa<range>(rhs);
84 // sort ranges by increasing lower bound
85 if (lhs_range && rhs_range) return Lit::as(lhs_range->arg()->proj(0)) < Lit::as(rhs_range->arg()->proj(0));
86 // ranges to the end
87 if (lhs_range) return false;
88 if (rhs_range) return true;
89
90 return lhs->gid() < rhs->gid(); // make irreflexive
91}
92
94 std::stable_sort(args.begin(), args.end(), &compare_re);
95 {
96 auto new_end = std::unique(args.begin(), args.end());
97 args.erase(new_end, args.end());
98 }
99}
100
101bool is_in_range(Range range, nat_t needle) { return needle >= range.first && needle <= range.second; }
102
103auto get_range(const Def* rng) -> Range {
104 auto rng_match = Axm::isa<range, false>(rng);
105 return {Lit::as<std::uint8_t>(rng_match->arg(0)), Lit::as<std::uint8_t>(rng_match->arg(1))};
106}
107
108struct app_range {
110 const Def* operator()(Range rng) { return w.call<range>(Defs{w.lit_i8(rng.first), w.lit_i8(rng.second)}); }
111};
112
113void merge_ranges(DefVec& args) {
114 auto ranges_begin = args.begin();
115 while (ranges_begin != args.end() && !Axm::isa<range>(*ranges_begin))
116 ranges_begin++;
117 if (ranges_begin == args.end()) return;
118
119 std::set<const Def*> to_remove;
120 Ranges old_ranges;
121 auto& world = (*ranges_begin)->world();
122
123 std::transform(ranges_begin, args.end(), std::back_inserter(old_ranges), get_range);
124
125 auto new_ranges = automaton::merge_ranges(old_ranges, [&world](std::string_view msg) { world.DLOG("{}", msg); });
126
127 // invalidates ranges_begin
128 args.erase(ranges_begin, args.end());
129 std::transform(new_ranges.begin(), new_ranges.end(), std::back_inserter(args), app_range{world});
130
131 make_vector_unique(args);
132}
133
134template<cls A, cls B>
135bool equals_any(const Def* cls0, const Def* cls1) {
136 return (Axm::isa(A, cls0) && Axm::isa(B, cls1)) || (Axm::isa(A, cls1) && Axm::isa(B, cls0));
137}
138
139bool equals_any(const Def* lhs, const Def* rhs) {
140 auto check_arg_equiv = [](const Def* lhs, const Def* rhs) {
141 if (auto rng_lhs = Axm::isa<range>(lhs))
142 if (auto not_rhs = Axm::isa<not_>(rhs)) {
143 if (auto rng_rhs = Axm::isa<range>(not_rhs->arg())) return rng_lhs == rng_rhs;
144 }
145 return false;
146 };
147
148 return check_arg_equiv(lhs, rhs) || check_arg_equiv(rhs, lhs);
149}
150
151bool equals_any(Defs lhs, Defs negated_rhs) {
152 auto is_range = [](const Def* d) { return Axm::isa<range>(d); };
153 auto to_range = std::views::filter(is_range) | std::views::transform(get_range);
154 auto rhs_view = negated_rhs | to_range;
155
156 if (std::ranges::distance(rhs_view) != std::ranges::distance(negated_rhs)) return false;
157
158 return std::ranges::includes(lhs | to_range, rhs_view);
159}
160
161const Def* normalize_disj(const Def* type, const Def*, const Def* arg) {
162 auto& world = type->world();
163 if (auto a = Lit::isa(arg->arity())) {
164 switch (*a) {
165 case 0: return world.lit_ff();
166 case 1: return arg;
167 default:
168 auto new_args = detail::flatten_in_arg<disj>(arg);
169
170 const bool contains_any
171 = std::ranges::find_if(new_args, [](const Def* ax) -> bool { return Axm::isa<any>(ax); })
172 != new_args.end();
173 const bool contains_empty
174 = std::ranges::find_if(new_args, [](const Def* ax) -> bool { return Axm::isa<empty>(ax); })
175 != new_args.end();
176
177 auto make_any = [&world, contains_empty]() {
178 if (contains_empty)
179 // (any|) matches everything, including empty string
180 // don't normalize again, as we'd just run into this normalizer again..
181 return world.call<disj, false>(Defs{world.annex<any>(), world.annex<empty>()});
182 else
183 return world.annex<any>();
184 };
185
186 if (contains_any) return make_any();
187 make_vector_unique(new_args);
188 merge_ranges(new_args);
189
190 const Def* to_remove = nullptr;
191 for (const auto* cls0 : new_args) {
192 for (const auto* cls1 : new_args)
193 if (equals_any(cls0, cls1)) return make_any();
194
195 if (auto not_rhs = Axm::isa<not_>(cls0)) {
196 if (auto disj_rhs = Axm::isa<disj>(not_rhs->arg())) {
197 auto rngs = detail::flatten_in_arg<disj>(disj_rhs->arg());
198 make_vector_unique(rngs);
199 if (equals_any(new_args, rngs)) return make_any();
200 }
201 }
202 }
203
204 erase(new_args, to_remove);
205 world.DLOG("final ranges {}", fe::Join(new_args));
206
207 if (new_args.size() > 2) return make_binary_tree<disj>(new_args);
208 if (new_args.size() > 1) return world.call<disj, false>(new_args);
209 return new_args.back();
210 }
211 }
212 return {};
213}
214
215const Def* normalize_range(const Def* type, const Def* callee, const Def* arg) {
216 auto& world = type->world();
217 auto [lhs, rhs] = arg->projs<2>();
218
219 if (!lhs->isa<Var>() && !rhs->isa<Var>()) // before first PE.
220 if (lhs->as<Lit>()->get() > rhs->as<Lit>()->get()) return world.raw_app(type, callee, {rhs, lhs});
221
222 return {};
223}
224
225const Def* any_unwanted_for_not(const Def* arg) {
226 if (auto ax = Axm::isa<regex::range>(arg)) return nullptr;
227 if (auto ax = Axm::isa<regex::not_>(arg)) return nullptr;
228 if (auto ax = Axm::isa<regex::any>(arg)) return nullptr;
229 if (auto disj = Axm::isa<regex::disj>(arg)) {
230 for (const auto* disj_arg : disj->args())
231 if (auto ret = any_unwanted_for_not(disj_arg)) return ret;
232 return nullptr;
233 }
234 return arg;
235}
236
237const Def* normalize_not(const Def*, const Def* callee, const Def* arg) {
238 if (auto unwanted = any_unwanted_for_not(arg)) {
239 throw Error()
240 .error(arg->loc(),
241 "regex.not_ must only be used with regex.disj, regex.range, regex.any and regex.not_: {} {}", callee,
242 arg)
243 .error(unwanted->loc(), "found unwanted: {}", unwanted);
244 }
245 return {};
246}
247
249
250} // namespace mim::plug::regex
static auto isa(const Def *def)
Definition axm.h:107
Base class for all Defs.
Definition def.h:246
auto projs(F f) const
Splits this Def via Def::projections into an Array (if A == std::dynamic_extent) or std::array (other...
Definition def.h:386
Loc loc() const
Definition def.h:514
constexpr u32 gid() const noexcept
Global id - unique number for this Def.
Definition def.h:266
virtual const Def * arity() const
Definition def.cpp:558
Error & error(Loc loc, std::format_string< Args... > s, Args &&... args)
Definition dbg.h:69
static std::optional< T > isa(const Def *def)
Definition def.h:838
T get() const
Definition def.h:825
static T as(const Def *def)
Definition def.h:844
A variable introduced by a binder (mutable).
Definition def.h:714
This is a thin wrapper for absl::InlinedVector<T, N, A> which is a drop-in replacement for std::vecto...
Definition vector.h:18
The World represents the whole program and manages creation of MimIR nodes (Defs).
Definition world.h:36
automaton::Range Range
Vector< Range > Ranges
std::pair< std::uint64_t, std::uint64_t > Range
std::optional< Range > merge_ranges(Range a, Range b) noexcept
The regex Plugin
Definition lower_regex.h:5
void merge_ranges(DefVec &args)
void make_vector_unique(DefVec &args)
auto get_range(const Def *rng) -> Range
bool is_in_range(Range range, nat_t needle)
const Def * normalize_conj(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_range(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_disj(const Def *type, const Def *, const Def *arg)
const Def * normalize_quant(const Def *type, const Def *callee, const Def *arg)
const Def * normalize_not(const Def *, const Def *callee, const Def *arg)
bool compare_re(const Def *lhs, const Def *rhs)
const Def * make_binary_tree(Defs args)
const Def * any_unwanted_for_not(const Def *arg)
bool equals_any(const Def *cls0, const Def *cls1)
View< const Def * > Defs
Definition def.h:78
u64 nat_t
Definition types.h:37
Vector< const Def * > DefVec
Definition def.h:79
Vector< T, N, A >::size_type erase(Vector< T, N, A > &c, const U &value) noexcept
Definition vector.h:83
#define MIM_regex_NORMALIZER_IMPL
Definition autogen.h:107
const Def * operator()(Range rng)