1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
//! Some simple node local optimizations, e.g.
//! - some algebraic simplifications, e.g. a << 2
//! - div of power of two to a sequence of shifts
//! - mod of power of two to a sequence of shifts
//!
//! To verify changes to these optimizations, you can compare the output of our
//! compiler with the output of the java compiler for the whole integer range as
//! follows, e.g. to test the substitution for modulo:
//!
//! ```ignore
//! class ModAllBy8 {
//!     public static void main(String[] args) {
//!         int min = -2147483648;
//!         int max = 2147483647;
//!         int remainder = 8;
//!
//!         /* reject fast by testing boundaries */
//!         System.out.println(min % remainder);
//!         System.out.println(max % remainder);
//!         System.out.println(-100 % remainder);
//!         System.out.println(-1 % remainder);
//!         System.out.println(0 % remainder);
//!         System.out.println(1 % remainder);
//!         System.out.println(100 % remainder);
//!
//!         /* check all possible ints */
//!         int i = min;
//!
//!         while (i <= max) {
//!             System.out.println(i % remainder);
//!             i = i + 1;
//!         }
//!     }
//! }
//! ```
//!
//! Compile both and check the optimization was actually applied:
//!
//! ```ignore
//! javac ModAllBy8.java
//! cargo run -- --compile -o ModAllBy8-comprakt.out --emit-asm ModAllBy8-comprakt.S
//! grep -q div ModAllBy8-comprakt.S; echo $? # this should print a 1!
//! ```
//!
//! Compare both using one of the commands below (don't write to disk! this
//! would take about 50GB!):
//!
//! ```ignore
//! bash -c "diff -q <(./ModAllBy8-comprakt.out) <(java ModAllBy8)"
//! bash -c "comm -3 <(./ModAllBy8-comprakt.out) <(java ModAllBy8)"
//! ```
// TODO: this could be done without assure_outs, we just have to walk the memory
// chain first, collecting all memory edges into a div (out memory of div). Then
// graph chunks representing divisions can be matched by simply matching the
// result projection and walking it's preds until the Div Node is found.
use super::Outcome;
use crate::optimization;
use libfirm_rs::{
    nodes::{Add, Div, Mod, Mul, Node, NodeTrait},
    Graph, Mode, Tarval, TarvalKind,
};

pub struct NodeLocal {
    graph: Graph,
    changed: Outcome,
}

impl optimization::Local for NodeLocal {
    fn optimize_function(graph: Graph) -> Outcome {
        Self::new(graph).run()
    }
}

impl NodeLocal {
    fn new(graph: Graph) -> Self {
        Self {
            graph,
            changed: Outcome::Unchanged,
        }
    }

    fn try_optimize_add(&mut self, add: Add) {
        let left = add.left();
        let right = add.right();

        // checking for equality of const nodes is unnecessary since const additions
        // were already folded
        if left == right {
            // convert (a + a) to (a << 2)
            log::debug!(
                "LO: self addition {:?} [left:{:?},right:{:?}] \
                 replaced by '{:?} << 2'",
                add,
                left,
                right,
                left
            );

            let shift_operand_mode = if let Some(mode) = mode_to_unsigned(right.mode()) {
                mode
            } else {
                return;
            };

            let tarval_1 = self.graph.new_const(Tarval::val(1, shift_operand_mode));

            let shl = add.block().new_shl(left, tarval_1);

            Graph::exchange(add, shl);
            self.changed = Outcome::Changed;
        }
    }

    fn try_optimize_mul(&mut self, mul: Mul) {
        let left = mul.left();
        let right = mul.right();

        let (power_node, other, power) = {
            if let Node::Const(op1) = left {
                // power of two * a
                if let TarvalKind::Long(val) = op1.tarval().kind() {
                    (left, right, val)
                } else {
                    return;
                }
            } else if let Node::Const(op2) = right {
                // a * power of two
                if let TarvalKind::Long(val) = op2.tarval().kind() {
                    (right, left, val)
                } else {
                    return;
                }
            } else {
                return;
            }
        };

        // this can be an Ls if ConstantFolding is run before this optimization
        // on `new int[4]`. We would need u128 for Ls with INT_MIN, simply ignore this
        // case instead of crashing the compiler.
        let abs_power = if let Some(abs) = power.checked_abs() {
            abs as u64
        } else {
            return;
        };

        let has_minus = power < 0;

        if abs_power.is_power_of_two() {
            let shift_amount = 64 - 1 - abs_power.leading_zeros();

            let shift_operand_mode = if let Some(mode) = mode_to_unsigned(power_node.mode()) {
                mode
            } else {
                return;
            };

            let shift_amount_node = self
                .graph
                .new_const(Tarval::val(i64::from(shift_amount), shift_operand_mode));

            let shl = mul.block().new_shl(other, shift_amount_node);
            let shl_end = if has_minus {
                Node::Minus(mul.block().new_minus(shl))
            } else {
                Node::Shl(shl)
            };

            log::debug!(
                "LO: Mul2Shift: {:?} [left:{:?},right:{:?}] replaced by '{:?} << {}'",
                mul,
                left,
                right,
                shl,
                shift_amount
            );

            Graph::exchange(mul, shl_end);
            self.changed = Outcome::Changed;
        }
    }

    fn try_optimize_mod(&mut self, modulo: Mod) {
        // this substitutes a % b to a - (a/b) * b, which is how modulo is defined in
        // the java standard. The division (a/b) is replaced by ((a + ((a
        // >> 31) & ((1 << b) - 1))) >> shift).
        //
        // We do no emit Minus nodes for modulo with negative dividends since
        // a % -b = a - a/(-b) * (-b) = a + a/b * (-b) = a - a/b * b = a % b

        // TODO: build a macro to deduplicate this with Div Node conversion. It's
        // nearly identical, we just don't emit a Minus Node for a negative dividends,
        // and the types don't match (but their API does).
        log::debug!(
            "LO: Mod2Shift: {:?}[left:{:?},right:{:?}] ",
            modulo,
            modulo.left(),
            modulo.right(),
        );

        // we expect either
        // - a Conv with a Const Is as operand [which is the output of firm
        //   construction]
        // - or a constant with Const Ls [which is the output of constant folding]
        let divisor = match modulo.right() {
            Node::Conv(conv) => {
                log::debug!("Mod2Shift: with conv {:?}!", conv);
                if let Node::Const(divisor) = conv.op() {
                    divisor
                } else {
                    return;
                }
            }
            Node::Const(divisor) => divisor,
            _ => {
                return;
            }
        };

        log::debug!("Mod2Shift: divisor {:?}!", divisor);

        if let TarvalKind::Long(divisor_value) = divisor.tarval().kind() {
            log::debug!("Mod2Shift: divisor value {:?}!", divisor_value);

            if divisor_value > std::i32::MAX.into() || divisor_value < std::i32::MIN.into() {
                log::debug!("Mod2Shift: aborting since value is not in i32 range!",);
                // this can only happen through constant folding, which might
                // generate a Const Ls out of the i32 range
                return;
            }

            // cannot crash since min_val is not a power of two
            let abs_divisor_value = divisor_value.abs() as u64;

            if abs_divisor_value.is_power_of_two() {
                log::debug!("Mod2Shift: is power of two!");

                let shift_amount = 64 - 1 - abs_divisor_value.leading_zeros();
                let shift_amount_node = self
                    .graph
                    .new_const(Tarval::val(i64::from(shift_amount), Mode::Iu()));

                let const_31 = self.graph.new_const(Tarval::val(31, Mode::Iu()));

                let modulo_proj_res = if let Some(res) = modulo.out_proj_res() {
                    res
                } else {
                    return;
                };

                if modulo_proj_res.out_nodes().len() != 1 {
                    return;
                }

                // this is the conv hanging at the end of the modulo
                let modulo_end = modulo_proj_res.out_nodes().nth(0).unwrap();

                // reach through the conv node in front of the modulo
                if modulo.left().in_nodes().len() != 1 {
                    return;
                }

                let real_left = modulo.left().in_nodes().nth(0).unwrap();

                let block = modulo.block();
                let shr_by_31 = block.new_shrs(real_left, const_31);
                let mask_const = (1i32 << shift_amount) - 1;
                let mask_const_node = self.graph.new_const(Tarval::mj_int(i64::from(mask_const)));
                let binary_and = block.new_and(shr_by_31, mask_const_node);
                let add_binary_and = block.new_add(real_left, binary_and);
                let shift_to_result = block.new_shrs(add_binary_and, shift_amount_node);

                let mul_result_by_divisor = block.new_shl(shift_to_result, shift_amount_node);

                let modulo_subst_end = block.new_sub(real_left, mul_result_by_divisor);

                log::debug!(
                    "LO: Mod2Shift: memory edge through modulo {:?} is {:?} -> {:?}",
                    modulo,
                    modulo.mem(),
                    modulo.out_proj_m(),
                );

                let modulo_proj_mem = if let Some(mem) = modulo.out_proj_m() {
                    mem
                } else {
                    return;
                };

                // drop the mem edge from the modulo
                Graph::exchange(modulo_proj_mem, modulo.mem());

                log::debug!(
                    "LO: Mod2Shift: {:?}[left:{:?},right:{:?}] replaced",
                    modulo,
                    modulo.left(),
                    modulo.right(),
                );

                Graph::exchange(modulo_end, modulo_subst_end);

                self.changed = Outcome::Changed;
            }
        }
    }

    fn try_optimize_div(&mut self, div: Div) {
        log::debug!(
            "LO: Div2Shift: {:?}[left:{:?},right:{:?}] ",
            div,
            div.left(),
            div.right(),
        );

        // we expect either
        // - a Conv with a Const Is as operand [which is the output of firm
        //   construction]
        // - or a constant with Const Ls [which is the output of constant folding]
        let divisor = match div.right() {
            Node::Conv(conv) => {
                log::debug!("Div2Shift: with conv {:?}!", conv);
                if let Node::Const(divisor) = conv.op() {
                    divisor
                } else {
                    return;
                }
            }
            Node::Const(divisor) => divisor,
            _ => {
                return;
            }
        };

        log::debug!("Div2Shift: divisor {:?}!", divisor);

        if let TarvalKind::Long(divisor_value) = divisor.tarval().kind() {
            log::debug!("Div2Shift: divisor value {:?}!", divisor_value);

            if divisor_value > std::i32::MAX.into() || divisor_value < std::i32::MIN.into() {
                log::debug!("Div2Shift: aborting since value is not in i32 range!",);
                // this can only happen through constant folding, which might
                // generate a Const Ls out of the i32 range
                return;
            }

            // cannot crash since min_val is not a power of two
            let abs_divisor_value = divisor_value.abs() as u64;
            let has_minus = divisor_value < 0;

            if abs_divisor_value.is_power_of_two() {
                log::debug!("Div2Shift: is power of two!");
                let shift_amount = 64 - 1 - abs_divisor_value.leading_zeros();
                let shift_amount_node = self
                    .graph
                    .new_const(Tarval::val(i64::from(shift_amount), Mode::Iu()));

                let const_31 = self.graph.new_const(Tarval::val(31, Mode::Iu()));

                let div_proj_res = if let Some(res) = div.out_proj_res() {
                    res
                } else {
                    return;
                };

                if div_proj_res.out_nodes().len() != 1 {
                    return;
                }

                // this is the conv hanging at the end of the div
                let div_end = div_proj_res.out_nodes().nth(0).unwrap();

                // reach through the conv node in front of the div
                if div.left().in_nodes().len() != 1 {
                    return;
                }

                let real_left = div.left().in_nodes().nth(0).unwrap();

                let block = div.block();
                let shr_by_31 = block.new_shrs(real_left, const_31);
                let mask_const = (1i32 << shift_amount) - 1;
                let mask_const_node = self.graph.new_const(Tarval::mj_int(i64::from(mask_const)));
                let binary_and = block.new_and(shr_by_31, mask_const_node);
                let add_binary_and = block.new_add(real_left, binary_and);
                let shift_to_result = block.new_shrs(add_binary_and, shift_amount_node);

                let shr_end = if has_minus {
                    Node::Minus(div.block().new_minus(shift_to_result))
                } else {
                    Node::Shrs(shift_to_result)
                };

                log::debug!(
                    "LO: Div2Shift: memory edge through div {:?} is {:?} -> {:?}",
                    div,
                    div.mem(),
                    div.out_proj_m(),
                );

                let div_proj_mem = if let Some(mem) = div.out_proj_m() {
                    mem
                } else {
                    return;
                };

                // drop the mem edge from the div
                Graph::exchange(div_proj_mem, div.mem());

                log::debug!(
                    "LO: Div2Shift: {:?}[left:{:?},right:{:?}] replaced by '>> {}'",
                    div,
                    div.left(),
                    div.right(),
                    shift_amount
                );

                Graph::exchange(div_end, shr_end);

                self.changed = Outcome::Changed;
            }
        }
    }

    fn visit_node(&mut self, current_node: Node) {
        // do not put a breakpoint here, this will result in a walker in walker
        // which just does random shit in libfirm.
        log::debug!("LO: visiting {:?}", current_node);

        match current_node {
            Node::Add(add) => self.try_optimize_add(add),
            Node::Mul(mul) => self.try_optimize_mul(mul),
            Node::Mod(modulo) => self.try_optimize_mod(modulo),
            Node::Div(div) => self.try_optimize_div(div),
            _ => {}
        }
    }

    fn run(&mut self) -> Outcome {
        self.changed = Outcome::Unchanged;

        self.graph.assure_outs();

        self.graph.walk_topological(|node| {
            // this is run for each node "after all predecessors are visited"
            self.visit_node(*node);
        });

        if self.changed == Outcome::Changed {
            self.graph.remove_unreachable_code();
            self.graph.remove_bads();
        }

        self.changed
    }
}

fn mode_to_unsigned(mode: Mode) -> Option<Mode> {
    if mode == Mode::Is() {
        Some(Mode::Iu())
    } else if mode == Mode::Ls() {
        Some(Mode::Lu())
    } else {
        None
    }
}