diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs index 6579fd0c6b9885c1053d13e2b84301e8461f2293..51ec29a4a1ef8b2ec773be21554bccdf26cf8713 100644 --- a/hercules_ir/src/dataflow.rs +++ b/hercules_ir/src/dataflow.rs @@ -4,9 +4,46 @@ extern crate slotmap; use self::slotmap::SecondaryMap; use self::bitvec::prelude::*; use self::bitvec::slice::*; +use self::std::ops::Index; // TODO: See if we can't optimize a secondary map into a bitvec for slotmaps -type SecondaryBitVec<A, B> = SecondaryMap<A, B>; +// Can we have a 'ghost' reference to a Function here, that doesn't compile to anything, but enforces that the function is not changed while these bitvecs are active? +// At this point, why not just store a map NodeID -> Index as part of the Function, and then use that to index a real bitvec. +// Oh well, we found this weird +#[derive(PartialEq, Eq, Clone, Debug)] +struct SecondaryBitVec<A: dataflow::slotmap::Key>(SecondaryMap<A, bool>); + +impl <A: dataflow::slotmap::Key> SecondaryBitVec<A> { + fn union(a: &Self, b: &Self) -> Self { + assert_eq!(a.0.len(), b.0.len(), "union on invalid bitvecs"); + + let mut out = a.clone(); + out.0.iter_mut().map(|(k, v)| *v = a.0[k] | b.0[k]); + out + } + + fn intersect(a: &Self, b: &Self) -> Self { + assert_eq!(a.0.len(), b.0.len(), "intersect on invalid bitvecs"); + + let mut out = b.clone(); + out.0.iter_mut().map(|(k, v)| *v = a.0[k] & b.0[k]); + out + } +} + +impl <A: dataflow::slotmap::Key> std::ops::Index<A> for SecondaryBitVec<A> { + type Output = bool; + + fn index(&self, index: A) -> &Self::Output { + &self.0[index] + } +} + +impl <A: dataflow::slotmap::Key> std::ops::IndexMut<A> for SecondaryBitVec<A> { + fn index_mut(&mut self, index: A) -> &mut Self::Output { + &mut self.0[index] + } +} use crate::*; @@ -34,18 +71,20 @@ pub fn forward_dataflow<L, F>( function: &Function, reverse_postorder: &Vec<NodeID>, mut flow_function: F, -) -> Vec<L> +) -> SecondaryMap<NodeID, L> where L: Semilattice, F: FnMut(&[&L], NodeID) -> L, { dataflow_global(function, reverse_postorder, |global_outs, node_id| { + let uses = get_uses(function, node_id); let pred_outs: Vec<_> = uses .as_ref() .iter() .map(|id| global_outs[*id]) .collect(); + flow_function(&pred_outs, node_id) }) } @@ -59,13 +98,14 @@ pub fn backward_dataflow<L, F>( def_use: &ImmutableDefUseMap, reverse_postorder: &Vec<NodeID>, mut flow_function: F, -) -> Vec<L> +) -> SecondaryMap<NodeID, L> where L: Semilattice, F: FnMut(&[&L], NodeID) -> L, { let mut postorder = reverse_postorder.clone(); postorder.reverse(); + dataflow_global(function, &postorder, |global_outs, node_id| { let users = def_use.get_users(node_id); let succ_outs: Vec<_> = users @@ -87,16 +127,20 @@ pub fn dataflow_global<L, F>( function: &Function, order: &Vec<NodeID>, mut flow_function: F, -) -> Vec<L> +) -> SecondaryMap<NodeID, L> where L: Semilattice, - F: FnMut(&SecondaryMap<NodeID, &L>, NodeID) -> L, + F: FnMut(&SecondaryMap<NodeID, L>, NodeID) -> L, { // Step 1: create initial set of "out" points. - let first_ins = vec![L::top(); function.nodes.len()]; - let mut outs: Vec<L> = (0..function.nodes.len()) - .map(|id| flow_function(&first_ins, NodeID::new(id))) - .collect(); + + // What is the default value of these latices? + + let first_ins: SecondaryMap<NodeID, L> = function.default_nodes_secondary_map(L::top()); + let mut outs: SecondaryMap<NodeID, L> = first_ins.clone(); + + outs.iter_mut() + .map(|(id, data)| *data = flow_function(&first_ins, id)); // Step 2: peform main dataflow loop. loop { @@ -105,13 +149,13 @@ where // Iterate nodes in specified order. for node_id in order { // Compute new "out" value from previous "out" values. - let new_out = flow_function(&outs, *node_id); - if outs[node_id.idx()] != new_out { + let new_out: L = flow_function(&outs, *node_id); + if outs[*node_id] != new_out { change = true; } // Update outs vector. - outs[node_id.idx()] = new_out; + outs[*node_id] = new_out; } // If no lattice value changed, we've reached the maximum fixed point @@ -128,15 +172,17 @@ where /* * Compute reverse post order of nodes in function. */ -pub fn reverse_postorder(def_uses: &ImmutableDefUseMap) -> Vec<NodeID> { +pub fn reverse_postorder(function: &Function, def_uses: &ImmutableDefUseMap) -> Vec<NodeID> { // Initialize order vector and bitset for tracking which nodes have been // visited. - let order = Vec::with_capacity(def_uses.num_nodes()); - let visited = bitvec![u8, Lsb0; 0; def_uses.num_nodes()]; + // let order = Vec::with_capacity(def_uses.num_nodes()); + assert_eq!(function.nodes.keys().len(), def_uses.num_nodes()); + let order = function.nodes.keys().collect(); + let visited = SecondaryBitVec(function.default_nodes_secondary_map(false)); // Order and visited are threaded through arguments / return pair of // reverse_postorder_helper for ownership reasons. - let (mut order, _) = reverse_postorder_helper(NodeID::new(0), def_uses, order, visited); + let (mut order, _) = reverse_postorder_helper(function.start_node, def_uses, order, visited); // Reverse order in-place. order.reverse(); @@ -147,14 +193,14 @@ fn reverse_postorder_helper( node: NodeID, def_uses: &ImmutableDefUseMap, mut order: Vec<NodeID>, - mut visited: BitVec<u8, Lsb0>, -) -> (Vec<NodeID>, BitVec<u8, Lsb0>) { - if visited[node.idx()] { + mut visited: SecondaryBitVec<NodeID>, +) -> (Vec<NodeID>, SecondaryBitVec<NodeID>) { + if visited[node] { // If already visited, return early. (order, visited) } else { // Set visited to true. - visited.set(node.idx(), true); + visited[node] = true; // Iterate over users. for user in def_uses.get_users(node) { @@ -182,7 +228,7 @@ impl IntersectNodeSet { pub fn is_set(&self, id: NodeID) -> bool { match self { IntersectNodeSet::Empty => false, - IntersectNodeSet::Bits(bits) => bits[id.idx()], + IntersectNodeSet::Bits(bits) => bits[id], IntersectNodeSet::Full => true, } } @@ -230,51 +276,24 @@ impl Semilattice for IntersectNodeSet { * "unioning" flow functions. */ #[derive(PartialEq, Eq, Clone, Debug)] -pub enum UnionNodeSet { - Empty, - Bits(BitVec<u8, Lsb0>), - Full, -} +pub struct UnionNodeSet(SecondaryBitVec<NodeID>); impl UnionNodeSet { - pub fn is_set(&self, id: NodeID) -> bool { - match self { - UnionNodeSet::Empty => false, - UnionNodeSet::Bits(bits) => bits[id.idx()], - UnionNodeSet::Full => true, - } - } - pub fn nodes(&self, num_nodes: u32) -> NodeSetIterator { - match self { - UnionNodeSet::Empty => NodeSetIterator::Empty, - UnionNodeSet::Bits(bitvec) => { - NodeSetIterator::Bits(bitvec.iter_ones().map(NodeID::new)) - } - UnionNodeSet::Full => NodeSetIterator::Full(0, num_nodes), - } + } } impl Semilattice for UnionNodeSet { fn meet(a: &Self, b: &Self) -> Self { - match (a, b) { - (UnionNodeSet::Empty, b) => b.clone(), - (a, UnionNodeSet::Empty) => a.clone(), - (UnionNodeSet::Bits(a), UnionNodeSet::Bits(b)) => { - assert!( - a.len() == b.len(), - "UnionNodeSets must have same length to meet." - ); - UnionNodeSet::Bits(a.clone() | b) - } - _ => UnionNodeSet::Full, - } + // Meet is union + SecondaryBitVec:: } fn bottom() -> Self { // For unioning flow functions, the bottom state is full. - UnionNodeSet::Full + SecondaryBitVec::with_capacity(capacity); + UnionNodeSet:: } fn top() -> Self { @@ -284,29 +303,16 @@ impl Semilattice for UnionNodeSet { } #[derive(Clone, Debug)] -pub enum NodeSetIterator<'a> { - Empty, - Bits(std::iter::Map<IterOnes<'a, u8, LocalBits>, fn(usize) -> ir::NodeID>), - Full(u32, u32), -} +struct NodeSetIterator<T: Iterator<Item = NodeID>>(T); -impl<'a> Iterator for NodeSetIterator<'a> { +type NodeSetIteartor = impl Iterator<Item = NodeID>; + +// FIXME: @(xrouth) this is disgusting. Why can't we just newtype NodeSetIterator as and Iterator +impl<T: Iterator<Item = NodeID>> Iterator for NodeSetIterator<T> { type Item = NodeID; fn next(&mut self) -> Option<Self::Item> { - match self { - NodeSetIterator::Empty => None, - NodeSetIterator::Bits(iter) => iter.next(), - NodeSetIterator::Full(idx, cap) => { - if idx < cap { - let id = NodeID::new(*idx as usize); - *idx += 1; - Some(id) - } else { - None - } - } - } + self.0.next() } } @@ -333,7 +339,7 @@ pub fn control_output_flow( let mut out = inputs .into_iter() .fold(UnionNodeSet::top(), |a, b| UnionNodeSet::meet(&a, b)); - let node = &function.nodes[node_id.idx()]; + let node = &function.nodes[node_id]; // Step 2: clear all bits, if applicable. if node.is_strictly_control() || node.is_thread_id() || node.is_reduce() || node.is_phi() { @@ -342,8 +348,8 @@ pub fn control_output_flow( // Step 3: set bit for current node, if applicable. if node.is_thread_id() || node.is_reduce() || node.is_phi() { - let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()]; - singular.set(node_id.idx(), true); + let mut singular: SecondaryBitVec<NodeID> = SecondaryBitVec::with_capacity(function.nodes.len()); // @(xrouth) What is the default value? + singular[node_id] = true; out = UnionNodeSet::meet(&out, &UnionNodeSet::Bits(singular)); } @@ -383,8 +389,8 @@ pub fn immediate_control_flow( // Step 2: clear all bits and set bit for current node, if applicable. if node.is_control() { - let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()]; - singular.set(node_id, true); + let mut singular: SecondaryBitVec<NodeID> = SecondaryBitVec::with_capacity(function.nodes.len()); + singular[node_id] = true; out = UnionNodeSet::Bits(singular); } diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 069e6d75484a96d596737bd3a00706816e1d73a0..9fa1a4bf6d7756f8559da0ae6ba54b4316609de9 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -131,10 +131,10 @@ pub fn write_dot<W: Write>( // before the use in the reverse postorder, and treating a // control edge a back edge when the destination appears before // the source in the reverse postorder. - let is_back_edge = reverse_postorder_node_numbers[node_id] + let is_back_edge = reverse_postorder_node_numbers[*node_id] < reverse_postorder_node_numbers[*u] && (node.is_phi() - || (function.nodes[node_id].is_control() + || (function.nodes[*node_id].is_control() && function.nodes[*u].is_control())); write_edge( *node_id, diff --git a/hercules_ir/src/gcm.rs b/hercules_ir/src/gcm.rs index 87f013f4fff8d15285391553082353679a31f2dc..1d00bdd860f506cfc912f2c4912e5b65c85cdf20 100644 --- a/hercules_ir/src/gcm.rs +++ b/hercules_ir/src/gcm.rs @@ -30,14 +30,14 @@ pub fn gcm( for (read, write) in antideps { let meet = UnionNodeSet::meet( &immediate_control_uses[read], - &immediate_control_uses[write.idx()], + &immediate_control_uses[write], ); immediate_control_uses[read.idx()] = meet.clone(); immediate_control_uses[write.idx()] = meet; let meet = UnionNodeSet::meet( - &immediate_control_users[read.idx()], - &immediate_control_users[write.idx()], + &immediate_control_users[read], + &immediate_control_users[write], ); immediate_control_users[read.idx()] = meet.clone(); immediate_control_users[write.idx()] = meet; diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index a6a3ea3875d765b45ff834608d83dc00ee35f3b7..c0ef341cfe577a4c5cc4e4093f0efdb1fda4c259 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -573,6 +573,18 @@ impl Function { let start_node = nodes.insert(start_node); Function { name: name.to_string(), param_types, return_type, nodes, start_node, num_dynamic_constants } } + + pub fn default_nodes_secondary_map<T: Clone>(&self, default: T) -> SecondaryMap<NodeID, T> { + let mut map = SecondaryMap::with_capacity(self.nodes.len()); + self.nodes.keys().into_iter().map(|key| {map.insert(key, default.clone())}); + map + } + + pub fn closure_nodes_secondary_map<T: Clone>(&self, func: fn(NodeID, Node) -> T) -> SecondaryMap<NodeID, T> { + let mut map = SecondaryMap::with_capacity(self.nodes.len()); + self.nodes.into_iter().map(|(id, node)| {map.insert(id, func(id, node))}); + map + } } impl Type { diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs index c657572f6f1cac3027219642903221d244bddd93..3a696b359badbf90bfa9f0d70e2cf73231e60a72 100644 --- a/hercules_ir/src/loops.rs +++ b/hercules_ir/src/loops.rs @@ -1,13 +1,20 @@ extern crate bitvec; +extern crate slotmap; use std::collections::hash_map; use std::collections::HashMap; use std::collections::VecDeque; +use self::slotmap::SecondaryMap; + use self::bitvec::prelude::*; use crate::*; + +// TODO: See if we can't optimize a secondary map into a bitvec for slotmaps +type SecondaryBitVec<A> = SecondaryMap<A, bool>; + /* * Custom type for storing a loop tree. Each node corresponds to a single loop * or a fork join pair in the IR graph. Each node in the tree corresponds to @@ -24,7 +31,7 @@ use crate::*; #[derive(Debug, Clone)] pub struct LoopTree { root: NodeID, - loops: HashMap<NodeID, (BitVec<u8, Lsb0>, NodeID)>, + loops: HashMap<NodeID, (SecondaryBitVec<NodeID>, NodeID)>, } impl LoopTree { @@ -32,7 +39,7 @@ impl LoopTree { x == self.root || self.loops.contains_key(&x) } - pub fn loops(&self) -> hash_map::Iter<'_, NodeID, (BitVec<u8, Lsb0>, NodeID)> { + pub fn loops(&self) -> hash_map::Iter<'_, NodeID, (SecondaryBitVec<NodeID>, NodeID)> { self.loops.iter() } @@ -40,7 +47,7 @@ impl LoopTree { * Sometimes, we need to iterate the loop tree bottom-up. Just assemble the * order upfront. */ - pub fn bottom_up_loops(&self) -> Vec<(NodeID, &BitVec<u8, Lsb0>)> { + pub fn bottom_up_loops(&self) -> Vec<(NodeID, &SecondaryBitVec<NodeID>)> { let mut bottom_up = vec![]; let mut children_count: HashMap<NodeID, u32> = self.loops.iter().map(|(k, _)| (*k, 0)).collect(); children_count.insert(self.root, 0); @@ -94,16 +101,18 @@ pub fn loops( let loop_contents = loop_back_edges.iter().map(|(n, d)| { // Compute reachability for each loop back edge. let mut loop_contents = loop_reachability(*n, *d, subgraph); - loop_contents.set(d.idx(), true); + loop_contents.insert(*d, true); (d, loop_contents) }); // Step 4: merge loops with same header into a single natural loop. - let mut loops: HashMap<NodeID, BitVec<u8, Lsb0>> = HashMap::new(); + let mut loops: HashMap<NodeID, SecondaryBitVec<NodeID>> = HashMap::new(); for (header, contents) in loop_contents { if loops.contains_key(header) { - let old_contents = loops.remove(header).unwrap(); - loops.insert(*header, old_contents | contents); + let mut old_contents = loops.remove(header).unwrap(); + // loops.insert(*header, old_contents | contents); + old_contents.extend(contents); + loops.insert(*header, old_contents); } else { loops.insert(*header, contents); } @@ -118,7 +127,7 @@ pub fn loops( while let Some(new_dominator) = dom.imm_dom(dominator) { dominator = new_dominator; if let Some(outer_contents) = loops.get(&dominator) { - if outer_contents[header.idx()] { + if outer_contents[*header] { return (*header, (contents.clone(), dominator)); } } @@ -130,8 +139,13 @@ pub fn loops( LoopTree { root, loops } } -fn loop_reachability(n: NodeID, d: NodeID, subgraph: &Subgraph) -> BitVec<u8, Lsb0> { - let visited = bitvec![u8, Lsb0; 0; subgraph.original_num_nodes() as usize]; +fn loop_reachability(n: NodeID, d: NodeID, subgraph: &Subgraph) -> SecondaryBitVec<NodeID> { + let mut visited = SecondaryBitVec::with_capacity(subgraph.original_num_nodes() as usize); + + // TODO: Do we need to set to a default value? + for bit in visited.values_mut() { + *bit = false; + } // n is the root of the traversal, finding d is a termination condition. let visited = loop_reachability_helper(n, d, subgraph, visited); @@ -143,14 +157,14 @@ fn loop_reachability_helper( n: NodeID, d: NodeID, subgraph: &Subgraph, - mut visited: BitVec<u8, Lsb0>, -) -> BitVec<u8, Lsb0> { - if visited[n.idx()] { + mut visited: SecondaryBitVec<NodeID>, +) -> SecondaryBitVec<NodeID> { + if visited[n] { // If already visited, return early. visited } else { // Set visited to true. - visited.set(n.idx(), true); + visited.insert(n, true); // Iterate over predecessors. for pred in subgraph.preds(n) { diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs index 63107c81d863934f101e859e7985b6585baee1e1..ef59cc6587e8da5d151972b4fc0ae4299f4257a3 100644 --- a/hercules_ir/src/parse.rs +++ b/hercules_ir/src/parse.rs @@ -56,7 +56,7 @@ impl<'a> Context<'a> { } } - fn get_node_id(&mut self, name: &'a str) -> NodeID { + fn get_node_id(&mut self, function_id: FunctionID, name: &'a str) -> NodeID { if let Some(id) = self.node_ids.get(name) { *id } else {