diff --git a/Cargo.lock b/Cargo.lock index 6642aef7745bf91996cb283de2c20c34953b3e90..0835939abb2314765a7ba7ee4ef90eefcd8e3144 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,7 +1366,29 @@ dependencies = [ ] [[package]] -name = "juno_grape_reduction" +name = "juno_grape_fft" +version = "0.1.0" +dependencies = [ + "async-std", + "grape_sim", + "hercules_rt", + "juno_build", + "with_builtin_macros", +] + +[[package]] +name = "juno_grape_reduction_host" +version = "0.1.0" +dependencies = [ + "async-std", + "grape_sim", + "hercules_rt", + "juno_build", + "with_builtin_macros", +] + +[[package]] +name = "juno_grape_reduction_tree" version = "0.1.0" dependencies = [ "async-std", diff --git a/Cargo.toml b/Cargo.toml index fd37ab44b5fe189caa1611962a040e7c836b6bfe..75521f82e1b6bf1f11c4cc6834918a8eea679db2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,9 +36,10 @@ members = [ "juno_samples/schedule_test", "juno_samples/simple3", "juno_samples/grape", - "juno_samples/grape_reduction", + "juno_samples/grape_reduction_tree", "juno_samples/grape_conv", - "juno_samples/grape_reduction", + "juno_samples/grape_fft", + "juno_samples/grape_reduction_host", "juno_scheduler", "juno_utils", ] diff --git a/grape_sim/src/lib.rs b/grape_sim/src/lib.rs index 06541a8a5e70105320517d71e0a35afe5aa590d0..9ca5469ebafd3742659debce7a66e66e9801a338 100644 --- a/grape_sim/src/lib.rs +++ b/grape_sim/src/lib.rs @@ -65,7 +65,8 @@ pub fn compute() -> [i16; 8] { println!("row: {}, col: {}, ins: {:?} op: {:?}", row, col, ins, op); let result = match op { - hercules_cg::FuOp::Add => ins.0 + ins.1, + hercules_cg::FuOp::Add => ins.0.wrapping_add(*ins.1), + hercules_cg::FuOp::Sub => ins.0.wrapping_sub(*ins.1), hercules_cg::FuOp::Mult => ins.0 * ins.1, hercules_cg::FuOp::PassA => *ins.0, hercules_cg::FuOp::Default => { @@ -96,7 +97,7 @@ pub unsafe extern "C" fn await_valid() {} pub unsafe extern "C" fn read_data(outputs: *mut i16) { // This forces the computation. let out = compute(); - print!("out: {:?}", out); + print!("out: {:?}\n", out); for i in 0..7 { *outputs.add(i) = out[i]; } diff --git a/hercules_cg/src/grape.rs b/hercules_cg/src/grape.rs index a7093f777a034be0a89044360fa04c169e14b6ea..085e1f9d53d461bbebde4a5c6f95e6bf31b1d3e7 100644 --- a/hercules_cg/src/grape.rs +++ b/hercules_cg/src/grape.rs @@ -74,6 +74,7 @@ impl FunctionalUnit { let vec = match self.op_type { FuOp::Add => vec![0, 0, 0, 0], + FuOp::Sub => vec![0, 1, 1, 1], FuOp::Mult => vec![0, 0, 1, 0], FuOp::PassA => vec![1, 1, 1, 1], FuOp::Default => vec![1, 1, 1, 1], @@ -133,6 +134,7 @@ where #[derive(Clone, Debug, Copy, PartialEq, Serialize, Deserialize)] pub enum FuOp { Add, + Sub, Mult, PassA, Default, // ... @@ -441,6 +443,8 @@ where // let binary_string: String = "010100101".to_owned(); std::fs::write("debug.txt", debug_str); + // todo!(); + // write!(output, "{}", string); // todo!(); @@ -1199,7 +1203,7 @@ where for (i, item) in input_nodes.iter().enumerate() { param_map.insert(*item, i); } - return self.schedule_row_recursive(0, param_map, input_mapping, config); + return self.schedule_row_recursive(0, param_map, input_mapping, config, HashSet::new()); } fn schedule_row_recursive( @@ -1208,6 +1212,7 @@ where prev_mapping: HashMap<NodeID, usize>, // A node mapping for the previous row mut input_mapping: Vec<NodeID>, mut config: SliceDesc<H, W>, + mut computed_nodes: HashSet<NodeID>, ) -> Result< (SliceDesc<H, W>, HashMap<NodeID, usize>, Vec<NodeID>), (SliceDesc<H, W>, HashMap<NodeID, usize>, Vec<NodeID>), @@ -1326,6 +1331,23 @@ where dedup(&mut choices); // Remove duplicates without changing order. + // Move killed nodes to back. + let mut i = 0; + + println!("computed_nodes: {:?}", computed_nodes); + + choices = choices + .iter() + .filter(|i| { + !self + .def_use_map + .get_users(**i) + .iter() + .all(|u| computed_nodes.contains(u)) + }) + .cloned() + .collect(); + println!("num choices: {:?}", choices.clone().len()); println!("choices: {:?}", choices.clone()); @@ -1333,6 +1355,7 @@ where for combo in choices.iter().combinations(W.min(choices.len())) { println!("combo: {:?}", combo); let mut next_mapping = HashMap::new(); + let mut next_computed_nodes = computed_nodes.clone(); // Heurestic (Correctness): Just assume the first 8 are good choices. for (col, node) in combo.iter().take(W).enumerate() { @@ -1361,6 +1384,7 @@ where } else { // compute it next_mapping.insert(**node, col); + next_computed_nodes.insert(**node); // Collect inputs let inputs: Vec<usize> = get_uses(&self.function.nodes[node.idx()]) @@ -1388,7 +1412,7 @@ where let op_type = match self.function.nodes[node.idx()] { Node::Binary { left, right, op } => match op { BinaryOperator::Add => FuOp::Add, - BinaryOperator::Sub => todo!(), + BinaryOperator::Sub => FuOp::Sub, BinaryOperator::Mul => FuOp::Mult, BinaryOperator::Div => todo!(), BinaryOperator::Rem => todo!(), @@ -1410,8 +1434,13 @@ where } } - let schedule_attempt = - self.schedule_row_recursive(row + 1, next_mapping, input_mapping.clone(), config); + let schedule_attempt = self.schedule_row_recursive( + row + 1, + next_mapping, + input_mapping.clone(), + config, + next_computed_nodes.clone(), + ); if schedule_attempt.is_ok() { return schedule_attempt; diff --git a/hercules_rt/src/grape_header.rs b/hercules_rt/src/grape_header.rs index 63467b92a6e0fa8a53e403b671d8600212d204e5..93a93f2cf29108d923f46fcc2d0cb06145f72041 100644 --- a/hercules_rt/src/grape_header.rs +++ b/hercules_rt/src/grape_header.rs @@ -1,10 +1,9 @@ extern "C" { fn program_bitstream(ptr: *const u8, len_bits: usize); - fn send_data(inputs: *const i16); // len is 16 + fn send_data(inputs: *const i16); // len is 16 fn await_valid(); fn read_data(outputs: *mut i16); // len is 7 - -} \ No newline at end of file +} diff --git a/hercules_rt/src/grape_lib.rs b/hercules_rt/src/grape_lib.rs index b2be97bd4391a15df37fe784f68be30d8f74ef92..5a625ce1083b26eb8f74d9471643069a8b0c43d3 100644 --- a/hercules_rt/src/grape_lib.rs +++ b/hercules_rt/src/grape_lib.rs @@ -1,20 +1,7 @@ // Grape stuff #[no_mangle] -pub unsafe extern "C" fn program_bitstream(ptr: *const u8, len: usize) { - // Example: read the bytes from the pointer - if ptr.is_null() { - panic!("Null pointer passed to program_bitstream"); - } - - let slice = std::slice::from_raw_parts(ptr, len); - - println!("Received bitstream of length {}", slice.len()); - - for (i, byte) in slice.iter().enumerate() { - println!("Byte {}: {:08b}", i, byte); - } -} +pub unsafe extern "C" fn program_bitstream(ptr: *const u8, len: usize) {} #[no_mangle] pub unsafe extern "C" fn send_data(inputs: *const i16) {} // len is 16 diff --git a/juno_samples/grape_conv/src/grape.sch b/juno_samples/grape_conv/src/grape.sch index 3aef5b9af52356456d080dcdf5ce0bca1d2263ee..664c0725c2776bf1d4fcbd88bc929fcd8caee0ee 100644 --- a/juno_samples/grape_conv/src/grape.sch +++ b/juno_samples/grape_conv/src/grape.sch @@ -26,5 +26,5 @@ ccp(*); simplify-cfg(*); dce(*); grape(wrapper); -xdot[true](*); +xdot[true](wrapper); gcm(*); diff --git a/juno_samples/grape_conv/src/main.rs b/juno_samples/grape_conv/src/main.rs index bf509d5d118324ef0982cbd1a5ad6cf847c34c6f..cad2a6ec5e96a682370a3238b22f630dccc79ee8 100644 --- a/juno_samples/grape_conv/src/main.rs +++ b/juno_samples/grape_conv/src/main.rs @@ -1,5 +1,7 @@ #![feature(concat_idents)] +use std::time::Instant; + #[cfg(feature = "cuda")] use hercules_rt::CUDABox; @@ -49,10 +51,14 @@ fn main() { { let a = HerculesCPURef::from_slice(&a); let b = HerculesCPURef::from_slice(&b); + let start = Instant::now(); let mut r = runner!(entry); let c = r.run(a, b).await; print!("{:?}", c); + let duration = start.elapsed(); + println!("Time elapsed in some_function() is: {:?}", duration); + assert!(false); assert_eq!(c, (result[0], result[1], result[2], result[3])); } #[cfg(feature = "cuda")] diff --git a/juno_samples/grape_reduction/Cargo.toml b/juno_samples/grape_fft/Cargo.toml similarity index 88% rename from juno_samples/grape_reduction/Cargo.toml rename to juno_samples/grape_fft/Cargo.toml index 683268b6368948bc6986d344641ae9ba02f79cfe..7b479813af19ffd84a6e6e153ebb750c3a28ae01 100644 --- a/juno_samples/grape_reduction/Cargo.toml +++ b/juno_samples/grape_fft/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "juno_grape_reduction" +name = "juno_grape_fft" version = "0.1.0" authors = ["Xavier Routh <xrouth2@illinois.edu>"] edition = "2021" [[bin]] -name = "juno_grape_reduction" +name = "juno_grape_fft" path = "src/main.rs" [features] diff --git a/juno_samples/grape_fft/build.rs b/juno_samples/grape_fft/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..2897d0e019e4b1983d5b3db4b4210d0f2ce80fe9 --- /dev/null +++ b/juno_samples/grape_fft/build.rs @@ -0,0 +1,35 @@ +use juno_build::JunoCompiler; + +fn main() { + #[cfg(not(feature = "cuda"))] + { + JunoCompiler::new() + .file_in_src("fft.jn") + .unwrap() + .schedule_in_src("cpu.sch") + .unwrap() + .build() + .unwrap(); + } + #[cfg(feature = "grape")] + { + JunoCompiler::new() + .file_in_src("fft.jn") + .unwrap() + .schedule_in_src("grape.sch") + .unwrap() + .build() + .unwrap(); + } + + #[cfg(feature = "cuda")] + { + JunoCompiler::new() + .file_in_src("simple.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + } +} diff --git a/juno_samples/grape_reduction/src/cpu.sch b/juno_samples/grape_fft/src/cpu.sch similarity index 100% rename from juno_samples/grape_reduction/src/cpu.sch rename to juno_samples/grape_fft/src/cpu.sch diff --git a/juno_samples/grape_fft/src/fft.jn b/juno_samples/grape_fft/src/fft.jn new file mode 100644 index 0000000000000000000000000000000000000000..f8d428d43154426df96f23e6fee6052463442d56 --- /dev/null +++ b/juno_samples/grape_fft/src/fft.jn @@ -0,0 +1,46 @@ +fn fft(a : i16[8]) -> i16, i16, i16, i16, i16, i16, i16, i16 { + let out : i16[8]; + let r0 = a[0]; + let r1 = a[1]; + let r2 = a[2]; + let r3 = a[3]; + let i0 = a[4]; + let i1 = a[5]; + let i2 = a[6]; + let i3 = a[7]; + + let r02 = r0 + r2; + let r13 = r1 + r3; + let i02 = i0 + i2; + let i13 = i1 + i3; + let r0m2 = r0 - r2; + let r1m3 = r1 - r3; + let i0m2 = i0 - i2; + let i1m3 = i1 - i3; + + out[0] = r02 + r13; + out[4] = i02 + i13; + out[2] = r02 - r13; + out[6] = i02 - i13; + out[1] = r0m2 + i1m3; + out[5] = i0m2 - r1m3; + out[3] = r0m2 - i1m3; + out[7] = i0m2 + r1m3; + return out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]; +} + +#[entry] +fn entry(a: i16[8]) -> i16[8] { + let out: i16[8]; + let a, b, c, d, e, f, g, h = fft(a); + out[0] = a; + out[1] = b; + out[2] = c; + out[3] = d; + out[4] = e; + out[5] = f; + out[6] = g; + out[7] = h; + return out; +} + diff --git a/juno_samples/grape_fft/src/grape.sch b/juno_samples/grape_fft/src/grape.sch new file mode 100644 index 0000000000000000000000000000000000000000..49486d91178dce73fc41f9a6ef64162d273926df --- /dev/null +++ b/juno_samples/grape_fft/src/grape.sch @@ -0,0 +1,31 @@ +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); + +delete-uncalled(*); + +fixpoint stop after 10 { + forkify(*); + fork-guard-elim(*); + fork-unroll(*); + predication(*); + gvn(*); + phi-elim(*); + ccp(*); + simplify-cfg(*); + dce(*); + lift-dc-math(*); +} + +a2p(*); +sroa(*); +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); +grape(fft); +xdot[true](*); +gcm(*); diff --git a/juno_samples/grape_fft/src/main.rs b/juno_samples/grape_fft/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..947be6431397ae1ccbc271abcbe9843ea2e06c9c --- /dev/null +++ b/juno_samples/grape_fft/src/main.rs @@ -0,0 +1,45 @@ +#![feature(concat_idents)] + +use std::time::Instant; + +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; + +use hercules_rt::{runner, HerculesCPURef}; + +juno_build::juno!("fft"); + +#[cfg(feature = "grape")] +use grape_sim::*; +fn main() { + async_std::task::block_on(async { + let a: Box<[i16]> = (1..=8).collect::<Vec<_>>().into_boxed_slice(); + + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let start = Instant::now(); + + let mut r = runner!(entry); + let c = r.run(a).await; + let duration = start.elapsed(); + println!("Time elapsed in some_function() is: {:?}", duration); + + assert!(false); + print!("{:?}", c); + } + #[cfg(feature = "cuda")] + { + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); + let mut r = runner!(simple); + let c = r.run(8, a.get_ref(), b.get_ref()).await; + assert_eq!(c, 120); + } + }); +} + +#[test] +fn simple3_test() { + main(); +} diff --git a/juno_samples/grape_reduction_host/Cargo.toml b/juno_samples/grape_reduction_host/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..f38a566c11282b4dc1413f6add670723b4bddf29 --- /dev/null +++ b/juno_samples/grape_reduction_host/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "juno_grape_reduction_host" +version = "0.1.0" +authors = ["Xavier Routh <xrouth2@illinois.edu>"] +edition = "2021" + +[[bin]] +name = "juno_grape_reduction_host" +path = "src/main.rs" + +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] +grape = [] + +[build-dependencies] +juno_build = { path = "../../juno_build" } + +[dependencies] +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } +grape_sim = { path = "../../grape_sim" } +with_builtin_macros = "0.1.0" +async-std = "*" diff --git a/juno_samples/grape_reduction/build.rs b/juno_samples/grape_reduction_host/build.rs similarity index 100% rename from juno_samples/grape_reduction/build.rs rename to juno_samples/grape_reduction_host/build.rs diff --git a/juno_samples/grape_reduction_host/src/cpu.sch b/juno_samples/grape_reduction_host/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..7934b277d2ccc3daa385f95182ad6555dd4040f1 --- /dev/null +++ b/juno_samples/grape_reduction_host/src/cpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + + +ip-sroa(*); +sroa(*); +dce(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + + +gcm(*); +dce(*); +gcm(*); diff --git a/juno_samples/grape_reduction/src/grape.sch b/juno_samples/grape_reduction_host/src/grape.sch similarity index 95% rename from juno_samples/grape_reduction/src/grape.sch rename to juno_samples/grape_reduction_host/src/grape.sch index d098a3327e7790ccbea13a38e3abe7d1f2c77ef0..0265e729f52a2108cc6c07bba7fbe2bb6649f27c 100644 --- a/juno_samples/grape_reduction/src/grape.sch +++ b/juno_samples/grape_reduction_host/src/grape.sch @@ -8,7 +8,7 @@ inline(fake_entry); delete-uncalled(*); forkify(*); -fork-tile[4, 0, false, true](*); +fork-tile[8, 0, false, true](*); let a = fork-split(*); print[a._1_fake_entry.fj1](); let inner = outline(a._1_fake_entry.fj1); @@ -53,7 +53,7 @@ fork-unroll(a._1_fake_entry.fj0); // xdot[true](*); -reassociate(inner); +// reassociate(inner); diff --git a/juno_samples/grape_reduction_host/src/grape3.sch b/juno_samples/grape_reduction_host/src/grape3.sch new file mode 100644 index 0000000000000000000000000000000000000000..248254316f8209ed3a4800c070636892e19ac07a --- /dev/null +++ b/juno_samples/grape_reduction_host/src/grape3.sch @@ -0,0 +1,45 @@ +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); + +inline(fake_entry); +delete-uncalled(*); + +forkify(*); +fork-guard-elim(*); +dce(*); + + +fixpoint stop after 10 { + forkify(*); + fork-guard-elim(*); + fork-unroll(*); + predication(*); + gvn(*); + phi-elim(*); + ccp(*); + simplify-cfg(*); + dce(*); + lift-dc-math(*); +} +// xdot[true](*); + +a2p(*); +sroa(*); +xdot[true](*); + +// reassociate go brr +reassociate(*); +xdot[true](*); + +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); +grape(fake_entry); +xdot[true](*); +gcm(*); +xdot[true](*); diff --git a/juno_samples/grape_reduction_host/src/main.rs b/juno_samples/grape_reduction_host/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..81e5ec0cdf12a2ca3ef365d552c4fd0841f12e33 --- /dev/null +++ b/juno_samples/grape_reduction_host/src/main.rs @@ -0,0 +1,50 @@ +#![feature(concat_idents)] + +use std::time::Instant; + +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; + +use hercules_rt::{runner, HerculesCPURef}; + +juno_build::juno!("simple"); + +#[cfg(feature = "grape")] +use grape_sim::*; + +fn main() { + async_std::task::block_on(async { + let size = 64; + let a: Box<[i16]> = (1..=size).collect::<Vec<_>>().into_boxed_slice(); + let sum: i16 = a.iter().sum(); + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let start = Instant::now(); + + let mut r = runner!(entry); + let c = r.run(a).await; + let duration = start.elapsed(); + + println!("Time elapsed in some_function() is: {:?}", duration); + + assert!(false); + print!("{:?}", c); + + assert_eq!(c, sum); + } + #[cfg(feature = "cuda")] + { + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); + let mut r = runner!(simple); + let c = r.run(8, a.get_ref(), b.get_ref()).await; + assert_eq!(c, 120); + } + }); +} + +#[test] +fn simple3_test() { + main(); +} diff --git a/juno_samples/grape_reduction_host/src/simple.jn b/juno_samples/grape_reduction_host/src/simple.jn new file mode 100644 index 0000000000000000000000000000000000000000..bd94baf7c0c581038e85a83930e56b83defd91ed --- /dev/null +++ b/juno_samples/grape_reduction_host/src/simple.jn @@ -0,0 +1,27 @@ +fn reduce<n: usize>(a : i16[n]) -> i16 { + let acc: i16 = 0; + + for i = 0 to n { + acc += a[i]; + } + + return acc; +} + +fn fake_entry(a: i16[64]) -> i16 { + @this { + let r = reduce::<64>(a); + + return r; + } +} + +#[entry] +fn entry(a: i16[64]) -> i16 { + return fake_entry(a); +} + + + + + diff --git a/juno_samples/grape_reduction_tree/Cargo.toml b/juno_samples/grape_reduction_tree/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..a20c606ba90d814bbd49d9783613d89d558a97c7 --- /dev/null +++ b/juno_samples/grape_reduction_tree/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "juno_grape_reduction_tree" +version = "0.1.0" +authors = ["Xavier Routh <xrouth2@illinois.edu>"] +edition = "2021" + +[[bin]] +name = "juno_grape_reduction_tree" +path = "src/main.rs" + +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] +grape = [] + +[build-dependencies] +juno_build = { path = "../../juno_build" } + +[dependencies] +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } +grape_sim = { path = "../../grape_sim" } +with_builtin_macros = "0.1.0" +async-std = "*" diff --git a/juno_samples/grape_reduction_tree/build.rs b/juno_samples/grape_reduction_tree/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..5ee28f2ccace52074541d72014082dbe056f1f14 --- /dev/null +++ b/juno_samples/grape_reduction_tree/build.rs @@ -0,0 +1,35 @@ +use juno_build::JunoCompiler; + +fn main() { + #[cfg(not(feature = "cuda"))] + { + JunoCompiler::new() + .file_in_src("simple.jn") + .unwrap() + .schedule_in_src("cpu.sch") + .unwrap() + .build() + .unwrap(); + } + #[cfg(feature = "grape")] + { + JunoCompiler::new() + .file_in_src("simple.jn") + .unwrap() + .schedule_in_src("grape.sch") + .unwrap() + .build() + .unwrap(); + } + + #[cfg(feature = "cuda")] + { + JunoCompiler::new() + .file_in_src("simple.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + } +} diff --git a/juno_samples/grape_reduction_tree/src/cpu.sch b/juno_samples/grape_reduction_tree/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..7934b277d2ccc3daa385f95182ad6555dd4040f1 --- /dev/null +++ b/juno_samples/grape_reduction_tree/src/cpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + + +ip-sroa(*); +sroa(*); +dce(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + + +gcm(*); +dce(*); +gcm(*); diff --git a/juno_samples/grape_reduction_tree/src/grape.sch b/juno_samples/grape_reduction_tree/src/grape.sch new file mode 100644 index 0000000000000000000000000000000000000000..248254316f8209ed3a4800c070636892e19ac07a --- /dev/null +++ b/juno_samples/grape_reduction_tree/src/grape.sch @@ -0,0 +1,45 @@ +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); + +inline(fake_entry); +delete-uncalled(*); + +forkify(*); +fork-guard-elim(*); +dce(*); + + +fixpoint stop after 10 { + forkify(*); + fork-guard-elim(*); + fork-unroll(*); + predication(*); + gvn(*); + phi-elim(*); + ccp(*); + simplify-cfg(*); + dce(*); + lift-dc-math(*); +} +// xdot[true](*); + +a2p(*); +sroa(*); +xdot[true](*); + +// reassociate go brr +reassociate(*); +xdot[true](*); + +gvn(*); +phi-elim(*); +ccp(*); +simplify-cfg(*); +dce(*); +grape(fake_entry); +xdot[true](*); +gcm(*); +xdot[true](*); diff --git a/juno_samples/grape_reduction/src/main.rs b/juno_samples/grape_reduction_tree/src/main.rs similarity index 82% rename from juno_samples/grape_reduction/src/main.rs rename to juno_samples/grape_reduction_tree/src/main.rs index 1c209ca181ad5e258cfe6ead86aceb19d534c9d9..d113aa0abc20ce07200273a019e0774e535c3faa 100644 --- a/juno_samples/grape_reduction/src/main.rs +++ b/juno_samples/grape_reduction_tree/src/main.rs @@ -1,5 +1,7 @@ #![feature(concat_idents)] +use std::time::Instant; + #[cfg(feature = "cuda")] use hercules_rt::CUDABox; @@ -17,10 +19,15 @@ fn main() { #[cfg(not(feature = "cuda"))] { let a = HerculesCPURef::from_slice(&a); + let start = Instant::now(); + let mut r = runner!(entry); let c = r.run(a).await; print!("{:?}", c); + let duration = start.elapsed(); + println!("Time elapsed in some_function() is: {:?}", duration); + assert!(false); assert_eq!(c, sum); } #[cfg(feature = "cuda")] diff --git a/juno_samples/grape_reduction/src/simple.jn b/juno_samples/grape_reduction_tree/src/simple.jn similarity index 100% rename from juno_samples/grape_reduction/src/simple.jn rename to juno_samples/grape_reduction_tree/src/simple.jn