Skip to content
Snippets Groups Projects
Commit 7e05fb81 authored by Xavier Routh's avatar Xavier Routh
Browse files

reduction on host

parent 42c52fd2
No related branches found
No related tags found
No related merge requests found
Pipeline #202577 failed
...@@ -1354,6 +1354,17 @@ dependencies = [ ...@@ -1354,6 +1354,17 @@ dependencies = [
"with_builtin_macros", "with_builtin_macros",
] ]
[[package]]
name = "juno_grape_conv"
version = "0.1.0"
dependencies = [
"async-std",
"grape_sim",
"hercules_rt",
"juno_build",
"with_builtin_macros",
]
[[package]] [[package]]
name = "juno_grape_reduction" name = "juno_grape_reduction"
version = "0.1.0" version = "0.1.0"
......
[workspace] [workspace]
resolver = "2"
members = [ members = [
"hercules_cg", "hercules_cg",
"hercules_ir", "hercules_ir",
...@@ -38,6 +37,9 @@ members = [ ...@@ -38,6 +37,9 @@ members = [
"juno_samples/simple3", "juno_samples/simple3",
"juno_samples/grape", "juno_samples/grape",
"juno_samples/grape_reduction", "juno_samples/grape_reduction",
"juno_samples/grape_conv",
"juno_samples/grape_reduction",
"juno_scheduler", "juno_scheduler",
"juno_utils", "juno_utils",
] ]
resolver = "2"
This diff is collapsed.
[package]
name = "juno_grape_conv"
version = "0.1.0"
authors = ["Xavier Routh <xrouth2@illinois.edu>"]
edition = "2021"
[[bin]]
name = "juno_grape_conv"
path = "src/main.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
grape = []
[build-dependencies]
juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
grape_sim = { path = "../../grape_sim" }
with_builtin_macros = "0.1.0"
async-std = "*"
use juno_build::JunoCompiler;
fn main() {
#[cfg(not(feature = "cuda"))]
{
JunoCompiler::new()
.file_in_src("conv.jn")
.unwrap()
.schedule_in_src("cpu.sch")
.unwrap()
.build()
.unwrap();
}
#[cfg(feature = "grape")]
{
JunoCompiler::new()
.file_in_src("conv.jn")
.unwrap()
.schedule_in_src("grape.sch")
.unwrap()
.build()
.unwrap();
}
#[cfg(feature = "cuda")]
{
JunoCompiler::new()
.file_in_src("simple.jn")
.unwrap()
.schedule_in_src("gpu.sch")
.unwrap()
.build()
.unwrap();
}
}
fn conv1d<n: usize, k: usize>(a : i16[n], kernel: i16[k]) -> i16[n] {
let res : i16[n];
for i = 0 to n {
let window_left = i as i64 - (k as i64) / 2;
let window_right = i as i64 + (k as i64) / 2;
if window_left < 0 {
window_left = 0;
}
if window_right >= n as i64 {
window_right = n as i64 - 1;
}
let acc: i16 = 0;
for j = 0 to (window_right - window_left + 1) as u64 {
let inc_j = j + window_left as u64;
acc += a[inc_j] * kernel[j];
}
res[i] = acc;
}
return res;
}
fn wrapper(a0, a1, a2, a3, k0, k1, k2 : i16) -> i16, i16, i16, i16 {
let a : i16[4];
let k : i16[3];
let r : i16[4];
a[0] = a0;
a[1] = a1;
a[2] = a2;
a[3] = a3;
k[0] = k0;
k[1] = k1;
k[2] = k2;
r = conv1d::<4, 3>(a, k);
return r[0], r[1], r[2], r[3];
}
#[entry]
fn entry(a: i16[4], k: i16[3]) -> i16, i16, i16, i16 {
let c, d, e, f = wrapper(a[0], a[1], a[2], a[3], k[0], k[1], k[2]);
return c, d, e, f;
}
gvn(*);
phi-elim(*);
dce(*);
ip-sroa(*);
sroa(*);
dce(*);
gvn(*);
phi-elim(*);
dce(*);
infer-schedules(*);
gcm(*);
dce(*);
gcm(*);
gvn(*);
phi-elim(*);
ccp(*);
simplify-cfg(*);
dce(*);
inline(wrapper);
delete-uncalled(*);
fixpoint stop after 10 {
forkify(*);
fork-guard-elim(*);
fork-unroll(*);
predication(*);
gvn(*);
phi-elim(*);
ccp(*);
simplify-cfg(*);
dce(*);
lift-dc-math(*);
}
a2p(*);
sroa(*);
gvn(*);
phi-elim(*);
ccp(*);
simplify-cfg(*);
dce(*);
grape(wrapper);
xdot[true](*);
gcm(*);
#![feature(concat_idents)]
#[cfg(feature = "cuda")]
use hercules_rt::CUDABox;
use hercules_rt::{runner, HerculesCPURef};
juno_build::juno!("conv");
#[cfg(feature = "grape")]
use grape_sim::*;
fn conv1d<const N: usize, const K: usize>(a: &[i16; N], kernel: &[i16; K]) -> [i16; N] {
let mut res = [0i16; N];
for i in 0..N {
let mut window_left = i as i64 - (K as i64) / 2;
let mut window_right = i as i64 + (K as i64) / 2;
if window_left < 0 {
window_left = 0;
}
if window_right >= N as i64 {
window_right = N as i64 - 1;
}
let mut acc: i16 = 0;
for j in 0..(window_right - window_left + 1) as usize {
let inc_j = j + window_left as usize;
acc += a[inc_j] * kernel[j];
}
res[i] = acc;
}
res
}
fn main() {
async_std::task::block_on(async {
let a: Box<[i16]> = Box::new([1, 2, 3, 4]);
let b: Box<[i16]> = Box::new([5, 6, 7]);
let result: [i16; 4] = conv1d::<4, 3>(
a.as_ref().try_into().unwrap(),
b.as_ref().try_into().unwrap(),
);
#[cfg(not(feature = "cuda"))]
{
let a = HerculesCPURef::from_slice(&a);
let b = HerculesCPURef::from_slice(&b);
let mut r = runner!(entry);
let c = r.run(a, b).await;
print!("{:?}", c);
assert_eq!(c, (result[0], result[1], result[2], result[3]));
}
#[cfg(feature = "cuda")]
{
let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a));
let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b));
let mut r = runner!(simple);
let c = r.run(8, a.get_ref(), b.get_ref()).await;
assert_eq!(c, 120);
}
});
}
#[test]
fn simple3_test() {
main();
}
...@@ -8,38 +8,66 @@ inline(fake_entry); ...@@ -8,38 +8,66 @@ inline(fake_entry);
delete-uncalled(*); delete-uncalled(*);
forkify(*); forkify(*);
fork-tile[4, 0, false, true](*);
let a = fork-split(*);
print[a._1_fake_entry.fj1]();
let inner = outline(a._1_fake_entry.fj1);
rename["inner"](inner);
fork-guard-elim(*); fork-guard-elim(*);
dce(*); dce(*);
xdot[true](*);
fixpoint stop after 4 {
forkify(a._1_fake_entry.fj1);
fork-guard-elim(a._1_fake_entry.fj1);
fork-unroll(a._1_fake_entry.fj1);
predication(a._1_fake_entry.fj1);
gvn(a._1_fake_entry.fj1);
phi-elim(a._1_fake_entry.fj1);
ccp(a._1_fake_entry.fj1);
simplify-cfg(a._1_fake_entry.fj1);
dce(a._1_fake_entry.fj1);
lift-dc-math(a._1_fake_entry.fj1);
}
fixpoint stop after 10 { fixpoint stop after 4 {
forkify(*); forkify(inner);
fork-guard-elim(*); fork-guard-elim(inner);
fork-unroll(*); fork-unroll(inner);
predication(*); predication(inner);
gvn(*); gvn(inner);
phi-elim(*); phi-elim(inner);
ccp(*); ccp(inner);
simplify-cfg(*); simplify-cfg(inner);
dce(*); dce(inner);
lift-dc-math(*); lift-dc-math(inner);
} }
xdot[true](*);
fork-unroll(a._1_fake_entry.fj0);
// xdot[true](*); // xdot[true](*);
reassociate(inner);
a2p(*); a2p(*);
sroa(*); sroa(*);
xdot[true](*); xdot[true](*);
// reassociate go brr
reassociate(*);
xdot[true](*);
gvn(*); gvn(*);
phi-elim(*); phi-elim(*);
ccp(*); ccp(*);
simplify-cfg(*); simplify-cfg(*);
dce(*); dce(*);
grape(fake_entry); grape(inner);
xdot[true](*); xdot[true](*);
gcm(*); gcm(*);
xdot[true](*); xdot[true](*);
...@@ -8,24 +8,10 @@ fn reduce<n: usize>(a : i16[n]) -> i16 { ...@@ -8,24 +8,10 @@ fn reduce<n: usize>(a : i16[n]) -> i16 {
return acc; return acc;
} }
fn reduce2<n: usize, k: usize>(a : i16[n, k]) -> i16 {
let acc: i16 = 0;
for j = 0 to k {
@inner for i = 0 to n {
acc += a[i, j];
}
}
return acc;
}
fn fake_entry(a: i16[16]) -> i16 { fn fake_entry(a: i16[16]) -> i16 {
@this { @this {
let r = reduce::<16>(a); let r = reduce::<16>(a);
// let r2 = reduce2::<16, 2>(b);
return r; return r;
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment