Skip to content
Snippets Groups Projects
Commit 7ab06905 authored by rarbore2's avatar rarbore2
Browse files

Merge branch 'edge_opt' into 'main'

Optimize edge detection

See merge request !192
parents 6bce8641 aacafeab
No related branches found
No related tags found
1 merge request!192Optimize edge detection
Pipeline #201779 passed
...@@ -560,23 +560,33 @@ impl<'a> RTContext<'a> { ...@@ -560,23 +560,33 @@ impl<'a> RTContext<'a> {
// same interface as AsyncRust functions. // same interface as AsyncRust functions.
let block = &mut blocks.get_mut(&bb).unwrap().data; let block = &mut blocks.get_mut(&bb).unwrap().data;
let is_async = func.schedules[id.idx()].contains(&Schedule::AsyncCall); let is_async = func.schedules[id.idx()].contains(&Schedule::AsyncCall);
if is_async {
for arg in args {
if let Some(arc) = self.clone_arc(*arg, false) {
write!(block, "{}", arc)?;
}
}
}
let device = self.devices[callee_id.idx()]; let device = self.devices[callee_id.idx()];
let prefix = match (device, is_async) { let prefix = match (device, is_async) {
(Device::AsyncRust, false) => "", (Device::AsyncRust, false) | (_, false) => {
(_, false) => "", format!("{} = ", self.get_value(id, bb, true))
(Device::AsyncRust, true) => "Some(::async_std::task::spawn(", }
(_, true) => "Some(::async_std::task::spawn(async move {", (_, true) => format!(
"{}::async_std::task::spawn(async move {{ async_call_sender_{}.send(",
self.clone_arc(id, true).unwrap(),
id.idx()
),
}; };
let postfix = match (device, is_async) { let postfix = match (device, is_async) {
(Device::AsyncRust, false) => ".await", (Device::AsyncRust, false) => ".await",
(_, false) => "", (_, false) => "",
(Device::AsyncRust, true) => "))", (Device::AsyncRust, true) => ".await).await})",
(_, true) => "}))", (_, true) => ").await})",
}; };
write!( write!(
block, block,
"{} = {}{}(", "{}{}(",
self.get_value(id, bb, true),
prefix, prefix,
self.module.functions[callee_id.idx()].name self.module.functions[callee_id.idx()].name
)?; )?;
...@@ -1069,11 +1079,15 @@ impl<'a> RTContext<'a> { ...@@ -1069,11 +1079,15 @@ impl<'a> RTContext<'a> {
} }
// If the node is a call with an AsyncCall schedule, it should be // If the node is a call with an AsyncCall schedule, it should be
// spawned as a task and awaited later. // lowered to a channel.
let is_async_call = let is_async_call =
func.nodes[idx].is_call() && func.schedules[idx].contains(&Schedule::AsyncCall); func.nodes[idx].is_call() && func.schedules[idx].contains(&Schedule::AsyncCall);
if is_async_call { if is_async_call {
write!(w, "let mut async_call_{} = None;", idx)?; write!(
w,
"let mut async_call_channel_{} = ::async_std::channel::bounded(1);let async_call_sender_{} = ::std::sync::Arc::new(async_call_channel_{}.0);let async_call_receiver_{} = ::std::sync::Arc::new(async_call_channel_{}.1);",
idx, idx, idx, idx, idx
)?;
} else { } else {
write!( write!(
w, w,
...@@ -1356,16 +1370,30 @@ impl<'a> RTContext<'a> { ...@@ -1356,16 +1370,30 @@ impl<'a> RTContext<'a> {
} else if func.nodes[id.idx()].is_call() } else if func.nodes[id.idx()].is_call()
&& func.schedules[id.idx()].contains(&Schedule::AsyncCall) && func.schedules[id.idx()].contains(&Schedule::AsyncCall)
{ {
format!( assert!(!lhs);
"async_call_{}{}", format!("async_call_receiver_{}.recv().await.unwrap()", id.idx(),)
id.idx(),
if lhs { "" } else { ".unwrap().await" }
)
} else { } else {
format!("node_{}", id.idx()) format!("node_{}", id.idx())
} }
} }
fn clone_arc(&self, id: NodeID, lhs: bool) -> Option<String> {
let func = self.get_func();
if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall)
{
let kind = if lhs { "sender" } else { "receiver" };
Some(format!(
"let async_call_{}_{} = async_call_{}_{}.clone();",
kind,
id.idx(),
kind,
id.idx()
))
} else {
None
}
}
fn get_type(&self, id: TypeID) -> &'static str { fn get_type(&self, id: TypeID) -> &'static str {
convert_type(&self.module.types[id.idx()]) convert_type(&self.module.types[id.idx()])
} }
......
...@@ -127,17 +127,8 @@ simpl!(fuse5); ...@@ -127,17 +127,8 @@ simpl!(fuse5);
delete-uncalled(*); delete-uncalled(*);
simpl!(*); simpl!(*);
fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
fork-split(fuse1); unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
unforkify(fuse1);
fork-split(fuse2);
unforkify(fuse2);
fork-split(fuse3);
unforkify(fuse3);
fork-split(fuse4);
unforkify(fuse4);
fork-split(fuse5);
unforkify(fuse5);
simpl!(*); simpl!(*);
......
...@@ -15,7 +15,6 @@ required-features = ["opencv"] ...@@ -15,7 +15,6 @@ required-features = ["opencv"]
[lib] [lib]
path = "src/lib.rs" path = "src/lib.rs"
required-features = ["opencv"]
[build-dependencies] [build-dependencies]
juno_build = { path = "../../juno_build" } juno_build = { path = "../../juno_build" }
......
...@@ -14,6 +14,8 @@ fn main() { ...@@ -14,6 +14,8 @@ fn main() {
JunoCompiler::new() JunoCompiler::new()
.file_in_src("edge_detection.jn") .file_in_src("edge_detection.jn")
.unwrap() .unwrap()
.schedule_in_src("cpu.sch")
.unwrap()
.build() .build()
.unwrap(); .unwrap();
} }
macro simpl!(X) {
ccp(X);
simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
simpl!(*);
ip-sroa(*);
sroa(*);
simpl!(*);
no-memset(gaussian_smoothing@res);
fixpoint {
forkify(gaussian_smoothing);
fork-guard-elim(gaussian_smoothing);
fork-coalesce(gaussian_smoothing);
}
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
fixpoint {
forkify(laplacian_estimate);
fork-guard-elim(laplacian_estimate);
fork-coalesce(laplacian_estimate);
}
simpl!(laplacian_estimate);
no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
fixpoint {
forkify(zero_crossings);
fork-guard-elim(zero_crossings);
fork-coalesce(zero_crossings);
}
simpl!(zero_crossings);
no-memset(gradient@res);
fixpoint {
forkify(gradient);
fork-guard-elim(gradient);
fork-coalesce(gradient);
}
predication(gradient);
simpl!(gradient);
predication(gradient);
simpl!(gradient);
fixpoint {
forkify(max_gradient);
fork-guard-elim(max_gradient);
fork-coalesce(max_gradient);
}
simpl!(max_gradient);
no-memset(reject_zero_crossings@res);
fixpoint {
forkify(reject_zero_crossings);
fork-guard-elim(reject_zero_crossings);
fork-coalesce(reject_zero_crossings);
}
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);
async-call(edge_detection@le, edge_detection@zc);
fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
simpl!(*);
delete-uncalled(*);
gcm(*);
...@@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>( ...@@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>(
input: f32[n, m], input: f32[n, m],
filter: f32[gs, gs], filter: f32[gs, gs],
) -> f32[n, m] { ) -> f32[n, m] {
let result : f32[n, m]; @res let result : f32[n, m];
// Define the gaussian radius as half the gaussian size // Define the gaussian radius as half the gaussian size
const gr = gs / 2; const gr = gs / 2;
...@@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>( ...@@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>(
) -> f32[n, m] { ) -> f32[n, m] {
const r = sz / 2; const r = sz / 2;
let result : f32[n, m]; @res let result : f32[n, m];
for row = 0 to n { for row = 0 to n {
for col = 0 to m { for col = 0 to m {
// Copy data for dilation filter // Copy data for dilation filter
let imageArea : f32[sz, sz]; @shr1 let imageArea : f32[sz, sz];
for i = 0 to sz { for i = 0 to sz {
for j = 0 to sz { for j = 0 to sz {
imageArea[i, j] = if row + i < r then MIN_BR imageArea[i, j] = if row + i < r then MIN_BR
...@@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>( ...@@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>(
} }
// Data copy for erotion filter // Data copy for erotion filter
let imageArea : f32[sz, sz]; @shr2 let imageArea : f32[sz, sz];
for i = 0 to sz { for i = 0 to sz {
for j = 0 to sz { for j = 0 to sz {
imageArea[i, j] = if row + i < r then MAX_BR imageArea[i, j] = if row + i < r then MAX_BR
...@@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>( ...@@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>(
) -> f32[n, m] { ) -> f32[n, m] {
const r = sz / 2; const r = sz / 2;
let result : f32[n, m]; @res let result : f32[n, m];
for row = 0 to n { for row = 0 to n {
for col = 0 to m { for col = 0 to m {
// Data copy for dilation filter // Data copy for dilation filter
let imageArea : f32[sz, sz]; @shr1 let imageArea : f32[sz, sz];
for i = 0 to sz { for i = 0 to sz {
for j = 0 to sz { for j = 0 to sz {
imageArea[i, j] = if row + i < r then MIN_BR imageArea[i, j] = if row + i < r then MIN_BR
...@@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>( ...@@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>(
} }
// Data copy for erotion filter // Data copy for erotion filter
let imageArea : f32[sz, sz]; @shr2 let imageArea : f32[sz, sz];
for i = 0 to sz { for i = 0 to sz {
for j = 0 to sz { for j = 0 to sz {
imageArea[i, j] = if row + i < r then MAX_BR imageArea[i, j] = if row + i < r then MAX_BR
...@@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>( ...@@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>(
) -> f32[n, m] { ) -> f32[n, m] {
const sbr = sb / 2; const sbr = sb / 2;
let result : f32[n, m]; @res let result : f32[n, m];
for row = 0 to n { for row = 0 to n {
for col = 0 to m { for col = 0 to m {
...@@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>( ...@@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>(
max_gradient: f32, max_gradient: f32,
theta: f32, theta: f32,
) -> f32[n, m] { ) -> f32[n, m] {
let result : f32[n, m]; @res let result : f32[n, m];
for row = 0 to n { for row = 0 to n {
for col = 0 to m { for col = 0 to m {
...@@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>( ...@@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>(
sy: f32[sb, sb], sy: f32[sb, sb],
theta: f32, theta: f32,
) -> f32[n, m] { ) -> f32[n, m] {
let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter);
let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); @le let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure);
let zcs = zero_crossings::<n, m, sz>(laplacian, structure); @zc let zcs = zero_crossings::<n, m, sz>(laplacian, structure);
let gradient = gradient::<n, m, sb>(smoothed, sx, sy); let gradient = gradient::<n, m, sb>(smoothed, sx, sy);
let maxgrad = max_gradient::<n, m>(gradient); let maxgrad = max_gradient::<n, m>(gradient);
return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta); return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta);
} }
#![cfg(feature = "opencv")]
#![feature(concat_idents)] #![feature(concat_idents)]
mod edge_detection_rust; mod edge_detection_rust;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment