diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs index 5b4db044c3f66fc630cc434794e2a6c3de684682..17c503b3c2261f220c39746d1f3154c0c91e1be9 100644 --- a/juno_samples/rodinia/cfd/benches/cfd_bench.rs +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -14,7 +14,7 @@ fn cfd_bench(c: &mut Criterion) { let mut euler_bench = |name, data_file, iterations| { let mut r = runner!(euler); - let block_size = 16; + let block_size = 256; let FarFieldConditions { ff_variable, ff_fc_momentum_x, @@ -92,7 +92,7 @@ fn cfd_bench(c: &mut Criterion) { let mut pre_euler_bench = |name, data_file, iterations| { let mut r = runner!(pre_euler); - let block_size = 16; + let block_size = 256; let FarFieldConditions { ff_variable, ff_fc_momentum_x, diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index d9d3eb8c0ebe133acd9daf3e9505eeb06b1be062..a5c7cbdfcdb8e5f43a53ab39554fd1d094c86a6f 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -25,7 +25,8 @@ fixpoint { fork-guard-elim(*); } simpl!(*); -unforkify(compute_flux@inner_loop); +//unforkify(compute_flux@inner_loop); +fork-unroll(compute_flux); if !feature("seq") { fork-tile[32, 0, false, false](compute_step_factor); @@ -41,6 +42,7 @@ if !feature("seq") { fork-coalesce(compute_flux, compute_flux_body); simpl!(compute_flux, compute_flux_body); compute_flux = compute_flux_body; + fork-tile[8, 0, false, true](compute_flux); fork-tile[32, 0, false, false](time_step); let split = fork-split(time_step); diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch index 3acee55bfee6d413a479edb771bb0c36a7036205..3018ea3ef1a95bbae951d073b46191409e412a4f 100644 --- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch @@ -25,7 +25,8 @@ fixpoint { } simpl!(*); no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res); -unforkify(compute_flux@inner_loop); +//unforkify(compute_flux@inner_loop); +fork-unroll(compute_flux); if !feature("seq") { fork-tile[32, 0, false, false](compute_step_factor); @@ -48,6 +49,7 @@ if !feature("seq") { fork-coalesce(compute_flux, compute_flux_body); simpl!(compute_flux, compute_flux_body); compute_flux = compute_flux_body; + fork-tile[8, 0, false, true](compute_flux); fork-tile[32, 0, false, false](time_step); let split = fork-split(time_step);