diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
index 5b4db044c3f66fc630cc434794e2a6c3de684682..17c503b3c2261f220c39746d1f3154c0c91e1be9 100644
--- a/juno_samples/rodinia/cfd/benches/cfd_bench.rs
+++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
@@ -14,7 +14,7 @@ fn cfd_bench(c: &mut Criterion) {
 
     let mut euler_bench = |name, data_file, iterations| {
         let mut r = runner!(euler);
-        let block_size = 16;
+        let block_size = 256;
         let FarFieldConditions {
             ff_variable,
             ff_fc_momentum_x,
@@ -92,7 +92,7 @@ fn cfd_bench(c: &mut Criterion) {
 
     let mut pre_euler_bench = |name, data_file, iterations| {
         let mut r = runner!(pre_euler);
-        let block_size = 16;
+        let block_size = 256;
         let FarFieldConditions {
             ff_variable,
             ff_fc_momentum_x,
diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index d9d3eb8c0ebe133acd9daf3e9505eeb06b1be062..a5c7cbdfcdb8e5f43a53ab39554fd1d094c86a6f 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -25,7 +25,8 @@ fixpoint {
   fork-guard-elim(*);
 }
 simpl!(*);
-unforkify(compute_flux@inner_loop);
+//unforkify(compute_flux@inner_loop);
+fork-unroll(compute_flux);
 
 if !feature("seq") {
   fork-tile[32, 0, false, false](compute_step_factor);
@@ -41,6 +42,7 @@ if !feature("seq") {
   fork-coalesce(compute_flux, compute_flux_body);
   simpl!(compute_flux, compute_flux_body);
   compute_flux = compute_flux_body;
+  fork-tile[8, 0, false, true](compute_flux);
 
   fork-tile[32, 0, false, false](time_step);
   let split = fork-split(time_step);
diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
index 3acee55bfee6d413a479edb771bb0c36a7036205..3018ea3ef1a95bbae951d073b46191409e412a4f 100644
--- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
@@ -25,7 +25,8 @@ fixpoint {
 }
 simpl!(*);
 no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
-unforkify(compute_flux@inner_loop);
+//unforkify(compute_flux@inner_loop);
+fork-unroll(compute_flux);
 
 if !feature("seq") {
   fork-tile[32, 0, false, false](compute_step_factor);
@@ -48,6 +49,7 @@ if !feature("seq") {
   fork-coalesce(compute_flux, compute_flux_body);
   simpl!(compute_flux, compute_flux_body);
   compute_flux = compute_flux_body;
+  fork-tile[8, 0, false, true](compute_flux);
 
   fork-tile[32, 0, false, false](time_step);
   let split = fork-split(time_step);