diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py
index 06717bd15011564f33a555931e30e3bcb2b34f7a..db856fb1ca898e9109e323ed73a6f9096f27695f 100644
--- a/predtuner/approxapp.py
+++ b/predtuner/approxapp.py
@@ -1,7 +1,17 @@
 import abc
 import logging
 from pathlib import Path
-from typing import Callable, Dict, Generic, List, Optional, Tuple, Type, TypeVar
+from typing import (
+    Callable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+)
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -17,15 +27,32 @@ TunerConfigT = Dict[int, int]
 
 
 class ApproxKnob:
-    def __init__(self, name: str, **kwargs):
+    def __init__(
+        self, name: str, devices: List[str] = None, baseline_priority: int = None
+    ):
         self.name = name
-        self.kwargs = kwargs
+        self.devices = None if devices is None else set(devices)
+        self.baseline_priority = baseline_priority
 
-    def coexists_with(self, other: "ApproxKnob") -> bool:
-        return False
+    def exists_on_device(self, device: str) -> bool:
+        if self.devices is None:
+            return True
+        return device in self.devices
 
     def __repr__(self):
-        return f'Knob"{self.name}"({self.kwargs})'
+        device_str = "" if self.devices is None else str(list(self.devices))
+        return f"Knob({self.name}){device_str}"
+
+    @classmethod
+    def unique_baseline(cls, knobs: Iterable["ApproxKnob"]) -> "ApproxKnob":
+        baselines = set(k for k in knobs if k.baseline_priority is not None)
+        if baselines:
+            sorted_bases = sorted(
+                baselines, key=lambda b: b.baseline_priority, reverse=True
+            )
+            return sorted_bases[0]
+        else:
+            return cls("__baseline__")
 
 
 class ApproxApp(abc.ABC):
@@ -38,9 +65,13 @@ class ApproxApp(abc.ABC):
         a mapping from each operator (identified by str) to a list of applicable knobs.
     """
 
-    def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None:
+    def __init__(
+        self, op_knobs: Dict[str, List[ApproxKnob]], tuning_device: str = None
+    ) -> None:
         super().__init__()
         self.op_knobs = op_knobs
+        if tuning_device:
+            self.op_knobs = self._filter_knob_by_device(self.op_knobs, tuning_device)
         # Also modifies self.op_knobs in place.
         self.baseline_knob = self._check_get_baseline_knob_(self.op_knobs)
 
@@ -74,24 +105,25 @@ class ApproxApp(abc.ABC):
     @staticmethod
     def _check_get_baseline_knob_(
         op_knobs: Dict[str, List[ApproxKnob]]
-    ) -> "BaselineKnob":
+    ) -> "ApproxKnob":
         # Modifies op_knobs inplace.
         # Find the baseline knob if the user has one, or get a default one
         knob_sets = [set(knobs) for knobs in op_knobs.values()]
         knobs = list(set.union(*knob_sets))
-        baselines = set(k for k in knobs if isinstance(k, BaselineKnob))
-        if len(baselines) > 1:
-            raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}")
-        if baselines:
-            (baseline_knob,) = baselines
-        else:
-            baseline_knob = BaselineKnob()
+        baseline_knob = ApproxKnob.unique_baseline(knobs)
         # Start checking if each layer has the baseline knob
         for knobs in op_knobs.values():
             if baseline_knob not in set(knobs):
                 knobs.append(baseline_knob)
         return baseline_knob
 
+    @staticmethod
+    def _filter_knob_by_device(op_knobs: Dict[str, List[ApproxKnob]], device: str):
+        return {
+            op: [knob for knob in knobs if knob.exists_on_device(device)]
+            for op, knobs in op_knobs.items()
+        }
+
     def add_baseline_to_knobs(self, approxes: KnobsT):
         return {
             op_name: approxes.get(op_name, self.baseline_knob.name)
@@ -99,11 +131,6 @@ class ApproxApp(abc.ABC):
         }
 
 
-class BaselineKnob(ApproxKnob):
-    def __init__(self, name: str = "__baseline__"):
-        super().__init__(name)
-
-
 class Config:
     def __init__(
         self, qos: float, cost: float, knobs: KnobsT, test_qos: Optional[float] = None
@@ -161,6 +188,8 @@ class ApproxTuner(Generic[T]):
             n_ops,
             n_knobs,
         )
+        msg_logger.info("Knobs: %s", self.app.knobs)
+        msg_logger.info("Baseline knob: %s", self.app.baseline_knob)
         msg_logger.info("At most %d iterations", max_iter)
         opentuner_args = opentuner_default_args()
         tuner = self._get_tuner_interface(
diff --git a/predtuner/approxes/approxes.py b/predtuner/approxes/approxes.py
index fb3bca7d13d0e0dd93cc39a3410d2ee9c86c7d18..0125de74f71a8a57e0148682e528ac687450da4a 100644
--- a/predtuner/approxes/approxes.py
+++ b/predtuner/approxes/approxes.py
@@ -6,7 +6,7 @@ import torch
 from torch.nn import Conv2d, Linear, Module, Parameter
 
 from .._logging import PathLike
-from ..torchapp import TorchBaselineKnob, TorchApproxKnob
+from ..torchapp import TorchApproxKnob
 from ._copy import module_only_deepcopy
 
 
@@ -69,15 +69,9 @@ class PerforateConv2dStride(TorchApproxKnob):
         offset: int,
         use_fp16: bool,
         exp_speedup: float,
+        **kwargs,
     ):
-        super().__init__(
-            name,
-            direction_is_row=direction_is_row,
-            stride=stride,
-            offset=offset,
-            use_fp16=use_fp16,
-            exp_speedup=exp_speedup,
-        )
+        super().__init__(name, **kwargs)
         assert stride >= 2
         assert 0 <= offset < stride
         self.interp_axis = 2 if direction_is_row else 3
@@ -174,15 +168,9 @@ class Conv2dSampling(TorchApproxKnob):
         interp_rate: float,
         use_fp16: bool,
         exp_speedup: float,
+        **kwargs,
     ):
-        super().__init__(
-            name,
-            skip_every=skip_every,
-            skip_offset=skip_offset,
-            interp_rate=interp_rate,
-            use_fp16=use_fp16,
-            exp_speedup=exp_speedup,
-        )
+        super().__init__(name, **kwargs)
         assert skip_every >= 2 and skip_offset >= 0
         self.skip_every = skip_every
         self.skip_offset = skip_offset
@@ -287,8 +275,8 @@ class PromiseSim(TorchApproxKnob):
 
     scaling_values = [0.75, 0.64, 0.336, 0.21, 0.168, 0.14, 0.11, 0.0784, 0.005]
 
-    def __init__(self, name: str, noise_level: int, exp_speedup: float):
-        super().__init__(name, noise_level=noise_level, exp_speedup=exp_speedup)
+    def __init__(self, name: str, noise_level: int, exp_speedup: float, **kwargs):
+        super().__init__(name, **kwargs)
         self.noise_level = noise_level
         self.exp_speedup = exp_speedup
 
@@ -337,14 +325,15 @@ class PromiseSim(TorchApproxKnob):
 
 
 class FP16Approx(TorchApproxKnob):
-    """Approximates by reducing precision of layer computation to float16."""
+    """
+    Approximates by reducing precision of layer computation to float16.
 
-    def __init__(self, name: str, exp_speedup: float):
-        super().__init__(name, exp_speedup=exp_speedup)
-        self.exp_speedup = exp_speedup
+    This is the baseline knob for GPU device by default.
+    """
 
-    def is_applicable(self, op: Module) -> bool:
-        return isinstance(op, (Conv2d, Linear))
+    def __init__(self, name: str, exp_speedup: float, **kwargs):
+        super().__init__(name, **kwargs)
+        self.exp_speedup = exp_speedup
 
     @property
     def deterministic(self) -> bool:
@@ -355,7 +344,7 @@ class FP16Approx(TorchApproxKnob):
         return self.exp_speedup
 
     def is_applicable(self, op: Module) -> bool:
-        return isinstance(op, (Conv2d, Linear))
+        return True
 
     class FP16ApproxModule(Module):
         def __init__(self, module: Module):
@@ -371,14 +360,30 @@ class FP16Approx(TorchApproxKnob):
         return self.FP16ApproxModule(module)
 
 
+class FP32Baseline(TorchApproxKnob):
+    @property
+    def deterministic(self) -> bool:
+        return True
+
+    @property
+    def expected_speedup(self) -> float:
+        return 1.0
+
+    def is_applicable(self, op: Module) -> bool:
+        return True
+
+    def apply(self, op: Module) -> Module:
+        return op
+
+
 default_name_to_class = {
     k.__name__: k
     for k in [
+        FP32Baseline,
         FP16Approx,
         PromiseSim,
         PerforateConv2dStride,
         Conv2dSampling,
-        TorchBaselineKnob,
     ]
 }
 default_knob_file = Path(__file__).parent / "default_approx_params.json"
@@ -412,5 +417,11 @@ def get_knobs_from_file(
         if class_name not in name_to_class:
             raise KeyError(f"{class_name} not found among knob class names")
         class_ty = name_to_class[class_name]
-        ret.add(class_ty(**knob_dict))
+        try:
+            ret.add(class_ty(**knob_dict))
+        except TypeError as e:
+            raise TypeError(
+                f"Approximation class {class_name} does not accept given arguments {knob_dict}.\n"
+                f"Original exception: {e}"
+            )
     return ret
diff --git a/predtuner/approxes/default_approx_params.json b/predtuner/approxes/default_approx_params.json
index d2cba160983e0f669ae08d2d495cd4ed33492db1..4c04c02564022608d35fb8b7f60a09a7704a7f45 100644
--- a/predtuner/approxes/default_approx_params.json
+++ b/predtuner/approxes/default_approx_params.json
@@ -1,10 +1,14 @@
 [{
-    "class": "TorchBaselineKnob",
-    "name": "11"
+    "class": "FP32Baseline",
+    "name": "11",
+    "devices": ["cpu"],
+    "baseline_priority": 1
 }, {
     "class": "FP16Approx",
     "name": "12",
-    "exp_speedup": 1.5
+    "exp_speedup": 1.5,
+    "baseline_priority": 0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 2,
@@ -12,7 +16,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "151",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 2,
@@ -20,7 +25,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "152",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 2,
@@ -28,7 +34,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "153",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 2,
@@ -36,7 +43,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "154",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 3,
@@ -44,7 +52,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "155",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 3,
@@ -52,7 +61,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "156",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 3,
@@ -60,7 +70,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "157",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 3,
@@ -68,7 +79,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "158",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 3,
@@ -76,7 +88,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "159",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 3,
@@ -84,7 +97,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "160",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 4,
@@ -92,7 +106,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "161",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 4,
@@ -100,7 +115,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "162",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 4,
@@ -108,7 +124,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "163",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": false,
     "stride": 4,
@@ -116,7 +133,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "164",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 4,
@@ -124,7 +142,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "165",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 4,
@@ -132,7 +151,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "166",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 4,
@@ -140,7 +160,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "167",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "direction_is_row": true,
     "stride": 4,
@@ -148,7 +169,8 @@
     "use_fp16": true,
     "class": "PerforateConv2dStride",
     "name": "168",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 2,
     "skip_offset": 0,
@@ -156,7 +178,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "261",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 2,
     "skip_offset": 1,
@@ -164,7 +187,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "262",
-    "exp_speedup": 3.0
+    "exp_speedup": 3.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 3,
     "skip_offset": 0,
@@ -172,7 +196,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "263",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "skip_every": 3,
     "skip_offset": 1,
@@ -180,7 +205,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "264",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "skip_every": 3,
     "skip_offset": 2,
@@ -188,7 +214,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "265",
-    "exp_speedup": 2.25
+    "exp_speedup": 2.25,
+    "devices": ["gpu"]
 }, {
     "skip_every": 4,
     "skip_offset": 0,
@@ -196,7 +223,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "266",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 4,
     "skip_offset": 1,
@@ -204,7 +232,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "267",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 4,
     "skip_offset": 2,
@@ -212,7 +241,8 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "268",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
 }, {
     "skip_every": 4,
     "skip_offset": 3,
@@ -220,5 +250,250 @@
     "use_fp16": true,
     "class": "Conv2dSampling",
     "name": "269",
-    "exp_speedup": 2.0
+    "exp_speedup": 2.0,
+    "devices": ["gpu"]
+},
+{
+    "direction_is_row": false,
+    "stride": 2,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "121",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 2,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "122",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 2,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "123",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 2,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "124",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 3,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "125",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 3,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "126",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 3,
+    "offset": 2,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "127",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 3,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "128",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 3,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "129",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 3,
+    "offset": 2,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "130",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 4,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "131",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 4,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "132",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 4,
+    "offset": 2,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "133",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": false,
+    "stride": 4,
+    "offset": 3,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "134",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 4,
+    "offset": 0,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "135",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 4,
+    "offset": 1,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "136",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 4,
+    "offset": 2,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "137",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "direction_is_row": true,
+    "stride": 4,
+    "offset": 3,
+    "use_fp16": false,
+    "class": "PerforateConv2dStride",
+    "name": "138",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 2,
+    "skip_offset": 0,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "231",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 2,
+    "skip_offset": 1,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "232",
+    "exp_speedup": 3.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 3,
+    "skip_offset": 0,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "233",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 3,
+    "skip_offset": 1,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "234",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 3,
+    "skip_offset": 2,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "235",
+    "exp_speedup": 2.25,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 4,
+    "skip_offset": 0,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "236",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 4,
+    "skip_offset": 1,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "237",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 4,
+    "skip_offset": 2,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "238",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
+}, {
+    "skip_every": 4,
+    "skip_offset": 3,
+    "interp_rate": 1.0,
+    "use_fp16": false,
+    "class": "Conv2dSampling",
+    "name": "239",
+    "exp_speedup": 2.0,
+    "devices": ["cpu"]
 }]
\ No newline at end of file
diff --git a/predtuner/modeledapp.py b/predtuner/modeledapp.py
index a66a1c03619fd3f1b747657aa9126269d2ff218a..fa30d8e7ddd831c3fabc728867f06ca024f03cac 100644
--- a/predtuner/modeledapp.py
+++ b/predtuner/modeledapp.py
@@ -24,8 +24,8 @@ class ModeledApp(ApproxApp, abc.ABC):
     for non-modeling application, inherit from `ApproxApp` instead.
     """
 
-    def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None:
-        super().__init__(op_knobs)
+    def __init__(self, op_knobs: Dict[str, List[ApproxKnob]], tuning_device: str = None) -> None:
+        super().__init__(op_knobs, tuning_device)
         models = self.get_models()
         self._name_to_model = {m.name: m for m in models}
         if len(self._name_to_model) != len(models):
diff --git a/predtuner/torchapp.py b/predtuner/torchapp.py
index f2bc4ee53ef255bcf45a90a20e6bef3a1fdad023..00a0376da0f7b24db45a615c13f8d0c2c740040c 100644
--- a/predtuner/torchapp.py
+++ b/predtuner/torchapp.py
@@ -8,7 +8,7 @@ from torch.nn import Module
 from torch.utils.data.dataloader import DataLoader
 
 from ._logging import PathLike
-from .approxapp import ApproxKnob, BaselineKnob, KnobsT
+from .approxapp import ApproxKnob, KnobsT
 from .modeledapp import (
     IPerfModel,
     IQoSModel,
@@ -98,22 +98,25 @@ class TorchApp(ModeledApp, abc.ABC):
         knobs: Set[TorchApproxKnob],
         tensor_to_qos: Callable[[torch.Tensor, Any], float],
         combine_qos: Callable[[np.ndarray], float] = np.mean,
-        device: Union[torch.device, str] = _default_device,
+        tuning_device: str = None,
+        torch_device: Union[torch.device, str] = _default_device,
         model_storage_folder: Optional[PathLike] = None,
     ) -> None:
         self.app_name = app_name
         self.module = module
         self.tune_loader = tune_dataloader
         self.test_loader = test_dataloader
-        self.name_to_knob = {k.name: k for k in self._check_baseline_knob(knobs)}
+        self.name_to_knob = {
+            k.name: k for k in self._check_and_filter_knob(knobs, tuning_device)
+        }
         self.tensor_to_qos = tensor_to_qos
         self.combine_qos = combine_qos
-        self.device = device
+        self.device = torch_device
         self.model_storage = (
             Path(model_storage_folder) if model_storage_folder else None
         )
 
-        self.module = self.module.to(device)
+        self.module = self.module.to(torch_device)
         self.midx = ModuleIndexer(module)
         self._op_costs = {}
         op_knobs = {}
@@ -129,7 +132,7 @@ class TorchApp(ModeledApp, abc.ABC):
             self._op_costs[op_name] = summary.loc[op_name, "flops"]
 
         # Init parent class last
-        super().__init__(op_knobs)
+        super().__init__(op_knobs, tuning_device)
 
     @property
     def name(self) -> str:
@@ -211,14 +214,15 @@ class TorchApp(ModeledApp, abc.ABC):
         return torch.cat(all_outputs, dim=0)
 
     @staticmethod
-    def _check_baseline_knob(knobs: Set[TorchApproxKnob]) -> Set[TorchApproxKnob]:
-        baselines = set(k for k in knobs if isinstance(k, TorchBaselineKnob))
-        if len(baselines) > 1:
-            raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}")
-        if not baselines:
-            print("Adding baseline knob to knob set")
-            knobs.add(TorchBaselineKnob())
-        return knobs
+    def _check_and_filter_knob(
+        knobs: Set[TorchApproxKnob], device: Optional[str]
+    ) -> Set[TorchApproxKnob]:
+        baseline = ApproxKnob.unique_baseline(knobs)
+        if baseline not in knobs:
+            knobs.add(baseline)
+        if not device:
+            return knobs
+        return {knob for knob in knobs if knob.exists_on_device(device)}
 
     def _apply_knobs(self, knobs: KnobsT) -> Module:
         import copy
@@ -232,22 +236,3 @@ class TorchApp(ModeledApp, abc.ABC):
     def _sample_input(self):
         inputs, _ = next(iter(DataLoader(self.tune_loader.dataset, batch_size=1)))
         return inputs.to(self.device)
-
-
-class TorchBaselineKnob(TorchApproxKnob, BaselineKnob):
-    def __init__(self, name: str = "__baseline__"):
-        super().__init__(name)
-
-    @property
-    def deterministic(self) -> bool:
-        return True
-
-    @property
-    def expected_speedup(self) -> float:
-        return 1.0
-
-    def is_applicable(self, op: Module) -> bool:
-        return True
-
-    def apply(self, op: Module) -> Module:
-        return op
diff --git a/test/test_torchapp.py b/test/test_torchapp.py
index 80efcf3cd6b61350c7a2400c23a89e825c2e008f..f43fdb45e5b574c00cdd748fe506ee50e2c1a5b9 100644
--- a/test/test_torchapp.py
+++ b/test/test_torchapp.py
@@ -20,14 +20,15 @@ class TorchAppSetUp(unittest.TestCase):
         cls.dataset = Subset(dataset, range(100))
         cls.module = VGG16Cifar10()
         cls.module.load_state_dict(torch.load("model_params/vgg16_cifar10.pth.tar"))
-        cls.app = TorchApp(
-            "TestTorchApp",
-            cls.module,
-            DataLoader(cls.dataset, batch_size=500),
-            DataLoader(cls.dataset, batch_size=500),
-            get_knobs_from_file(),
-            accuracy,
-        )
+        cls.app_args = {
+            "app_name": "TestTorchApp",
+            "module": cls.module,
+            "tune_dataloader": DataLoader(cls.dataset, batch_size=500),
+            "test_dataloader": DataLoader(cls.dataset, batch_size=500),
+            "knobs": get_knobs_from_file(),
+            "tensor_to_qos": accuracy,
+        }
+        cls.app = TorchApp(**cls.app_args)
 
 
 class TestTorchAppTuning(TorchAppSetUp):
@@ -35,17 +36,26 @@ class TestTorchAppTuning(TorchAppSetUp):
         n_knobs = {op: len(ks) for op, ks in self.app.op_knobs.items()}
         self.assertEqual(len(n_knobs), 34)
         for op_name, op in self.app.midx.name_to_module.items():
-            if isinstance(op, Conv2d):
-                nknob = 29
-            elif isinstance(op, Linear):
-                nknob = 2
-            else:
-                nknob = 1
+            nknob = 56 if isinstance(op, Conv2d) else 2
             self.assertEqual(n_knobs[op_name], nknob)
-
-    def test_baseline_knob(self):
         self.assertEqual(self.app.baseline_knob.name, "11")
 
+    def test_cpu_knobs(self):
+        app = TorchApp(**self.app_args, tuning_device="cpu")
+        n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()}
+        for op_name, op in app.midx.name_to_module.items():
+            nknob = 28 if isinstance(op, Conv2d) else 1
+            self.assertEqual(n_knobs[op_name], nknob)
+        self.assertEqual(app.baseline_knob.name, "11")
+
+    def test_gpu_knobs(self):
+        app = TorchApp(**self.app_args, tuning_device="gpu")
+        n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()}
+        for op_name, op in app.midx.name_to_module.items():
+            nknob = 28 if isinstance(op, Conv2d) else 1
+            self.assertEqual(n_knobs[op_name], nknob)
+        self.assertEqual(app.baseline_knob.name, "12")
+
     def test_baseline_qos(self):
         qos, _ = self.app.measure_qos_cost({}, False)
         self.assertAlmostEqual(qos, 93.0)