diff --git a/predtuner/approxapp.py b/predtuner/approxapp.py index 06717bd15011564f33a555931e30e3bcb2b34f7a..db856fb1ca898e9109e323ed73a6f9096f27695f 100644 --- a/predtuner/approxapp.py +++ b/predtuner/approxapp.py @@ -1,7 +1,17 @@ import abc import logging from pathlib import Path -from typing import Callable, Dict, Generic, List, Optional, Tuple, Type, TypeVar +from typing import ( + Callable, + Dict, + Generic, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, +) import matplotlib.pyplot as plt import numpy as np @@ -17,15 +27,32 @@ TunerConfigT = Dict[int, int] class ApproxKnob: - def __init__(self, name: str, **kwargs): + def __init__( + self, name: str, devices: List[str] = None, baseline_priority: int = None + ): self.name = name - self.kwargs = kwargs + self.devices = None if devices is None else set(devices) + self.baseline_priority = baseline_priority - def coexists_with(self, other: "ApproxKnob") -> bool: - return False + def exists_on_device(self, device: str) -> bool: + if self.devices is None: + return True + return device in self.devices def __repr__(self): - return f'Knob"{self.name}"({self.kwargs})' + device_str = "" if self.devices is None else str(list(self.devices)) + return f"Knob({self.name}){device_str}" + + @classmethod + def unique_baseline(cls, knobs: Iterable["ApproxKnob"]) -> "ApproxKnob": + baselines = set(k for k in knobs if k.baseline_priority is not None) + if baselines: + sorted_bases = sorted( + baselines, key=lambda b: b.baseline_priority, reverse=True + ) + return sorted_bases[0] + else: + return cls("__baseline__") class ApproxApp(abc.ABC): @@ -38,9 +65,13 @@ class ApproxApp(abc.ABC): a mapping from each operator (identified by str) to a list of applicable knobs. """ - def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None: + def __init__( + self, op_knobs: Dict[str, List[ApproxKnob]], tuning_device: str = None + ) -> None: super().__init__() self.op_knobs = op_knobs + if tuning_device: + self.op_knobs = self._filter_knob_by_device(self.op_knobs, tuning_device) # Also modifies self.op_knobs in place. self.baseline_knob = self._check_get_baseline_knob_(self.op_knobs) @@ -74,24 +105,25 @@ class ApproxApp(abc.ABC): @staticmethod def _check_get_baseline_knob_( op_knobs: Dict[str, List[ApproxKnob]] - ) -> "BaselineKnob": + ) -> "ApproxKnob": # Modifies op_knobs inplace. # Find the baseline knob if the user has one, or get a default one knob_sets = [set(knobs) for knobs in op_knobs.values()] knobs = list(set.union(*knob_sets)) - baselines = set(k for k in knobs if isinstance(k, BaselineKnob)) - if len(baselines) > 1: - raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}") - if baselines: - (baseline_knob,) = baselines - else: - baseline_knob = BaselineKnob() + baseline_knob = ApproxKnob.unique_baseline(knobs) # Start checking if each layer has the baseline knob for knobs in op_knobs.values(): if baseline_knob not in set(knobs): knobs.append(baseline_knob) return baseline_knob + @staticmethod + def _filter_knob_by_device(op_knobs: Dict[str, List[ApproxKnob]], device: str): + return { + op: [knob for knob in knobs if knob.exists_on_device(device)] + for op, knobs in op_knobs.items() + } + def add_baseline_to_knobs(self, approxes: KnobsT): return { op_name: approxes.get(op_name, self.baseline_knob.name) @@ -99,11 +131,6 @@ class ApproxApp(abc.ABC): } -class BaselineKnob(ApproxKnob): - def __init__(self, name: str = "__baseline__"): - super().__init__(name) - - class Config: def __init__( self, qos: float, cost: float, knobs: KnobsT, test_qos: Optional[float] = None @@ -161,6 +188,8 @@ class ApproxTuner(Generic[T]): n_ops, n_knobs, ) + msg_logger.info("Knobs: %s", self.app.knobs) + msg_logger.info("Baseline knob: %s", self.app.baseline_knob) msg_logger.info("At most %d iterations", max_iter) opentuner_args = opentuner_default_args() tuner = self._get_tuner_interface( diff --git a/predtuner/approxes/approxes.py b/predtuner/approxes/approxes.py index fb3bca7d13d0e0dd93cc39a3410d2ee9c86c7d18..0125de74f71a8a57e0148682e528ac687450da4a 100644 --- a/predtuner/approxes/approxes.py +++ b/predtuner/approxes/approxes.py @@ -6,7 +6,7 @@ import torch from torch.nn import Conv2d, Linear, Module, Parameter from .._logging import PathLike -from ..torchapp import TorchBaselineKnob, TorchApproxKnob +from ..torchapp import TorchApproxKnob from ._copy import module_only_deepcopy @@ -69,15 +69,9 @@ class PerforateConv2dStride(TorchApproxKnob): offset: int, use_fp16: bool, exp_speedup: float, + **kwargs, ): - super().__init__( - name, - direction_is_row=direction_is_row, - stride=stride, - offset=offset, - use_fp16=use_fp16, - exp_speedup=exp_speedup, - ) + super().__init__(name, **kwargs) assert stride >= 2 assert 0 <= offset < stride self.interp_axis = 2 if direction_is_row else 3 @@ -174,15 +168,9 @@ class Conv2dSampling(TorchApproxKnob): interp_rate: float, use_fp16: bool, exp_speedup: float, + **kwargs, ): - super().__init__( - name, - skip_every=skip_every, - skip_offset=skip_offset, - interp_rate=interp_rate, - use_fp16=use_fp16, - exp_speedup=exp_speedup, - ) + super().__init__(name, **kwargs) assert skip_every >= 2 and skip_offset >= 0 self.skip_every = skip_every self.skip_offset = skip_offset @@ -287,8 +275,8 @@ class PromiseSim(TorchApproxKnob): scaling_values = [0.75, 0.64, 0.336, 0.21, 0.168, 0.14, 0.11, 0.0784, 0.005] - def __init__(self, name: str, noise_level: int, exp_speedup: float): - super().__init__(name, noise_level=noise_level, exp_speedup=exp_speedup) + def __init__(self, name: str, noise_level: int, exp_speedup: float, **kwargs): + super().__init__(name, **kwargs) self.noise_level = noise_level self.exp_speedup = exp_speedup @@ -337,14 +325,15 @@ class PromiseSim(TorchApproxKnob): class FP16Approx(TorchApproxKnob): - """Approximates by reducing precision of layer computation to float16.""" + """ + Approximates by reducing precision of layer computation to float16. - def __init__(self, name: str, exp_speedup: float): - super().__init__(name, exp_speedup=exp_speedup) - self.exp_speedup = exp_speedup + This is the baseline knob for GPU device by default. + """ - def is_applicable(self, op: Module) -> bool: - return isinstance(op, (Conv2d, Linear)) + def __init__(self, name: str, exp_speedup: float, **kwargs): + super().__init__(name, **kwargs) + self.exp_speedup = exp_speedup @property def deterministic(self) -> bool: @@ -355,7 +344,7 @@ class FP16Approx(TorchApproxKnob): return self.exp_speedup def is_applicable(self, op: Module) -> bool: - return isinstance(op, (Conv2d, Linear)) + return True class FP16ApproxModule(Module): def __init__(self, module: Module): @@ -371,14 +360,30 @@ class FP16Approx(TorchApproxKnob): return self.FP16ApproxModule(module) +class FP32Baseline(TorchApproxKnob): + @property + def deterministic(self) -> bool: + return True + + @property + def expected_speedup(self) -> float: + return 1.0 + + def is_applicable(self, op: Module) -> bool: + return True + + def apply(self, op: Module) -> Module: + return op + + default_name_to_class = { k.__name__: k for k in [ + FP32Baseline, FP16Approx, PromiseSim, PerforateConv2dStride, Conv2dSampling, - TorchBaselineKnob, ] } default_knob_file = Path(__file__).parent / "default_approx_params.json" @@ -412,5 +417,11 @@ def get_knobs_from_file( if class_name not in name_to_class: raise KeyError(f"{class_name} not found among knob class names") class_ty = name_to_class[class_name] - ret.add(class_ty(**knob_dict)) + try: + ret.add(class_ty(**knob_dict)) + except TypeError as e: + raise TypeError( + f"Approximation class {class_name} does not accept given arguments {knob_dict}.\n" + f"Original exception: {e}" + ) return ret diff --git a/predtuner/approxes/default_approx_params.json b/predtuner/approxes/default_approx_params.json index d2cba160983e0f669ae08d2d495cd4ed33492db1..4c04c02564022608d35fb8b7f60a09a7704a7f45 100644 --- a/predtuner/approxes/default_approx_params.json +++ b/predtuner/approxes/default_approx_params.json @@ -1,10 +1,14 @@ [{ - "class": "TorchBaselineKnob", - "name": "11" + "class": "FP32Baseline", + "name": "11", + "devices": ["cpu"], + "baseline_priority": 1 }, { "class": "FP16Approx", "name": "12", - "exp_speedup": 1.5 + "exp_speedup": 1.5, + "baseline_priority": 0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 2, @@ -12,7 +16,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "151", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 2, @@ -20,7 +25,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "152", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 2, @@ -28,7 +34,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "153", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 2, @@ -36,7 +43,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "154", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 3, @@ -44,7 +52,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "155", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 3, @@ -52,7 +61,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "156", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 3, @@ -60,7 +70,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "157", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 3, @@ -68,7 +79,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "158", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 3, @@ -76,7 +88,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "159", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 3, @@ -84,7 +97,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "160", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 4, @@ -92,7 +106,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "161", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 4, @@ -100,7 +115,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "162", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 4, @@ -108,7 +124,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "163", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": false, "stride": 4, @@ -116,7 +133,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "164", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 4, @@ -124,7 +142,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "165", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 4, @@ -132,7 +151,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "166", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 4, @@ -140,7 +160,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "167", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "direction_is_row": true, "stride": 4, @@ -148,7 +169,8 @@ "use_fp16": true, "class": "PerforateConv2dStride", "name": "168", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "skip_every": 2, "skip_offset": 0, @@ -156,7 +178,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "261", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "skip_every": 2, "skip_offset": 1, @@ -164,7 +187,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "262", - "exp_speedup": 3.0 + "exp_speedup": 3.0, + "devices": ["gpu"] }, { "skip_every": 3, "skip_offset": 0, @@ -172,7 +196,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "263", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "skip_every": 3, "skip_offset": 1, @@ -180,7 +205,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "264", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "skip_every": 3, "skip_offset": 2, @@ -188,7 +214,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "265", - "exp_speedup": 2.25 + "exp_speedup": 2.25, + "devices": ["gpu"] }, { "skip_every": 4, "skip_offset": 0, @@ -196,7 +223,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "266", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "skip_every": 4, "skip_offset": 1, @@ -204,7 +232,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "267", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "skip_every": 4, "skip_offset": 2, @@ -212,7 +241,8 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "268", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] }, { "skip_every": 4, "skip_offset": 3, @@ -220,5 +250,250 @@ "use_fp16": true, "class": "Conv2dSampling", "name": "269", - "exp_speedup": 2.0 + "exp_speedup": 2.0, + "devices": ["gpu"] +}, +{ + "direction_is_row": false, + "stride": 2, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "121", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 2, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "122", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 2, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "123", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 2, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "124", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 3, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "125", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 3, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "126", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 3, + "offset": 2, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "127", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 3, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "128", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 3, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "129", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 3, + "offset": 2, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "130", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 4, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "131", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 4, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "132", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 4, + "offset": 2, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "133", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": false, + "stride": 4, + "offset": 3, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "134", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 4, + "offset": 0, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "135", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 4, + "offset": 1, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "136", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 4, + "offset": 2, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "137", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "direction_is_row": true, + "stride": 4, + "offset": 3, + "use_fp16": false, + "class": "PerforateConv2dStride", + "name": "138", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "skip_every": 2, + "skip_offset": 0, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "231", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "skip_every": 2, + "skip_offset": 1, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "232", + "exp_speedup": 3.0, + "devices": ["cpu"] +}, { + "skip_every": 3, + "skip_offset": 0, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "233", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "skip_every": 3, + "skip_offset": 1, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "234", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "skip_every": 3, + "skip_offset": 2, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "235", + "exp_speedup": 2.25, + "devices": ["cpu"] +}, { + "skip_every": 4, + "skip_offset": 0, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "236", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "skip_every": 4, + "skip_offset": 1, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "237", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "skip_every": 4, + "skip_offset": 2, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "238", + "exp_speedup": 2.0, + "devices": ["cpu"] +}, { + "skip_every": 4, + "skip_offset": 3, + "interp_rate": 1.0, + "use_fp16": false, + "class": "Conv2dSampling", + "name": "239", + "exp_speedup": 2.0, + "devices": ["cpu"] }] \ No newline at end of file diff --git a/predtuner/modeledapp.py b/predtuner/modeledapp.py index a66a1c03619fd3f1b747657aa9126269d2ff218a..fa30d8e7ddd831c3fabc728867f06ca024f03cac 100644 --- a/predtuner/modeledapp.py +++ b/predtuner/modeledapp.py @@ -24,8 +24,8 @@ class ModeledApp(ApproxApp, abc.ABC): for non-modeling application, inherit from `ApproxApp` instead. """ - def __init__(self, op_knobs: Dict[str, List[ApproxKnob]]) -> None: - super().__init__(op_knobs) + def __init__(self, op_knobs: Dict[str, List[ApproxKnob]], tuning_device: str = None) -> None: + super().__init__(op_knobs, tuning_device) models = self.get_models() self._name_to_model = {m.name: m for m in models} if len(self._name_to_model) != len(models): diff --git a/predtuner/torchapp.py b/predtuner/torchapp.py index f2bc4ee53ef255bcf45a90a20e6bef3a1fdad023..00a0376da0f7b24db45a615c13f8d0c2c740040c 100644 --- a/predtuner/torchapp.py +++ b/predtuner/torchapp.py @@ -8,7 +8,7 @@ from torch.nn import Module from torch.utils.data.dataloader import DataLoader from ._logging import PathLike -from .approxapp import ApproxKnob, BaselineKnob, KnobsT +from .approxapp import ApproxKnob, KnobsT from .modeledapp import ( IPerfModel, IQoSModel, @@ -98,22 +98,25 @@ class TorchApp(ModeledApp, abc.ABC): knobs: Set[TorchApproxKnob], tensor_to_qos: Callable[[torch.Tensor, Any], float], combine_qos: Callable[[np.ndarray], float] = np.mean, - device: Union[torch.device, str] = _default_device, + tuning_device: str = None, + torch_device: Union[torch.device, str] = _default_device, model_storage_folder: Optional[PathLike] = None, ) -> None: self.app_name = app_name self.module = module self.tune_loader = tune_dataloader self.test_loader = test_dataloader - self.name_to_knob = {k.name: k for k in self._check_baseline_knob(knobs)} + self.name_to_knob = { + k.name: k for k in self._check_and_filter_knob(knobs, tuning_device) + } self.tensor_to_qos = tensor_to_qos self.combine_qos = combine_qos - self.device = device + self.device = torch_device self.model_storage = ( Path(model_storage_folder) if model_storage_folder else None ) - self.module = self.module.to(device) + self.module = self.module.to(torch_device) self.midx = ModuleIndexer(module) self._op_costs = {} op_knobs = {} @@ -129,7 +132,7 @@ class TorchApp(ModeledApp, abc.ABC): self._op_costs[op_name] = summary.loc[op_name, "flops"] # Init parent class last - super().__init__(op_knobs) + super().__init__(op_knobs, tuning_device) @property def name(self) -> str: @@ -211,14 +214,15 @@ class TorchApp(ModeledApp, abc.ABC): return torch.cat(all_outputs, dim=0) @staticmethod - def _check_baseline_knob(knobs: Set[TorchApproxKnob]) -> Set[TorchApproxKnob]: - baselines = set(k for k in knobs if isinstance(k, TorchBaselineKnob)) - if len(baselines) > 1: - raise ValueError(f"Found multiple baseline knobs in op_knobs: {baselines}") - if not baselines: - print("Adding baseline knob to knob set") - knobs.add(TorchBaselineKnob()) - return knobs + def _check_and_filter_knob( + knobs: Set[TorchApproxKnob], device: Optional[str] + ) -> Set[TorchApproxKnob]: + baseline = ApproxKnob.unique_baseline(knobs) + if baseline not in knobs: + knobs.add(baseline) + if not device: + return knobs + return {knob for knob in knobs if knob.exists_on_device(device)} def _apply_knobs(self, knobs: KnobsT) -> Module: import copy @@ -232,22 +236,3 @@ class TorchApp(ModeledApp, abc.ABC): def _sample_input(self): inputs, _ = next(iter(DataLoader(self.tune_loader.dataset, batch_size=1))) return inputs.to(self.device) - - -class TorchBaselineKnob(TorchApproxKnob, BaselineKnob): - def __init__(self, name: str = "__baseline__"): - super().__init__(name) - - @property - def deterministic(self) -> bool: - return True - - @property - def expected_speedup(self) -> float: - return 1.0 - - def is_applicable(self, op: Module) -> bool: - return True - - def apply(self, op: Module) -> Module: - return op diff --git a/test/test_torchapp.py b/test/test_torchapp.py index 80efcf3cd6b61350c7a2400c23a89e825c2e008f..f43fdb45e5b574c00cdd748fe506ee50e2c1a5b9 100644 --- a/test/test_torchapp.py +++ b/test/test_torchapp.py @@ -20,14 +20,15 @@ class TorchAppSetUp(unittest.TestCase): cls.dataset = Subset(dataset, range(100)) cls.module = VGG16Cifar10() cls.module.load_state_dict(torch.load("model_params/vgg16_cifar10.pth.tar")) - cls.app = TorchApp( - "TestTorchApp", - cls.module, - DataLoader(cls.dataset, batch_size=500), - DataLoader(cls.dataset, batch_size=500), - get_knobs_from_file(), - accuracy, - ) + cls.app_args = { + "app_name": "TestTorchApp", + "module": cls.module, + "tune_dataloader": DataLoader(cls.dataset, batch_size=500), + "test_dataloader": DataLoader(cls.dataset, batch_size=500), + "knobs": get_knobs_from_file(), + "tensor_to_qos": accuracy, + } + cls.app = TorchApp(**cls.app_args) class TestTorchAppTuning(TorchAppSetUp): @@ -35,17 +36,26 @@ class TestTorchAppTuning(TorchAppSetUp): n_knobs = {op: len(ks) for op, ks in self.app.op_knobs.items()} self.assertEqual(len(n_knobs), 34) for op_name, op in self.app.midx.name_to_module.items(): - if isinstance(op, Conv2d): - nknob = 29 - elif isinstance(op, Linear): - nknob = 2 - else: - nknob = 1 + nknob = 56 if isinstance(op, Conv2d) else 2 self.assertEqual(n_knobs[op_name], nknob) - - def test_baseline_knob(self): self.assertEqual(self.app.baseline_knob.name, "11") + def test_cpu_knobs(self): + app = TorchApp(**self.app_args, tuning_device="cpu") + n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()} + for op_name, op in app.midx.name_to_module.items(): + nknob = 28 if isinstance(op, Conv2d) else 1 + self.assertEqual(n_knobs[op_name], nknob) + self.assertEqual(app.baseline_knob.name, "11") + + def test_gpu_knobs(self): + app = TorchApp(**self.app_args, tuning_device="gpu") + n_knobs = {op: len(ks) for op, ks in app.op_knobs.items()} + for op_name, op in app.midx.name_to_module.items(): + nknob = 28 if isinstance(op, Conv2d) else 1 + self.assertEqual(n_knobs[op_name], nknob) + self.assertEqual(app.baseline_knob.name, "12") + def test_baseline_qos(self): qos, _ = self.app.measure_qos_cost({}, False) self.assertAlmostEqual(qos, 93.0)