iree-org
diff --git a/‎iree/turbine/kernel/boo/conv_exports/conv.py
Lines changed: 5 additions & 17 deletions b/‎iree/turbine/kernel/boo/conv_exports/conv.py
Lines changed: 5 additions & 17 deletions
diff --git a/‎iree/turbine/kernel/boo/driver/README.md
Lines changed: 1 addition & 1 deletion b/‎iree/turbine/kernel/boo/driver/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎iree/turbine/kernel/boo/driver/driver.py
Lines changed: 20 additions & 8 deletions b/‎iree/turbine/kernel/boo/driver/driver.py
Lines changed: 20 additions & 8 deletions
diff --git a/‎iree/turbine/kernel/boo/driver/launch.py
Lines changed: 2 additions & 2 deletions b/‎iree/turbine/kernel/boo/driver/launch.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎iree/turbine/kernel/boo/driver/preload.py
Lines changed: 1 addition & 1 deletion b/‎iree/turbine/kernel/boo/driver/preload.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎iree/turbine/kernel/boo/exports/signature.py
Lines changed: 3 additions & 2 deletions b/‎iree/turbine/kernel/boo/exports/signature.py
Lines changed: 3 additions & 2 deletions
@@ -13,12 +13,13 @@
 )
 
 from enum import IntEnum
+from functools import cached_property
 import math
 
 import torch
 
 from .utils import Permutation
-from ..exports.signature import OpSignature
+from ..exports.signature import OpSignature, ModeBase
 from ....ops.conv_fwd import conv_2d_nhwc_fhwc, generic_conv
 from ....ops.insert_slice import insert_slice
 
@@ -32,7 +33,7 @@
 ]
 
 
-class Mode(IntEnum):
+class Mode(ModeBase, IntEnum):
     FORWARD = 0
     INPUT_BACKWARD = 1
     WEIGHT_BACKWARD = 2
@@ -42,20 +43,6 @@ class Mode(IntEnum):
     BWD = INPUT_BACKWARD
     WRW = WEIGHT_BACKWARD
 
-    @staticmethod
-    def parse(spec: Union[str, None, "Mode"]) -> "Mode":
-        if spec is None:
-            return Mode.FORWARD
-        if isinstance(spec, Mode):
-            return spec
-        spec = spec.upper().replace("-", "_")
-        if spec not in Mode.__members__:
-            raise ValueError(
-                f"For mode= argument, expected one of: "
-                f"{', '.join(Mode.__members__.keys())}"
-            )
-        return Mode[spec]
-
     def __str__(self):
         return self.name
 
@@ -325,7 +312,8 @@ def get(shape):
             return (get(self.output_shape), get(self.kernel_shape))
         raise ValueError(f"Unknown mode: {self.mode}")
 
-    def get_func_name(self):
+    @cached_property
+    def func_name(self) -> str:
         name_items = [
             "conv",
             f"{self.num_spatial_dims}d",
 
@@ -31,7 +31,7 @@ populator.run()
 sample_signature = populator.signatures[0]
 
 # One can also check the cache for this signature
-cache_status = populator.get_cache_status(sample_signature.get_func_name())
+cache_status = populator.get_cache_status(sample_signature.func_name)
 print(cache_status)
 
 # You can also get the list of failed signatures via:
 
@@ -28,9 +28,11 @@ def main():
     parser = argparse.ArgumentParser(
         usage="%(prog)s [-h] [... MIOpenDriver command ...] [--commands-file COMMANDS_FILE]",
         description="""
-Run a convolution with the IREE runtime. Command line arguments mirror the
+Run a kernel with the IREE runtime. Command line arguments mirror the
 arguments to MIOpenDriver.
 
+Currently supports convolution and layernorm.
+
 If COMMANDS_FILE is specified, driver commands are read from the file. Each
 line is treated as a separate invocation of the driver, and any additional
 command-line arguments are appended to the arguments from the file.
@@ -115,10 +117,16 @@ def run(cli_args: Sequence[str], gpu_id: int):
     from iree.turbine.kernel.boo.exports.parser import OpCLIParser
 
     def dispatch(cli_args: Sequence[str]) -> type[OpCLIParser]:
-        if any(map(lambda x: "conv" in x, cli_args)):
+        if any("conv" in x for x in cli_args):
             from iree.turbine.kernel.boo.conv_exports.miopen_parser import ConvParser
 
             return ConvParser
+        if any("layernorm" in x for x in cli_args):
+            from iree.turbine.kernel.boo.layer_norm_exports.miopen_parser import (
+                LayerNormParser,
+            )
+
+            return LayerNormParser
         raise ValueError("unsupported operation kind in " + shlex.join(cli_args))
 
     from iree.turbine.kernel.boo.driver.launch import get_launchable
@@ -165,24 +173,28 @@ def dispatch(cli_args: Sequence[str]) -> type[OpCLIParser]:
     mem_bytes_threshold = 96 * (10**9)
     iter_thresh = int(mem_bytes_threshold // res_mem_bytes)
 
-    result = None
+    results: tuple[torch.Tensor, ...] | torch.Tensor | None = None
     for iter in range(iter_per_device + 1):
         for device_idx, launch_args in enumerate(per_device_data):
             if iter == iter_per_device and device_idx >= rem_iter:
                 break
-            result = launchable(*launch_args)
+            results = launchable(*launch_args)
         if (iter + 1) % iter_thresh == 0:
             print(f"Synchronizing all devices on iter {iter} and collecting garbage.")
             for i in range(num_devices):
                 torch.cuda.synchronize(torch.device(f"cuda:{i}"))
             gc.collect()
 
     torch.cuda.synchronize()
-    print(
-        f">>> result shape: {result.shape}; dtype: {result.dtype}; device type: {result.device.type}"
-    )
+    results = results or ()
+    if isinstance(results, torch.Tensor):
+        results = (results,)
+    for i, result in enumerate(results):
+        print(
+            f">>> result #{i} shape: {result.shape}; dtype: {result.dtype}; device type: {result.device.type}"
+        )
 
-    return sig.get_func_name()
+    return sig.func_name
 
 
 TRACY_PORT = str(random.randint(40_000, 50_000))
 
@@ -13,7 +13,7 @@
 def get_module_asm(
     signature: OpSignature, func_name: str | None = None, use_custom: bool = True
 ) -> str:
-    func_name = func_name or signature.get_func_name()
+    func_name = func_name or signature.func_name
     module_factory = lambda: signature.get_nn_module(use_custom=use_custom)
     arg_factory = lambda: signature.get_sample_args(splat_value=0)
     return generic_get_module_asm(
@@ -24,7 +24,7 @@ def get_module_asm(
 def get_launchable(
     signature: OpSignature, *, use_custom=True, cache_only=False
 ) -> Launchable:
-    func_name = signature.get_func_name()
+    func_name = signature.func_name
     module_factory = lambda: signature.get_nn_module(use_custom=use_custom)
     arg_factory = lambda: signature.get_sample_args(splat_value=0)
     return generic_get_launchable(
 
@@ -180,7 +180,7 @@ def get_failures(self) -> dict[str, str]:
 
 
 def mlir_import(sig: OpSignature) -> tuple[str, bool]:
-    func_name = sig.get_func_name()
+    func_name = sig.func_name
     success = False
     try:
         get_module_asm(sig, func_name)
 
@@ -42,9 +42,10 @@ def get_sample_args(
         """Generates sample arguments as PyTorch tensors for the operation."""
         ...
 
+    @property
     @abstractmethod
-    def get_func_name(self) -> str:
-        """Generates an MLIR function name to use for the operation, unique across operations."""
+    def func_name(self) -> str:
+        """MLIR function name to use for the operation, unique across operations."""
         ...
 
     @abstractmethod