NVIDIA · rapids-bot · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025
@@ -0,0 +1,91 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from aiq.builder.builder import EvalBuilder
+from aiq.builder.evaluator import EvaluatorInfo
+from aiq.cli.register_workflow import register_evaluator
+from aiq.data_models.evaluator import EvaluatorBaseConfig
+from aiq.eval.evaluator.base_evaluator import BaseEvaluator
+from aiq.eval.evaluator.evaluator_model import EvalInputItem
+from aiq.eval.evaluator.evaluator_model import EvalOutputItem
+
+logger = logging.getLogger(__name__)
+
+
+class ClassificationEvaluatorConfig(EvaluatorBaseConfig, name="classification_accuracy"):
+    """Configuration for custom classification evaluator.
+
+    This evaluator config is used to evaluate the accuracy of classification predictions
+    by comparing them against expected labels.
+    """
+    pass
+
+
+@register_evaluator(config_type=ClassificationEvaluatorConfig)
+async def register_classification_evaluator(config: ClassificationEvaluatorConfig, builder: EvalBuilder):
+    """Register a custom classification evaluator.
+
+    Args:
+        config: Configuration object for the evaluator
+        builder: EvalBuilder instance to access evaluation context
+
+    Returns:
+        EvaluatorInfo containing the evaluator configuration and evaluation function
+    """
+    evaluator = ClassificationEvaluator(builder.get_max_concurrency())
+
+    yield EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Classification Accuracy Evaluator")
+
+
+class ClassificationEvaluator(BaseEvaluator):
+
+    def __init__(
+        self,
+        max_concurrency: int = 8,
+    ):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating classification accuracy")
+        logger.debug("Classification accuracy evaluator initialized.")
+
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
+        """Compute accuracy score for an individual prediction.
+
+        Extracts the predicted category from the generated answer and compares
+        it to the expected answer.
+
+        Args:
+            item: Single evaluation item containing prediction and ground truth
+
+        Returns:
+            EvalOutputItem containing the accuracy score and reasoning
+        """
+        label = item.full_dataset_entry['label']
+        generated_answer = item.output_obj
+
+        try:
+            # Extract predicted category from generated answer
+            prediction = generated_answer.split('Root Cause Category')[-1].strip().split('\n')[0].lower().strip()
+            if prediction == label:
+                score = 1.0
+                reasoning = f"The prediction {prediction} is correct. (label: {label})"
+            else:
+                score = 0.0
+                reasoning = f"The prediction {prediction} is incorrect. (label: {label})"
+        except Exception:
+            score = 0.0
+            reasoning = f"The prediction is not in the expected format: {generated_answer}"
+
+        return EvalOutputItem(id=item.id, score=score, reasoning=reasoning)
@@ -127,4 +127,6 @@ eval:
     rag_relevance:
       _type: ragas
       metric: ContextRelevance
-      llm_name: nim_rag_eval_llm
+      llm_name: nim_rag_eval_llm
+    classification_accuracy:
+      _type: classification_accuracy
@@ -47,6 +47,8 @@
 from . import telemetry_metrics_host_heartbeat_check_tool
 from . import telemetry_metrics_host_performance_check_tool
 from . import utils
+# Import custom evaluator
+from .classification_evaluator import register_classification_evaluator
 from .prompts import ALERT_TRIAGE_AGENT_PROMPT
 
 

@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+general:
+  use_uvloop: true
+
+functions:
+  calculator_multiply:
+    _type: calculator_multiply
+  calculator_inequality:
+    _type: calculator_inequality
+  calculator_divide:
+    _type: aiq_simple_calculator/calculator_divide
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    temperature: 0.0
+    max_tokens: 1024
+  openai_llm:
+    _type: openai
+    model_name: gpt-3.5-turbo
+    max_tokens: 2000
+
+workflow:
+  _type: react_agent
+  tool_names:
+    - calculator_multiply
+    - calculator_inequality
+    - current_datetime
+    - calculator_divide
+  llm_name: nim_llm
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This config file shows how to use the MCP server to get the current date and time.
+# Here the workflow acts as a MCP client and connects to the MCP server running
+# on the specified URL (defaults to `http://localhost:8080/sse`).
+
+general:
+  use_uvloop: true
+
+functions:
+  mcp_time:
+    _type: mcp_client
+    server:
+      transport: stdio
+      command: "python"
+      args: ["-m", "mcp_server_time", "--local-timezone=America/Los_Angeles"]
+  mcp_math:
+    _type: mcp_client
+    server:
+      transport: sse
+      url: "http://localhost:9901/sse"
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    temperature: 0.0
+    max_tokens: 1024
+  openai_llm:
+    _type: openai
+    model_name: gpt-3.5-turbo
+    max_tokens: 2000
+
+workflow:
+  _type: react_agent
+  tool_names:
+    - calculator_multiply
+    - calculator_inequality
+    - get_current_time
+    - calculator_divide
+  llm_name: nim_llm
+
+