Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from aiq.builder.builder import EvalBuilder
from aiq.builder.evaluator import EvaluatorInfo
from aiq.cli.register_workflow import register_evaluator
from aiq.data_models.evaluator import EvaluatorBaseConfig
from aiq.eval.evaluator.base_evaluator import BaseEvaluator
from aiq.eval.evaluator.evaluator_model import EvalInputItem
from aiq.eval.evaluator.evaluator_model import EvalOutputItem

logger = logging.getLogger(__name__)


class ClassificationEvaluatorConfig(EvaluatorBaseConfig, name="classification_accuracy"):
"""Configuration for custom classification evaluator.

This evaluator config is used to evaluate the accuracy of classification predictions
by comparing them against expected labels.
"""
pass


@register_evaluator(config_type=ClassificationEvaluatorConfig)
async def register_classification_evaluator(config: ClassificationEvaluatorConfig, builder: EvalBuilder):
"""Register a custom classification evaluator.

Args:
config: Configuration object for the evaluator
builder: EvalBuilder instance to access evaluation context

Returns:
EvaluatorInfo containing the evaluator configuration and evaluation function
"""
evaluator = ClassificationEvaluator(builder.get_max_concurrency())

yield EvaluatorInfo(config=config, evaluate_fn=evaluator.evaluate, description="Classification Accuracy Evaluator")


class ClassificationEvaluator(BaseEvaluator):

def __init__(
self,
max_concurrency: int = 8,
):
super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating classification accuracy")
logger.debug("Classification accuracy evaluator initialized.")

async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
"""Compute accuracy score for an individual prediction.

Extracts the predicted category from the generated answer and compares
it to the expected answer.

Args:
item: Single evaluation item containing prediction and ground truth

Returns:
EvalOutputItem containing the accuracy score and reasoning
"""
label = item.full_dataset_entry['label']
generated_answer = item.output_obj

try:
# Extract predicted category from generated answer
prediction = generated_answer.split('Root Cause Category')[-1].strip().split('\n')[0].lower().strip()
if prediction == label:
score = 1.0
reasoning = f"The prediction {prediction} is correct. (label: {label})"
else:
score = 0.0
reasoning = f"The prediction {prediction} is incorrect. (label: {label})"
except Exception:
score = 0.0
reasoning = f"The prediction is not in the expected format: {generated_answer}"

return EvalOutputItem(id=item.id, score=score, reasoning=reasoning)
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,6 @@ eval:
rag_relevance:
_type: ragas
metric: ContextRelevance
llm_name: nim_rag_eval_llm
llm_name: nim_rag_eval_llm
classification_accuracy:
_type: classification_accuracy
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
from . import telemetry_metrics_host_heartbeat_check_tool
from . import telemetry_metrics_host_performance_check_tool
from . import utils
# Import custom evaluator
from .classification_evaluator import register_classification_evaluator
from .prompts import ALERT_TRIAGE_AGENT_PROMPT


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

general:
use_uvloop: true

functions:
calculator_multiply:
_type: calculator_multiply
calculator_inequality:
_type: calculator_inequality
calculator_divide:
_type: aiq_simple_calculator/calculator_divide
current_datetime:
_type: current_datetime

llms:
nim_llm:
_type: nim
model_name: meta/llama-3.1-70b-instruct
temperature: 0.0
max_tokens: 1024
openai_llm:
_type: openai
model_name: gpt-3.5-turbo
max_tokens: 2000

workflow:
_type: react_agent
tool_names:
- calculator_multiply
- calculator_inequality
- current_datetime
- calculator_divide
llm_name: nim_llm
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This config file shows how to use the MCP server to get the current date and time.
# Here the workflow acts as a MCP client and connects to the MCP server running
# on the specified URL (defaults to `http://localhost:8080/sse`).

general:
use_uvloop: true

functions:
mcp_time:
_type: mcp_client
server:
transport: stdio
command: "python"
args: ["-m", "mcp_server_time", "--local-timezone=America/Los_Angeles"]
mcp_math:
_type: mcp_client
server:
transport: sse
url: "http://localhost:9901/sse"

llms:
nim_llm:
_type: nim
model_name: meta/llama-3.1-70b-instruct
temperature: 0.0
max_tokens: 1024
openai_llm:
_type: openai
model_name: gpt-3.5-turbo
max_tokens: 2000

workflow:
_type: react_agent
tool_names:
- calculator_multiply
- calculator_inequality
- get_current_time
- calculator_divide
llm_name: nim_llm