Source code for avise.pipelines.languagemodel.schema

"""Dataclasses for avise/pipelines/language_model/pipeline.py"""

from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional


[docs] @dataclass class LanguageModelSETCase: """Contract: Output of initialize(), input to execute(). ID and prompt are required fields that every SET case must contain. Additional fields can be added to 'metadata'. """ id: str prompt: str metadata: Dict[str, Any] = field( default_factory=dict ) # New dict created for each instance of LanguageModelSETCase.
[docs] def to_dict(self) -> Dict[str, Any]: return { "id": self.id, "prompt": self.prompt, **self.metadata, # Unpacks the metadata dictionary }
[docs] @dataclass class ExecutionOutput: """Single test execution / output result. Produced by execute() for each test case. """ set_id: str # Unique identifier prompt: str # Original test prompt response: str # Model response metadata: Dict[str, Any] = field(default_factory=dict) error: Optional[str] = None # Error message if execution failed
[docs] def to_dict(self) -> Dict[str, Any]: result = { "set_id": self.set_id, "prompt": self.prompt, "response": self.response, "metadata": self.metadata, } if self.error: result["error"] = self.error return result
[docs] @dataclass class OutputData: """Output of execute(), input to evaluate(). Contains all execution outputs and execution duration in seconds. """ outputs: List[ExecutionOutput] duration_seconds: float
[docs] def to_dict(self) -> Dict[str, Any]: return { "outputs": [output.to_dict() for output in self.outputs], "duration": self.duration_seconds, }
[docs] @dataclass class EvaluationResult: """Evaluation result of a single test Produced by evaluate() function for each ExecutionOutput. """ set_id: str # Unique identifier prompt: str # Original test prompt response: str # Model response status: str # "passed", "failed", or "error". "pass" or "fail" based on what kind of patterns were found. "Error" if none were found. reason: str # Explanation for status detections: Dict[str, Any] = field( default_factory=dict ) # Evaluator findings. Based on the selected evaluators metadata: Dict[str, Any] = field(default_factory=dict) elm_evaluation: Optional[str] = ( None # ELM evaluation result (if evaluation model was used) )
[docs] def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for serialization.""" result = { "set_id": self.set_id, "prompt": self.prompt, "response": self.response, "status": self.status, "reason": self.reason, "detections": self.detections, "metadata": self.metadata, } if self.elm_evaluation: result["elm_evaluation"] = self.elm_evaluation return result
[docs] @dataclass class ReportData: """Output of the report phase / function. The final report structure that is serialized to the desired format based on the given command line argument. """ set_name: str timestamp: str execution_time_seconds: Optional[float] summary: Dict[str, Any] # total tests ran, passed%, failed%, error% rates results: List[EvaluationResult] # All evaluation results configuration: Dict[str, Any] = field(default_factory=dict) # Test config ai_summary: Optional[Dict[str, Any]] = field( default_factory=dict ) # AI-generated summary group_results: bool = True # Group results by set_category
[docs] def group_by_vulnerability(self) -> Dict[str, List[EvaluationResult]]: """Group results by vulnerability_subcategory field in metadata. Returns: Dict mapping set_category to list of results """ grouped: Dict[str, List[EvaluationResult]] = {} for result in self.results: group_name = result.metadata.get( "vulnerability_subcategory", "Uncategorized" ) if group_name not in grouped: grouped[group_name] = [] grouped[group_name].append(result) return grouped
[docs] def to_dict(self) -> Dict[str, Any]: result = { "set_name": self.set_name, "timestamp": self.timestamp, "execution_time_seconds": self.execution_time_seconds, "configuration": self.configuration, "summary": self.summary, } if self.group_results: grouped = self.group_by_vulnerability() result["set_category"] = { group: [r.to_dict() for r in results] for group, results in grouped.items() } else: result["results"] = [r.to_dict() for r in self.results] if self.ai_summary: result["ai_summary"] = self.ai_summary return result