Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/llm-as-judge-skills/src/agents/evaluator.ts
1import { openai } from '@ai-sdk/openai';2import { generateText } from 'ai';3import { config } from '../config/index.js';4import {5executeDirectScore,6executePairwiseCompare,7executeGenerateRubric,8type DirectScoreInput,9type PairwiseCompareInput,10type GenerateRubricInput11} from '../tools/evaluation/index.js';1213export interface EvaluatorAgentConfig {14model?: string;15temperature?: number;16maxTokens?: number;17}1819export class EvaluatorAgent {20private model: string;21private temperature: number;2223constructor(agentConfig?: EvaluatorAgentConfig) {24this.model = agentConfig?.model || config.openai.model;25this.temperature = agentConfig?.temperature || 0.3;26}2728/**29* Score a response against defined criteria30*/31async score(input: DirectScoreInput) {32return executeDirectScore(input);33}3435/**36* Compare two responses and pick the better one37*/38async compare(input: PairwiseCompareInput) {39return executePairwiseCompare(input);40}4142/**43* Generate a rubric for a criterion44*/45async generateRubric(input: GenerateRubricInput) {46return executeGenerateRubric(input);47}4849/**50* Full evaluation workflow: generate rubric, then score51*/52async evaluateWithGeneratedRubric(53response: string,54prompt: string,55criteria: Array<{ name: string; description: string; weight?: number }>56) {57// Generate rubrics for each criterion58const rubrics = await Promise.all(59criteria.map(c => this.generateRubric({60criterionName: c.name,61criterionDescription: c.description,62scale: '1-5',63includeExamples: false,64strictness: 'balanced'65}))66);6768// Build combined rubric69const levelDescriptions: Record<string, string> = {};70rubrics[0]?.levels?.forEach(level => {71levelDescriptions[String(level.score)] = level.description;72});7374// Score using generated rubric75return this.score({76response,77prompt,78criteria: criteria.map((c) => ({79name: c.name,80description: c.description,81weight: c.weight || 182})),83rubric: {84scale: '1-5',85levelDescriptions86}87});88}8990/**91* Chat-based evaluation for custom queries92*/93async chat(userMessage: string) {94const result = await generateText({95model: openai(this.model),96system: `You are an expert evaluator of AI-generated content.97Your role is to assess quality, identify issues, and provide actionable feedback.98Be objective, specific, and constructive in your evaluations.`,99prompt: userMessage,100temperature: this.temperature101});102103return {104text: result.text,105usage: result.usage106};107}108}109110// Default instance111export const evaluatorAgent = new EvaluatorAgent();112113