Source from repo

Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.

muratcankoylanGitHub muratcankoylanSource repo Original GitHub link

Files

241

Skill

n/a

Size

2.6 MB

Entrypoint

SKILL.md

Format

git-repo

Open file

examples/llm-as-judge-skills/src/agents/evaluator.ts

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code113 linesFree

examples/llm-as-judge-skills/src/agents/evaluator.ts

1import { openai } from '@ai-sdk/openai';
2import { generateText } from 'ai';
3import { config } from '../config/index.js';
4import { 
5  executeDirectScore, 
6  executePairwiseCompare, 
7  executeGenerateRubric,
8  type DirectScoreInput,
9  type PairwiseCompareInput,
10  type GenerateRubricInput
11} from '../tools/evaluation/index.js';
12 
13export interface EvaluatorAgentConfig {
14  model?: string;
15  temperature?: number;
16  maxTokens?: number;
17}
18 
19export class EvaluatorAgent {
20  private model: string;
21  private temperature: number;
22 
23  constructor(agentConfig?: EvaluatorAgentConfig) {
24    this.model = agentConfig?.model || config.openai.model;
25    this.temperature = agentConfig?.temperature || 0.3;
26  }
27 
28  /**
29   * Score a response against defined criteria
30   */
31  async score(input: DirectScoreInput) {
32    return executeDirectScore(input);
33  }
34 
35  /**
36   * Compare two responses and pick the better one
37   */
38  async compare(input: PairwiseCompareInput) {
39    return executePairwiseCompare(input);
40  }
41 
42  /**
43   * Generate a rubric for a criterion
44   */
45  async generateRubric(input: GenerateRubricInput) {
46    return executeGenerateRubric(input);
47  }
48 
49  /**
50   * Full evaluation workflow: generate rubric, then score
51   */
52  async evaluateWithGeneratedRubric(
53    response: string,
54    prompt: string,
55    criteria: Array<{ name: string; description: string; weight?: number }>
56  ) {
57    // Generate rubrics for each criterion
58    const rubrics = await Promise.all(
59      criteria.map(c => this.generateRubric({
60        criterionName: c.name,
61        criterionDescription: c.description,
62        scale: '1-5',
63        includeExamples: false,
64        strictness: 'balanced'
65      }))
66    );
67 
68    // Build combined rubric
69    const levelDescriptions: Record<string, string> = {};
70    rubrics[0]?.levels?.forEach(level => {
71      levelDescriptions[String(level.score)] = level.description;
72    });
73 
74    // Score using generated rubric
75    return this.score({
76      response,
77      prompt,
78      criteria: criteria.map((c) => ({
79        name: c.name,
80        description: c.description,
81        weight: c.weight || 1
82      })),
83      rubric: {
84        scale: '1-5',
85        levelDescriptions
86      }
87    });
88  }
89 
90  /**
91   * Chat-based evaluation for custom queries
92   */
93  async chat(userMessage: string) {
94    const result = await generateText({
95      model: openai(this.model),
96      system: `You are an expert evaluator of AI-generated content.
97Your role is to assess quality, identify issues, and provide actionable feedback.
98Be objective, specific, and constructive in your evaluations.`,
99      prompt: userMessage,
100      temperature: this.temperature
101    });
102 
103    return {
104      text: result.text,
105      usage: result.usage
106    };
107  }
108}
109 
110// Default instance
111export const evaluatorAgent = new EvaluatorAgent();
112 
113

Agent Skills for Context Engineering

examples/llm-as-judge-skills/src/agents/evaluator.ts

Preparing the source view

Agent Skills for Context Engineering

examples/llm-as-judge-skills/src/agents/evaluator.ts