Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/llm-as-judge-skills/examples/full-evaluation-workflow.ts
1/**2* Full Evaluation Workflow Example3*4* Demonstrates a complete evaluation workflow:5* 1. Generate rubrics for criteria6* 2. Score a response using generated rubrics7* 3. Compare with an alternative response8*9* Run: npx tsx examples/full-evaluation-workflow.ts10*/1112import 'dotenv/config';13import { EvaluatorAgent } from '../src/agents/evaluator.js';14import { validateConfig } from '../src/config/index.js';1516async function main() {17validateConfig();1819const agent = new EvaluatorAgent();20const startTime = Date.now();2122console.log('=== Full Evaluation Workflow ===\n');2324const prompt = 'Explain how vaccines work to prevent disease';2526const response = `27Vaccines work by training your immune system to recognize and fight specific pathogens28without causing the disease itself.2930Here's the process:31321. **Introduction**: The vaccine introduces a weakened, killed, or partial version of33the pathogen (or instructions to make a piece of it, like mRNA vaccines).34352. **Immune Response**: Your immune system detects these foreign substances (antigens)36and mounts a response. This includes producing antibodies and training T-cells.37383. **Memory Formation**: Some immune cells become "memory cells" that remember39how to fight this specific pathogen.40414. **Future Protection**: If you're exposed to the real pathogen later, your immune42system recognizes it immediately and can fight it off before you get sick.4344This is why vaccines are so effective - they give your immune system a "practice run"45without the risks of actual infection.46`;4748// Step 1: Generate rubrics49console.log('Step 1: Generating rubrics...\n');5051const criteria = [52{ name: 'Scientific Accuracy', description: 'Correctness of biological/medical information' },53{ name: 'Completeness', description: 'Covers the key steps and concepts' },54{ name: 'Accessibility', description: 'Understandable by general audience' }55];5657const rubrics = await Promise.all(58criteria.map(c => agent.generateRubric({59criterionName: c.name,60criterionDescription: c.description,61scale: '1-5',62domain: 'health education',63includeExamples: false,64strictness: 'balanced'65}))66);6768console.log('Generated rubrics for:');69rubrics.forEach(r => {70if (r.success) {71console.log(` - ${r.criterion.name} (${r.levels.length} levels)`);72}73});7475// Step 2: Score the response76console.log('\nStep 2: Scoring the response...\n');7778const scoreResult = await agent.score({79response,80prompt,81criteria: criteria.map((c, i) => ({82name: c.name,83description: c.description,84weight: i === 0 ? 0.4 : 0.3 // Weight accuracy higher85})),86rubric: {87scale: '1-5',88levelDescriptions: rubrics[0].success89? Object.fromEntries(rubrics[0].levels.map(l => [String(l.score), l.label]))90: undefined91}92});9394if (scoreResult.success) {95console.log('Scores:');96scoreResult.scores.forEach(s => {97console.log(` ${s.criterion}: ${s.score}/${s.maxScore}`);98});99console.log(`\nOverall: ${scoreResult.overallScore} | Weighted: ${scoreResult.weightedScore}`);100}101102// Step 3: Compare with an alternative103console.log('\nStep 3: Comparing with alternative response...\n');104105const alternativeResponse = `106Vaccines prevent disease by helping your body build immunity. When you get107vaccinated, your body learns to fight the germ. Then if you're exposed to108the real disease, your body already knows how to protect itself.109`;110111const comparisonResult = await agent.compare({112responseA: response,113responseB: alternativeResponse,114prompt,115criteria: ['accuracy', 'depth', 'clarity'],116swapPositions: true117});118119if (comparisonResult.success) {120console.log(`Comparison Result: Response ${comparisonResult.winner} is better`);121console.log(`Confidence: ${(comparisonResult.confidence * 100).toFixed(0)}%`);122console.log('\nKey differences:');123comparisonResult.differentiators.slice(0, 3).forEach(d => console.log(` - ${d}`));124}125126// Summary127const totalTime = Date.now() - startTime;128console.log('\n=== Workflow Complete ===');129console.log(`Total time: ${totalTime}ms`);130console.log(`Rubrics generated: ${rubrics.filter(r => r.success).length}`);131console.log(`Final score: ${scoreResult.success ? scoreResult.overallScore : 'N/A'}`);132console.log(`Better response: ${comparisonResult.success ? comparisonResult.winner : 'N/A'}`);133}134135main().catch(console.error);136137