Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/llm-as-judge-skills/examples/pairwise-comparison.ts
1/**2* Pairwise Comparison Example3*4* Demonstrates how to compare two responses and pick the better one.5*6* Run: npx tsx examples/pairwise-comparison.ts7*/89import 'dotenv/config';10import { EvaluatorAgent } from '../src/agents/evaluator.js';11import { validateConfig } from '../src/config/index.js';1213async function main() {14validateConfig();1516const agent = new EvaluatorAgent();1718console.log('=== Pairwise Comparison Example ===\n');1920const prompt = 'Explain the benefits of regular exercise';2122const responseA = `23Regular exercise offers numerous health benefits that affect both body and mind.2425Physical benefits include:26- Improved cardiovascular health and reduced heart disease risk27- Stronger muscles and bones28- Better weight management29- Enhanced immune function3031Mental benefits include:32- Reduced stress and anxiety33- Improved mood through endorphin release34- Better sleep quality35- Enhanced cognitive function3637The CDC recommends at least 150 minutes of moderate aerobic activity per week,38plus muscle-strengthening activities twice weekly.39`;4041const responseB = `42Working out is really good for you. It makes you healthier and feel better.43You should try to exercise regularly if you can. Many people find that44going to the gym or running helps them stay in shape.45`;4647console.log('Prompt:', prompt);48console.log('\n--- Response A ---');49console.log(responseA.trim());50console.log('\n--- Response B ---');51console.log(responseB.trim());52console.log('\n--- Comparison Results ---\n');5354const result = await agent.compare({55responseA,56responseB,57prompt,58criteria: ['accuracy', 'completeness', 'actionability', 'clarity'],59allowTie: true,60swapPositions: true // Mitigate position bias61});6263if (result.success) {64console.log(`Winner: Response ${result.winner}`);65console.log(`Confidence: ${(result.confidence * 100).toFixed(0)}%`);6667if (result.positionConsistency) {68console.log(`Position Consistency: ${result.positionConsistency.consistent ? 'Yes' : 'No'}`);69}7071console.log('\nPer-Criterion Results:');72result.comparison.forEach(c => {73console.log(`\n ${c.criterion}:`);74console.log(` Winner: ${c.winner}`);75console.log(` A: ${c.aAssessment}`);76console.log(` B: ${c.bAssessment}`);77});7879console.log('\nKey Differentiators:');80result.differentiators.forEach(d => console.log(` - ${d}`));8182console.log('\nResponse A Analysis:');83console.log(' Strengths:', result.analysis.responseA.strengths.join(', '));84console.log(' Weaknesses:', result.analysis.responseA.weaknesses.join(', '));8586console.log('\nResponse B Analysis:');87console.log(' Strengths:', result.analysis.responseB.strengths.join(', '));88console.log(' Weaknesses:', result.analysis.responseB.weaknesses.join(', '));8990console.log(`\nEvaluation Time: ${result.metadata.evaluationTimeMs}ms`);91} else {92console.error('Comparison failed');93}94}9596main().catch(console.error);9798