Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/llm-as-judge-skills/examples/basic-evaluation.ts
1/**2* Basic Evaluation Example3*4* Demonstrates how to use the EvaluatorAgent to score responses.5*6* Run: npx tsx examples/basic-evaluation.ts7*/89import 'dotenv/config';10import { EvaluatorAgent } from '../src/agents/evaluator.js';11import { validateConfig } from '../src/config/index.js';1213async function main() {14// Validate API key is configured15validateConfig();1617const agent = new EvaluatorAgent();1819console.log('=== Direct Scoring Example ===\n');2021const response = `22Machine learning is a subset of artificial intelligence that enables systems23to learn and improve from experience without being explicitly programmed.24It focuses on developing algorithms that can access data and use it to learn for themselves.2526There are three main types of machine learning:271. Supervised learning - learns from labeled data282. Unsupervised learning - finds patterns in unlabeled data293. Reinforcement learning - learns through trial and error30`;3132const result = await agent.score({33response,34prompt: 'Explain what machine learning is to a beginner',35criteria: [36{37name: 'Accuracy',38description: 'Factual correctness of the explanation',39weight: 0.440},41{42name: 'Clarity',43description: 'Easy to understand for a beginner',44weight: 0.345},46{47name: 'Completeness',48description: 'Covers the key concepts adequately',49weight: 0.350}51],52rubric: {53scale: '1-5',54levelDescriptions: {55'1': 'Poor - Major issues',56'2': 'Below Average - Several issues',57'3': 'Average - Some issues',58'4': 'Good - Minor issues only',59'5': 'Excellent - No issues'60}61}62});6364if (result.success) {65console.log('Evaluation Results:');66console.log('-------------------');6768result.scores.forEach(score => {69console.log(`\n${score.criterion}: ${score.score}/${score.maxScore}`);70console.log(`Justification: ${score.justification}`);71console.log(`Improvement: ${score.improvement}`);72});7374console.log('\n-------------------');75console.log(`Overall Score: ${result.overallScore}`);76console.log(`Weighted Score: ${result.weightedScore}`);77console.log(`\nAssessment: ${result.summary.assessment}`);78console.log(`\nStrengths:`);79result.summary.strengths.forEach(s => console.log(` - ${s}`));80console.log(`\nWeaknesses:`);81result.summary.weaknesses.forEach(w => console.log(` - ${w}`));82console.log(`\nEvaluation Time: ${result.metadata.evaluationTimeMs}ms`);83} else {84console.error('Evaluation failed:', result.summary.assessment);85}86}8788main().catch(console.error);8990