Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/llm-as-judge-skills/src/tools/evaluation/generate-rubric.ts
1import { tool } from 'ai';2import { z } from 'zod';3import { openai } from '@ai-sdk/openai';4import { generateText } from 'ai';5import { config } from '../../config/index.js';67export const GenerateRubricInputSchema = z.object({8criterionName: z.string().describe('Name of the criterion'),9criterionDescription: z.string().describe('What this criterion measures'),10scale: z.enum(['1-3', '1-5', '1-10']).optional().default('1-5'),11domain: z.string().optional().describe('Domain context'),12includeExamples: z.boolean().optional().default(true),13strictness: z.enum(['lenient', 'balanced', 'strict']).optional().default('balanced')14});1516export type GenerateRubricInput = z.infer<typeof GenerateRubricInputSchema>;1718export const GenerateRubricOutputSchema = z.object({19success: z.boolean(),20criterion: z.object({21name: z.string(),22description: z.string()23}),24scale: z.object({25min: z.number(),26max: z.number(),27type: z.string()28}),29levels: z.array(z.object({30score: z.number(),31label: z.string(),32description: z.string(),33characteristics: z.array(z.string()),34example: z.string().optional()35})),36scoringGuidelines: z.array(z.string()),37edgeCases: z.array(z.object({38situation: z.string(),39guidance: z.string()40})),41metadata: z.object({42domain: z.string().nullable(),43strictness: z.string(),44generationTimeMs: z.number()45})46});4748export type GenerateRubricOutput = z.infer<typeof GenerateRubricOutputSchema>;4950export async function executeGenerateRubric(input: GenerateRubricInput): Promise<GenerateRubricOutput> {51const startTime = Date.now();52const [minScore, maxScore] = input.scale.split('-').map(Number);5354const systemPrompt = `You are an expert in creating evaluation rubrics.55Create clear, actionable rubrics with distinct boundaries between levels.56Strictness: ${input.strictness}57- lenient: Lower bar for passing scores58- balanced: Fair, typical expectations59- strict: High standards, critical evaluation`;6061const userPrompt = `Create a scoring rubric for:6263**Criterion**: ${input.criterionName}64**Description**: ${input.criterionDescription}65**Scale**: ${input.scale} (${minScore} = lowest, ${maxScore} = highest)66${input.domain ? `**Domain**: ${input.domain}` : ''}67**Include Examples**: ${input.includeExamples}6869Generate a rubric with:701. Clear descriptions for each score level712. Specific characteristics that define each level723. ${input.includeExamples ? 'Brief example text for each level' : 'No examples needed'}734. General scoring guidelines745. Edge cases with guidance7576Respond with valid JSON:77{78"levels": [79{80"score": ${minScore},81"label": "Label (e.g., Poor)",82"description": "Detailed description of this level",83"characteristics": ["characteristic 1", "characteristic 2"],84"example": ${input.includeExamples ? '"Brief example text"' : 'null'}85}86// ... all levels from ${minScore} to ${maxScore}87],88"scoringGuidelines": [89"General guideline 1",90"General guideline 2"91],92"edgeCases": [93{94"situation": "Edge case description",95"guidance": "How to handle it"96}97]98}`;99100try {101const result = await generateText({102model: openai(config.openai.model),103system: systemPrompt,104prompt: userPrompt,105temperature: 0.4106});107108const parsed = JSON.parse(result.text);109110return {111success: true,112criterion: {113name: input.criterionName,114description: input.criterionDescription115},116scale: {117min: minScore,118max: maxScore,119type: input.scale120},121levels: parsed.levels,122scoringGuidelines: parsed.scoringGuidelines,123edgeCases: parsed.edgeCases,124metadata: {125domain: input.domain || null,126strictness: input.strictness,127generationTimeMs: Date.now() - startTime128}129};130} catch (error) {131return {132success: false,133criterion: {134name: input.criterionName,135description: input.criterionDescription136},137scale: {138min: minScore,139max: maxScore,140type: input.scale141},142levels: [],143scoringGuidelines: [],144edgeCases: [],145metadata: {146domain: input.domain || null,147strictness: input.strictness,148generationTimeMs: Date.now() - startTime149}150};151}152}153154export const generateRubricTool = tool({155description: `Generate a scoring rubric for an evaluation criterion.156Creates detailed descriptions for each score level.157Use to establish consistent evaluation standards.`,158parameters: GenerateRubricInputSchema,159execute: executeGenerateRubric160});161162