Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
finetuning/scripts/validate/validate_dpo.py
1#!/usr/bin/env python32"""Validate DPO (Direct Preference Optimization) JSONL files for Azure AI Foundry.34Adapted from foundry-ft agent with additional checks:5- Identical preferred/non_preferred detection6- DPO overtraining risk (small dataset warning)7"""8import json9import sys10111213try:14sys.stdout.reconfigure(encoding="utf-8")15sys.stderr.reconfigure(encoding="utf-8")16except (AttributeError, OSError):17pass # Stream not reconfigurable (older Python or non-tty); default encoding is fine18def validate_dpo(filepath: str) -> None:19errors = []20warnings = []21total = 02223with open(filepath, "r", encoding="utf-8") as f:24for line_num, line in enumerate(f, 1):25line = line.strip()26if not line:27continue28total += 12930try:31record = json.loads(line)32except json.JSONDecodeError as e:33errors.append(f"Line {line_num}: Invalid JSON — {e}")34continue3536for field in ["input", "preferred_output", "non_preferred_output"]:37if field not in record:38errors.append(f"Line {line_num}: Missing '{field}' field")3940if "input" not in record:41continue4243inp = record["input"]44if "messages" not in inp:45errors.append(f"Line {line_num}: 'input' missing 'messages' field")46else:47msgs = inp["messages"]48if not any(m.get("role") == "user" for m in msgs):49errors.append(f"Line {line_num}: 'input.messages' has no 'user' message")5051for output_field in ["preferred_output", "non_preferred_output"]:52if output_field in record:53out = record[output_field]54if not isinstance(out, list) or len(out) == 0:55errors.append(f"Line {line_num}: '{output_field}' must be a non-empty array")56elif not any(m.get("role") == "assistant" for m in out):57errors.append(f"Line {line_num}: '{output_field}' has no 'assistant' message")5859if "preferred_output" in record and "non_preferred_output" in record:60pref = json.dumps(record["preferred_output"], sort_keys=True)61non_pref = json.dumps(record["non_preferred_output"], sort_keys=True)62if pref == non_pref:63warnings.append(f"Line {line_num}: preferred and non_preferred outputs are identical")6465print(f"\n{'='*60}")66print(f"DPO Validation Report: {filepath}")67print(f"{'='*60}")68print(f"Total records: {total}")69print(f"Errors: {len(errors)}")70print(f"Warnings: {len(warnings)}")7172# DPO-specific guidance from our experiments73if total < 500 and total > 0:74print(f"\n⚠️ DPO tip: With {total} pairs, use n_epochs=1-2 max (Azure defaults to 3, which causes overtraining on small datasets).")75if total > 0:76print(f"\n💡 DPO tip: If your base model already scores >9/10 on this task, DPO may hurt more than help.")7778if errors:79print(f"\n❌ ERRORS (must fix):")80for e in errors[:20]:81print(f" • {e}")82if len(errors) > 20:83print(f" ... and {len(errors) - 20} more errors")8485if warnings:86print(f"\n⚠️ WARNINGS:")87for w in warnings[:10]:88print(f" • {w}")8990if not errors:91print(f"\n✅ Data is valid for DPO fine-tuning!")92else:93print(f"\n❌ Fix {len(errors)} error(s) before submitting.")94sys.exit(1)959697if __name__ == "__main__":98if len(sys.argv) != 2:99print("Usage: python validate_dpo.py <path-to-jsonl>")100sys.exit(1)101validate_dpo(sys.argv[1])102