Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
finetuning/scripts/validate/validate_rft.py
1#!/usr/bin/env python32"""Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry.34Adapted from foundry-ft agent with critical additions from our platform gotchas:5- Grader escaping warnings for newlines (\\n must be \\\\n in JSON strings)6- Content moderation risk detection ("chain of thought" triggers RAI filter)7- Reference answer diversity check8"""9import argparse10import json11import sys1213try:14sys.stdout.reconfigure(encoding="utf-8")15sys.stderr.reconfigure(encoding="utf-8")16except (AttributeError, OSError):17pass # Stream not reconfigurable (older Python or non-tty); default encoding is fine18from collections import Counter192021RISKY_PHRASES = [22"chain of thought", "step by step reasoning", "let me think",23"think carefully", "reason through",24]252627def validate_rft(filepath, expected_field=None):28errors = []29warnings = []30total = 031extra_fields_per_line: list[set[str]] = []32all_extra_field_counts: Counter = Counter()33grader_values: list[str] = []3435with open(filepath, "r", encoding="utf-8") as f:36for line_num, line in enumerate(f, 1):37raw_line = line38line = line.strip()39if not line:40continue41total += 14243try:44record = json.loads(line)45except json.JSONDecodeError as e:46errors.append(f"Line {line_num}: Invalid JSON — {e}")47continue4849if "messages" not in record:50errors.append(f"Line {line_num}: Missing 'messages' field")51else:52msgs = record["messages"]53if not isinstance(msgs, list) or len(msgs) == 0:54errors.append(f"Line {line_num}: 'messages' must be a non-empty array")55elif not any(m.get("role") == "user" for m in msgs):56errors.append(f"Line {line_num}: 'messages' has no 'user' message")57elif msgs[-1].get("role") != "user":58errors.append(59f"Line {line_num}: Last message must be 'user' role for RFT "60f"(found '{msgs[-1].get('role')}') — unlike SFT, the model generates its own response"61)6263# Detect extra fields (grader fields) beyond 'messages'64extra_fields = set(record.keys()) - {"messages"}65extra_fields_per_line.append(extra_fields)66all_extra_field_counts.update(extra_fields)6768if expected_field:69if expected_field not in record:70errors.append(f"Line {line_num}: Missing expected field '{expected_field}'")71else:72val = str(record[expected_field]).strip()73if not val:74errors.append(f"Line {line_num}: '{expected_field}' is empty")75else:76grader_values.append(val)77else:78if not extra_fields:79errors.append(80f"Line {line_num}: No grader fields found — RFT requires at least "81"one field beyond 'messages' (e.g. 'answer', 'reference_code')"82)83else:84# Collect values from extra fields for diversity check85for field in sorted(extra_fields):86val = str(record[field]).strip()87if val:88grader_values.append(val)8990# Check for unescaped newlines in extra fields (CRITICAL platform gotcha)91# Instead of regex-parsing the raw JSON line (which risks catastrophic92# backtracking), we compare the parsed value against the raw line to93# detect single-escaped \n that should be double-escaped \\n.94for field in extra_fields:95parsed_val = str(record.get(field, ""))96if "\n" in parsed_val:97# The parsed value contains actual newlines — check if the raw98# JSON has them properly double-escaped99field_needle = f'"{field}"'100if field_needle in raw_line:101field_start = raw_line.index(field_needle)102field_region = raw_line[field_start:field_start + 500]103# Single-escaped \n in raw JSON (not \\n) means the source104# code newlines aren't properly escaped for the platform105if "\\n" in field_region and "\\\\n" not in field_region:106warnings.append(107f"Line {line_num}: '{field}' contains \\n sequences — "108"if this is grader source code embedded in JSON, "109"ensure newlines are escaped as \\\\n."110)111112# Content moderation risk113all_text = json.dumps(record).lower()114for phrase in RISKY_PHRASES:115if phrase in all_text:116warnings.append(117f"Line {line_num}: Contains '{phrase}' — may trigger Azure content moderation filter."118)119break120121# Check for inconsistent extra-field schemas across examples122field_sets = [fs for fs in extra_fields_per_line if fs]123if len(field_sets) > 1:124first_schema = field_sets[0]125inconsistent_lines = [126i + 1 for i, fs in enumerate(extra_fields_per_line)127if fs and fs != first_schema128]129if inconsistent_lines:130warnings.append(131f"Inconsistent grader fields across examples — "132f"line 1 has {sorted(first_schema)}, but {len(inconsistent_lines)} "133f"line(s) differ (e.g. line {inconsistent_lines[0]}). "134"Ensure your grader handles all field variants."135)136137# Diversity check138if grader_values:139unique_values = set(grader_values)140if len(unique_values) == 1:141warnings.append(142f"All grader field values are identical ('{list(unique_values)[0][:50]}...') — "143"grader may not learn effectively"144)145avg_len = sum(len(v) for v in grader_values) / len(grader_values)146if avg_len > 500:147warnings.append(148f"Average grader field value length is {avg_len:.0f} chars — "149"consider using a model_grader instead of string_check"150)151152print(f"\n{'='*60}")153print(f"RFT Validation Report: {filepath}")154print(f"{'='*60}")155print(f"Total records: {total}")156print(f"Errors: {len(errors)}")157print(f"Warnings: {len(warnings)}")158159if all_extra_field_counts:160print(f"\nGrader fields found:")161for field, count in all_extra_field_counts.most_common():162print(f" • '{field}' — in {count}/{total} records")163164if errors:165print(f"\n❌ ERRORS (must fix):")166for e in errors[:20]:167print(f" • {e}")168if len(errors) > 20:169print(f" ... and {len(errors) - 20} more errors")170171if warnings:172print(f"\n⚠️ WARNINGS:")173for w in warnings[:10]:174print(f" • {w}")175if len(warnings) > 10:176print(f" ... and {len(warnings) - 10} more warnings")177178# RFT-specific guidance179if total > 0:180print(f"\n💡 RFT tips:")181print(f" • Ensure your training grader matches your eval grader (alignment gotcha)")182print(f" • Start with reasoning_effort='medium', pass_threshold=0.5")183print(f" • RFT is primarily for o-series models (o4-mini). Check Azure docs for the latest supported model list.")184185if not errors:186print(f"\n✅ Data is valid for RFT fine-tuning!")187else:188print(f"\n❌ Fix {len(errors)} error(s) before submitting.")189sys.exit(1)190191192if __name__ == "__main__":193parser = argparse.ArgumentParser(194description="Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry."195)196parser.add_argument("filepath", help="Path to the JSONL file to validate")197parser.add_argument(198"--expected-field",199default=None,200help="Specific grader field name to require (e.g. 'answer'). "201"If omitted, any extra field beyond 'messages' is accepted.",202)203args = parser.parse_args()204validate_rft(args.filepath, expected_field=args.expected_field)205