Source from repo
Microsoft Foundry Skill

Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry
microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page
Files
154
Skill
n/a
Size
976.2 KB
Entrypoint
SKILL.md
Format
git-repo
Open file
finetuning/scripts/validate/validate_rft.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code205 linesFree
finetuning/scripts/validate/validate_rft.py
1#!/usr/bin/env python3
2"""Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry.
3 
4Adapted from foundry-ft agent with critical additions from our platform gotchas:
5- Grader escaping warnings for newlines (\\n must be \\\\n in JSON strings)
6- Content moderation risk detection ("chain of thought" triggers RAI filter)
7- Reference answer diversity check
8"""
9import argparse
10import json
11import sys
12 
13try:
14    sys.stdout.reconfigure(encoding="utf-8")
15    sys.stderr.reconfigure(encoding="utf-8")
16except (AttributeError, OSError):
17    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
18from collections import Counter
19 
20 
21RISKY_PHRASES = [
22    "chain of thought", "step by step reasoning", "let me think",
23    "think carefully", "reason through",
24]
25 
26 
27def validate_rft(filepath, expected_field=None):
28    errors = []
29    warnings = []
30    total = 0
31    extra_fields_per_line: list[set[str]] = []
32    all_extra_field_counts: Counter = Counter()
33    grader_values: list[str] = []
34 
35    with open(filepath, "r", encoding="utf-8") as f:
36        for line_num, line in enumerate(f, 1):
37            raw_line = line
38            line = line.strip()
39            if not line:
40                continue
41            total += 1
42 
43            try:
44                record = json.loads(line)
45            except json.JSONDecodeError as e:
46                errors.append(f"Line {line_num}: Invalid JSON — {e}")
47                continue
48 
49            if "messages" not in record:
50                errors.append(f"Line {line_num}: Missing 'messages' field")
51            else:
52                msgs = record["messages"]
53                if not isinstance(msgs, list) or len(msgs) == 0:
54                    errors.append(f"Line {line_num}: 'messages' must be a non-empty array")
55                elif not any(m.get("role") == "user" for m in msgs):
56                    errors.append(f"Line {line_num}: 'messages' has no 'user' message")
57                elif msgs[-1].get("role") != "user":
58                    errors.append(
59                        f"Line {line_num}: Last message must be 'user' role for RFT "
60                        f"(found '{msgs[-1].get('role')}') — unlike SFT, the model generates its own response"
61                    )
62 
63            # Detect extra fields (grader fields) beyond 'messages'
64            extra_fields = set(record.keys()) - {"messages"}
65            extra_fields_per_line.append(extra_fields)
66            all_extra_field_counts.update(extra_fields)
67 
68            if expected_field:
69                if expected_field not in record:
70                    errors.append(f"Line {line_num}: Missing expected field '{expected_field}'")
71                else:
72                    val = str(record[expected_field]).strip()
73                    if not val:
74                        errors.append(f"Line {line_num}: '{expected_field}' is empty")
75                    else:
76                        grader_values.append(val)
77            else:
78                if not extra_fields:
79                    errors.append(
80                        f"Line {line_num}: No grader fields found — RFT requires at least "
81                        "one field beyond 'messages' (e.g. 'answer', 'reference_code')"
82                    )
83                else:
84                    # Collect values from extra fields for diversity check
85                    for field in sorted(extra_fields):
86                        val = str(record[field]).strip()
87                        if val:
88                            grader_values.append(val)
89 
90                    # Check for unescaped newlines in extra fields (CRITICAL platform gotcha)
91                    # Instead of regex-parsing the raw JSON line (which risks catastrophic
92                    # backtracking), we compare the parsed value against the raw line to
93                    # detect single-escaped \n that should be double-escaped \\n.
94                    for field in extra_fields:
95                        parsed_val = str(record.get(field, ""))
96                        if "\n" in parsed_val:
97                            # The parsed value contains actual newlines — check if the raw
98                            # JSON has them properly double-escaped
99                            field_needle = f'"{field}"'
100                            if field_needle in raw_line:
101                                field_start = raw_line.index(field_needle)
102                                field_region = raw_line[field_start:field_start + 500]
103                                # Single-escaped \n in raw JSON (not \\n) means the source
104                                # code newlines aren't properly escaped for the platform
105                                if "\\n" in field_region and "\\\\n" not in field_region:
106                                    warnings.append(
107                                        f"Line {line_num}: '{field}' contains \\n sequences — "
108                                        "if this is grader source code embedded in JSON, "
109                                        "ensure newlines are escaped as \\\\n."
110                                    )
111 
112            # Content moderation risk
113            all_text = json.dumps(record).lower()
114            for phrase in RISKY_PHRASES:
115                if phrase in all_text:
116                    warnings.append(
117                        f"Line {line_num}: Contains '{phrase}' — may trigger Azure content moderation filter."
118                    )
119                    break
120 
121    # Check for inconsistent extra-field schemas across examples
122    field_sets = [fs for fs in extra_fields_per_line if fs]
123    if len(field_sets) > 1:
124        first_schema = field_sets[0]
125        inconsistent_lines = [
126            i + 1 for i, fs in enumerate(extra_fields_per_line)
127            if fs and fs != first_schema
128        ]
129        if inconsistent_lines:
130            warnings.append(
131                f"Inconsistent grader fields across examples — "
132                f"line 1 has {sorted(first_schema)}, but {len(inconsistent_lines)} "
133                f"line(s) differ (e.g. line {inconsistent_lines[0]}). "
134                "Ensure your grader handles all field variants."
135            )
136 
137    # Diversity check
138    if grader_values:
139        unique_values = set(grader_values)
140        if len(unique_values) == 1:
141            warnings.append(
142                f"All grader field values are identical ('{list(unique_values)[0][:50]}...') — "
143                "grader may not learn effectively"
144            )
145        avg_len = sum(len(v) for v in grader_values) / len(grader_values)
146        if avg_len > 500:
147            warnings.append(
148                f"Average grader field value length is {avg_len:.0f} chars — "
149                "consider using a model_grader instead of string_check"
150            )
151 
152    print(f"\n{'='*60}")
153    print(f"RFT Validation Report: {filepath}")
154    print(f"{'='*60}")
155    print(f"Total records: {total}")
156    print(f"Errors: {len(errors)}")
157    print(f"Warnings: {len(warnings)}")
158 
159    if all_extra_field_counts:
160        print(f"\nGrader fields found:")
161        for field, count in all_extra_field_counts.most_common():
162            print(f"  • '{field}' — in {count}/{total} records")
163 
164    if errors:
165        print(f"\n❌ ERRORS (must fix):")
166        for e in errors[:20]:
167            print(f"  • {e}")
168        if len(errors) > 20:
169            print(f"  ... and {len(errors) - 20} more errors")
170 
171    if warnings:
172        print(f"\n⚠️  WARNINGS:")
173        for w in warnings[:10]:
174            print(f"  • {w}")
175        if len(warnings) > 10:
176            print(f"  ... and {len(warnings) - 10} more warnings")
177 
178    # RFT-specific guidance
179    if total > 0:
180        print(f"\n💡 RFT tips:")
181        print(f"  • Ensure your training grader matches your eval grader (alignment gotcha)")
182        print(f"  • Start with reasoning_effort='medium', pass_threshold=0.5")
183        print(f"  • RFT is primarily for o-series models (o4-mini). Check Azure docs for the latest supported model list.")
184 
185    if not errors:
186        print(f"\n✅ Data is valid for RFT fine-tuning!")
187    else:
188        print(f"\n❌ Fix {len(errors)} error(s) before submitting.")
189        sys.exit(1)
190 
191 
192if __name__ == "__main__":
193    parser = argparse.ArgumentParser(
194        description="Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry."
195    )
196    parser.add_argument("filepath", help="Path to the JSONL file to validate")
197    parser.add_argument(
198        "--expected-field",
199        default=None,
200        help="Specific grader field name to require (e.g. 'answer'). "
201             "If omitted, any extra field beyond 'messages' is accepted.",
202    )
203    args = parser.parse_args()
204    validate_rft(args.filepath, expected_field=args.expected_field)
205
Preparing the source view

Microsoft Foundry Skill

finetuning/scripts/validate/validate_rft.py