Source from repo
Microsoft Foundry Skill

Build and deploy AI applications on Azure AI Foundry using Microsoft's model catalog and AI services
microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page
Files
152
Skill
n/a
Size
941.0 KB
Entrypoint
SKILL.md
Format
git-repo
Open file
finetuning/scripts/validate/validate_rft.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code205 linesFree
finetuning/scripts/validate/validate_rft.py
1#!/usr/bin/env python3
2"""Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry.
3 
4Adapted from foundry-ft agent with critical additions from our platform gotchas:
5- Grader escaping warnings for newlines (\\n must be \\\\n in JSON strings)
6- Content moderation risk detection ("chain of thought" triggers RAI filter)
7- Reference answer diversity check
8"""
9import argparse
10import json
11import sys
12 
13try:
14    sys.stdout.reconfigure(encoding="utf-8")
15    sys.stderr.reconfigure(encoding="utf-8")
16except (AttributeError, OSError):
17    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
18from collections import Counter
19 
20 
21RISKY_PHRASES = [
22    "chain of thought", "step by step reasoning", "let me think",
23    "think carefully", "reason through",
24]
25 
26 
27def validate_rft(filepath, expected_field=None):
28    errors = []
29    warnings = []
30    total = 0
31    extra_fields_per_line: list[set[str]] = []
32    all_extra_field_counts: Counter = Counter()
33    grader_values: list[str] = []
34 
35    with open(filepath, "r", encoding="utf-8") as f:
36        for line_num, line in enumerate(f, 1):
37            raw_line = line
38            line = line.strip()
39            if not line:
40                continue
41            total += 1
42 
43            try:
44                record = json.loads(line)
45            except json.JSONDecodeError as e:
46                errors.append(f"Line {line_num}: Invalid JSON — {e}")
47                continue
48 
49            if "messages" not in record:
50                errors.append(f"Line {line_num}: Missing 'messages' field")
51            else:
52                msgs = record["messages"]
53                if not isinstance(msgs, list) or len(msgs) == 0:
54                    errors.append(f"Line {line_num}: 'messages' must be a non-empty array")
55                elif not any(m.get("role") == "user" for m in msgs):
56                    errors.append(f"Line {line_num}: 'messages' has no 'user' message")
57                elif msgs[-1].get("role") != "user":
58                    errors.append(
59                        f"Line {line_num}: Last message must be 'user' role for RFT "
60                        f"(found '{msgs[-1].get('role')}') — unlike SFT, the model generates its own response"
61                    )
62 
63            # Detect extra fields (grader fields) beyond 'messages'
64            extra_fields = set(record.keys()) - {"messages"}
65            extra_fields_per_line.append(extra_fields)
66            all_extra_field_counts.update(extra_fields)
67 
68            if expected_field:
69                if expected_field not in record:
70                    errors.append(f"Line {line_num}: Missing expected field '{expected_field}'")
71                else:
72                    val = str(record[expected_field]).strip()
73                    if not val:
74                        errors.append(f"Line {line_num}: '{expected_field}' is empty")
75                    else:
76                        grader_values.append(val)
77            else:
78                if not extra_fields:
79                    errors.append(
80                        f"Line {line_num}: No grader fields found — RFT requires at least "
81                        "one field beyond 'messages' (e.g. 'answer', 'reference_code')"
82                    )
83                else:
84                    # Collect values from extra fields for diversity check
85                    for field in sorted(extra_fields):
86                        val = str(record[field]).strip()
87                        if val:
88                            grader_values.append(val)
89 
90                    # Check for unescaped newlines in extra fields (CRITICAL platform gotcha)
91                    # Instead of regex-parsing the raw JSON line (which risks catastrophic
92                    # backtracking), we compare the parsed value against the raw line to
93                    # detect single-escaped \n that should be double-escaped \\n.
94                    for field in extra_fields:
95                        parsed_val = str(record.get(field, ""))
96                        if "\n" in parsed_val:
97                            # The parsed value contains actual newlines — check if the raw
98                            # JSON has them properly double-escaped
99                            field_needle = f'"{field}"'
100                            if field_needle in raw_line:
101                                field_start = raw_line.index(field_needle)
102                                field_region = raw_line[field_start:field_start + 500]
103                                # Single-escaped \n in raw JSON (not \\n) means the source
104                                # code newlines aren't properly escaped for the platform
105                                if "\\n" in field_region and "\\\\n" not in field_region:
106                                    warnings.append(
107                                        f"Line {line_num}: '{field}' contains \\n sequences — "
108                                        "if this is grader source code embedded in JSON, "
109                                        "ensure newlines are escaped as \\\\n."
110                                    )
111 
112            # Content moderation risk
113            all_text = json.dumps(record).lower()
114            for phrase in RISKY_PHRASES:
115                if phrase in all_text:
116                    warnings.append(
117                        f"Line {line_num}: Contains '{phrase}' — may trigger Azure content moderation filter."
118                    )
119                    break
120 
121    # Check for inconsistent extra-field schemas across examples
122    field_sets = [fs for fs in extra_fields_per_line if fs]
123    if len(field_sets) > 1:
124        first_schema = field_sets[0]
125        inconsistent_lines = [
126            i + 1 for i, fs in enumerate(extra_fields_per_line)
127            if fs and fs != first_schema
128        ]
129        if inconsistent_lines:
130            warnings.append(
131                f"Inconsistent grader fields across examples — "
132                f"line 1 has {sorted(first_schema)}, but {len(inconsistent_lines)} "
133                f"line(s) differ (e.g. line {inconsistent_lines[0]}). "
134                "Ensure your grader handles all field variants."
135            )
136 
137    # Diversity check
138    if grader_values:
139        unique_values = set(grader_values)
140        if len(unique_values) == 1:
141            warnings.append(
142                f"All grader field values are identical ('{list(unique_values)[0][:50]}...') — "
143                "grader may not learn effectively"
144            )
145        avg_len = sum(len(v) for v in grader_values) / len(grader_values)
146        if avg_len > 500:
147            warnings.append(
148                f"Average grader field value length is {avg_len:.0f} chars — "
149                "consider using a model_grader instead of string_check"
150            )
151 
152    print(f"\n{'='*60}")
153    print(f"RFT Validation Report: {filepath}")
154    print(f"{'='*60}")
155    print(f"Total records: {total}")
156    print(f"Errors: {len(errors)}")
157    print(f"Warnings: {len(warnings)}")
158 
159    if all_extra_field_counts:
160        print(f"\nGrader fields found:")
161        for field, count in all_extra_field_counts.most_common():
162            print(f"  • '{field}' — in {count}/{total} records")
163 
164    if errors:
165        print(f"\n❌ ERRORS (must fix):")
166        for e in errors[:20]:
167            print(f"  • {e}")
168        if len(errors) > 20:
169            print(f"  ... and {len(errors) - 20} more errors")
170 
171    if warnings:
172        print(f"\n⚠️  WARNINGS:")
173        for w in warnings[:10]:
174            print(f"  • {w}")
175        if len(warnings) > 10:
176            print(f"  ... and {len(warnings) - 10} more warnings")
177 
178    # RFT-specific guidance
179    if total > 0:
180        print(f"\n💡 RFT tips:")
181        print(f"  • Ensure your training grader matches your eval grader (alignment gotcha)")
182        print(f"  • Start with reasoning_effort='medium', pass_threshold=0.5")
183        print(f"  • RFT is primarily for o-series models (o4-mini). Check Azure docs for the latest supported model list.")
184 
185    if not errors:
186        print(f"\n✅ Data is valid for RFT fine-tuning!")
187    else:
188        print(f"\n❌ Fix {len(errors)} error(s) before submitting.")
189        sys.exit(1)
190 
191 
192if __name__ == "__main__":
193    parser = argparse.ArgumentParser(
194        description="Validate RFT (Reinforcement Fine-Tuning) JSONL files for Azure AI Foundry."
195    )
196    parser.add_argument("filepath", help="Path to the JSONL file to validate")
197    parser.add_argument(
198        "--expected-field",
199        default=None,
200        help="Specific grader field name to require (e.g. 'answer'). "
201             "If omitted, any extra field beyond 'messages' is accepted.",
202    )
203    args = parser.parse_args()
204    validate_rft(args.filepath, expected_field=args.expected_field)
205
Preparing the source view

Microsoft Foundry Skill

finetuning/scripts/validate/validate_rft.py