Source from repo

Microsoft Foundry Skill

Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry

microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page

Files

154

Skill

n/a

Size

976.2 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

finetuning/scripts/generate_distillation_data.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code255 linesFree

finetuning/scripts/generate_distillation_data.py

1# /// script
2# dependencies = [
3#   "openai>=1.0",
4#   "azure-identity",
5# ]
6# ///
7"""
8generate_distillation_data.py — Generate training data from a teacher model for distillation.
9 
10Creates a synthetic SFT dataset by:
111. Generating diverse prompts from combinatorial axes (topics × formats × contexts)
122. Having the teacher model produce responses
133. Quality-grading each response with an LLM judge
144. Filtering low-quality examples
155. Splitting into train/val/test JSONL files
16 
17Usage:
18  python generate_distillation_data.py \
19      --teacher gpt-4.1-mini \
20      --system-prompt "You are a formal business writer." \
21      --topics "earnings,risk,compliance" \
22      --num-prompts 300 \
23      --min-score 7.0 \
24      --output-dir ./my_dataset
25 
26  # Or with a prompts file (one prompt per line):
27  python generate_distillation_data.py \
28      --teacher gpt-4.1-mini \
29      --prompts-file my_prompts.txt \
30      --output-dir ./my_dataset
31"""
32 
33import json
34import os
35import random
36import re
37import sys
38 
39try:
40    sys.stdout.reconfigure(encoding="utf-8")
41    sys.stderr.reconfigure(encoding="utf-8")
42except (AttributeError, OSError):
43    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
44import time
45sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
46from common import HelpOnErrorParser, get_clients, _clamp_score
47 
48import openai
49 
50 
51def verify_deployment(client, model):
52    """Verify a model deployment exists by sending a trivial request."""
53    try:
54        client.chat.completions.create(
55            model=model,
56            messages=[{"role": "user", "content": "Hi"}],
57            max_completion_tokens=1,
58        )
59        return True
60    except openai.NotFoundError:
61        return False
62    except Exception:
63        return True  # other errors (rate limit, etc.) mean the deployment exists
64 
65 
66def generate_combinatorial_prompts(topics, formats, contexts, n):
67    """Generate diverse prompts from combinatorial axes."""
68    prompts = []
69    for _ in range(n):
70        t = random.choice(topics)
71        f = random.choice(formats)
72        c = random.choice(contexts)
73        prompts.append(f"Context: {c}\n\nWrite {f} about: {t}.")
74    return prompts
75 
76 
77def teacher_generate(client, model, system_prompt, prompt, retries=3):
78    """Generate a single response from the teacher."""
79    for attempt in range(retries):
80        try:
81            resp = client.chat.completions.create(
82                model=model,
83                messages=[
84                    {"role": "system", "content": system_prompt},
85                    {"role": "user", "content": prompt},
86                ],
87                temperature=0.7,
88                max_completion_tokens=1024,
89            )
90            return resp.choices[0].message.content
91        except Exception as e:
92            if attempt >= retries - 1:
93                print(f"  Failed after {retries} attempts: {e}")
94                return None
95            time.sleep(2 * (attempt + 1))
96    return None
97 
98 
99QUALITY_PROMPT = """Rate this AI-generated text on quality dimensions (1-10 each).
100 
101## Text to evaluate
102{output}
103 
104## Dimensions
105**Accuracy** (1-10): Is the content factually sound and coherent?
106**Quality** (1-10): Is it well-written, clear, and professional?
107**Task-fit** (1-10): Does it match the requested format and purpose?
108 
109Return ONLY JSON: {{"accuracy": <int>, "quality": <int>, "task_fit": <int>}}"""
110 
111 
112def grade_output(client, judge_model, output, retries=3):
113    for attempt in range(retries):
114        try:
115            resp = client.chat.completions.create(
116                model=judge_model,
117                messages=[{"role": "user", "content": QUALITY_PROMPT.format(output=output)}],
118                temperature=0.0,
119                max_completion_tokens=100,
120            )
121            text = (resp.choices[0].message.content or "").strip()
122            match = re.search(r'\{[^}]+\}', text)
123            if match:
124                scores = json.loads(match.group())
125                return {k: _clamp_score(v) for k, v in scores.items()}
126        except Exception:
127            if attempt < retries - 1:
128                time.sleep(2)
129    return None
130 
131 
132def main():
133    parser = HelpOnErrorParser(description="Generate distillation training data from a teacher model")
134    parser.add_argument("--base-url", default=os.environ.get("OPENAI_BASE_URL"),
135                        help="Project /v1/ URL (preferred)")
136    parser.add_argument("--endpoint", default=os.environ.get("AZURE_OPENAI_ENDPOINT"),
137                        help="Azure OpenAI endpoint (fallback)")
138    parser.add_argument("--project-endpoint", default=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
139                        help="Azure AI project endpoint (Foundry SDK)")
140    parser.add_argument("--api-key", default=os.environ.get("AZURE_OPENAI_API_KEY"))
141    parser.add_argument("--teacher", required=True, help="Teacher model deployment name")
142    parser.add_argument("--judge", default=None, help="Judge model (default: same as teacher)")
143    parser.add_argument("--system-prompt", default="You are a helpful assistant.", help="System prompt for teacher")
144 
145    # Prompt generation (either combinatorial or from file)
146    parser.add_argument("--prompts-file", help="File with one prompt per line (skips combinatorial generation)")
147    parser.add_argument("--topics", help="Comma-separated topics for combinatorial prompts")
148    parser.add_argument("--formats", default="a concise response,a brief summary,a detailed explanation",
149                        help="Comma-separated output formats")
150    parser.add_argument("--contexts", default="", help="Comma-separated context sentences")
151    parser.add_argument("--num-prompts", type=int, default=300, help="Number of prompts to generate")
152 
153    # Quality
154    parser.add_argument("--min-score", type=float, default=7.0, help="Minimum average quality score to keep")
155    parser.add_argument("--skip-grading", action="store_true", help="Skip quality grading (keep all)")
156 
157    # Output
158    parser.add_argument("--output-dir", default="./distillation_data", help="Output directory")
159    parser.add_argument("--train-split", type=float, default=0.8)
160    parser.add_argument("--val-split", type=float, default=0.1)
161 
162    args = parser.parse_args()
163 
164    client, method = get_clients(
165        base_url=args.base_url, azure_endpoint=args.endpoint,
166        project_endpoint=args.project_endpoint, api_key=args.api_key
167    )
168    judge = args.judge or args.teacher
169 
170    # Step 0: Verify deployments exist
171    print(f"Verifying deployment '{args.teacher}'...")
172    if not verify_deployment(client, args.teacher):
173        print(f"  ERROR: Deployment '{args.teacher}' not found. Available deployments can be listed in Azure Portal.")
174        sys.exit(1)
175    print(f"  ✅ Teacher deployment verified.")
176 
177    if judge != args.teacher:
178        print(f"Verifying judge deployment '{judge}'...")
179        if not verify_deployment(client, judge):
180            print(f"  ERROR: Judge deployment '{judge}' not found.")
181            sys.exit(1)
182        print(f"  ✅ Judge deployment verified.")
183 
184    # Step 1: Generate or load prompts
185    if args.prompts_file:
186        with open(args.prompts_file, encoding="utf-8") as pf:
187            prompts = [line.strip() for line in pf if line.strip()]
188        print(f"Loaded {len(prompts)} prompts from {args.prompts_file}")
189    else:
190        topics = [t.strip() for t in (args.topics or "general knowledge").split(",")]
191        formats = [f.strip() for f in args.formats.split(",")]
192        contexts = [c.strip() for c in args.contexts.split(",") if c.strip()] or [""]
193        prompts = generate_combinatorial_prompts(topics, formats, contexts, args.num_prompts)
194        print(f"Generated {len(prompts)} prompts ({len(topics)} topics × {len(formats)} formats × {len(contexts)} contexts)")
195 
196    # Step 2: Teacher generates responses
197    print(f"\nTeacher ({args.teacher}) generating responses...")
198    examples = []
199    for i, prompt in enumerate(prompts):
200        response = teacher_generate(client, args.teacher, args.system_prompt, prompt)
201        if response:
202            examples.append({"prompt": prompt, "response": response})
203        if (i + 1) % 25 == 0:
204            print(f"  {i+1}/{len(prompts)} ({len(examples)} successful)")
205    print(f"  Teacher produced {len(examples)}/{len(prompts)} responses")
206 
207    # Step 3: Quality grade and filter
208    if not args.skip_grading:
209        print(f"\nGrading with {judge}...")
210        for i, ex in enumerate(examples):
211            scores = grade_output(client, judge, ex["response"])
212            if scores:
213                ex["scores"] = scores
214                ex["avg_score"] = sum(scores.values()) / len(scores)
215            else:
216                ex["avg_score"] = 0
217            if (i + 1) % 25 == 0:
218                print(f"  Graded {i+1}/{len(examples)}")
219 
220        filtered = [ex for ex in examples if ex["avg_score"] >= args.min_score]
221        avgs = [ex["avg_score"] for ex in examples if ex["avg_score"] > 0]
222        print(f"  Passed filter (>= {args.min_score}): {len(filtered)}/{len(examples)}")
223        if avgs:
224            print(f"  Scores: min={min(avgs):.1f}, max={max(avgs):.1f}, mean={sum(avgs)/len(avgs):.1f}")
225    else:
226        filtered = examples
227        print(f"Skipping grading — keeping all {len(filtered)} examples")
228 
229    # Step 4: Convert to SFT format and split
230    sft_data = [{"messages": [
231        {"role": "system", "content": args.system_prompt},
232        {"role": "user", "content": ex["prompt"]},
233        {"role": "assistant", "content": ex["response"]},
234    ]} for ex in filtered]
235 
236    random.shuffle(sft_data)
237    n = len(sft_data)
238    t_end = int(n * args.train_split)
239    v_end = int(n * (args.train_split + args.val_split))
240    splits = {"train": sft_data[:t_end], "validation": sft_data[t_end:v_end], "test": sft_data[v_end:]}
241 
242    os.makedirs(args.output_dir, exist_ok=True)
243    for name, data in splits.items():
244        path = os.path.join(args.output_dir, f"{name}.jsonl")
245        with open(path, "w", encoding="utf-8") as f:
246            for ex in data:
247                f.write(json.dumps(ex, ensure_ascii=False) + "\n")
248        print(f"  {name}: {len(data)} examples → {path}")
249 
250    print(f"\n✅ Done! Dataset ready in {args.output_dir}/")
251 
252 
253if __name__ == "__main__":
254    main()
255

Marketplace

Source from repo

Microsoft Foundry Skill

Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry

microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page

Files

154

Skill

n/a

Size

976.2 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

finetuning/scripts/generate_distillation_data.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code255 linesFree

finetuning/scripts/generate_distillation_data.py

1# /// script
2# dependencies = [
3#   "openai>=1.0",
4#   "azure-identity",
5# ]
6# ///
7"""
8generate_distillation_data.py — Generate training data from a teacher model for distillation.
9 
10Creates a synthetic SFT dataset by:
111. Generating diverse prompts from combinatorial axes (topics × formats × contexts)
122. Having the teacher model produce responses
133. Quality-grading each response with an LLM judge
144. Filtering low-quality examples
155. Splitting into train/val/test JSONL files
16 
17Usage:
18  python generate_distillation_data.py \
19      --teacher gpt-4.1-mini \
20      --system-prompt "You are a formal business writer." \
21      --topics "earnings,risk,compliance" \
22      --num-prompts 300 \
23      --min-score 7.0 \
24      --output-dir ./my_dataset
25 
26  # Or with a prompts file (one prompt per line):
27  python generate_distillation_data.py \
28      --teacher gpt-4.1-mini \
29      --prompts-file my_prompts.txt \
30      --output-dir ./my_dataset
31"""
32 
33import json
34import os
35import random
36import re
37import sys
38 
39try:
40    sys.stdout.reconfigure(encoding="utf-8")
41    sys.stderr.reconfigure(encoding="utf-8")
42except (AttributeError, OSError):
43    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
44import time
45sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
46from common import HelpOnErrorParser, get_clients, _clamp_score
47 
48import openai
49 
50 
51def verify_deployment(client, model):
52    """Verify a model deployment exists by sending a trivial request."""
53    try:
54        client.chat.completions.create(
55            model=model,
56            messages=[{"role": "user", "content": "Hi"}],
57            max_completion_tokens=1,
58        )
59        return True
60    except openai.NotFoundError:
61        return False
62    except Exception:
63        return True  # other errors (rate limit, etc.) mean the deployment exists
64 
65 
66def generate_combinatorial_prompts(topics, formats, contexts, n):
67    """Generate diverse prompts from combinatorial axes."""
68    prompts = []
69    for _ in range(n):
70        t = random.choice(topics)
71        f = random.choice(formats)
72        c = random.choice(contexts)
73        prompts.append(f"Context: {c}\n\nWrite {f} about: {t}.")
74    return prompts
75 
76 
77def teacher_generate(client, model, system_prompt, prompt, retries=3):
78    """Generate a single response from the teacher."""
79    for attempt in range(retries):
80        try:
81            resp = client.chat.completions.create(
82                model=model,
83                messages=[
84                    {"role": "system", "content": system_prompt},
85                    {"role": "user", "content": prompt},
86                ],
87                temperature=0.7,
88                max_completion_tokens=1024,
89            )
90            return resp.choices[0].message.content
91        except Exception as e:
92            if attempt >= retries - 1:
93                print(f"  Failed after {retries} attempts: {e}")
94                return None
95            time.sleep(2 * (attempt + 1))
96    return None
97 
98 
99QUALITY_PROMPT = """Rate this AI-generated text on quality dimensions (1-10 each).
100 
101## Text to evaluate
102{output}
103 
104## Dimensions
105**Accuracy** (1-10): Is the content factually sound and coherent?
106**Quality** (1-10): Is it well-written, clear, and professional?
107**Task-fit** (1-10): Does it match the requested format and purpose?
108 
109Return ONLY JSON: {{"accuracy": <int>, "quality": <int>, "task_fit": <int>}}"""
110 
111 
112def grade_output(client, judge_model, output, retries=3):
113    for attempt in range(retries):
114        try:
115            resp = client.chat.completions.create(
116                model=judge_model,
117                messages=[{"role": "user", "content": QUALITY_PROMPT.format(output=output)}],
118                temperature=0.0,
119                max_completion_tokens=100,
120            )
121            text = (resp.choices[0].message.content or "").strip()
122            match = re.search(r'\{[^}]+\}', text)
123            if match:
124                scores = json.loads(match.group())
125                return {k: _clamp_score(v) for k, v in scores.items()}
126        except Exception:
127            if attempt < retries - 1:
128                time.sleep(2)
129    return None
130 
131 
132def main():
133    parser = HelpOnErrorParser(description="Generate distillation training data from a teacher model")
134    parser.add_argument("--base-url", default=os.environ.get("OPENAI_BASE_URL"),
135                        help="Project /v1/ URL (preferred)")
136    parser.add_argument("--endpoint", default=os.environ.get("AZURE_OPENAI_ENDPOINT"),
137                        help="Azure OpenAI endpoint (fallback)")
138    parser.add_argument("--project-endpoint", default=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
139                        help="Azure AI project endpoint (Foundry SDK)")
140    parser.add_argument("--api-key", default=os.environ.get("AZURE_OPENAI_API_KEY"))
141    parser.add_argument("--teacher", required=True, help="Teacher model deployment name")
142    parser.add_argument("--judge", default=None, help="Judge model (default: same as teacher)")
143    parser.add_argument("--system-prompt", default="You are a helpful assistant.", help="System prompt for teacher")
144 
145    # Prompt generation (either combinatorial or from file)
146    parser.add_argument("--prompts-file", help="File with one prompt per line (skips combinatorial generation)")
147    parser.add_argument("--topics", help="Comma-separated topics for combinatorial prompts")
148    parser.add_argument("--formats", default="a concise response,a brief summary,a detailed explanation",
149                        help="Comma-separated output formats")
150    parser.add_argument("--contexts", default="", help="Comma-separated context sentences")
151    parser.add_argument("--num-prompts", type=int, default=300, help="Number of prompts to generate")
152 
153    # Quality
154    parser.add_argument("--min-score", type=float, default=7.0, help="Minimum average quality score to keep")
155    parser.add_argument("--skip-grading", action="store_true", help="Skip quality grading (keep all)")
156 
157    # Output
158    parser.add_argument("--output-dir", default="./distillation_data", help="Output directory")
159    parser.add_argument("--train-split", type=float, default=0.8)
160    parser.add_argument("--val-split", type=float, default=0.1)
161 
162    args = parser.parse_args()
163 
164    client, method = get_clients(
165        base_url=args.base_url, azure_endpoint=args.endpoint,
166        project_endpoint=args.project_endpoint, api_key=args.api_key
167    )
168    judge = args.judge or args.teacher
169 
170    # Step 0: Verify deployments exist
171    print(f"Verifying deployment '{args.teacher}'...")
172    if not verify_deployment(client, args.teacher):
173        print(f"  ERROR: Deployment '{args.teacher}' not found. Available deployments can be listed in Azure Portal.")
174        sys.exit(1)
175    print(f"  ✅ Teacher deployment verified.")
176 
177    if judge != args.teacher:
178        print(f"Verifying judge deployment '{judge}'...")
179        if not verify_deployment(client, judge):
180            print(f"  ERROR: Judge deployment '{judge}' not found.")
181            sys.exit(1)
182        print(f"  ✅ Judge deployment verified.")
183 
184    # Step 1: Generate or load prompts
185    if args.prompts_file:
186        with open(args.prompts_file, encoding="utf-8") as pf:
187            prompts = [line.strip() for line in pf if line.strip()]
188        print(f"Loaded {len(prompts)} prompts from {args.prompts_file}")
189    else:
190        topics = [t.strip() for t in (args.topics or "general knowledge").split(",")]
191        formats = [f.strip() for f in args.formats.split(",")]
192        contexts = [c.strip() for c in args.contexts.split(",") if c.strip()] or [""]
193        prompts = generate_combinatorial_prompts(topics, formats, contexts, args.num_prompts)
194        print(f"Generated {len(prompts)} prompts ({len(topics)} topics × {len(formats)} formats × {len(contexts)} contexts)")
195 
196    # Step 2: Teacher generates responses
197    print(f"\nTeacher ({args.teacher}) generating responses...")
198    examples = []
199    for i, prompt in enumerate(prompts):
200        response = teacher_generate(client, args.teacher, args.system_prompt, prompt)
201        if response:
202            examples.append({"prompt": prompt, "response": response})
203        if (i + 1) % 25 == 0:
204            print(f"  {i+1}/{len(prompts)} ({len(examples)} successful)")
205    print(f"  Teacher produced {len(examples)}/{len(prompts)} responses")
206 
207    # Step 3: Quality grade and filter
208    if not args.skip_grading:
209        print(f"\nGrading with {judge}...")
210        for i, ex in enumerate(examples):
211            scores = grade_output(client, judge, ex["response"])
212            if scores:
213                ex["scores"] = scores
214                ex["avg_score"] = sum(scores.values()) / len(scores)
215            else:
216                ex["avg_score"] = 0
217            if (i + 1) % 25 == 0:
218                print(f"  Graded {i+1}/{len(examples)}")
219 
220        filtered = [ex for ex in examples if ex["avg_score"] >= args.min_score]
221        avgs = [ex["avg_score"] for ex in examples if ex["avg_score"] > 0]
222        print(f"  Passed filter (>= {args.min_score}): {len(filtered)}/{len(examples)}")
223        if avgs:
224            print(f"  Scores: min={min(avgs):.1f}, max={max(avgs):.1f}, mean={sum(avgs)/len(avgs):.1f}")
225    else:
226        filtered = examples
227        print(f"Skipping grading — keeping all {len(filtered)} examples")
228 
229    # Step 4: Convert to SFT format and split
230    sft_data = [{"messages": [
231        {"role": "system", "content": args.system_prompt},
232        {"role": "user", "content": ex["prompt"]},
233        {"role": "assistant", "content": ex["response"]},
234    ]} for ex in filtered]
235 
236    random.shuffle(sft_data)
237    n = len(sft_data)
238    t_end = int(n * args.train_split)
239    v_end = int(n * (args.train_split + args.val_split))
240    splits = {"train": sft_data[:t_end], "validation": sft_data[t_end:v_end], "test": sft_data[v_end:]}
241 
242    os.makedirs(args.output_dir, exist_ok=True)
243    for name, data in splits.items():
244        path = os.path.join(args.output_dir, f"{name}.jsonl")
245        with open(path, "w", encoding="utf-8") as f:
246            for ex in data:
247                f.write(json.dumps(ex, ensure_ascii=False) + "\n")
248        print(f"  {name}: {len(data)} examples → {path}")
249 
250    print(f"\n✅ Done! Dataset ready in {args.output_dir}/")
251 
252 
253if __name__ == "__main__":
254    main()
255

Microsoft Foundry Skill

finetuning/scripts/generate_distillation_data.py

Preparing the source view

Microsoft Foundry Skill

finetuning/scripts/generate_distillation_data.py