Source from repo

Microsoft Foundry Skill

Build and deploy AI applications on Azure AI Foundry using Microsoft's model catalog and AI services

microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page

Files

155

Skill

n/a

Size

976.3 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

finetuning/scripts/generate_distillation_data.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code255 linesFree

finetuning/scripts/generate_distillation_data.py

1# /// script
2# dependencies = [
3#   "openai>=1.0",
4#   "azure-identity",
5# ]
6# ///
7"""
8generate_distillation_data.py — Generate training data from a teacher model for distillation.
9 
10Creates a synthetic SFT dataset by:
111. Generating diverse prompts from combinatorial axes (topics × formats × contexts)
122. Having the teacher model produce responses
133. Quality-grading each response with an LLM judge
144. Filtering low-quality examples
155. Splitting into train/val/test JSONL files
16 
17Usage:
18  python generate_distillation_data.py \
19      --teacher gpt-4.1-mini \
20      --system-prompt "You are a formal business writer." \
21      --topics "earnings,risk,compliance" \
22      --num-prompts 300 \
23      --min-score 7.0 \
24      --output-dir ./my_dataset
25 
26  # Or with a prompts file (one prompt per line):
27  python generate_distillation_data.py \
28      --teacher gpt-4.1-mini \
29      --prompts-file my_prompts.txt \
30      --output-dir ./my_dataset
31"""
32 
33import json
34import os
35import random
36import re
37import sys
38 
39try:
40    sys.stdout.reconfigure(encoding="utf-8")
41    sys.stderr.reconfigure(encoding="utf-8")
42except (AttributeError, OSError):
43    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
44import time
45sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
46from common import HelpOnErrorParser, get_clients, _clamp_score
47 
48import openai
49 
50 
51def verify_deployment(client, model):
52    """Verify a model deployment exists by sending a trivial request."""
53    try:
54        client.chat.completions.create(
55            model=model,
56            messages=[{"role": "user", "content": "Hi"}],
57            max_completion_tokens=1,
58        )
59        return True
60    except openai.NotFoundError:
61        return False
62    except Exception:
63        return True  # other errors (rate limit, etc.) mean the deployment exists
64 
65 
66def generate_combinatorial_prompts(topics, formats, contexts, n):
67    """Generate diverse prompts from combinatorial axes."""
68    prompts = []
69    for _ in range(n):
70        t = random.choice(topics)
71        f = random.choice(formats)
72        c = random.choice(contexts)
73        prompts.append(f"Context: {c}\n\nWrite {f} about: {t}.")
74    return prompts
75 
76 
77def teacher_generate(client, model, system_prompt, prompt, retries=3):
78    """Generate a single response from the teacher."""
79    for attempt in range(retries):
80        try:
81            resp = client.chat.completions.create(
82                model=model,
83                messages=[
84                    {"role": "system", "content": system_prompt},
85                    {"role": "user", "content": prompt},
86                ],
87                temperature=0.7,
88                max_completion_tokens=1024,
89            )
90            return resp.choices[0].message.content
91        except Exception as e:
92            if attempt >= retries - 1:
93                print(f"  Failed after {retries} attempts: {e}")
94                return None
95            time.sleep(2 * (attempt + 1))
96    return None
97 
98 
99QUALITY_PROMPT = """Rate this AI-generated text on quality dimensions (1-10 each).
100 
101## Text to evaluate
102{output}
103 
104## Dimensions
105**Accuracy** (1-10): Is the content factually sound and coherent?
106**Quality** (1-10): Is it well-written, clear, and professional?
107**Task-fit** (1-10): Does it match the requested format and purpose?
108 
109Return ONLY JSON: {{"accuracy": <int>, "quality": <int>, "task_fit": <int>}}"""
110 
111 
112def grade_output(client, judge_model, output, retries=3):
113    for attempt in range(retries):
114        try:
115            resp = client.chat.completions.create(
116                model=judge_model,
117                messages=[{"role": "user", "content": QUALITY_PROMPT.format(output=output)}],
118                temperature=0.0,
119                max_completion_tokens=100,
120            )
121            text = (resp.choices[0].message.content or "").strip()
122            match = re.search(r'\{[^}]+\}', text)
123            if match:
124                scores = json.loads(match.group())
125                return {k: _clamp_score(v) for k, v in scores.items()}
126        except Exception:
127            if attempt < retries - 1:
128                time.sleep(2)
129    return None
130 
131 
132def main():
133    parser = HelpOnErrorParser(description="Generate distillation training data from a teacher model")
134    parser.add_argument("--base-url", default=os.environ.get("OPENAI_BASE_URL"),
135                        help="Project /v1/ URL (preferred)")
136    parser.add_argument("--endpoint", default=os.environ.get("AZURE_OPENAI_ENDPOINT"),
137                        help="Azure OpenAI endpoint (fallback)")
138    parser.add_argument("--project-endpoint", default=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
139                        help="Azure AI project endpoint (Foundry SDK)")
140    parser.add_argument("--api-key", default=os.environ.get("AZURE_OPENAI_API_KEY"))
141    parser.add_argument("--teacher", required=True, help="Teacher model deployment name")
142    parser.add_argument("--judge", default=None, help="Judge model (default: same as teacher)")
143    parser.add_argument("--system-prompt", default="You are a helpful assistant.", help="System prompt for teacher")
144 
145    # Prompt generation (either combinatorial or from file)
146    parser.add_argument("--prompts-file", help="File with one prompt per line (skips combinatorial generation)")
147    parser.add_argument("--topics", help="Comma-separated topics for combinatorial prompts")
148    parser.add_argument("--formats", default="a concise response,a brief summary,a detailed explanation",
149                        help="Comma-separated output formats")
150    parser.add_argument("--contexts", default="", help="Comma-separated context sentences")
151    parser.add_argument("--num-prompts", type=int, default=300, help="Number of prompts to generate")
152 
153    # Quality
154    parser.add_argument("--min-score", type=float, default=7.0, help="Minimum average quality score to keep")
155    parser.add_argument("--skip-grading", action="store_true", help="Skip quality grading (keep all)")
156 
157    # Output
158    parser.add_argument("--output-dir", default="./distillation_data", help="Output directory")
159    parser.add_argument("--train-split", type=float, default=0.8)
160    parser.add_argument("--val-split", type=float, default=0.1)
161 
162    args = parser.parse_args()
163 
164    client, method = get_clients(
165        base_url=args.base_url, azure_endpoint=args.endpoint,
166        project_endpoint=args.project_endpoint, api_key=args.api_key
167    )
168    judge = args.judge or args.teacher
169 
170    # Step 0: Verify deployments exist
171    print(f"Verifying deployment '{args.teacher}'...")
172    if not verify_deployment(client, args.teacher):
173        print(f"  ERROR: Deployment '{args.teacher}' not found. Available deployments can be listed in Azure Portal.")
174        sys.exit(1)
175    print(f"  ✅ Teacher deployment verified.")
176 
177    if judge != args.teacher:
178        print(f"Verifying judge deployment '{judge}'...")
179        if not verify_deployment(client, judge):
180            print(f"  ERROR: Judge deployment '{judge}' not found.")
181            sys.exit(1)
182        print(f"  ✅ Judge deployment verified.")
183 
184    # Step 1: Generate or load prompts
185    if args.prompts_file:
186        with open(args.prompts_file, encoding="utf-8") as pf:
187            prompts = [line.strip() for line in pf if line.strip()]
188        print(f"Loaded {len(prompts)} prompts from {args.prompts_file}")
189    else:
190        topics = [t.strip() for t in (args.topics or "general knowledge").split(",")]
191        formats = [f.strip() for f in args.formats.split(",")]
192        contexts = [c.strip() for c in args.contexts.split(",") if c.strip()] or [""]
193        prompts = generate_combinatorial_prompts(topics, formats, contexts, args.num_prompts)
194        print(f"Generated {len(prompts)} prompts ({len(topics)} topics × {len(formats)} formats × {len(contexts)} contexts)")
195 
196    # Step 2: Teacher generates responses
197    print(f"\nTeacher ({args.teacher}) generating responses...")
198    examples = []
199    for i, prompt in enumerate(prompts):
200        response = teacher_generate(client, args.teacher, args.system_prompt, prompt)
201        if response:
202            examples.append({"prompt": prompt, "response": response})
203        if (i + 1) % 25 == 0:
204            print(f"  {i+1}/{len(prompts)} ({len(examples)} successful)")
205    print(f"  Teacher produced {len(examples)}/{len(prompts)} responses")
206 
207    # Step 3: Quality grade and filter
208    if not args.skip_grading:
209        print(f"\nGrading with {judge}...")
210        for i, ex in enumerate(examples):
211            scores = grade_output(client, judge, ex["response"])
212            if scores:
213                ex["scores"] = scores
214                ex["avg_score"] = sum(scores.values()) / len(scores)
215            else:
216                ex["avg_score"] = 0
217            if (i + 1) % 25 == 0:
218                print(f"  Graded {i+1}/{len(examples)}")
219 
220        filtered = [ex for ex in examples if ex["avg_score"] >= args.min_score]
221        avgs = [ex["avg_score"] for ex in examples if ex["avg_score"] > 0]
222        print(f"  Passed filter (>= {args.min_score}): {len(filtered)}/{len(examples)}")
223        if avgs:
224            print(f"  Scores: min={min(avgs):.1f}, max={max(avgs):.1f}, mean={sum(avgs)/len(avgs):.1f}")
225    else:
226        filtered = examples
227        print(f"Skipping grading — keeping all {len(filtered)} examples")
228 
229    # Step 4: Convert to SFT format and split
230    sft_data = [{"messages": [
231        {"role": "system", "content": args.system_prompt},
232        {"role": "user", "content": ex["prompt"]},
233        {"role": "assistant", "content": ex["response"]},
234    ]} for ex in filtered]
235 
236    random.shuffle(sft_data)
237    n = len(sft_data)
238    t_end = int(n * args.train_split)
239    v_end = int(n * (args.train_split + args.val_split))
240    splits = {"train": sft_data[:t_end], "validation": sft_data[t_end:v_end], "test": sft_data[v_end:]}
241 
242    os.makedirs(args.output_dir, exist_ok=True)
243    for name, data in splits.items():
244        path = os.path.join(args.output_dir, f"{name}.jsonl")
245        with open(path, "w", encoding="utf-8") as f:
246            for ex in data:
247                f.write(json.dumps(ex, ensure_ascii=False) + "\n")
248        print(f"  {name}: {len(data)} examples → {path}")
249 
250    print(f"\n✅ Done! Dataset ready in {args.output_dir}/")
251 
252 
253if __name__ == "__main__":
254    main()
255

Marketplace

Source from repo

Microsoft Foundry Skill

Build and deploy AI applications on Azure AI Foundry using Microsoft's model catalog and AI services

microsoftGitHub microsoftOfficialSource repo Original GitHub link Publisher page

Files

155

Skill

n/a

Size

976.3 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

finetuning/scripts/generate_distillation_data.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code255 linesFree

finetuning/scripts/generate_distillation_data.py

1# /// script
2# dependencies = [
3#   "openai>=1.0",
4#   "azure-identity",
5# ]
6# ///
7"""
8generate_distillation_data.py — Generate training data from a teacher model for distillation.
9 
10Creates a synthetic SFT dataset by:
111. Generating diverse prompts from combinatorial axes (topics × formats × contexts)
122. Having the teacher model produce responses
133. Quality-grading each response with an LLM judge
144. Filtering low-quality examples
155. Splitting into train/val/test JSONL files
16 
17Usage:
18  python generate_distillation_data.py \
19      --teacher gpt-4.1-mini \
20      --system-prompt "You are a formal business writer." \
21      --topics "earnings,risk,compliance" \
22      --num-prompts 300 \
23      --min-score 7.0 \
24      --output-dir ./my_dataset
25 
26  # Or with a prompts file (one prompt per line):
27  python generate_distillation_data.py \
28      --teacher gpt-4.1-mini \
29      --prompts-file my_prompts.txt \
30      --output-dir ./my_dataset
31"""
32 
33import json
34import os
35import random
36import re
37import sys
38 
39try:
40    sys.stdout.reconfigure(encoding="utf-8")
41    sys.stderr.reconfigure(encoding="utf-8")
42except (AttributeError, OSError):
43    pass  # Stream not reconfigurable (older Python or non-tty); default encoding is fine
44import time
45sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
46from common import HelpOnErrorParser, get_clients, _clamp_score
47 
48import openai
49 
50 
51def verify_deployment(client, model):
52    """Verify a model deployment exists by sending a trivial request."""
53    try:
54        client.chat.completions.create(
55            model=model,
56            messages=[{"role": "user", "content": "Hi"}],
57            max_completion_tokens=1,
58        )
59        return True
60    except openai.NotFoundError:
61        return False
62    except Exception:
63        return True  # other errors (rate limit, etc.) mean the deployment exists
64 
65 
66def generate_combinatorial_prompts(topics, formats, contexts, n):
67    """Generate diverse prompts from combinatorial axes."""
68    prompts = []
69    for _ in range(n):
70        t = random.choice(topics)
71        f = random.choice(formats)
72        c = random.choice(contexts)
73        prompts.append(f"Context: {c}\n\nWrite {f} about: {t}.")
74    return prompts
75 
76 
77def teacher_generate(client, model, system_prompt, prompt, retries=3):
78    """Generate a single response from the teacher."""
79    for attempt in range(retries):
80        try:
81            resp = client.chat.completions.create(
82                model=model,
83                messages=[
84                    {"role": "system", "content": system_prompt},
85                    {"role": "user", "content": prompt},
86                ],
87                temperature=0.7,
88                max_completion_tokens=1024,
89            )
90            return resp.choices[0].message.content
91        except Exception as e:
92            if attempt >= retries - 1:
93                print(f"  Failed after {retries} attempts: {e}")
94                return None
95            time.sleep(2 * (attempt + 1))
96    return None
97 
98 
99QUALITY_PROMPT = """Rate this AI-generated text on quality dimensions (1-10 each).
100 
101## Text to evaluate
102{output}
103 
104## Dimensions
105**Accuracy** (1-10): Is the content factually sound and coherent?
106**Quality** (1-10): Is it well-written, clear, and professional?
107**Task-fit** (1-10): Does it match the requested format and purpose?
108 
109Return ONLY JSON: {{"accuracy": <int>, "quality": <int>, "task_fit": <int>}}"""
110 
111 
112def grade_output(client, judge_model, output, retries=3):
113    for attempt in range(retries):
114        try:
115            resp = client.chat.completions.create(
116                model=judge_model,
117                messages=[{"role": "user", "content": QUALITY_PROMPT.format(output=output)}],
118                temperature=0.0,
119                max_completion_tokens=100,
120            )
121            text = (resp.choices[0].message.content or "").strip()
122            match = re.search(r'\{[^}]+\}', text)
123            if match:
124                scores = json.loads(match.group())
125                return {k: _clamp_score(v) for k, v in scores.items()}
126        except Exception:
127            if attempt < retries - 1:
128                time.sleep(2)
129    return None
130 
131 
132def main():
133    parser = HelpOnErrorParser(description="Generate distillation training data from a teacher model")
134    parser.add_argument("--base-url", default=os.environ.get("OPENAI_BASE_URL"),
135                        help="Project /v1/ URL (preferred)")
136    parser.add_argument("--endpoint", default=os.environ.get("AZURE_OPENAI_ENDPOINT"),
137                        help="Azure OpenAI endpoint (fallback)")
138    parser.add_argument("--project-endpoint", default=os.environ.get("AZURE_AI_PROJECT_ENDPOINT"),
139                        help="Azure AI project endpoint (Foundry SDK)")
140    parser.add_argument("--api-key", default=os.environ.get("AZURE_OPENAI_API_KEY"))
141    parser.add_argument("--teacher", required=True, help="Teacher model deployment name")
142    parser.add_argument("--judge", default=None, help="Judge model (default: same as teacher)")
143    parser.add_argument("--system-prompt", default="You are a helpful assistant.", help="System prompt for teacher")
144 
145    # Prompt generation (either combinatorial or from file)
146    parser.add_argument("--prompts-file", help="File with one prompt per line (skips combinatorial generation)")
147    parser.add_argument("--topics", help="Comma-separated topics for combinatorial prompts")
148    parser.add_argument("--formats", default="a concise response,a brief summary,a detailed explanation",
149                        help="Comma-separated output formats")
150    parser.add_argument("--contexts", default="", help="Comma-separated context sentences")
151    parser.add_argument("--num-prompts", type=int, default=300, help="Number of prompts to generate")
152 
153    # Quality
154    parser.add_argument("--min-score", type=float, default=7.0, help="Minimum average quality score to keep")
155    parser.add_argument("--skip-grading", action="store_true", help="Skip quality grading (keep all)")
156 
157    # Output
158    parser.add_argument("--output-dir", default="./distillation_data", help="Output directory")
159    parser.add_argument("--train-split", type=float, default=0.8)
160    parser.add_argument("--val-split", type=float, default=0.1)
161 
162    args = parser.parse_args()
163 
164    client, method = get_clients(
165        base_url=args.base_url, azure_endpoint=args.endpoint,
166        project_endpoint=args.project_endpoint, api_key=args.api_key
167    )
168    judge = args.judge or args.teacher
169 
170    # Step 0: Verify deployments exist
171    print(f"Verifying deployment '{args.teacher}'...")
172    if not verify_deployment(client, args.teacher):
173        print(f"  ERROR: Deployment '{args.teacher}' not found. Available deployments can be listed in Azure Portal.")
174        sys.exit(1)
175    print(f"  ✅ Teacher deployment verified.")
176 
177    if judge != args.teacher:
178        print(f"Verifying judge deployment '{judge}'...")
179        if not verify_deployment(client, judge):
180            print(f"  ERROR: Judge deployment '{judge}' not found.")
181            sys.exit(1)
182        print(f"  ✅ Judge deployment verified.")
183 
184    # Step 1: Generate or load prompts
185    if args.prompts_file:
186        with open(args.prompts_file, encoding="utf-8") as pf:
187            prompts = [line.strip() for line in pf if line.strip()]
188        print(f"Loaded {len(prompts)} prompts from {args.prompts_file}")
189    else:
190        topics = [t.strip() for t in (args.topics or "general knowledge").split(",")]
191        formats = [f.strip() for f in args.formats.split(",")]
192        contexts = [c.strip() for c in args.contexts.split(",") if c.strip()] or [""]
193        prompts = generate_combinatorial_prompts(topics, formats, contexts, args.num_prompts)
194        print(f"Generated {len(prompts)} prompts ({len(topics)} topics × {len(formats)} formats × {len(contexts)} contexts)")
195 
196    # Step 2: Teacher generates responses
197    print(f"\nTeacher ({args.teacher}) generating responses...")
198    examples = []
199    for i, prompt in enumerate(prompts):
200        response = teacher_generate(client, args.teacher, args.system_prompt, prompt)
201        if response:
202            examples.append({"prompt": prompt, "response": response})
203        if (i + 1) % 25 == 0:
204            print(f"  {i+1}/{len(prompts)} ({len(examples)} successful)")
205    print(f"  Teacher produced {len(examples)}/{len(prompts)} responses")
206 
207    # Step 3: Quality grade and filter
208    if not args.skip_grading:
209        print(f"\nGrading with {judge}...")
210        for i, ex in enumerate(examples):
211            scores = grade_output(client, judge, ex["response"])
212            if scores:
213                ex["scores"] = scores
214                ex["avg_score"] = sum(scores.values()) / len(scores)
215            else:
216                ex["avg_score"] = 0
217            if (i + 1) % 25 == 0:
218                print(f"  Graded {i+1}/{len(examples)}")
219 
220        filtered = [ex for ex in examples if ex["avg_score"] >= args.min_score]
221        avgs = [ex["avg_score"] for ex in examples if ex["avg_score"] > 0]
222        print(f"  Passed filter (>= {args.min_score}): {len(filtered)}/{len(examples)}")
223        if avgs:
224            print(f"  Scores: min={min(avgs):.1f}, max={max(avgs):.1f}, mean={sum(avgs)/len(avgs):.1f}")
225    else:
226        filtered = examples
227        print(f"Skipping grading — keeping all {len(filtered)} examples")
228 
229    # Step 4: Convert to SFT format and split
230    sft_data = [{"messages": [
231        {"role": "system", "content": args.system_prompt},
232        {"role": "user", "content": ex["prompt"]},
233        {"role": "assistant", "content": ex["response"]},
234    ]} for ex in filtered]
235 
236    random.shuffle(sft_data)
237    n = len(sft_data)
238    t_end = int(n * args.train_split)
239    v_end = int(n * (args.train_split + args.val_split))
240    splits = {"train": sft_data[:t_end], "validation": sft_data[t_end:v_end], "test": sft_data[v_end:]}
241 
242    os.makedirs(args.output_dir, exist_ok=True)
243    for name, data in splits.items():
244        path = os.path.join(args.output_dir, f"{name}.jsonl")
245        with open(path, "w", encoding="utf-8") as f:
246            for ex in data:
247                f.write(json.dumps(ex, ensure_ascii=False) + "\n")
248        print(f"  {name}: {len(data)} examples → {path}")
249 
250    print(f"\n✅ Done! Dataset ready in {args.output_dir}/")
251 
252 
253if __name__ == "__main__":
254    main()
255

Microsoft Foundry Skill

finetuning/scripts/generate_distillation_data.py

Preparing the source view

Microsoft Foundry Skill

finetuning/scripts/generate_distillation_data.py