Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Deploy, evaluate, and manage AI agents end-to-end on Microsoft Azure AI Foundry
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
finetuning/scripts/deploy_model.py
1# /// script2# dependencies = [3# "openai>=1.0",4# "requests",5# "azure-identity",6# ]7# ///8"""9deploy_model.py — Deploy fine-tuned models on Azure AI Foundry via ARM REST API.1011Supports all model families with correct format/SKU mapping.1213Usage:14python deploy_model.py --model-id "ft:gpt-4.1-mini-2025-04-14:..." --name "my-ft-eval" --capacity 10015python deploy_model.py --model-id "ft:gpt-oss-20b:..." --name "oss-eval" --format Microsoft --sku GlobalStandard16python deploy_model.py --delete --name "my-ft-eval"17python deploy_model.py --list18"""1920import os21import subprocess22import sys2324try:25sys.stdout.reconfigure(encoding="utf-8")26sys.stderr.reconfigure(encoding="utf-8")27except (AttributeError, OSError):28pass # Stream not reconfigurable (older Python or non-tty); default encoding is fine29import time30sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))31from common import HelpOnErrorParser3233import requests343536def _safe_error_msg(resp):37"""Extract error message from response, handling non-JSON bodies (HTML 502/503)."""38try:39return resp.json().get("error", {}).get("message", resp.text[:200])40except (ValueError, KeyError):41return resp.text[:200] if resp.text else "Unknown error"4243# Default Azure resource coordinates — override with env vars or args44DEFAULT_SUB = os.environ.get("AZURE_SUBSCRIPTION_ID", "")45DEFAULT_RG = os.environ.get("AZURE_RESOURCE_GROUP", "")46DEFAULT_ACCOUNT = os.environ.get("AZURE_COGSERVICES_ACCOUNT", "")47AZ_CLI = os.environ.get("AZ_CLI_PATH")48if not AZ_CLI:49import shutil50AZ_CLI = shutil.which("az")51if not AZ_CLI:52# Common Windows paths53for candidate in [54r"C:\Program Files (x86)\Microsoft SDKs\Azure\CLI2\wbin\az.cmd",55r"C:\Program Files\Microsoft SDKs\Azure\CLI2\wbin\az.cmd",56]:57if os.path.exists(candidate):58AZ_CLI = candidate59break60if not AZ_CLI:61AZ_CLI = "az" # last resort, hope it's on PATH6263# Model format auto-detection rules64FORMAT_RULES = [65(lambda m: "oss-20b" in m.lower() or "oss20b" in m.lower(), "Microsoft", "GlobalStandard"),66(lambda m: "ministral" in m.lower() or "mistral" in m.lower(), "Mistral AI", "GlobalStandard"),67(lambda m: "llama" in m.lower() or "meta" in m.lower(), "Meta", "GlobalStandard"),68(lambda m: "qwen" in m.lower() or "alibaba" in m.lower(), "Alibaba", "GlobalStandard"),69(lambda m: True, "OpenAI", "Standard"), # Default fallback70]717273def get_arm_token():74"""Get a fresh ARM token from Azure CLI."""75result = subprocess.run(76[AZ_CLI, "account", "get-access-token", "--query", "accessToken", "-o", "tsv"],77capture_output=True, text=True,78)79token = result.stdout.strip()80if not token:81raise RuntimeError(f"Failed to get ARM token: {result.stderr}")82return token838485def arm_url(sub, rg, account, deploy_name=None):86"""Build the ARM REST API URL."""87base = (f"https://management.azure.com/subscriptions/{sub}"88f"/resourceGroups/{rg}"89f"/providers/Microsoft.CognitiveServices/accounts/{account}"90f"/deployments")91if deploy_name:92base += f"/{deploy_name}"93return base + "?api-version=2024-10-01"949596def detect_format(model_id):97"""Auto-detect model format and SKU from model ID."""98for check, fmt, sku in FORMAT_RULES:99if check(model_id):100return fmt, sku101return "OpenAI", "Standard"102103104def create_deployment(sub, rg, account, name, model_id, model_format, sku, capacity):105"""Create a deployment via ARM REST API."""106token = get_arm_token()107url = arm_url(sub, rg, account, name)108109body = {110"sku": {"name": sku, "capacity": capacity},111"properties": {112"model": {113"format": model_format,114"name": model_id,115"version": "1",116}117},118}119120resp = requests.put(url, headers={121"Authorization": f"Bearer {token}",122"Content-Type": "application/json",123}, json=body, timeout=(10, 120))124125if resp.status_code in (200, 201):126print(f"✅ Deployment '{name}' created (format={model_format}, sku={sku}, capacity={capacity})")127return True128else:129print(f"❌ Deployment failed ({resp.status_code}): {_safe_error_msg(resp)}")130return False131132133def wait_for_deployment(sub, rg, account, name, timeout=600, poll_interval=15):134"""Wait for deployment to reach 'Succeeded' state."""135url = arm_url(sub, rg, account, name)136start = time.time()137138while time.time() - start < timeout:139token = get_arm_token()140try:141resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))142except requests.exceptions.RequestException as e:143print(f" ⚠️ Polling error: {e} — retrying in {poll_interval}s")144time.sleep(poll_interval)145continue146if resp.status_code == 200:147try:148state = resp.json().get("properties", {}).get("provisioningState", "Unknown")149except (ValueError, KeyError):150state = "Unknown"151print(f" Status: {state}")152if state == "Succeeded":153return True154if state in ("Failed", "Canceled"):155print(f" Deployment {state}.")156return False157time.sleep(poll_interval)158159print(f" Timed out after {timeout}s")160return False161162163def delete_deployment(sub, rg, account, name):164"""Delete a deployment."""165token = get_arm_token()166url = arm_url(sub, rg, account, name)167resp = requests.delete(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))168if resp.status_code in (200, 202, 204):169print(f"✅ Deployment '{name}' deleted.")170else:171print(f"❌ Delete failed ({resp.status_code}): {_safe_error_msg(resp)}")172173174def list_deployments(sub, rg, account):175"""List all deployments."""176token = get_arm_token()177url = arm_url(sub, rg, account)178resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))179if resp.status_code != 200:180print(f"❌ Failed to list deployments ({resp.status_code}): {_safe_error_msg(resp)}")181return182183try:184deployments = resp.json().get("value", [])185except (ValueError, KeyError):186print(f"❌ Failed to parse deployment list: {resp.text[:200]}")187return188if not deployments:189print("No deployments found.")190return191192print(f"{'Name':<40} {'Model':<40} {'SKU':<15} {'State':<15}")193print("─" * 110)194for d in deployments:195name = d.get("name", "?")196model = d.get("properties", {}).get("model", {}).get("name", "?")197sku = d.get("sku", {}).get("name", "?")198state = d.get("properties", {}).get("provisioningState", "?")199print(f"{name:<40} {model:<40} {sku:<15} {state:<15}")200201202def main():203parser = HelpOnErrorParser(description="Deploy fine-tuned models on Azure AI Foundry")204parser.add_argument("--sub", default=DEFAULT_SUB, help="Azure subscription ID")205parser.add_argument("--rg", default=DEFAULT_RG, help="Resource group")206parser.add_argument("--account", default=DEFAULT_ACCOUNT, help="Cognitive Services account")207208# Actions209parser.add_argument("--list", action="store_true", help="List all deployments")210parser.add_argument("--delete", action="store_true", help="Delete a deployment")211parser.add_argument("--wait", action="store_true", help="Wait for deployment to succeed")212213# Deployment config214parser.add_argument("--name", help="Deployment name (max 64 chars, alphanumeric + hyphens)")215parser.add_argument("--model-id", help="Fine-tuned model ID (e.g., ft:gpt-4.1-mini:...)")216parser.add_argument("--format", help="Model format (auto-detected if not specified)")217parser.add_argument("--sku", help="SKU name (auto-detected if not specified)")218parser.add_argument("--capacity", type=int, default=100, help="TPM capacity in thousands")219220args = parser.parse_args()221222if not all([args.sub, args.rg, args.account]):223print("Error: Set --sub/--rg/--account or AZURE_SUBSCRIPTION_ID/AZURE_RESOURCE_GROUP/AZURE_COGSERVICES_ACCOUNT")224sys.exit(1)225226if args.list:227list_deployments(args.sub, args.rg, args.account)228return229230if not args.name:231print("Error: --name required for create/delete/wait")232sys.exit(1)233234if args.delete:235delete_deployment(args.sub, args.rg, args.account, args.name)236return237238if args.wait and not args.model_id:239# Wait-only mode: poll an existing deployment240success = wait_for_deployment(args.sub, args.rg, args.account, args.name)241sys.exit(0 if success else 1)242243if not args.model_id:244print("Error: --model-id required for create")245sys.exit(1)246247# Auto-detect format/SKU if not specified248model_format = args.format249sku = args.sku250if not model_format or not sku:251auto_fmt, auto_sku = detect_format(args.model_id)252model_format = model_format or auto_fmt253sku = sku or auto_sku254print(f"Auto-detected: format={model_format}, sku={sku}")255256created = create_deployment(args.sub, args.rg, args.account, args.name,257args.model_id, model_format, sku, args.capacity)258259if args.wait and created:260wait_for_deployment(args.sub, args.rg, args.account, args.name)261262263if __name__ == "__main__":264main()265