Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Build and deploy AI applications on Azure AI Foundry using Microsoft's model catalog and AI services
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
finetuning/scripts/deploy_model.py
1# /// script2# dependencies = [3# "openai>=1.0",4# "requests",5# "azure-identity",6# ]7# ///8"""9deploy_model.py — Deploy fine-tuned models on Azure AI Foundry via ARM REST API.1011Supports all model families with correct format/SKU mapping.1213Usage:14python deploy_model.py --model-id "ft:gpt-4.1-mini-2025-04-14:..." --name "my-ft-eval" --capacity 10015python deploy_model.py --model-id "ft:gpt-oss-20b:..." --name "oss-eval" --format Microsoft --sku GlobalStandard16python deploy_model.py --delete --name "my-ft-eval"17python deploy_model.py --list18"""1920import os21import subprocess22import sys2324try:25sys.stdout.reconfigure(encoding="utf-8")26sys.stderr.reconfigure(encoding="utf-8")27except (AttributeError, OSError):28pass # Stream not reconfigurable (older Python or non-tty); default encoding is fine29import time30sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))31from common import HelpOnErrorParser3233import requests343536def _safe_error_msg(resp):37"""Extract error message from response, handling non-JSON bodies (HTML 502/503)."""38try:39return resp.json().get("error", {}).get("message", resp.text[:200])40except (ValueError, KeyError):41return resp.text[:200] if resp.text else "Unknown error"4243# Default Azure resource coordinates — override with env vars or args44DEFAULT_SUB = os.environ.get("AZURE_SUBSCRIPTION_ID", "")45DEFAULT_RG = os.environ.get("AZURE_RESOURCE_GROUP", "")46DEFAULT_ACCOUNT = os.environ.get("AZURE_COGSERVICES_ACCOUNT", "")47AZ_CLI = os.environ.get("AZ_CLI_PATH")48if not AZ_CLI:49import shutil50AZ_CLI = shutil.which("az")51if not AZ_CLI:52# Common Windows paths53for candidate in [54r"C:\Program Files (x86)\Microsoft SDKs\Azure\CLI2\wbin\az.cmd",55r"C:\Program Files\Microsoft SDKs\Azure\CLI2\wbin\az.cmd",56]:57if os.path.exists(candidate):58AZ_CLI = candidate59break60if not AZ_CLI:61AZ_CLI = "az" # last resort, hope it's on PATH6263# Model format auto-detection rules64FORMAT_RULES = [65(lambda m: "oss-20b" in m.lower() or "oss20b" in m.lower(), "Microsoft", "GlobalStandard"),66(lambda m: "ministral" in m.lower() or "mistral" in m.lower(), "Mistral AI", "GlobalStandard"),67(lambda m: "llama" in m.lower() or "meta" in m.lower(), "Meta", "GlobalStandard"),68(lambda m: "qwen" in m.lower() or "alibaba" in m.lower(), "Alibaba", "GlobalStandard"),69(lambda m: True, "OpenAI", "Standard"), # Default fallback70]717273def get_arm_token():74"""Get a fresh ARM token from Azure CLI."""75result = subprocess.run(76[AZ_CLI, "account", "get-access-token", "--query", "accessToken", "-o", "tsv"],77capture_output=True, text=True,78)79token = result.stdout.strip()80if not token:81raise RuntimeError(f"Failed to get ARM token: {result.stderr}")82return token838485def arm_url(sub, rg, account, deploy_name=None):86"""Build the ARM REST API URL."""87base = (f"https://management.azure.com/subscriptions/{sub}"88f"/resourceGroups/{rg}"89f"/providers/Microsoft.CognitiveServices/accounts/{account}"90f"/deployments")91if deploy_name:92base += f"/{deploy_name}"93return base + "?api-version=2024-10-01"949596def detect_format(model_id):97"""Auto-detect model format and SKU from model ID."""98for check, fmt, sku in FORMAT_RULES:99if check(model_id):100return fmt, sku101return "OpenAI", "Standard"102103104def create_deployment(sub, rg, account, name, model_id, model_format, sku, capacity):105"""Create a deployment via ARM REST API."""106token = get_arm_token()107url = arm_url(sub, rg, account, name)108109body = {110"sku": {"name": sku, "capacity": capacity},111"properties": {112"model": {113"format": model_format,114"name": model_id,115"version": "1",116}117},118}119120resp = requests.put(url, headers={121"Authorization": f"Bearer {token}",122"Content-Type": "application/json",123}, json=body, timeout=(10, 120))124125if resp.status_code in (200, 201):126print(f"✅ Deployment '{name}' created (format={model_format}, sku={sku}, capacity={capacity})")127return True128else:129print(f"❌ Deployment failed ({resp.status_code}): {_safe_error_msg(resp)}")130return False131132133def wait_for_deployment(sub, rg, account, name, timeout=600, poll_interval=15):134"""Wait for deployment to reach 'Succeeded' state."""135url = arm_url(sub, rg, account, name)136start = time.time()137138while time.time() - start < timeout:139token = get_arm_token()140try:141resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))142except requests.exceptions.RequestException as e:143print(f" ⚠️ Polling error: {e} — retrying in {poll_interval}s")144time.sleep(poll_interval)145continue146if resp.status_code == 200:147try:148state = resp.json().get("properties", {}).get("provisioningState", "Unknown")149except (ValueError, KeyError):150state = "Unknown"151print(f" Status: {state}")152if state == "Succeeded":153return True154if state in ("Failed", "Canceled"):155print(f" Deployment {state}.")156return False157time.sleep(poll_interval)158159print(f" Timed out after {timeout}s")160return False161162163def delete_deployment(sub, rg, account, name):164"""Delete a deployment."""165token = get_arm_token()166url = arm_url(sub, rg, account, name)167resp = requests.delete(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))168if resp.status_code in (200, 202, 204):169print(f"✅ Deployment '{name}' deleted.")170else:171print(f"❌ Delete failed ({resp.status_code}): {_safe_error_msg(resp)}")172173174def list_deployments(sub, rg, account):175"""List all deployments."""176token = get_arm_token()177url = arm_url(sub, rg, account)178resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=(10, 60))179if resp.status_code != 200:180print(f"❌ Failed to list deployments ({resp.status_code}): {_safe_error_msg(resp)}")181return182183try:184deployments = resp.json().get("value", [])185except (ValueError, KeyError):186print(f"❌ Failed to parse deployment list: {resp.text[:200]}")187return188if not deployments:189print("No deployments found.")190return191192print(f"{'Name':<40} {'Model':<40} {'SKU':<15} {'State':<15}")193print("─" * 110)194for d in deployments:195name = d.get("name", "?")196model = d.get("properties", {}).get("model", {}).get("name", "?")197sku = d.get("sku", {}).get("name", "?")198state = d.get("properties", {}).get("provisioningState", "?")199print(f"{name:<40} {model:<40} {sku:<15} {state:<15}")200201202def main():203parser = HelpOnErrorParser(description="Deploy fine-tuned models on Azure AI Foundry")204parser.add_argument("--sub", default=DEFAULT_SUB, help="Azure subscription ID")205parser.add_argument("--rg", default=DEFAULT_RG, help="Resource group")206parser.add_argument("--account", default=DEFAULT_ACCOUNT, help="Cognitive Services account")207208# Actions209parser.add_argument("--list", action="store_true", help="List all deployments")210parser.add_argument("--delete", action="store_true", help="Delete a deployment")211parser.add_argument("--wait", action="store_true", help="Wait for deployment to succeed")212213# Deployment config214parser.add_argument("--name", help="Deployment name (max 64 chars, alphanumeric + hyphens)")215parser.add_argument("--model-id", help="Fine-tuned model ID (e.g., ft:gpt-4.1-mini:...)")216parser.add_argument("--format", help="Model format (auto-detected if not specified)")217parser.add_argument("--sku", help="SKU name (auto-detected if not specified)")218parser.add_argument("--capacity", type=int, default=100, help="TPM capacity in thousands")219220args = parser.parse_args()221222if not all([args.sub, args.rg, args.account]):223print("Error: Set --sub/--rg/--account or AZURE_SUBSCRIPTION_ID/AZURE_RESOURCE_GROUP/AZURE_COGSERVICES_ACCOUNT")224sys.exit(1)225226if args.list:227list_deployments(args.sub, args.rg, args.account)228return229230if not args.name:231print("Error: --name required for create/delete/wait")232sys.exit(1)233234if args.delete:235delete_deployment(args.sub, args.rg, args.account, args.name)236return237238if args.wait and not args.model_id:239# Wait-only mode: poll an existing deployment240success = wait_for_deployment(args.sub, args.rg, args.account, args.name)241sys.exit(0 if success else 1)242243if not args.model_id:244print("Error: --model-id required for create")245sys.exit(1)246247# Auto-detect format/SKU if not specified248model_format = args.format249sku = args.sku250if not model_format or not sku:251auto_fmt, auto_sku = detect_format(args.model_id)252model_format = model_format or auto_fmt253sku = sku or auto_sku254print(f"Auto-detected: format={model_format}, sku={sku}")255256created = create_deployment(args.sub, args.rg, args.account, args.name,257args.model_id, model_format, sku, args.capacity)258259if args.wait and created:260wait_for_deployment(args.sub, args.rg, args.account, args.name)261262263if __name__ == "__main__":264main()265