Update to 4o-mini and use Azure AI Services (#43)

ckittel · web-flow · commit 9be607651942 · 2025-04-24T08:47:53.000-05:00
* Update to 4o-mini and use Azure AI Services

* Delete temp file

* Fix quota line

* make sure dns is lowered
diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ Follow these instructions to deploy this example to your Azure subscription, try
 
   - The subscription selected must have the following quota available in the location you'll select to deploy this implementation.
 
-    - Azure OpenAI: Standard, GPT-35-Turbo, 25K TPM
+    - Azure OpenAI: Standard, GPT-4o-mini, 10K TPM
     - Storage Accounts: 1
     - Total Cluster Dedicated Regional vCPUs: 4
     - Standard DASv4 Family Cluster Dedicated vCPUs: 4
@@ -130,14 +130,14 @@ To test this architecture, you'll be deploying a pre-built Prompt flow. The Prom
 
 1. Connect the `extract_query_from_question` Prompt flow step to your Azure OpenAI model deployment.
 
-      - For **Connection**, select 'aoai' from the dropdown menu. This is your deployed Azure OpenAI instance.
-      - For **deployment_name**, select 'gpt35' from the dropdown menu. This is the model you've deployed in that Azure OpenAI instance.
+      - For **Connection**, select 'azureaiservices_aoai' from the dropdown menu. This is your deployed Azure OpenAI instance.
+      - For **deployment_name**, select 'gpt4o' from the dropdown menu. This is the model you've deployed in that Azure OpenAI instance.
       - For **response_format**, select '{"type":"text"}' from the dropdown menu
 
 1. Connect the `augmented_chat` Prompt flow step to your Azure OpenAI model deployment.
 
-      - For **Connection**, select the same 'aoai' from the dropdown menu.
-      - For **deployment_name**, select the same 'gpt35' from the dropdown menu.
+      - For **Connection**, select the same 'azureaiservices_aoai' from the dropdown menu.
+      - For **deployment_name**, select the same 'gpt4o' from the dropdown menu.
       - For **response_format**, also select '{"type":"text"}' from the dropdown menu.
 
 1. Click **Save** on the flow.
@@ -186,7 +186,7 @@ Here you'll take your tested flow and deploy it to a managed online endpoint.
    There is a notice on the final screen that says:
 
    > Following connection(s) are using Microsoft Entra ID based authentication. You need to manually grant the endpoint identity access to the related resource of these connection(s).
-   > - aoai
+   > - azureaiservices_aoai
 
    This has already been taken care of by your IaC deployment. The managed online endpoint identity already has this permission to Azure OpenAI, so there is no action for you to take.
 
@@ -240,7 +240,7 @@ az group delete -n $RESOURCE_GROUP -y
 
 # Purge the soft delete resources
 az keyvault purge -n kv-${BASE_NAME} -l $LOCATION
-az cognitiveservices account purge -g $RESOURCE_GROUP -l $LOCATION -n oai-${BASE_NAME}
+az cognitiveservices account purge -g $RESOURCE_GROUP -l $LOCATION -n ais-${BASE_NAME}
 ```
 
 ## Contributions
diff --git a/bicep b/bicep
diff --git a/infra-as-code/bicep/machinelearning.bicep b/infra-as-code/bicep/machinelearning.bicep
@@ -14,38 +14,38 @@ param location string = resourceGroup().location
 param applicationInsightsName string
 param containerRegistryName string
 param keyVaultName string
-param aiStudioStorageAccountName string
+param aiFoundryStorageAccountName string
 
 @description('The name of the workload\'s existing Log Analytics workspace.')
 param logWorkspaceName string
 
-param openAiResourceName string
+param azureAiServicesResourceName string
 param yourPrincipalId string
 
 // ---- Existing resources ----
 
-resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
+resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2023-09-01' existing = {
   name: logWorkspaceName
 }
 
 resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = {
   name: applicationInsightsName
 }
 
-resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-08-01-preview' existing = {
+resource containerRegistry 'Microsoft.ContainerRegistry/registries@2024-11-01-preview' existing = {
   name: containerRegistryName
 }
 
-resource keyVault 'Microsoft.KeyVault/vaults@2023-07-01' existing = {
+resource keyVault 'Microsoft.KeyVault/vaults@2024-11-01' existing = {
   name: keyVaultName
 }
 
-resource aiStudioStorageAccount 'Microsoft.Storage/storageAccounts@2023-01-01' existing = {
-  name: aiStudioStorageAccountName
+resource aiStudioStorageAccount 'Microsoft.Storage/storageAccounts@2024-01-01' existing = {
+  name: aiFoundryStorageAccountName
 }
 
-resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-05-01' existing = {
-  name: openAiResourceName
+resource azureAiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' existing = {
+  name: azureAiServicesResourceName
 }
 
 @description('Built-in Role: [Cognitive Services OpenAI User](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#cognitive-services-openai-user)')
@@ -134,8 +134,8 @@ resource blobStorageContributorForUserRoleAssignment 'Microsoft.Authorization/ro
 
 @description('Assign your user the ability to invoke models in Azure OpenAI. This is needed to execute the Prompt flow from within in the Azure AI Foundry portal.')
 resource cognitiveServicesOpenAiUserForUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
-  scope: openAiAccount
-  name: guid(openAiAccount.id, yourPrincipalId, cognitiveServicesOpenAiUserRole.id)
+  scope: azureAiServices
+  name: guid(azureAiServices.id, yourPrincipalId, cognitiveServicesOpenAiUserRole.id)
   properties: {
     roleDefinitionId: cognitiveServicesOpenAiUserRole.id
     principalType: 'User'
@@ -144,7 +144,7 @@ resource cognitiveServicesOpenAiUserForUserRoleAssignment 'Microsoft.Authorizati
 }
 
 @description('A hub provides the hosting environment for this AI workload. It provides security, governance controls, and shared configurations.')
-resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview' = {
+resource aiHub 'Microsoft.MachineLearningServices/workspaces@2025-01-01-preview' = {
   name: 'aihub-${baseName}'
   location: location
   kind: 'Hub'
@@ -160,8 +160,13 @@ resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview'
     description: 'Hub to support the Microsoft Learn Azure OpenAI basic chat implementation. https://learn.microsoft.com/azure/architecture/ai-ml/architecture/basic-openai-e2e-chat'
     publicNetworkAccess: 'Enabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
     ipAllowlist: []
+    networkAcls: {
+      defaultAction: 'Allow' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
+      ipRules: []
+    }
     serverlessComputeSettings: null
     enableServiceSideCMKEncryption: false
+    provisionNetworkNow: false
     managedNetwork: {
       isolationMode: 'Disabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
     }
@@ -183,20 +188,20 @@ resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview'
     imageBuildCompute: null
   }
 
-  resource aoaiConnection 'connections' = {
-    name: 'aoai'
+  resource azureAiServicesConnection 'connections' = {
+    name: 'azureaiservices'
     properties: {
       authType: 'AAD'
-      category: 'AzureOpenAI'
+      category: 'AIServices'
       isSharedToAll: true
       useWorkspaceManagedIdentity: true
-      peRequirement: 'NotRequired'
+      peRequirement: 'NotRequired'  // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
       sharedUserList: []
       metadata: {
         ApiType: 'Azure'
-        ResourceId: openAiAccount.id
+        ResourceId: azureAiServices.id
       }
-      target: openAiAccount.properties.endpoint
+      target: azureAiServices.properties.endpoint
     }
   }
 }
@@ -223,7 +228,7 @@ resource aiHubDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-pre
 // ---- Chat project ----
 
 @description('This is a container for the chat project.')
-resource chatProject 'Microsoft.MachineLearningServices/workspaces@2024-04-01' = {
+resource chatProject 'Microsoft.MachineLearningServices/workspaces@2025-01-01-preview' = {
   name: 'aiproj-chat'
   location: location
   kind: 'Project'
@@ -238,7 +243,12 @@ resource chatProject 'Microsoft.MachineLearningServices/workspaces@2024-04-01' =
     friendlyName: 'Chat with Wikipedia project'
     description: 'Project to contain the "Chat with Wikipedia" example Prompt flow that is used as part of the Microsoft Learn Azure OpenAI basic chat implementation. https://learn.microsoft.com/azure/architecture/ai-ml/architecture/basic-openai-e2e-chat'
     v1LegacyMode: false
-    publicNetworkAccess: 'Enabled'
+    hbiWorkspace: false
+    allowRoleAssignmentOnRG: false // Require role assignments at the resource level.
+    enableDataIsolation: true
+    systemDatastoresAuthMode: 'identity'
+    enableServiceSideCMKEncryption: false
+    publicNetworkAccess: 'Enabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
     hubResourceId: aiHub.id
   }
 
@@ -276,8 +286,8 @@ resource projectSecretsReaderForOnlineEndpointRoleAssignment 'Microsoft.Authoriz
 
 @description('Assign the online endpoint the ability to invoke models in Azure OpenAI. This is needed to execute the Prompt flow from the managed endpoint.')
 resource projectOpenAIUserForOnlineEndpointRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
-  scope: openAiAccount
-  name: guid(openAiAccount.id, chatProject::endpoint.id, cognitiveServicesOpenAiUserRole.id)
+  scope: azureAiServices
+  name: guid(azureAiServices.id, chatProject::endpoint.id, cognitiveServicesOpenAiUserRole.id)
   properties: {
     roleDefinitionId: cognitiveServicesOpenAiUserRole.id
     principalType: 'ServicePrincipal'
diff --git a/infra-as-code/bicep/main.bicep b/infra-as-code/bicep/main.bicep
@@ -93,11 +93,11 @@ module aiStudio 'machinelearning.bicep' = {
     baseName: baseName
     applicationInsightsName: appInsightsModule.outputs.applicationInsightsName
     keyVaultName: keyVaultModule.outputs.keyVaultName
-    aiStudioStorageAccountName: storageModule.outputs.aiStudioStorageAccountName
+    aiFoundryStorageAccountName: storageModule.outputs.aiFoundryStorageAccountName
     containerRegistryName: 'cr${baseName}'
     yourPrincipalId: yourPrincipalId
     logWorkspaceName: logWorkspace.name
-    openAiResourceName: openaiModule.outputs.openAiResourceName
+    azureAiServicesResourceName: openaiModule.outputs.azureAiServicesResourceName
   }
 }
 
diff --git a/infra-as-code/bicep/openai.bicep b/infra-as-code/bicep/openai.bicep
@@ -9,19 +9,17 @@ param location string = resourceGroup().location
 @description('The name of the workload\'s existing Log Analytics workspace.')
 param logWorkspaceName string
 
-//variables
-var openaiName = 'oai-${baseName}'
-
 resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
   name: logWorkspaceName
 }
 
-resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview' = {
-  name: openaiName
+@description('Use Azure AI Services as a common gateway to other Azure AI services, such as Azure OpenAI.')
+resource azureAiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
+  name: 'ais-${baseName}'
   location: location
-  kind: 'OpenAI'
+  kind: 'AIServices'
   properties: {
-    customSubDomainName: 'oai-${baseName}'
+    customSubDomainName: 'ais-${toLower(baseName)}'
     publicNetworkAccess: 'Enabled' // Production readiness change: This sample uses identity as the perimeter. Production scenarios should layer in network perimeter control as well.
     disableLocalAuth: true
   }
@@ -33,94 +31,93 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
   resource blockingFilter 'raiPolicies' = {
     name: 'blocking-filter'
     properties: {
-      #disable-next-line BCP037
-      type: 'UserManaged'
-      basePolicyName: 'Microsoft.Default'
+      basePolicyName: 'Microsoft.DefaultV2'
       mode: 'Default'
       contentFilters: [
         /* PROMPT FILTERS */
         {
-          #disable-next-line BCP037
-          name: 'hate'
+          name: 'Hate'
+          blocking: true
+          enabled: true
+          severityThreshold: 'Low'
+          source: 'Prompt'
+        }
+        {
+          name: 'Sexual'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Prompt'
         }
         {
-          #disable-next-line BCP037
-          name: 'sexual'
+          name: 'Selfharm'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Prompt'
         }
         {
-          #disable-next-line BCP037
-          name: 'selfharm'
+          name: 'Violence'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Prompt'
         }
         {
-          #disable-next-line BCP037
-          name: 'violence'
+          name: 'Jailbreak'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
           source: 'Prompt'
         }
         {
-          #disable-next-line BCP037
-          name: 'jailbreak'
+          name: 'Indirect Attack'
           blocking: true
           enabled: true
           source: 'Prompt'
         }
         {
-          #disable-next-line BCP037
-          name: 'profanity'
+          name: 'Profanity'
           blocking: true
           enabled: true
           source: 'Prompt'
         }
         /* COMPLETION FILTERS */
         {
-          #disable-next-line BCP037
-          name: 'hate'
+          name: 'Hate'
+          blocking: true
+          enabled: true
+          severityThreshold: 'Low'
+          source: 'Completion'
+        }
+        {
+          name: 'Sexual'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Completion'
         }
         {
-          #disable-next-line BCP037
-          name: 'sexual'
+          name: 'Selfharm'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Completion'
         }
         {
-          #disable-next-line BCP037
-          name: 'selfharm'
+          name: 'Violence'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
+          severityThreshold: 'Low'
           source: 'Completion'
         }
         {
-          #disable-next-line BCP037
-          name: 'violence'
+          name: 'Protected Material Text'
           blocking: true
           enabled: true
-          allowedContentLevel: 'Low'
           source: 'Completion'
         }
         {
-          #disable-next-line BCP037
-          name: 'profanity'
+          name: 'Protected Material Code'
           blocking: true
           enabled: true
           source: 'Completion'
@@ -129,21 +126,21 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
     }
   }
 
-  @description('Add a gpt-3.5 turbo deployment.')
-  resource gpt35 'deployments' = {
-    name: 'gpt35'
+  @description('Add a GPT-4o mini deployment.')
+  resource gpt4o 'deployments' = {
+    name: 'gpt4o'
     sku: {
       name: 'Standard'
-      capacity: 25
+      capacity: 4
     }
     properties: {
       model: {
         format: 'OpenAI'
-        name: 'gpt-35-turbo'
-        version: '0125' // If your selected region doesn't support this version, please change it.
-                        // az cognitiveservices model list -l $LOCATION --query "sort([?model.name == 'gpt-35-turbo' && kind == 'OpenAI'].model.version)" -o tsv
+        name: 'gpt-4o-mini'
+        version: '2024-07-18' // If your selected region doesn't support this version, please change the version to a supported one.
+                              // az cognitiveservices model list -l $LOCATION --query "sort([?model.name == 'gpt-4o-mini' && kind == 'OpenAI'].model.version)" -o tsv
       }
-      raiPolicyName: openAiAccount::blockingFilter.name
+      raiPolicyName: azureAiServices::blockingFilter.name
       versionUpgradeOption: 'OnceNewDefaultVersionAvailable' // Production readiness change: Always be explicit about model versions, use 'NoAutoUpgrade' to prevent version changes.
     }
   }
@@ -152,7 +149,7 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
 //OpenAI diagnostic settings
 resource openAIDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = {
   name: 'default'
-  scope: openAiAccount
+  scope: azureAiServices
   properties: {
     workspaceId: logWorkspace.id
     logs: [
@@ -195,4 +192,4 @@ resource openAIDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-pr
 
 // ---- Outputs ----
 
-output openAiResourceName string = openAiAccount.name
+output azureAiServicesResourceName string = azureAiServices.name
diff --git a/infra-as-code/bicep/storage.bicep b/infra-as-code/bicep/storage.bicep

Original file line number	Diff line number	Diff line change
`@@ -93,11 +93,11 @@ module aiStudio 'machinelearning.bicep' = {`
`93`	`93`	`baseName: baseName`
`94`	`94`	`applicationInsightsName: appInsightsModule.outputs.applicationInsightsName`
`95`	`95`	`keyVaultName: keyVaultModule.outputs.keyVaultName`
`96`		`- aiStudioStorageAccountName: storageModule.outputs.aiStudioStorageAccountName`
	`96`	`+ aiFoundryStorageAccountName: storageModule.outputs.aiFoundryStorageAccountName`
`97`	`97`	`containerRegistryName: 'cr${baseName}'`
`98`	`98`	`yourPrincipalId: yourPrincipalId`
`99`	`99`	`logWorkspaceName: logWorkspace.name`
`100`		`- openAiResourceName: openaiModule.outputs.openAiResourceName`
	`100`	`+ azureAiServicesResourceName: openaiModule.outputs.azureAiServicesResourceName`
`101`	`101`	`}`
`102`	`102`	`}`
`103`	`103`