Skip to content

Commit 9be6076

Browse files
authored
Update to 4o-mini and use Azure AI Services (#43)
* Update to 4o-mini and use Azure AI Services * Delete temp file * Fix quota line * make sure dns is lowered
1 parent f0664d0 commit 9be6076

File tree

6 files changed

+175
-93
lines changed

6 files changed

+175
-93
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Follow these instructions to deploy this example to your Azure subscription, try
4747

4848
- The subscription selected must have the following quota available in the location you'll select to deploy this implementation.
4949

50-
- Azure OpenAI: Standard, GPT-35-Turbo, 25K TPM
50+
- Azure OpenAI: Standard, GPT-4o-mini, 10K TPM
5151
- Storage Accounts: 1
5252
- Total Cluster Dedicated Regional vCPUs: 4
5353
- Standard DASv4 Family Cluster Dedicated vCPUs: 4
@@ -130,14 +130,14 @@ To test this architecture, you'll be deploying a pre-built Prompt flow. The Prom
130130

131131
1. Connect the `extract_query_from_question` Prompt flow step to your Azure OpenAI model deployment.
132132

133-
- For **Connection**, select 'aoai' from the dropdown menu. This is your deployed Azure OpenAI instance.
134-
- For **deployment_name**, select 'gpt35' from the dropdown menu. This is the model you've deployed in that Azure OpenAI instance.
133+
- For **Connection**, select 'azureaiservices_aoai' from the dropdown menu. This is your deployed Azure OpenAI instance.
134+
- For **deployment_name**, select 'gpt4o' from the dropdown menu. This is the model you've deployed in that Azure OpenAI instance.
135135
- For **response_format**, select '{"type":"text"}' from the dropdown menu
136136

137137
1. Connect the `augmented_chat` Prompt flow step to your Azure OpenAI model deployment.
138138

139-
- For **Connection**, select the same 'aoai' from the dropdown menu.
140-
- For **deployment_name**, select the same 'gpt35' from the dropdown menu.
139+
- For **Connection**, select the same 'azureaiservices_aoai' from the dropdown menu.
140+
- For **deployment_name**, select the same 'gpt4o' from the dropdown menu.
141141
- For **response_format**, also select '{"type":"text"}' from the dropdown menu.
142142

143143
1. Click **Save** on the flow.
@@ -186,7 +186,7 @@ Here you'll take your tested flow and deploy it to a managed online endpoint.
186186
There is a notice on the final screen that says:
187187

188188
> Following connection(s) are using Microsoft Entra ID based authentication. You need to manually grant the endpoint identity access to the related resource of these connection(s).
189-
> - aoai
189+
> - azureaiservices_aoai
190190
191191
This has already been taken care of by your IaC deployment. The managed online endpoint identity already has this permission to Azure OpenAI, so there is no action for you to take.
192192

@@ -240,7 +240,7 @@ az group delete -n $RESOURCE_GROUP -y
240240

241241
# Purge the soft delete resources
242242
az keyvault purge -n kv-${BASE_NAME} -l $LOCATION
243-
az cognitiveservices account purge -g $RESOURCE_GROUP -l $LOCATION -n oai-${BASE_NAME}
243+
az cognitiveservices account purge -g $RESOURCE_GROUP -l $LOCATION -n ais-${BASE_NAME}
244244
```
245245

246246
## Contributions

bicep

79.8 MB
Binary file not shown.

infra-as-code/bicep/machinelearning.bicep

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,38 @@ param location string = resourceGroup().location
1414
param applicationInsightsName string
1515
param containerRegistryName string
1616
param keyVaultName string
17-
param aiStudioStorageAccountName string
17+
param aiFoundryStorageAccountName string
1818

1919
@description('The name of the workload\'s existing Log Analytics workspace.')
2020
param logWorkspaceName string
2121

22-
param openAiResourceName string
22+
param azureAiServicesResourceName string
2323
param yourPrincipalId string
2424

2525
// ---- Existing resources ----
2626

27-
resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
27+
resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2023-09-01' existing = {
2828
name: logWorkspaceName
2929
}
3030

3131
resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = {
3232
name: applicationInsightsName
3333
}
3434

35-
resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-08-01-preview' existing = {
35+
resource containerRegistry 'Microsoft.ContainerRegistry/registries@2024-11-01-preview' existing = {
3636
name: containerRegistryName
3737
}
3838

39-
resource keyVault 'Microsoft.KeyVault/vaults@2023-07-01' existing = {
39+
resource keyVault 'Microsoft.KeyVault/vaults@2024-11-01' existing = {
4040
name: keyVaultName
4141
}
4242

43-
resource aiStudioStorageAccount 'Microsoft.Storage/storageAccounts@2023-01-01' existing = {
44-
name: aiStudioStorageAccountName
43+
resource aiStudioStorageAccount 'Microsoft.Storage/storageAccounts@2024-01-01' existing = {
44+
name: aiFoundryStorageAccountName
4545
}
4646

47-
resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-05-01' existing = {
48-
name: openAiResourceName
47+
resource azureAiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' existing = {
48+
name: azureAiServicesResourceName
4949
}
5050

5151
@description('Built-in Role: [Cognitive Services OpenAI User](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#cognitive-services-openai-user)')
@@ -134,8 +134,8 @@ resource blobStorageContributorForUserRoleAssignment 'Microsoft.Authorization/ro
134134

135135
@description('Assign your user the ability to invoke models in Azure OpenAI. This is needed to execute the Prompt flow from within in the Azure AI Foundry portal.')
136136
resource cognitiveServicesOpenAiUserForUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
137-
scope: openAiAccount
138-
name: guid(openAiAccount.id, yourPrincipalId, cognitiveServicesOpenAiUserRole.id)
137+
scope: azureAiServices
138+
name: guid(azureAiServices.id, yourPrincipalId, cognitiveServicesOpenAiUserRole.id)
139139
properties: {
140140
roleDefinitionId: cognitiveServicesOpenAiUserRole.id
141141
principalType: 'User'
@@ -144,7 +144,7 @@ resource cognitiveServicesOpenAiUserForUserRoleAssignment 'Microsoft.Authorizati
144144
}
145145

146146
@description('A hub provides the hosting environment for this AI workload. It provides security, governance controls, and shared configurations.')
147-
resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview' = {
147+
resource aiHub 'Microsoft.MachineLearningServices/workspaces@2025-01-01-preview' = {
148148
name: 'aihub-${baseName}'
149149
location: location
150150
kind: 'Hub'
@@ -160,8 +160,13 @@ resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview'
160160
description: 'Hub to support the Microsoft Learn Azure OpenAI basic chat implementation. https://learn.microsoft.com/azure/architecture/ai-ml/architecture/basic-openai-e2e-chat'
161161
publicNetworkAccess: 'Enabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
162162
ipAllowlist: []
163+
networkAcls: {
164+
defaultAction: 'Allow' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
165+
ipRules: []
166+
}
163167
serverlessComputeSettings: null
164168
enableServiceSideCMKEncryption: false
169+
provisionNetworkNow: false
165170
managedNetwork: {
166171
isolationMode: 'Disabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
167172
}
@@ -183,20 +188,20 @@ resource aiHub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview'
183188
imageBuildCompute: null
184189
}
185190

186-
resource aoaiConnection 'connections' = {
187-
name: 'aoai'
191+
resource azureAiServicesConnection 'connections' = {
192+
name: 'azureaiservices'
188193
properties: {
189194
authType: 'AAD'
190-
category: 'AzureOpenAI'
195+
category: 'AIServices'
191196
isSharedToAll: true
192197
useWorkspaceManagedIdentity: true
193-
peRequirement: 'NotRequired'
198+
peRequirement: 'NotRequired' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
194199
sharedUserList: []
195200
metadata: {
196201
ApiType: 'Azure'
197-
ResourceId: openAiAccount.id
202+
ResourceId: azureAiServices.id
198203
}
199-
target: openAiAccount.properties.endpoint
204+
target: azureAiServices.properties.endpoint
200205
}
201206
}
202207
}
@@ -223,7 +228,7 @@ resource aiHubDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-pre
223228
// ---- Chat project ----
224229

225230
@description('This is a container for the chat project.')
226-
resource chatProject 'Microsoft.MachineLearningServices/workspaces@2024-04-01' = {
231+
resource chatProject 'Microsoft.MachineLearningServices/workspaces@2025-01-01-preview' = {
227232
name: 'aiproj-chat'
228233
location: location
229234
kind: 'Project'
@@ -238,7 +243,12 @@ resource chatProject 'Microsoft.MachineLearningServices/workspaces@2024-04-01' =
238243
friendlyName: 'Chat with Wikipedia project'
239244
description: 'Project to contain the "Chat with Wikipedia" example Prompt flow that is used as part of the Microsoft Learn Azure OpenAI basic chat implementation. https://learn.microsoft.com/azure/architecture/ai-ml/architecture/basic-openai-e2e-chat'
240245
v1LegacyMode: false
241-
publicNetworkAccess: 'Enabled'
246+
hbiWorkspace: false
247+
allowRoleAssignmentOnRG: false // Require role assignments at the resource level.
248+
enableDataIsolation: true
249+
systemDatastoresAuthMode: 'identity'
250+
enableServiceSideCMKEncryption: false
251+
publicNetworkAccess: 'Enabled' // Production readiness change: The "Baseline" architecture adds ingress and egress network control over this "Basic" implementation.
242252
hubResourceId: aiHub.id
243253
}
244254

@@ -276,8 +286,8 @@ resource projectSecretsReaderForOnlineEndpointRoleAssignment 'Microsoft.Authoriz
276286

277287
@description('Assign the online endpoint the ability to invoke models in Azure OpenAI. This is needed to execute the Prompt flow from the managed endpoint.')
278288
resource projectOpenAIUserForOnlineEndpointRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
279-
scope: openAiAccount
280-
name: guid(openAiAccount.id, chatProject::endpoint.id, cognitiveServicesOpenAiUserRole.id)
289+
scope: azureAiServices
290+
name: guid(azureAiServices.id, chatProject::endpoint.id, cognitiveServicesOpenAiUserRole.id)
281291
properties: {
282292
roleDefinitionId: cognitiveServicesOpenAiUserRole.id
283293
principalType: 'ServicePrincipal'

infra-as-code/bicep/main.bicep

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@ module aiStudio 'machinelearning.bicep' = {
9393
baseName: baseName
9494
applicationInsightsName: appInsightsModule.outputs.applicationInsightsName
9595
keyVaultName: keyVaultModule.outputs.keyVaultName
96-
aiStudioStorageAccountName: storageModule.outputs.aiStudioStorageAccountName
96+
aiFoundryStorageAccountName: storageModule.outputs.aiFoundryStorageAccountName
9797
containerRegistryName: 'cr${baseName}'
9898
yourPrincipalId: yourPrincipalId
9999
logWorkspaceName: logWorkspace.name
100-
openAiResourceName: openaiModule.outputs.openAiResourceName
100+
azureAiServicesResourceName: openaiModule.outputs.azureAiServicesResourceName
101101
}
102102
}
103103

infra-as-code/bicep/openai.bicep

Lines changed: 47 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,17 @@ param location string = resourceGroup().location
99
@description('The name of the workload\'s existing Log Analytics workspace.')
1010
param logWorkspaceName string
1111

12-
//variables
13-
var openaiName = 'oai-${baseName}'
14-
1512
resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = {
1613
name: logWorkspaceName
1714
}
1815

19-
resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview' = {
20-
name: openaiName
16+
@description('Use Azure AI Services as a common gateway to other Azure AI services, such as Azure OpenAI.')
17+
resource azureAiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
18+
name: 'ais-${baseName}'
2119
location: location
22-
kind: 'OpenAI'
20+
kind: 'AIServices'
2321
properties: {
24-
customSubDomainName: 'oai-${baseName}'
22+
customSubDomainName: 'ais-${toLower(baseName)}'
2523
publicNetworkAccess: 'Enabled' // Production readiness change: This sample uses identity as the perimeter. Production scenarios should layer in network perimeter control as well.
2624
disableLocalAuth: true
2725
}
@@ -33,94 +31,93 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
3331
resource blockingFilter 'raiPolicies' = {
3432
name: 'blocking-filter'
3533
properties: {
36-
#disable-next-line BCP037
37-
type: 'UserManaged'
38-
basePolicyName: 'Microsoft.Default'
34+
basePolicyName: 'Microsoft.DefaultV2'
3935
mode: 'Default'
4036
contentFilters: [
4137
/* PROMPT FILTERS */
4238
{
43-
#disable-next-line BCP037
44-
name: 'hate'
39+
name: 'Hate'
40+
blocking: true
41+
enabled: true
42+
severityThreshold: 'Low'
43+
source: 'Prompt'
44+
}
45+
{
46+
name: 'Sexual'
4547
blocking: true
4648
enabled: true
47-
allowedContentLevel: 'Low'
49+
severityThreshold: 'Low'
4850
source: 'Prompt'
4951
}
5052
{
51-
#disable-next-line BCP037
52-
name: 'sexual'
53+
name: 'Selfharm'
5354
blocking: true
5455
enabled: true
55-
allowedContentLevel: 'Low'
56+
severityThreshold: 'Low'
5657
source: 'Prompt'
5758
}
5859
{
59-
#disable-next-line BCP037
60-
name: 'selfharm'
60+
name: 'Violence'
6161
blocking: true
6262
enabled: true
63-
allowedContentLevel: 'Low'
63+
severityThreshold: 'Low'
6464
source: 'Prompt'
6565
}
6666
{
67-
#disable-next-line BCP037
68-
name: 'violence'
67+
name: 'Jailbreak'
6968
blocking: true
7069
enabled: true
71-
allowedContentLevel: 'Low'
7270
source: 'Prompt'
7371
}
7472
{
75-
#disable-next-line BCP037
76-
name: 'jailbreak'
73+
name: 'Indirect Attack'
7774
blocking: true
7875
enabled: true
7976
source: 'Prompt'
8077
}
8178
{
82-
#disable-next-line BCP037
83-
name: 'profanity'
79+
name: 'Profanity'
8480
blocking: true
8581
enabled: true
8682
source: 'Prompt'
8783
}
8884
/* COMPLETION FILTERS */
8985
{
90-
#disable-next-line BCP037
91-
name: 'hate'
86+
name: 'Hate'
87+
blocking: true
88+
enabled: true
89+
severityThreshold: 'Low'
90+
source: 'Completion'
91+
}
92+
{
93+
name: 'Sexual'
9294
blocking: true
9395
enabled: true
94-
allowedContentLevel: 'Low'
96+
severityThreshold: 'Low'
9597
source: 'Completion'
9698
}
9799
{
98-
#disable-next-line BCP037
99-
name: 'sexual'
100+
name: 'Selfharm'
100101
blocking: true
101102
enabled: true
102-
allowedContentLevel: 'Low'
103+
severityThreshold: 'Low'
103104
source: 'Completion'
104105
}
105106
{
106-
#disable-next-line BCP037
107-
name: 'selfharm'
107+
name: 'Violence'
108108
blocking: true
109109
enabled: true
110-
allowedContentLevel: 'Low'
110+
severityThreshold: 'Low'
111111
source: 'Completion'
112112
}
113113
{
114-
#disable-next-line BCP037
115-
name: 'violence'
114+
name: 'Protected Material Text'
116115
blocking: true
117116
enabled: true
118-
allowedContentLevel: 'Low'
119117
source: 'Completion'
120118
}
121119
{
122-
#disable-next-line BCP037
123-
name: 'profanity'
120+
name: 'Protected Material Code'
124121
blocking: true
125122
enabled: true
126123
source: 'Completion'
@@ -129,21 +126,21 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
129126
}
130127
}
131128

132-
@description('Add a gpt-3.5 turbo deployment.')
133-
resource gpt35 'deployments' = {
134-
name: 'gpt35'
129+
@description('Add a GPT-4o mini deployment.')
130+
resource gpt4o 'deployments' = {
131+
name: 'gpt4o'
135132
sku: {
136133
name: 'Standard'
137-
capacity: 25
134+
capacity: 4
138135
}
139136
properties: {
140137
model: {
141138
format: 'OpenAI'
142-
name: 'gpt-35-turbo'
143-
version: '0125' // If your selected region doesn't support this version, please change it.
144-
// az cognitiveservices model list -l $LOCATION --query "sort([?model.name == 'gpt-35-turbo' && kind == 'OpenAI'].model.version)" -o tsv
139+
name: 'gpt-4o-mini'
140+
version: '2024-07-18' // If your selected region doesn't support this version, please change the version to a supported one.
141+
// az cognitiveservices model list -l $LOCATION --query "sort([?model.name == 'gpt-4o-mini' && kind == 'OpenAI'].model.version)" -o tsv
145142
}
146-
raiPolicyName: openAiAccount::blockingFilter.name
143+
raiPolicyName: azureAiServices::blockingFilter.name
147144
versionUpgradeOption: 'OnceNewDefaultVersionAvailable' // Production readiness change: Always be explicit about model versions, use 'NoAutoUpgrade' to prevent version changes.
148145
}
149146
}
@@ -152,7 +149,7 @@ resource openAiAccount 'Microsoft.CognitiveServices/accounts@2023-10-01-preview'
152149
//OpenAI diagnostic settings
153150
resource openAIDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = {
154151
name: 'default'
155-
scope: openAiAccount
152+
scope: azureAiServices
156153
properties: {
157154
workspaceId: logWorkspace.id
158155
logs: [
@@ -195,4 +192,4 @@ resource openAIDiagSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-pr
195192

196193
// ---- Outputs ----
197194

198-
output openAiResourceName string = openAiAccount.name
195+
output azureAiServicesResourceName string = azureAiServices.name

0 commit comments

Comments
 (0)