diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..da7361f28 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,50 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:0-3.9", + + // Features to add to the dev container. More info: https://containers.dev/features. + "features": { + "ghcr.io/devcontainers/features/azure-cli:1": { + "version": "latest" + }, + "ghcr.io/rchaganti/vsc-devcontainer-features/azurebicep:1": { + "version": "latest" + } + }, + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": [ + "ms-python.python", + "ms-vscode.azure-account", + "prompt-flow.prompt-flow" + ] + } + }, + + + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [9000], + + // Use 'portsAttributes' to set default properties for specific forwarded ports. + // More info: https://containers.dev/implementors/json_reference/#port-attributes + "portsAttributes": { + "9000": { + "label": "Hello World", + "onAutoForward": "notify" + } + }, + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "pip3 install -r \"./.devcontainer/requirements.txt\" && az extension add --name \"ml\"" + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} \ No newline at end of file diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt new file mode 100644 index 000000000..bc30dc5a7 --- /dev/null +++ b/.devcontainer/requirements.txt @@ -0,0 +1,7 @@ +promptflow +promptflow-tools +promptflow-sdk[builtins] +jinja2 +promptflow[azure] +openai +python-dotenv \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2c4b65f20..0934a7dd0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +llmops_config.json +ssh/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 20a3311b9..53f36d88b 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ Additionally, there is a llmops_config.json file that refers to important infras # Documentation +- Full documentation on deploying an base architecture with/without network isolation can be found [here](./docs/tutorial/02-Infra%20deployment.md) - Full documentation on using this repo using Azure DevOps can be found [here](./docs/Azure_devops_how_to_setup.md) - Full documentation on using this repo using Github Workflows can be found [here](./docs/github_workflows_how_to_setup.md) - Documentation about adding a new flow is available [here](./docs/how_to_onboard_new_flows.md) @@ -83,9 +84,12 @@ The repo helps in deploying to **Kubernetes, Kubernetes ARC and AzureML Managed ![Deployment](./docs/images/endpoints.png) - ![A/B Deployments](./docs/images/abdeployments.png) +You will also find infrastructure as code to deploy the deploy resources with the choice to enable network isolation: + +![Architecture](./docs/images/architecture.png) + # Pipeline The pipeline execution consists of multiple stages and jobs in each stage: @@ -110,7 +114,9 @@ To harness the capabilities of the **local execution**, follow these installatio git clone https://github.com/microsoft/llmops-promptflow-template.git ``` -2. **setup env file**: create .env file at top folder level and provide information for items mentioned. Add as many connection names as needed. All the flow examples in this repo uses AzureOpenAI connection named `aoai`. Add a line `aoai={"api_key": "","api_base": "","api_type": "azure","api_version": "2023-03-15-preview"}` with updated values for api_key and api_base. If additional connections with different names are used in your flows, they should be added accordingly. Currently, flow with AzureOpenAI as provider as supported. +1. **Optional: Use the dev container:** The code includes a dev container configuration file that can be used to create a development container with all the dependencies installed. This is the recommended way to run the code. If you are using VS Code, you can open the folder in a container by clicking on the "Reopen in Container" button in the bottom right corner of the window. The required packages and PromptFlow VS Code extension will be installed automatically when the container is created. If you are using another IDE, you can use the dev container configuration file to create a [development container](https://code.visualstudio.com/docs/devcontainers/containers). This requires [Docker Desktop ](https://www.docker.com/products/docker-desktop/) to be installed on your machine. + +1. **setup env file**: create .env file at top folder level and provide information for items mentioned. Add as many connection names as needed. All the flow examples in this repo uses AzureOpenAI connection named `aoai`. Add a line `aoai={"api_key": "","api_base": "","api_type": "azure","api_version": "2023-03-15-preview"}` with updated values for api_key and api_base. If additional connections with different names are used in your flows, they should be added accordingly. Currently, flow with AzureOpenAI as provider as supported. ```bash @@ -118,17 +124,18 @@ experiment_name= connection_name_1={ "api_key": "","api_base": "","api_type": "azure","api_version": "2023-03-15-preview"} connection_name_2={ "api_key": "","api_base": "","api_type": "azure","api_version": "2023-03-15-preview"} ``` -3. Prepare the local conda or virtual environment to install the dependencies. +1. Prepare the local conda or virtual environment to install the dependencies. +If you decide to not use the dev container, you can create a virtual environment or conda environment and install the dependencies using the following command: ```bash python -m pip install promptflow promptflow-tools promptflow-sdk jinja2 promptflow[azure] openai promptflow-sdk[builtins] python-dotenv ``` -4. Bring or write your flows into the template based on documentation [here](./docs/how_to_onboard_new_flows.md). +1. Bring or write your flows into the template based on documentation [here](./docs/how_to_onboard_new_flows.md). -5. Write python scripts similar to the provided examples in local_execution folder. +1. Write python scripts similar to the provided examples in local_execution folder. ## Contributing diff --git a/deployment_config.json b/deployment_config.json new file mode 100644 index 000000000..de9439a27 --- /dev/null +++ b/deployment_config.json @@ -0,0 +1,43 @@ +{ + "azure_managed_endpoint": { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "PUBLIC_ACCESS": "true", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An online endpoint serving a flow for [task]", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100", + "DEPLOYMENT_VM_SIZE": "Standard_F4s_v2", + "DEPLOYMENT_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest", + "DEPLOYMENT_CONDA_PATH": "environment/conda.yml", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + }, + "kubernetes_endpoint": { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "PUBLIC_ACCESS": "true", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for [task]", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100, + "COMPUTE_NAME": "", + "DEPLOYMENT_VM_SIZE": "promptinstancetype", + "DEPLOYMENT_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest", + "DEPLOYMENT_CONDA_PATH": "environment/conda.yml", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "CPU_ALLOCATION": "", + "MEMORY_ALLOCATION": "", + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } +} diff --git a/deployment_config.json.sample b/deployment_config.json.sample new file mode 100644 index 000000000..c0a0b2f60 --- /dev/null +++ b/deployment_config.json.sample @@ -0,0 +1,47 @@ +{ + "azure_managed_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "PUBLIC_ACCESS": "true", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An online endpoint serving a flow for [task]", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100", + "DEPLOYMENT_VM_SIZE": "Standard_F4s_v2", + "DEPLOYMENT_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest", + "DEPLOYMENT_CONDA_PATH": "environment/conda.yml", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ], + "kubernetes_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "PUBLIC_ACCESS": "true", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for [task]", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100, + "COMPUTE_NAME": "", + "DEPLOYMENT_VM_SIZE": "promptinstancetype", + "DEPLOYMENT_BASE_IMAGE_NAME": "mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest", + "DEPLOYMENT_CONDA_PATH": "environment/conda.yml", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "CPU_ALLOCATION": "", + "MEMORY_ALLOCATION": "", + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ] +} \ No newline at end of file diff --git a/docs/Azure_devops_how_to_setup.md b/docs/Azure_devops_how_to_setup.md index 1279f6fa8..d410b4999 100644 --- a/docs/Azure_devops_how_to_setup.md +++ b/docs/Azure_devops_how_to_setup.md @@ -197,6 +197,8 @@ curl --request POST \ }" ``` +**Note:** If you have provisioned a managed VNET for your Azure ML workspace, this operation will not work for now. You need to use a serverless runtime for now. + 15. Get runtime creation status using REST API. Execute this step multiple times unless either you get output that shows createdOn with a valid date and time value or failure. In case of failure, troubleshoot the issue before moving forward. ```bash @@ -330,6 +332,8 @@ Update configuration so that we can create a pull request for any one of the exa ### Update llmops_config.json +**Note:** If you decide to use [the infrastructure deployed with the deployment script of this code base](../docs/tutorial/02-Infra%20deployment.md), this file is created and populated automatically. + Modify the configuration values in `llmops_config.json` file available for each example based on description. Update the `KEYVAULT_NAME`, `RESOURCE_GROUP_NAME` and Azure Machine Learning `WORKSPACE_NAME`. - `ENV_NAME`: This represents the environment type. (The template supports *pr* and *dev* environments.) @@ -344,6 +348,8 @@ The template uses 'pr' and 'dev' to refer to environment types. The template can ### Update config/deployment_config.json +**Note:** If you decide to use [the infrastructure deployed with the deployment script of this code base](../docs/tutorial/02-Infra%20deployment.md), this file is created and populated automatically. You can modify some of the default values if required. + Modify the configuration values in `deployment_config.json` file for each environment. These are required for deploying Prompt flows in Azure ML. Ensure the values for `ENDPOINT_NAME` and `CURRENT_DEPLOYMENT_NAME` are changed before pushing the changes to remote repository. - `ENV_NAME`: This indicates the environment name, referring to the "development" or "production" or any other environment where the prompt will be deployed and used in real-world scenarios. diff --git a/docs/github_workflows_how_to_setup.md b/docs/github_workflows_how_to_setup.md index 97aa7094b..b5cf2d465 100644 --- a/docs/github_workflows_how_to_setup.md +++ b/docs/github_workflows_how_to_setup.md @@ -181,6 +181,8 @@ curl --request POST \ }" ``` +**Note:** If you have provisioned a managed VNET for your Azure ML workspace, this operation will not work for now. You need to use a serverless runtime for now. + 15. Get runtime creation status using REST API. Execute this step multiple times unless either you get output that shows createdOn with a valid date and time value or failure. In case of failure, troubleshoot the issue before moving forward. ```bash @@ -278,6 +280,8 @@ Update code so that we can create a pull request. Update the `llmops_config.json ### Update llmops_config.json +**Note:** If you decide to use [the infrastructure deployed with the deployment script of this code base](../docs/tutorial/02-Infra%20deployment.md), this file is created and populated automatically. + Modify the configuration values in the `llmops_config.json` file available for each example based on description. - `ENV_NAME`: This represents the environment type. (The template supports *pr* and *dev* environments.) @@ -292,6 +296,8 @@ For the optional post production evaluation workflow, the above configuration wi ### Update deployment_config.json in config folder +**Note:** If you decide to use [the infrastructure deployed with the deployment script of this code base](../docs/tutorial/02-Infra%20deployment.md), this file is created and populated automatically. You can modify some of the default values if required. + Modify the configuration values in the `deployment_config.json` file for each environment. These are required for deploying Prompt flows in Azure ML. Ensure the values for `ENDPOINT_NAME` and `CURRENT_DEPLOYMENT_NAME` are changed before pushing the changes to remote repository. - `ENV_NAME`: This indicates the environment name, referring to the "development" or "production" or any other environment where the prompt will be deployed and used in real-world scenarios. diff --git a/docs/images/architecture.png b/docs/images/architecture.png new file mode 100644 index 000000000..0a0df2da1 Binary files /dev/null and b/docs/images/architecture.png differ diff --git a/docs/images/firefox-proxy.png b/docs/images/firefox-proxy.png new file mode 100644 index 000000000..d0c61f620 Binary files /dev/null and b/docs/images/firefox-proxy.png differ diff --git a/docs/images/inbound-rule.png b/docs/images/inbound-rule.png new file mode 100644 index 000000000..1352a9080 Binary files /dev/null and b/docs/images/inbound-rule.png differ diff --git a/docs/images/managet-vnet-aml.svg b/docs/images/managet-vnet-aml.svg new file mode 100644 index 000000000..94dbf34ae --- /dev/null +++ b/docs/images/managet-vnet-aml.svg @@ -0,0 +1,1409 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + On + - + premises network + + + + + + + + + + + + + Your Azure VNet + + Express Route + VPN connection + Bastion with jump box VM + + + + + + + + + + + + + + + + + + + Machine + Learning + Workspace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Workspace default resources + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Compute + instance + + + + + + Compute + cluster + + + + + + Serverless + + + + + + Serverless + S + park + + + + + + Managed + online + endpoint + + + + + + + + + + + + Azure Machine Learning + managed VNet + + + + + + + + + + + + Your + business + storage + + + + + + + + + + + + + Your + Azure + OpenAI + + You can configure private + endpoint outbound rules to + your private resources. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Free access to machine + learning artifacts on the + Internet. + + + + + + + + + + + + + + + + + + (*) Private endpoints are provisionedif the public network access flag ofthe destination resource is disabled. + (*) + (*) + (*) + (*) + + + + diff --git a/docs/ssh_tunelling_access_workspace_vnet.md b/docs/ssh_tunelling_access_workspace_vnet.md new file mode 100644 index 000000000..05e52d8bd --- /dev/null +++ b/docs/ssh_tunelling_access_workspace_vnet.md @@ -0,0 +1,51 @@ +# Access the LLMOps environment behind a VNET using SSH tunneling + +If you deploy an environment with ```enableNetworkIsolation=true```, you can not access the resources behind the VNET. + +Hence, we use SSH tunneling. +You have access to a virtual machine that will serve as a jumpbox. Here are the step to proceed: + +1. Get the private key to access the jumpbox via ssh generated during deployment from the Keyvault + +1. Navigate to the `~/ssh` directory, and paste your clipboard into a new file: + +```bash +/> vi jumpbox +# Paste the content, save and close +/> chmod 600 jumpbox # to have proper permissions on the file +``` + +1. Open SSH port for your IP onto the jumpbox. + 1. [Find what your IP is](https://www.whatsmyip.org/), + 1. Navigate to the jumpbox in the portal`, + 1. Click on `Networking` on the left, + 1. Add an `inbound port rule` + + ![Add inbound rule](./images/inbound-rule.png) + + Replace `source ip` with your IP and set a unique priority (lower than 65000) + +1. Get the public IP of the jumpbox (from the Azure Portal). + +1. Create a ssh tunel from your machine by running the command: + +```bash +/> ssh -D 127.0.0.1:8090 -i ~/.ssh/jumpbox azureuser@vmsshlinux-agvhcsfgnjgjw.eastus.cloudapp.azure.com +# use -i to use the key copied previously +# azureuser is the default user name of the VM +``` + +The commands above opens a SSH connection to the jumpbox, exposing a local endpoint on port 8090. + +You can navigate to the Azure resources by following the procedure hereafter: + +1. Use Firefox to proxy HTTP requests through the SSH tunnel. In order not to change all the settings of your host network, the easiest way is to use [Firefox browser](https://www.mozilla.org/en-US/). It allows proxying directly, without impacting the whole machine. + + 1. Install & start Firefox + 1. Open settings + 1. Scroll to the bottom, and open the Network settings + 1. Provide a Proxy, Port etc ... (see print screen below) + + ![Firefox proxy settings](./images/firefox-proxy.png) + + 1. You can now access resources on the portal from Firefox diff --git a/docs/tutorial/02-Infra deployment.md b/docs/tutorial/02-Infra deployment.md new file mode 100644 index 000000000..391699b87 --- /dev/null +++ b/docs/tutorial/02-Infra deployment.md @@ -0,0 +1,82 @@ +# Tutorial 02: Infra deployment with Bicep + +## Introduction + +This tutorial will help you create the following baseline infrastructure for the application using Bicep. You can make the choice whether you want to enable network isolation. The network isolation is done through a custom VNET and a [managed VNET for the Azure Machine Learning Workspace.](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-managed-network?view=azureml-api-2&tabs=azure-cli). + +## Architecture + +### Base components + +The following components are always deployed: + +- Azure Machine Learning Workspace +- Azure OpenAI service +- Azure Application Insights +- Azure Key Vault +- Azure Storage Account +- Azure Container Registry + +When network isolation is enabled, the following components are also deployed: + +- Azure Virtual Network and required subnets +- Linux Virtual Machine to resort to SSH tunnelling to acces the Azure Machine Learning Workspace +- Azure Machine Learning Managed + +![Architecture diagram of the components deployed with network isolation enabled. All resources are in the LLMOps subnet of the custom virtual network. A managed virtual network is provisioned for the Azure Machine Learning workspace](../images/architecture.png) + +### Network isolation + +When you deploy the architecture with the bicep code with `enableNetworkIsolation=true`, the resources are deployed in a custom VNET and the Azure ML workspace uses a [managed VNET](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-managed-network?view=azureml-api-2&tabs=azure-cli): + +![Architecture](../images/managet-vnet-aml.svg) + + +## Infra deployment script + +### Deployment process + +First make sure to create a resource group in which you'll deploy the infrastructure. The deployment is done through the [deploy-infra.sh script](../../infra/bicep/scripts/deploy-infra.sh). The script takes the following parameters: + +- **ENV**: the environment where the resources are deployed (DEV, QA, PREPROD, PROD). ***Required*** +- **RESOURCE_GROUP**: the resource group where the resources are deployed. ***Required*** +- **NETWORK_ISOLATION**: trigger on/off network isolation for AML workspace and its dependant resources. ***Set to false by default*** + +If you want to further specify TENANT_ID and SUBSCRIPTION_ID to login to Azure or use a service principal, you can set following parameters: + +- **APP_ID**: the APP_ID when you want to use a service principal to login to Azure +- **PASSWORD**: the PASSWORD when you want to use a service principal to login to Azure +- **TENANT_ID**: the TENANT_ID +- **SUBSCRIPTION_ID**: the SUBSCRIPTION_ID + +The script does the following: +1. Login to Azure +1. Deploys the infrastructure using the [main.bicep file](../../infra/bicep/main.bicep) via the Azure CLI. +1. Exports the required outputs of the deployment (using the [export-deployment-variables.sh script](../../))to the deployment_config.json and llmops_config.json configuration files that you can use when deploying your flows. +1. Note that as opposed to custom VNET deployment, the bicep code **configures** the managed VNET for the Azure ML workspace, but **does not provision** it. Hence the scripts triggers the provisioning operation using the `az ml workspace provision-network` command. This operation can take up to 40 minutes. + +Once the script finishes, you can use the deployment_config.json and llmops_config.json configuration files to deploy your flows, following the dedicated tutorials. + +### Examples + +**Example 1: Deploy the infrastructure in a resource group named `DevResourceGroup` for a `DEV` environment without network isolation. Azure Login relies on the identity of the person launching the script.** + +```bash +bash ./deploy-infra.sh -e DEV -r DevResourceGroup" +``` + +**Example 2: Deploy the infrastructure in a resource group named `ProdResourceGroup` for a `PROD` with network isolation enabled. Azure Login relies on the identity of the person launching the script.** + +```bash +bash ./deploy-infra.sh -e PROD -r ProdResourceGroup -i true" +``` + +**Example 3: Deploy the infrastructure in a resource group named `ProdResourceGroup` for a `PROD` with network isolation enabled. Azure login relies on a service principal.** + +```bash +bash ./deploy-infra.sh -e PROD -r PRODResourceGroup -i true -a 1234abcd-123a-1234-abcd-123456abcdef -p password -t 1234abcd-123a-1234-abcd-123456abcdef -s 1234abcd-123a-1234-abcd-123456abcdef" +``` + +### Connect to network-isolated AML workspace + +In the above process, an SSH key-pair was generated in the ssh folder. You can use the private key to connect to the Linux VM and access the Azure Machine Learning workspace. The steps are detailed in the [Access the LLMOPs environment behind a VNET using SSH tunneling](./ssh_tunelling_access_workspace_vnet.md) tutorial. diff --git a/docs/tutorial/02-Development.md b/docs/tutorial/03-Development.md similarity index 100% rename from docs/tutorial/02-Development.md rename to docs/tutorial/03-Development.md diff --git a/docs/tutorial/03-Operationalization.md b/docs/tutorial/04-Operationalization.md similarity index 100% rename from docs/tutorial/03-Operationalization.md rename to docs/tutorial/04-Operationalization.md diff --git a/docs/tutorial/04-Patterns.md b/docs/tutorial/05-Patterns.md similarity index 100% rename from docs/tutorial/04-Patterns.md rename to docs/tutorial/05-Patterns.md diff --git a/infra/bicep/main.bicep b/infra/bicep/main.bicep new file mode 100644 index 000000000..2fa42f444 --- /dev/null +++ b/infra/bicep/main.bicep @@ -0,0 +1,191 @@ +@description('Location for all resources.') +param location string = resourceGroup().location + +@description('Type of environment (dev, qa, prod, ...).') +param environmentType string + +@description('The SKU for Key Vault.') +param keyVaultSku string = 'premium' + +@description('Public SSH key to connect to the Linux jumpbox') +param jumpboxSshKey string + +@description('Associated private SSH key to connect to the Linux jumpbox') +@secure() +param jumpboxSshPrivateKey string + +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool = false + +@description('Set of tags to apply to all resources.') +param tags object = { + environmentType: environmentType +} +// Parameters for the storage account +param storageSku string ='Standard_LRS' + +module nsg 'modules/nsg.bicep' = { + name: 'nsg-${uniqueString(resourceGroup().id)}' + params: { + location: location + tags: tags + nameNsg: 'nsg-${uniqueString(resourceGroup().id)}' + } +} + +module network 'modules/network.bicep' = { + name: 'vnet' + params: { + location: location + environmentType: environmentType + idNetworkSecurityGroup: nsg.outputs.networkSecurityGroup + vnetName: 'vnet-${uniqueString(resourceGroup().id)}' + vnetAddressSpace: '10.1.0.0/16' + llmopsSubnet: '10.1.4.0/24' + jumpboxSubnet: '10.1.6.0/24' + enableNetworkIsolation: enableNetworkIsolation + } +} + +module appInsights 'modules/app-insights.bicep' = { + name: 'appInsights' + params: { + location: location + environmentType: environmentType + enableNetworkIsolation: enableNetworkIsolation + } +} + + +module keyVault 'modules/key-vault.bicep' = { + name: 'keyVault' + params: { + location: location + environmentType: environmentType + skuName: keyVaultSku + enableNetworkIsolation: enableNetworkIsolation + sshPrivateKey: jumpboxSshPrivateKey + virtualNetworkId: network.outputs.vnetId + subnetId: network.outputs.llmopsSubnetId + } +} + + + +// Creating two storage accounts: +// - one for the Azure Machine Learning workspace +module storage 'modules/storage.bicep' = { + name: 'storagellmops' + params: { + location: location + nameStorage: 'stllmops${uniqueString(resourceGroup().id)}' + nameStoragePleBlob: 'pep-blob-stllmops${uniqueString(resourceGroup().id)}' + nameStoragePleFile: 'pep-file-stllmops${uniqueString(resourceGroup().id)}' + nameStorageSku: storageSku + subnetId: network.outputs.llmopsSubnetId + virtualNetworkId: network.outputs.vnetId + enableNetworkIsolation: enableNetworkIsolation + tags: tags + } +} + + +// Creating the Azure Container registry required by +// Azure machine Learning to serve as a model registry +module containerRegistry 'modules/container-registry.bicep' = { + name: 'llmopsContainerRegistry' + params: { + location: location + nameContainerRegistry: 'acr${uniqueString(resourceGroup().id)}' + nameContainerRegistryPep: 'pep-acr-${uniqueString(resourceGroup().id)}' + subnetId: network.outputs.llmopsSubnetId + virtualNetworkId: network.outputs.vnetId + enableNetworkIsolation: enableNetworkIsolation + tags: tags + } +} + + +// Creating the Azure Machine Learning workspace, compute and networking resources +module azuremlWorkspace 'modules/machine-learning-workspace.bicep' = { + name: 'azuremlWorkspace' + params: { + // workspace organization + nameMachineLearning: 'amlws-${environmentType}-${uniqueString(resourceGroup().id)}' + nameMachineLearningFriendly: 'Azure ML ${environmentType} workspace' + descriptionMachineLearning: 'This is an AML workspace for ${environmentType} environment' + location: location + tags: tags + + // dependant resources + applicationInsightsId: appInsights.outputs.id + containerRegistryId: containerRegistry.outputs.containerRegistryId + keyVaultId: keyVault.outputs.keyVaultId + storageAccountId: storage.outputs.storageId + azureOpenAIId: azureOpenAI.outputs.azureOpenAIId + + // networking + subnetId: network.outputs.llmopsSubnetId + virtualNetworkId: network.outputs.vnetId + machineLearningPepName: 'pep-amlws-${uniqueString(resourceGroup().id)}' + + enableNetworkIsolation: enableNetworkIsolation + } + dependsOn: [ + keyVault + containerRegistry + appInsights + storage + ] +} + +// Creating all the role assignments required for the end-to-end flow to work +module rolesAssignments 'modules/rolesAssignments.bicep' = { + name: 'rolesAssignments-${uniqueString(resourceGroup().id)}' + params: { + nameStorage: storage.outputs.nameStorage + azuremlWorkspacePrincipalId: azuremlWorkspace.outputs.machineLearningPrincipalId + } +} + +// Creating the Azure OpenAI resource +module azureOpenAI 'modules/azure-openai.bicep' = { + name: 'azureOpenAI' + params: { + //Azure OpenAI resource + nameAOAI: 'aoai-eastus-${environmentType}-${uniqueString(resourceGroup().id)}' + location: location + nameDeploymentAOAI: 'gpt-35-turbo' + nameDeployedModel: 'gpt-35-turbo' + versionDeployedModel: '0301' + skuAOAI: { + name: 'S0' + } + environmentType: environmentType + enableNetworkIsolation: enableNetworkIsolation + + // Networking + subnetId: network.outputs.llmopsSubnetId + virtualNetworkId: network.outputs.vnetId + namePepAOAI: 'pep-aoai-eastus-${uniqueString(resourceGroup().id)}' + privateDnsZoneName: 'privatelink.openai.azure.com' + + } +} + + +// Creating the SSH linux VM +module linuxmachine 'modules/linux-machine.bicep' = if (enableNetworkIsolation) { + name: 'sshmachine' + params: { + location: location + nameVm: 'VMsshLinux' + subnetId: network.outputs.jumpboxSubnetId + adminUsername: 'azureuser' + sshKey: jumpboxSshKey + networkSecurityGroupId: nsg.outputs.networkSecurityGroup + } +} + +output keyVaultName string = keyVault.outputs.keyVaultName +output amlWorkspaceName string = azuremlWorkspace.outputs.nameMachineLearning diff --git a/infra/bicep/modules/app-insights.bicep b/infra/bicep/modules/app-insights.bicep new file mode 100644 index 000000000..b8182f441 --- /dev/null +++ b/infra/bicep/modules/app-insights.bicep @@ -0,0 +1,38 @@ +@description('The type of the environment') +param environmentType string +@description('Application Insight name') +param appInsightsName string = 'appinsights-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('Application Insight name') +param logAnalyticsWorkspaceName string = 'loganalyticsw-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('Location for all resources.') +param location string = resourceGroup().location +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { + name: logAnalyticsWorkspaceName + location: location + properties: { + sku: { + name: 'PerGB2018' + } + retentionInDays: 30 + publicNetworkAccessForIngestion: 'Enabled' + publicNetworkAccessForQuery: ( enableNetworkIsolation ? 'Enabled' : 'Disabled' ) + } +} + +resource appInsights 'Microsoft.Insights/components@2020-02-02' = { + name: appInsightsName + location: location + kind: 'web' + properties: { + Application_Type: 'web' + Request_Source: 'rest' + WorkspaceResourceId: logAnalyticsWorkspace.id + Flow_Type: 'Bluefield' + } +} + +output connectionString string = appInsights.properties.ConnectionString +output id string = appInsights.id diff --git a/infra/bicep/modules/azure-openai.bicep b/infra/bicep/modules/azure-openai.bicep new file mode 100644 index 000000000..c153066ee --- /dev/null +++ b/infra/bicep/modules/azure-openai.bicep @@ -0,0 +1,149 @@ +@description('Name of the Azure OpenAI resource') +param nameAOAI string + +@description('The Azure Region to deploy the resources into') +param location string + +@description('Name of the deployment that appears in the studio') +param nameDeploymentAOAI string + +@description('Name of the model that will be deployed') +param nameDeployedModel string + +@description('Version of the model that will be deployed') +param versionDeployedModel string + +@description('SKU of the Azure OpenAI resource') +param skuAOAI object = { + name: 'S0' +} + +@description('Scale type of the Azure OpenAI resource') +param scaleTypeAOAI string = 'Standard' + +@description('Environment type') +param environmentType string + +@description('Azure OpenAI private link endpoint name') +param namePepAOAI string + +@description('Resource ID of the subnet') +param subnetId string + +@description('Resource ID of the virtual network') +param virtualNetworkId string + +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool + +param privateDnsZoneName string + +// Array containing the models we want to deploy +var deploymentsAOAI = [ + { + name: nameDeploymentAOAI + model: { + format: 'OpenAI' + name: nameDeployedModel + version: versionDeployedModel + } + scaleSettings: { + scaleType: scaleTypeAOAI + } + } +] + +param tags object = { + Creator: 'ServiceAccount' + Service: 'OpenAI' + Environment: environmentType +} + + +// Azure OpenAI service +resource azureOpenAI 'Microsoft.CognitiveServices/accounts@2023-05-01' = { + name: nameAOAI + location: location + kind: 'OpenAI' + properties: { + publicNetworkAccess: (!enableNetworkIsolation ? 'Enabled' : 'Disabled') + customSubDomainName: nameAOAI + networkAcls: { + defaultAction: (!enableNetworkIsolation ? 'Allow' : 'Deny') + } + } + tags: tags + sku: skuAOAI +} + +@batchSize(1) +resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deploymentsAOAI: { + parent: azureOpenAI + sku: { + name: 'Standard' + capacity: 120 + } + name: deployment.name + properties: { + model: deployment.model + } +}] + + + +resource azureOpenAIPrivateEndpoint 'Microsoft.Network/privateEndpoints@2023-02-01' = if (enableNetworkIsolation) { + name: namePepAOAI + location: location + tags: tags + properties: { + privateLinkServiceConnections: [ + { + name: namePepAOAI + properties: { + groupIds: [ + 'account' + ] + privateLinkServiceId: azureOpenAI.id + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource azureOpenAIPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: privateDnsZoneName + location: 'global' +} + +resource privateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2023-02-01' = if (enableNetworkIsolation) { + name: 'openai-PrivateDnsZoneGroup' + parent: azureOpenAIPrivateEndpoint + properties:{ + privateDnsZoneConfigs: [ + { + name: privateDnsZoneName + properties:{ + privateDnsZoneId: azureOpenAIPrivateDnsZone.id + } + } + ] + } +} + +resource azureAOAIPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(azureOpenAI.id) + parent: azureOpenAIPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +output azureOpenAIId string = azureOpenAI.id +output azureOpenAIName string = azureOpenAI.name diff --git a/infra/bicep/modules/container-registry.bicep b/infra/bicep/modules/container-registry.bicep new file mode 100644 index 000000000..6011d21c2 --- /dev/null +++ b/infra/bicep/modules/container-registry.bicep @@ -0,0 +1,116 @@ +// Creates an Azure Container Registry with Azure Private Link endpoint +@description('Azure region of the deployment') +param location string + +@description('Tags to add to the resources') +param tags object + +@description('Container registry name') +param nameContainerRegistry string + +@description('Container registry private link endpoint name') +param nameContainerRegistryPep string + +@description('Resource ID of the subnet') +param subnetId string + +@description('Resource ID of the virtual network') +param virtualNetworkId string + +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool + + +var nameContainerRegistryCleaned = replace(nameContainerRegistry, '-', '') +var privateDnsZoneName = 'privatelink${environment().suffixes.acrLoginServer}' +var groupName = 'registry' + + +resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-07-01' = { + name: nameContainerRegistryCleaned + location: location + tags: tags + sku: { + name: 'Premium' + } + properties: { + adminUserEnabled: true + dataEndpointEnabled: false + networkRuleBypassOptions: 'AzureServices' + networkRuleSet: { + defaultAction: (!enableNetworkIsolation ? 'Allow' : 'Deny') + } + policies: { + quarantinePolicy: { + status: 'disabled' + } + retentionPolicy: { + status: 'enabled' + days: 7 + } + trustPolicy: { + status: 'disabled' + type: 'Notary' + } + } + publicNetworkAccess: (!enableNetworkIsolation ? 'Enabled' : 'Disabled') + zoneRedundancy: 'Disabled' + } +} + +resource containerRegistryPrivateEndpoint 'Microsoft.Network/privateEndpoints@2022-01-01' = if (enableNetworkIsolation) { + name: nameContainerRegistryPep + location: location + tags: tags + properties: { + privateLinkServiceConnections: [ + { + name: nameContainerRegistryPep + properties: { + groupIds: [ + groupName + ] + privateLinkServiceId: containerRegistry.id + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource acrPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: privateDnsZoneName + location: 'global' +} + +resource privateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = if (enableNetworkIsolation) { + name: '${groupName}-PrivateDnsZoneGroup' + parent: containerRegistryPrivateEndpoint + properties:{ + privateDnsZoneConfigs: [ + { + name: privateDnsZoneName + properties:{ + privateDnsZoneId: acrPrivateDnsZone.id + } + } + ] + } +} + +resource acrPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(containerRegistry.id) + parent: acrPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +output containerRegistryId string = containerRegistry.id +output nameContainerRegistry string = containerRegistry.name diff --git a/infra/bicep/modules/key-vault.bicep b/infra/bicep/modules/key-vault.bicep new file mode 100644 index 000000000..6df059cae --- /dev/null +++ b/infra/bicep/modules/key-vault.bicep @@ -0,0 +1,119 @@ +@description('The type of the environment') +param environmentType string +@description('The name of the key vault to be created.') +param vaultName string = 'kv-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('Location for all resources.') +param location string = resourceGroup().location +@description('The SKU of the vault to be created.') +@allowed([ + 'standard' + 'premium' +]) +param skuName string = 'standard' +@description('The name of the key vault pep to be created.') +param vaultPepName string = 'pep-kv-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('The Subnet ID where the Key Vault Private Link is to be created') +param subnetId string +@description('The VNet ID where the Key Vault Private Link is to be created') +param virtualNetworkId string +@description('Enable Network Isolation for the Key Vault') +param enableNetworkIsolation bool + +@description('Associated private SSH key to connect to the Linux VM') +@secure() +param sshPrivateKey string + + +resource keyVault 'Microsoft.KeyVault/vaults@2023-02-01' = { + name: vaultName + location: location + properties: { + accessPolicies: [] + createMode: 'default' + enableRbacAuthorization: true + enableSoftDelete: true + softDeleteRetentionInDays: 90 + enablePurgeProtection: null + enabledForDeployment: true + enabledForDiskEncryption: false + enabledForTemplateDeployment: true + tenantId: subscription().tenantId + publicNetworkAccess: (enableNetworkIsolation ? 'Disabled' : null) + sku: { + name: skuName + family: 'A' + } + networkAcls: { + defaultAction: (!enableNetworkIsolation ? 'Allow' : 'Deny') + bypass: 'AzureServices' + } + } +} + +resource keyVaultPrivateEndpoint 'Microsoft.Network/privateEndpoints@2022-01-01' = if (enableNetworkIsolation) { + name: vaultPepName + location: location + properties: { + privateLinkServiceConnections: [ + { + name: vaultPepName + properties: { + groupIds: [ + 'vault' + ] + privateLinkServiceId: keyVault.id + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource keyVaultPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: 'privatelink.vaultcore.azure.net' + location: 'global' +} + +resource privateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = if (enableNetworkIsolation) { + name: 'vault-PrivateDnsZoneGroup' + parent: keyVaultPrivateEndpoint + properties:{ + privateDnsZoneConfigs: [ + { + name: keyVaultPrivateDnsZone.name + properties:{ + privateDnsZoneId: keyVaultPrivateDnsZone.id + } + } + ] + } +} + +resource keyVaultPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(keyVault.id) + parent: keyVaultPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + + +resource sshLinuxPrivateKey 'Microsoft.KeyVault/vaults/secrets@2023-02-01' = if (enableNetworkIsolation) { + name: 'ssh-private-key-linux' + parent: keyVault + properties: { + value: sshPrivateKey + } +} + + + +output keyVaultDnsZoneName string = keyVaultPrivateDnsZone.name +output keyVaultId string = keyVault.id +output keyVaultName string = keyVault.name diff --git a/infra/bicep/modules/linux-machine.bicep b/infra/bicep/modules/linux-machine.bicep new file mode 100644 index 000000000..074e88342 --- /dev/null +++ b/infra/bicep/modules/linux-machine.bicep @@ -0,0 +1,131 @@ +// Creates a Data Science Virtual Machine jumpbox. +@description('Azure region of the deployment') +param location string = resourceGroup().location + +@description('Resource ID of the subnet') +param subnetId string + +@description('Network Security Group Resource ID') +param networkSecurityGroupId string + +@description('Virtual machine admin username') +param adminUsername string + +@description('Public SSH key to connect to the Linux VM') +param sshKey string + + +@description('Name of the Linux VM') +param nameVm string='VMsshLinux' + + +@description('Size of the Linux VM') +param vmSize string='Standard_DS1_v2' + +var setSshKey = !empty(sshKey) // True if a non-empty string is provided +var dnsLabelPrefix=toLower('${nameVm}-${uniqueString(resourceGroup().id)}') +var publicIPAddressName = '${nameVm}PublicIP' +var osDiskType = 'Standard_LRS' +var nameNic='${nameVm}-nic' + +resource networkInterface 'Microsoft.Network/networkInterfaces@2022-07-01' = { + name: nameNic + location: location + properties: { + ipConfigurations: [ + { + name: 'ipconfig1' + properties: { + subnet: { + id: subnetId + } + privateIPAllocationMethod: 'Dynamic' + publicIPAddress: { + id: publicIPAddress.id + } + } + } + ] + networkSecurityGroup: { + id: networkSecurityGroupId + } + } +} + + +resource publicSshKey 'Microsoft.Compute/sshPublicKeys@2022-11-01' = if (!empty(sshKey)) { + name: 'sshkey-linuxmachine' + location: location + properties: { + publicKey: sshKey + } +} + +var linuxConfiguration = { + disablePasswordAuthentication: true +} +var linuxConfigurationWithSSH = { + disablePasswordAuthentication: true + ssh: { + publicKeys: [ + { + path: '/home/${adminUsername}/ssh/authorized_keys' + keyData: publicSshKey.properties.publicKey + } + ] + } +} + + +resource publicIPAddress 'Microsoft.Network/publicIPAddresses@2023-02-01' = { + name: publicIPAddressName + location: location + sku:{ + name:'Basic' + } + properties:{ + publicIPAllocationMethod:'Dynamic' + dnsSettings:{ + domainNameLabel:dnsLabelPrefix + } + idleTimeoutInMinutes :4 + } +} + +resource vm 'Microsoft.Compute/virtualMachines@2023-03-01' = { + name: nameVm + location: location + properties:{ + hardwareProfile:{ + vmSize : vmSize + } + storageProfile:{ + osDisk:{ + createOption:'FromImage' + managedDisk:{ + storageAccountType : osDiskType + } + } + imageReference: { + publisher: 'Canonical' + offer: '0001-com-ubuntu-server-focal' + sku: '20_04-lts-gen2' + version: 'latest' + } + } + networkProfile:{ + networkInterfaces:[ + { + id : networkInterface.id + } + ] + } + osProfile:{ + computerName : nameVm + adminUsername : adminUsername + linuxConfiguration : (setSshKey ? linuxConfigurationWithSSH : linuxConfiguration) + } + } +} + +output vmId string = vm.id diff --git a/infra/bicep/modules/machine-learning-workspace.bicep b/infra/bicep/modules/machine-learning-workspace.bicep new file mode 100644 index 000000000..69090f966 --- /dev/null +++ b/infra/bicep/modules/machine-learning-workspace.bicep @@ -0,0 +1,170 @@ +// Creates a machine learning workspace, private endpoints and DNS zones for the azure machine learning workspace + +@description('Azure region of the deployment') +param location string + +@description('Tags to add to the resources') +param tags object + +@description('Machine learning workspace name') +param nameMachineLearning string + +@description('Machine learning workspace display name') +param nameMachineLearningFriendly string = nameMachineLearning + +@description('Machine learning workspace description') +param descriptionMachineLearning string + +@description('Resource ID of the application insights resource') +param applicationInsightsId string + +@description('Resource ID of the container registry resource') +param containerRegistryId string + +@description('Resource ID of the key vault resource') +param keyVaultId string + +@description('Resource ID of the storage account resource') +param storageAccountId string + +@description('Resource ID of the Azure OpenAI resource') +param azureOpenAIId string + +@description('Resource ID of the subnet resource') +param subnetId string + +@description('Resource ID of the virtual network') +param virtualNetworkId string + +@description('Machine learning workspace private link endpoint name') +param machineLearningPepName string + +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool + +var privateDnsZoneName = 'privatelink.api.azureml.ms' +var privateAznbDnsZoneName = 'privatelink.notebooks.azure.net' + +resource machineLearning 'Microsoft.MachineLearningServices/workspaces@2023-10-01' = { + name: nameMachineLearning + location: location + tags: tags + identity: { + type: 'SystemAssigned' + } + properties: { + // workspace organization + friendlyName: nameMachineLearningFriendly + description: descriptionMachineLearning + + // dependent resources + applicationInsights: applicationInsightsId + containerRegistry: containerRegistryId + keyVault: keyVaultId + storageAccount: storageAccountId + + // Managed VNET + managedNetwork: (enableNetworkIsolation ? { + isolationMode: 'AllowInternetOutbound' + outboundRules: { + ruleAMLtoAOAI:{ + type:'PrivateEndpoint' + destination: { + serviceResourceId: azureOpenAIId + subresourceTarget: 'account' + sparkEnabled: false + sparkStatus: 'Inactive' + } + } + } + status: { + sparkReady: false + status: 'active' + } + } : null) + publicNetworkAccess: (!enableNetworkIsolation ? 'Enabled' : 'Disabled') + } +} + +resource machineLearningPrivateEndpoint 'Microsoft.Network/privateEndpoints@2022-01-01' = if (enableNetworkIsolation) { + name: machineLearningPepName + location: location + tags: tags + properties: { + privateLinkServiceConnections: [ + { + name: machineLearningPepName + properties: { + groupIds: [ + 'amlworkspace' + ] + privateLinkServiceId: machineLearning.id + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource amlPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: privateDnsZoneName + location: 'global' +} + +resource amlPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(machineLearning.id) + parent: amlPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +// Notebook +resource notebookPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: privateAznbDnsZoneName + location: 'global' +} + +resource notebookPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(machineLearning.id) + parent: notebookPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +resource privateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = if (enableNetworkIsolation) { + name: 'amlworkspace-PrivateDnsZoneGroup' + parent: machineLearningPrivateEndpoint + properties:{ + privateDnsZoneConfigs: [ + { + name: privateDnsZoneName + properties:{ + privateDnsZoneId: amlPrivateDnsZone.id + } + } + { + name: privateAznbDnsZoneName + properties:{ + privateDnsZoneId: notebookPrivateDnsZone.id + } + } + ] + } +} + +output nameMachineLearning string = machineLearning.name +output machineLearningId string = machineLearning.id +output machineLearningPrincipalId string = machineLearning.identity.principalId +output usedSuffix string = uniqueString(resourceGroup().id) diff --git a/infra/bicep/modules/network.bicep b/infra/bicep/modules/network.bicep new file mode 100644 index 000000000..18f646647 --- /dev/null +++ b/infra/bicep/modules/network.bicep @@ -0,0 +1,66 @@ +@description('The type of the environment') +param environmentType string +@description('Location for all resources.') +param location string = resourceGroup().location +@description('Name of the virtual network') +param vnetName string = 'vnet-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('llmops subnet name') +param llmopsSubnetName string = 'snet-llmops-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('Name of the subnet where the Linux VM for SSH tunelling will be deployed') +param jumpboxSubnetName string = 'snet-jumpb-${environmentType}-${uniqueString(resourceGroup().id)}' +@description('Address space for the virtual network') +param vnetAddressSpace string = '10.1.0.0/16' +@description('llmops subnet address prefix') +param llmopsSubnet string = '10.1.4.0/24' +@description('Jumpbox subnet address prefix') +param jumpboxSubnet string = '10.1.6.0/24' +@description('Group ID of the network security group') +param idNetworkSecurityGroup string +@description('Enable network isolation for the virtual network') +param enableNetworkIsolation bool + + +var subnetLlmops = [ + { + name: llmopsSubnetName + properties: { + addressPrefix: llmopsSubnet + privateEndpointNetworkPolicies: 'Disabled' + privateLinkServiceNetworkPolicies: 'Disabled' + networkSecurityGroup: { + id: idNetworkSecurityGroup + } + } + } +] + +var subnetJumpbox = [ + { + name: jumpboxSubnetName + properties: { + addressPrefix: jumpboxSubnet + } + } +] + +var subnets = (enableNetworkIsolation ? concat(subnetLlmops, subnetJumpbox) : subnetLlmops) + +resource vnet 'Microsoft.Network/virtualNetworks@2023-04-01' = { + name: vnetName + location: location + properties: { + addressSpace: { + addressPrefixes: [ + vnetAddressSpace + ] + } + subnets: subnets + } +} + + +output vnetId string = vnet.id +output llmopsSubnetId string = '${vnet.id}/subnets/${llmopsSubnetName}' +output jumpboxSubnetId string = '${vnet.id}/subnets/${jumpboxSubnetName}' + + diff --git a/infra/bicep/modules/nsg.bicep b/infra/bicep/modules/nsg.bicep new file mode 100644 index 000000000..68621be25 --- /dev/null +++ b/infra/bicep/modules/nsg.bicep @@ -0,0 +1,139 @@ +// Creates a network security group preconfigured for use with Azure ML +// To learn more, see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-azureml-behind-firewall +@description('Azure region of the deployment') +param location string + +@description('Tags to add to the resources') +param tags object + +@description('Name of the network security group') +param nameNsg string + +resource nsg 'Microsoft.Network/networkSecurityGroups@2022-01-01' = { + name: nameNsg + location: location + tags: tags + properties: { + securityRules: [ + { + name: 'BatchNodeManagement' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '29876-29877' + sourceAddressPrefix: 'BatchNodeManagement' + destinationAddressPrefix: '*' + access: 'Allow' + priority: 120 + direction: 'Inbound' + } + } + { + name: 'AzureMachineLearning' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '44224' + sourceAddressPrefix: 'AzureMachineLearning' + destinationAddressPrefix: '*' + access: 'Allow' + priority: 130 + direction: 'Inbound' + } + } + { + name: 'AzureActiveDirectory' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '*' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'AzureActiveDirectory' + access: 'Allow' + priority: 140 + direction: 'Outbound' + } + } + { + name: 'AzureMachineLearningOutbound' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'AzureMachineLearning' + access: 'Allow' + priority: 150 + direction: 'Outbound' + } + } + { + name: 'AzureResourceManager' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'AzureResourceManager' + access: 'Allow' + priority: 160 + direction: 'Outbound' + } + } + { + name: 'AzureStorageAccount' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'Storage.${location}' + access: 'Allow' + priority: 170 + direction: 'Outbound' + } + } + { + name: 'AzureFrontDoor' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'AzureFrontDoor.FrontEnd' + access: 'Allow' + priority: 180 + direction: 'Outbound' + } + } + { + name: 'AzureContainerRegistry' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: '*' + destinationAddressPrefix: 'AzureContainerRegistry.${location}' + access: 'Allow' + priority: 190 + direction: 'Outbound' + } + } + { + name: 'MicrosoftContainerRegistry' + properties: { + protocol: 'Tcp' + sourcePortRange: '*' + destinationPortRange: '443' + sourceAddressPrefix: 'VirtualNetwork' + destinationAddressPrefix: 'MicrosoftContainerRegistry' + access: 'Allow' + priority: 200 + direction: 'Outbound' + } + } + ] + } +} + +output networkSecurityGroup string = nsg.id diff --git a/infra/bicep/modules/rolesAssignments.bicep b/infra/bicep/modules/rolesAssignments.bicep new file mode 100644 index 000000000..0d015752c --- /dev/null +++ b/infra/bicep/modules/rolesAssignments.bicep @@ -0,0 +1,37 @@ +@description('Name of the storage account') +param nameStorage string + +@description('AML workspace principal id') +param azuremlWorkspacePrincipalId string + +// Identifiers for the required roles +var roleStorageTableDataContributorId = '0a9a7e1f-b9d0-4cc4-a60d-0319b160aaa3' +var roleFileDataPriviligedContributorId = '69566ab7-960f-475b-8e7c-b3118f30c6bd' + + +resource referenceStorage 'Microsoft.Storage/storageAccounts@2022-09-01' existing = { + name: nameStorage +} + + +// Assign "Table data contributor" role to AML Workspace +resource roleAssignmentTableDataContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: referenceStorage + name: guid(nameStorage, roleStorageTableDataContributorId, resourceGroup().id) + properties: { + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleStorageTableDataContributorId) + principalId: azuremlWorkspacePrincipalId + principalType: 'ServicePrincipal' + } +} + +// Assign "Storage File Data Privileged Contributor" role to AML Workspace +resource roleAssignmentFileDataPriviligedContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: referenceStorage + name: guid(nameStorage, roleFileDataPriviligedContributorId, resourceGroup().id) + properties: { + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleFileDataPriviligedContributorId) + principalId: azuremlWorkspacePrincipalId + principalType: 'ServicePrincipal' + } +} diff --git a/infra/bicep/modules/storage.bicep b/infra/bicep/modules/storage.bicep new file mode 100644 index 000000000..25cde932e --- /dev/null +++ b/infra/bicep/modules/storage.bicep @@ -0,0 +1,216 @@ +@description('Azure region of the deployment') +param location string + +@description('Tags to add to the resources') +param tags object + +@description('Name of the storage account') +param nameStorage string + +@description('Name of the storage blob private link endpoint') +param nameStoragePleBlob string + +@description('Name of the storage file private link endpoint') +param nameStoragePleFile string + +@description('Resource ID of the subnet') +param subnetId string + +@description('Resource ID of the virtual network') +param virtualNetworkId string + +@allowed([ + 'Standard_LRS' + 'Standard_ZRS' + 'Standard_GRS' + 'Standard_GZRS' + 'Standard_RAGRS' + 'Standard_RAGZRS' + 'Premium_LRS' + 'Premium_ZRS' +]) +@description('Storage SKU') +param nameStorageSku string = 'Standard_LRS' + +@description('Enable public access to ease dev tests?') +param enableNetworkIsolation bool + +var nameStorageCleaned = replace(nameStorage, '-', '') +var nameBlobPrivateDnsZone = 'privatelink.blob.${environment().suffixes.storage}' +var nameFilePrivateDnsZone = 'privatelink.file.${environment().suffixes.storage}' + +resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = { + name: nameStorageCleaned + location: location + tags: tags + sku: { + name: nameStorageSku + } + kind: 'StorageV2' + identity: { + type: 'SystemAssigned' + } + properties: { + accessTier: 'Hot' + allowBlobPublicAccess: (!enableNetworkIsolation ? true : false) + publicNetworkAccess: (!enableNetworkIsolation ? 'Enabled' : 'Disabled') + allowCrossTenantReplication: false + allowSharedKeyAccess: true + encryption: { + keySource: 'Microsoft.Storage' + requireInfrastructureEncryption: false + services: { + blob: { + enabled: true + keyType: 'Account' + } + file: { + enabled: true + keyType: 'Account' + } + queue: { + enabled: true + keyType: 'Service' + } + table: { + enabled: true + keyType: 'Service' + } + } + } + isHnsEnabled: false + isNfsV3Enabled: false + keyPolicy: { + keyExpirationPeriodInDays: 7 + } + largeFileSharesState: 'Disabled' + minimumTlsVersion: 'TLS1_2' + networkAcls: { + bypass: 'AzureServices' + defaultAction: (!enableNetworkIsolation ? 'Allow' : 'Deny') + } + supportsHttpsTrafficOnly: true + } +} + +resource storagePrivateEndpointBlob 'Microsoft.Network/privateEndpoints@2022-01-01' = if (enableNetworkIsolation) { + name: nameStoragePleBlob + location: location + tags: tags + properties: { + privateLinkServiceConnections: [ + { + name: nameStoragePleBlob + properties: { + groupIds: [ + 'blob' + ] + privateLinkServiceId: storage.id + privateLinkServiceConnectionState: { + status: 'Approved' + description: 'Auto-Approved' + actionsRequired: 'None' + } + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource storagePrivateEndpointFile 'Microsoft.Network/privateEndpoints@2022-01-01' = if (enableNetworkIsolation) { + name: nameStoragePleFile + location: location + tags: tags + properties: { + privateLinkServiceConnections: [ + { + name: nameStoragePleFile + properties: { + groupIds: [ + 'file' + ] + privateLinkServiceId: storage.id + privateLinkServiceConnectionState: { + status: 'Approved' + description: 'Auto-Approved' + actionsRequired: 'None' + } + } + } + ] + subnet: { + id: subnetId + } + } +} + +resource blobPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: nameBlobPrivateDnsZone + location: 'global' +} + +resource privateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = if (enableNetworkIsolation) { + name: 'blob-PrivateDnsZoneGroup' + parent: storagePrivateEndpointBlob + properties:{ + privateDnsZoneConfigs: [ + { + name: nameBlobPrivateDnsZone + properties:{ + privateDnsZoneId: blobPrivateDnsZone.id + } + } + ] + } +} + + +resource blobPrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(storage.id) + parent: blobPrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +resource filePrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = if (enableNetworkIsolation) { + name: nameFilePrivateDnsZone + location: 'global' +} + +resource filePrivateEndpointDns 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-01-01' = if (enableNetworkIsolation) { + name: 'file-PrivateDnsZoneGroup' + parent: storagePrivateEndpointFile + properties:{ + privateDnsZoneConfigs: [ + { + name: nameFilePrivateDnsZone + properties:{ + privateDnsZoneId: filePrivateDnsZone.id + } + } + ] + } +} + +resource filePrivateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (enableNetworkIsolation) { + name: uniqueString(storage.id) + parent: filePrivateDnsZone + location: 'global' + properties: { + registrationEnabled: false + virtualNetwork: { + id: virtualNetworkId + } + } +} + +output storageId string = storage.id +output nameStorage string = storage.name diff --git a/infra/bicep/scripts/deploy-infra.sh b/infra/bicep/scripts/deploy-infra.sh new file mode 100755 index 000000000..354ff0d01 --- /dev/null +++ b/infra/bicep/scripts/deploy-infra.sh @@ -0,0 +1,184 @@ +#!/bin/bash +repoRoot=$( + cd "$(dirname "${BASH_SOURCE[0]}")/../../../" + pwd -P +) + +############################################################################## +# colors for formatting the output +############################################################################## +# shellcheck disable=SC2034 +{ +YELLOW='\033[1;33m' +GREEN='\033[1;32m' +RED='\033[0;31m' +BLUE='\033[1;34m' +NC='\033[0m' # No Color +} +############################################################################## +#- print functions +############################################################################## +function printMessage(){ + echo -e "${GREEN}$1${NC}" +} +function printWarning(){ + echo -e "${YELLOW}$1${NC}" +} +function printError(){ + echo -e "${RED}$1${NC}" +} +function printProgress(){ + echo -e "${BLUE}$1${NC}" +} +############################################################################## +#- checkLoginAndSubscription +############################################################################## +function checkLoginAndSubscription() { + az account show -o none + # shellcheck disable=SC2181 + if [ $? -ne 0 ]; then + printError "\nYou seems disconnected from Azure, stopping the script." + exit 1 + fi +} +############################################################################## +#- function used to check whether an error occurred +############################################################################## +function checkError() { + # shellcheck disable=SC2181 + if [ $? -ne 0 ]; then + echo -e "${RED}\nAn error occurred exiting from the current bash${NC}" + exit 1 + fi +} +####################################################### +#- function used to print out script usage +####################################################### +function usage() { + echo + echo "Arguments:" + echo -e " -e [ENV] set environment where ENV=DEV, QA, PREPROD or PROD" + echo -e " -r [RESOURCE_GROUP] set resource group" + echo -e " -i [NETWORK_ISOLATION] trigger on/off network isolation for AML workspace and its dependant resources. Set to false by default" + echo -e " -i [APP_ID] set the APP_ID when you want to use a service principal to login to Azure" + echo -e " -i [PASSWORD] set the PASSWORD when you want to use a service principal to login to Azure" + echo -e " -i [TENANT_ID] set TENANT_ID" + echo -e " -i [SUBSCRIPTION_ID] set SUBSCRIPTION_ID" + + echo + echo "Example:" + echo -e " bash ./deploy-infra.sh -e DEV -r DevResourceGroup" + echo -e " bash ./deploy-infra.sh -e QA -r QAResourceGroup -i false" + echo -e " bash ./deploy-infra.sh -e PROD -r PRODResourceGroup -i true" + echo -e " bash ./deploy-infra.sh -e PROD -r PRODResourceGroup -i true -a 1234abcd-123a-1234-abcd-123456abcdef -p password -t 1234abcd-123a-1234-abcd-123456abcdef -s 1234abcd-123a-1234-abcd-123456abcdef" +} + + +NETWORK_ISOLATION=false +# shellcheck disable=SC2034 +while getopts "e:r:i:a:p:t:s:" opt; do + case $opt in + e) TYPE_ENVIRONMENT=$OPTARG ;; + r) RESOURCE_GROUP=$OPTARG ;; + i) NETWORK_ISOLATION=$OPTARG ;; + a) APP_ID=$OPTARG ;; + p) PASSWORD=$OPTARG ;; + t) TENANT_ID=$OPTARG ;; + s) SUBSCRIPTION_ID=$OPTARG ;; + :) + printError "Error: -${OPTARG} requires a value" + exit 1 + ;; + *) + usage + exit 1 + ;; + esac +done + +# Validation +if [[ -z "${TYPE_ENVIRONMENT}" || -z "${RESOURCE_GROUP}" ]]; then + printError "Required parameters are missing" + usage + exit 1 +fi + +if [[ -z "$APP_ID" || -z $PASSWORD || -z $TENANT_ID || -z $SUBSCRIPTION_ID ]]; then + printWarning "Variables \$APP_ID \$PASSWORD \$TENANT_ID \$SUBSCRIPTION_ID not set" + printProgress "Interactive Azure login..." + if [[ -z $TENANT_ID ]]; then + az login || exit 1 + else + az login -t $TENANT_ID || exit 1 + fi + if [[ ! -z $SUBSCRIPTION_ID ]]; then + az account set -s $SUBSCRIPTION_ID + fi +else + printProgress "Service Principal Azure login..." + az login --service-principal -u $APP_ID -p $PASSWORD -t $TENANT_ID || exit 1 + az account set -s $SUBSCRIPTION_ID +fi +checkLoginAndSubscription + +az account show + +printProgress "Getting Resource Group Name..." +resourceGroupName="${RESOURCE_GROUP}" +printProgress "Resource Group Name: ${resourceGroupName}" + +# Deploy the infrastructure for DEV environment + +pathToBicep="${repoRoot}/infra/bicep/main.bicep" +environmentType="${TYPE_ENVIRONMENT}" +networkIsolationBool=${NETWORK_ISOLATION} + +printProgress "generate keys for the jumpbox..." +printProgress "Check if ssh keys already created in resource group ${resourceGroupName}..." + +sshKeyName=$(az sshkey list -g "$resourceGroupName" --query "[?contains(name, 'sshkey-linuxmachine')].name" -o tsv) + +if [ -z "$sshKeyName" ] || [ "$sshKeyName" == "" ]; then + printProgress "Creating ssh keys in resource group ${resourceGroupName}..." + + folder_ssh="${repoRoot}/ssh" + if [ ! -d "${folder_ssh}" ]; then mkdir "${folder_ssh}"; fi + yes y | ssh-keygen -t rsa -N "" -f ${folder_ssh}/jumpbox_private_key + privateSshKey=$(cat ${folder_ssh}/jumpbox_private_key) + publicSshKey=$(cat ${folder_ssh}/jumpbox_private_key.pub) + printProgress "publicSshKey=$publicSshKey" + printProgress "privateSshKey=$privateSshKey" +else + printProgress "Ssh keys already created in resource group ${resourceGroupName}..." + publicSshKey="" + privateSshKey="" +fi + +#Deploy infrastructure using main.bicep file +printProgress "Deploying resources in resource group ${resourceGroupName}..." +az deployment group create --mode Incremental --resource-group $resourceGroupName --template-file $pathToBicep --parameters environmentType=$environmentType keyVaultSku='standard' jumpboxSshKey="$publicSshKey" jumpboxSshPrivateKey="$privateSshKey" enableNetworkIsolation=$networkIsolationBool + +#Getting Azure Key Vault and Azure ML workspace names from the deployment named "main" and "azuremlWorkspace" +keyVaultName=$(az deployment group show --resource-group ${resourceGroupName} --name main --query properties.outputs.keyVaultName.value -o tsv) +nameAmlWorkspace=$(az deployment group show --resource-group ${resourceGroupName} --name azuremlWorkspace --query properties.outputs.nameMachineLearning.value -o tsv) +echo $nameAmlWorkspace +#Exporting variable names in llmops_config.json file at the root of the repo +if [ -z "$keyVaultName" ]; then + printProgress "Missing keyVaultName" + exit 1 +fi +if [ -z "$nameAmlWorkspace" ]; then + printProgress "Missing nameAmlWorkspace" + exit 1 +fi +runtimeName="runtime1" +${repoRoot}/infra/bicep/scripts/export-deployment-variables.sh -k "$keyVaultName" -g "$resourceGroupName" -e "$environmentType" -w "$nameAmlWorkspace" -r "$runtimeName" -i $networkIsolationBool + +if [ $networkIsolationBool = true ]; then + printProgress "AML workspace name: ${nameAmlWorkspace}" + printProgress "Provisionning AML managed VNET..." + az ml workspace provision-network --name ${nameAmlWorkspace} -g ${resourceGroupName} +fi + +checkError +printMessage "Deployment in resource group ${resourceGroupName} successful!" diff --git a/infra/bicep/scripts/export-deployment-variables.sh b/infra/bicep/scripts/export-deployment-variables.sh new file mode 100755 index 000000000..1e904aae8 --- /dev/null +++ b/infra/bicep/scripts/export-deployment-variables.sh @@ -0,0 +1,147 @@ +#!/bin/bash +repoRoot=$( + cd "$(dirname "${BASH_SOURCE[0]}")/../../../" + pwd -P +) + +# # shellcheck disable=SC2034 +# while getopts "k:g:e:w:r:i:" opt; do +# case $opt in +# k) kvName=$OPTARG ;; +# g) rgName=$OPTARG ;; +# e) envName=$OPTARG ;; +# w) workspaceName=$OPTARG ;; +# r) runtimeName=$OPTARG ;; +# i) networkIsolationBool=$OPTARG ;; +# :) +# echo "Error: -${OPTARG} requires a value" +# exit 1 +# ;; +# *) +# exit 1 +# ;; +# esac +# done + +# # The LLMOPS configJSON file +# FILE_LLMOPS="${repoRoot}/llmops_config.json" + +# #If the llmops_config.json file does not exist, create it and add one block containing the environment variables +# if [[ ! -e $FILE_LLMOPS ]]; then +# echo "File $FILE_LLMOPS does not exist, creating it..." +# touch $FILE_LLMOPS +# blockToAdd="{ +# \"ENV_NAME\": \"$envName\", +# \"RUNTIME_NAME\": \"$runtimeName\", +# \"KEYVAULT_NAME\": \"$kvName\", +# \"RESOURCE_GROUP_NAME\": \"$rgName\", +# \"WORKSPACE_NAME\": \"$workspaceName\", +# \"STANDARD_FLOW_PATH\": \"flows/experiment\", +# \"EVALUATION_FLOW_PATH\": \"flows/evaluation, flows/evaluation_adv\" +# }" +# echo "File $FILE_LLMOPS created." +# cat << EOF > $FILE_LLMOPS +# { +# "envs":[ +# $blockToAdd +# ] +# } +# EOF +# else +# #If the the llmops_config.json exists, update it. +# #The update is done by adding a new block if the environment does not exist, or by updating the existing block if the environment already exists +# echo "File $FILE_LLMOPS already exists. Updating configuration of environment $envName..." +# jq --arg envName "$envName" --arg runtimeName "$runtimeName" --arg kvName "$kvName" --arg rgName "$rgName" --arg workspaceName "$workspaceName" ' +# if any(.envs[]; .ENV_NAME == $envName) then +# .envs |= map( +# if .ENV_NAME == $envName then +# .ENV_NAME = $envName +# | .RUNTIME_NAME = $runtimeName +# | .KEYVAULT_NAME = $kvName +# | .RESOURCE_GROUP_NAME = $rgName +# | .WORKSPACE_NAME = $workspaceName +# else +# . +# end +# ) +# else +# .envs += [ +# { +# "ENV_NAME": $envName, +# "RUNTIME_NAME": $runtimeName, +# "KEYVAULT_NAME": $kvName, +# "RESOURCE_GROUP_NAME": $rgName, +# "WORKSPACE_NAME": $workspaceName, +# "STANDARD_FLOW_PATH": "flows/experiment", +# "EVALUATION_FLOW_PATH": "flows/evaluation" +# } +# ] +# end' $FILE_LLMOPS > temp.json && mv temp.json $FILE_LLMOPS +# fi + +networkIsolationBool=true +# Now set "PUBLIC_ACCESS" to true in deployment_config.json +FILE_DEPLOYMENT="${repoRoot}/deployment_config.json" + +#If the llmops_config.json file does not exist, create it and add one block containing the environment variables +if [[ ! -e $FILE_DEPLOYMENT ]]; then + echo "File $FILE_DEPLOYMENT does not exist, creating it..." + touch $FILE_DEPLOYMENT + blockToAddDeployment="{ + \"azure_managed_endpoint\":[ + { + \"ENV_NAME\": \"dev\", + \"TEST_FILE_PATH\": \"sample-request.json\", + \"PUBLIC_ACCESS\": \"$networkIsolationBool\", + \"ENDPOINT_NAME\": \"\", + \"ENDPOINT_DESC\": \"An online endpoint serving a flow for [task]\", + \"DEPLOYMENT_DESC\": \"prompt flow deployment\", + \"PRIOR_DEPLOYMENT_NAME\": \"\", + \"PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION\": \"\", + \"CURRENT_DEPLOYMENT_NAME\": \"\", + \"CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION\": \"100\", + \"DEPLOYMENT_VM_SIZE\": \"Standard_F4s_v2\", + \"DEPLOYMENT_BASE_IMAGE_NAME\": \"mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest\", + \"DEPLOYMENT_CONDA_PATH\": \"environment/conda.yml\", + \"DEPLOYMENT_INSTANCE_COUNT\": 1, + \"ENVIRONMENT_VARIABLES\": { + \"example-name\": \"example-value\" + } + } + ], + \"kubernetes_endpoint\":[ + { + \"ENV_NAME\": \"dev\", + \"TEST_FILE_PATH\": \"sample-request.json\", + \"PUBLIC_ACCESS\": \"$networkIsolationBool\", + \"ENDPOINT_NAME\": \"\", + \"ENDPOINT_DESC\": \"An kubernetes endpoint serving a flow for [task]\", + \"DEPLOYMENT_DESC\": \"prompt flow deployment\", + \"PRIOR_DEPLOYMENT_NAME\": \"\", + \"PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION\": \"\", + \"CURRENT_DEPLOYMENT_NAME\": \"\", + \"CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION\": 100, + \"COMPUTE_NAME\": \"\", + \"DEPLOYMENT_VM_SIZE\": \"promptinstancetype\", + \"DEPLOYMENT_BASE_IMAGE_NAME\": \"mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest\", + \"DEPLOYMENT_CONDA_PATH\": \"environment/conda.yml\", + \"DEPLOYMENT_INSTANCE_COUNT\": 1, + \"CPU_ALLOCATION\": \"\", + \"MEMORY_ALLOCATION\": \"\", + \"ENVIRONMENT_VARIABLES\": { + \"example-name\": \"example-value\" + } + } + ] +}" + echo "File $FILE_DEPLOYMENT created." + cat << EOF > $FILE_DEPLOYMENT + $blockToAddDeployment +EOF +else + #If the the deployment_config.json.json exists, update it to set value of "PUBLIC_ACCESS" to true or false + echo "File $FILE_DEPLOYMENT already exists. Updating it to set value of PUBLIC_ACCESS" + # jq --arg networkIsolationBool "$networkIsolationBool" '.[] |= (.[] | .PUBLIC_ACCESS = "$networkIsolationBool")' $FILE_DEPLOYMENT > tempd.json && mv tempd.json $FILE_DEPLOYMENT + jq --arg networkIsolationBool "$networkIsolationBool" ' .[].PUBLIC_ACCESS |= $networkIsolationBool ' $FILE_DEPLOYMENT > tempd.json && mv tempd.json $FILE_DEPLOYMENT +fi + diff --git a/llmops/common/deployment/provision_deployment.py b/llmops/common/deployment/provision_deployment.py index 59ec04b21..aa3eabfb2 100644 --- a/llmops/common/deployment/provision_deployment.py +++ b/llmops/common/deployment/provision_deployment.py @@ -101,6 +101,7 @@ if "ENDPOINT_NAME" in elem and "ENV_NAME" in elem: if stage == elem["ENV_NAME"]: endpoint_name = elem["ENDPOINT_NAME"] + public_access = elem["PUBLIC_ACCESS"] deployment_name = elem["CURRENT_DEPLOYMENT_NAME"] deployment_conda_path = elem["DEPLOYMENT_CONDA_PATH"] deployment_base_image = elem["DEPLOYMENT_BASE_IMAGE_NAME"] diff --git a/llmops/common/deployment/provision_endpoint.py b/llmops/common/deployment/provision_endpoint.py index b2c04930e..24dc4c4d8 100644 --- a/llmops/common/deployment/provision_endpoint.py +++ b/llmops/common/deployment/provision_endpoint.py @@ -86,12 +86,14 @@ for elem in endpoint_config["azure_managed_endpoint"]: if "ENDPOINT_NAME" in elem and "ENV_NAME" in elem: if stage == elem["ENV_NAME"]: + public_access = elem["PUBLIC_ACCESS"] endpoint_name = elem["ENDPOINT_NAME"] endpoint_desc = elem["ENDPOINT_DESC"] endpoint = ManagedOnlineEndpoint( name=endpoint_name, description=endpoint_desc, auth_mode="key", + public_network_access=public_access, tags={"build_id": build_id}, ) diff --git a/llmops_config.json.sample b/llmops_config.json.sample new file mode 100644 index 000000000..5d48976f0 --- /dev/null +++ b/llmops_config.json.sample @@ -0,0 +1,22 @@ +{ + "envs":[ + { + "ENV_NAME": "pr", + "RUNTIME_NAME": "", + "KEYVAULT_NAME": "", + "RESOURCE_GROUP_NAME": "", + "WORKSPACE_NAME": "", + "STANDARD_FLOW_PATH": "flows/experiment", + "EVALUATION_FLOW_PATH": "flows/evaluation" + }, + { + "ENV_NAME": "dev", + "RUNTIME_NAME": "", + "KEYVAULT_NAME": "", + "RESOURCE_GROUP_NAME": "", + "WORKSPACE_NAME": "", + "STANDARD_FLOW_PATH": "flows/experiment", + "EVALUATION_FLOW_PATH": "flows/evaluation, flows/evaluation_adv" + } + ] +} \ No newline at end of file