Skip to content

Commit 0553da3

Browse files
committed
build-an-observability-system-for-ai-applications-at-low-costs
1 parent 3f4b1cd commit 0553da3

File tree

4 files changed

+239
-0
lines changed

4 files changed

+239
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<!-- DOCS_DESCRIPTION_CN -->
2+
本示例用于实现解决方案[使用ARMS监控自建大模型应用实现可观测](https://www.aliyun.com/solution/tech-solution-deploy/2922005), 涉及到专有网络(VPC)、交换机(VSwitch)、云服务器(ECS)、RAM 用户等资源的创建。
3+
<!-- DOCS_DESCRIPTION_CN -->
4+
5+
<!-- DOCS_DESCRIPTION_EN -->
6+
This example is used to implement solution [build-an-observability-system-for-ai-applications-at-low-costs](https://www.aliyun.com/solution/tech-solution-deploy/2922005). It involves the creation, and deployment of resources such as Virtual Private Cloud (VPC), VSwitch, Elastic Compute Service (ECS), and RAM users.
7+
<!-- DOCS_DESCRIPTION_EN -->
8+
9+
<!-- BEGIN_TF_DOCS -->
10+
## Providers
11+
12+
| Name | Version |
13+
|------|---------|
14+
| <a name="provider_alicloud"></a> [alicloud](#provider\_alicloud) | n/a |
15+
| <a name="provider_random"></a> [random](#provider\_random) | n/a |
16+
17+
## Modules
18+
19+
No modules.
20+
21+
## Resources
22+
23+
| Name | Type |
24+
|------|------|
25+
| [alicloud_ecs_command.run_command](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/ecs_command) | resource |
26+
| [alicloud_ecs_invocation.invoke_script](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/ecs_invocation) | resource |
27+
| [alicloud_instance.ecs_instance](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/instance) | resource |
28+
| [alicloud_ram_access_key.ramak](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/ram_access_key) | resource |
29+
| [alicloud_ram_user.ram_user](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/ram_user) | resource |
30+
| [alicloud_ram_user_policy_attachment.attach_policy_to_user](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/ram_user_policy_attachment) | resource |
31+
| [alicloud_security_group.security_group](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/security_group) | resource |
32+
| [alicloud_security_group_rule.allow_8000](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/security_group_rule) | resource |
33+
| [alicloud_vpc.vpc](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/vpc) | resource |
34+
| [alicloud_vswitch.vswitch](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/resources/vswitch) | resource |
35+
| [random_string.suffix](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | resource |
36+
| [alicloud_images.default](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/data-sources/images) | data source |
37+
| [alicloud_regions.current_region_ds](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/data-sources/regions) | data source |
38+
| [alicloud_zones.default](https://registry.terraform.io/providers/hashicorp/alicloud/latest/docs/data-sources/zones) | data source |
39+
40+
## Inputs
41+
42+
| Name | Description | Type | Default | Required |
43+
|------|-------------|------|---------|:--------:|
44+
| <a name="input_arms_license_key"></a> [arms\_license\_key](#input\_arms\_license\_key) | 当前环境 ARMS License Key。可以通过OpenAPI获取,前往<https://api.aliyun.com/api/ARMS/2019-08-08/DescribeTraceLicenseKey>,输入参数中填写RegionId(部署地域),单击发起调用,获取结果中LicenseKey对应的值。 | `string` | n/a | yes |
45+
| <a name="input_bai_lian_api_key"></a> [bai\_lian\_api\_key](#input\_bai\_lian\_api\_key) | 百炼 API-KEY,需开通百炼模型服务再获取 API-KEY,详情请参考:https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key | `string` | n/a | yes |
46+
| <a name="input_ecs_instance_password"></a> [ecs\_instance\_password](#input\_ecs\_instance\_password) | 服务器登录密码,长度8-30,必须包含三项(大写字母、小写字母、数字、 ()`~!@#$%^&*_-+=|{}[]:;'<>,.?/ 中的特殊符号)` | `string` | n/a | yes |
47+
| <a name="input_ecs_instance_type"></a> [ecs\_instance\_type](#input\_ecs\_instance\_type) | 实例类型 | `string` | `"ecs.t6-c1m2.large"` | no |
48+
<!-- END_TF_DOCS -->
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# ------------------------------------------------------------------------------
2+
# 核心资源定义 (Main Resource Definitions)
3+
#
4+
# 本文件包含了模块的核心基础设施资源。
5+
# 这里的代码负责根据输入变量来创建和配置所有云资源。
6+
# ------------------------------------------------------------------------------
7+
8+
# 配置阿里云提供商 (Provider)
9+
provider "alicloud" {
10+
region = "cn-shanghai"
11+
}
12+
13+
# 查询当前部署地域
14+
data "alicloud_regions" "current_region_ds" {
15+
current = true
16+
}
17+
18+
# 查询支持指定ECS实例规格和磁盘类型的可用区
19+
data "alicloud_zones" "default" {
20+
available_disk_category = "cloud_essd"
21+
available_resource_creation = "VSwitch"
22+
available_instance_type = var.ecs_instance_type
23+
}
24+
25+
# 创建一个随机ID,用于生成唯一的资源名称后缀,避免命名冲突
26+
resource "random_string" "suffix" {
27+
length = 8
28+
lower = true
29+
upper = false
30+
numeric = false
31+
special = false
32+
}
33+
34+
# 定义一个局部变量,将随机ID用作通用名称后缀
35+
locals {
36+
common_name = random_string.suffix.id
37+
region = data.alicloud_regions.current_region_ds.regions.0.id
38+
}
39+
40+
# 创建一个专有网络(VPC),为云资源提供一个隔离的网络环境
41+
resource "alicloud_vpc" "vpc" {
42+
cidr_block = "192.168.0.0/16"
43+
vpc_name = "vpc-${local.common_name}"
44+
}
45+
46+
# 创建一个交换机(VSwitch),用于在VPC内划分一个子网
47+
resource "alicloud_vswitch" "vswitch" {
48+
vpc_id = alicloud_vpc.vpc.id
49+
cidr_block = "192.168.0.0/24"
50+
zone_id = data.alicloud_zones.default.zones.0.id
51+
vswitch_name = "vswitch-${local.common_name}"
52+
}
53+
54+
# 创建一个安全组,作为虚拟防火墙来控制ECS实例的网络访问
55+
resource "alicloud_security_group" "security_group" {
56+
vpc_id = alicloud_vpc.vpc.id
57+
security_group_name = "sg-${local.common_name}"
58+
}
59+
60+
# 在安全组中添加入方向规则,允许外部流量访问8000端口
61+
resource "alicloud_security_group_rule" "allow" {
62+
type = "ingress"
63+
ip_protocol = "tcp"
64+
nic_type = "intranet"
65+
policy = "accept"
66+
port_range = "8000/8000"
67+
priority = 1
68+
security_group_id = alicloud_security_group.security_group.id
69+
cidr_ip = "192.168.0.0/24"
70+
# 如需允许从公网访问ECS,请将cidr_ip修改为0.0.0.0/0
71+
# cidr_ip = "0.0.0.0/0"
72+
}
73+
74+
# 查询可用的阿里云镜像
75+
data "alicloud_images" "default" {
76+
# name_regex = "^aliyun_3_x64_20G_alibase_.*"
77+
name_regex = "^ubuntu_24_04_x64_20G_alibase_.*"
78+
most_recent = true
79+
owners = "system"
80+
}
81+
82+
# 创建一个RAM用户,用于后续给ECS实例授权访问其他云服务
83+
resource "alicloud_ram_user" "ram_user" {
84+
name = "ram-user-${local.common_name}"
85+
}
86+
87+
# 为前面创建的RAM用户生成一个Access Key
88+
resource "alicloud_ram_access_key" "ramak" {
89+
user_name = alicloud_ram_user.ram_user.name
90+
}
91+
92+
# 为RAM用户附加一个系统策略
93+
resource "alicloud_ram_user_policy_attachment" "attach_policy_to_user" {
94+
user_name = alicloud_ram_user.ram_user.name
95+
# 策略类型为系统预设策略
96+
policy_type = "System"
97+
# 授予日志服务的完全访问权限
98+
policy_name = "AliyunLogFullAccess"
99+
}
100+
101+
# 创建一台ECS实例(云服务器)
102+
resource "alicloud_instance" "ecs_instance" {
103+
instance_name = "ecs-${local.common_name}"
104+
image_id = data.alicloud_images.default.images[0].id
105+
instance_type = var.ecs_instance_type
106+
system_disk_category = "cloud_essd"
107+
security_groups = [alicloud_security_group.security_group.id]
108+
vswitch_id = alicloud_vswitch.vswitch.id
109+
password = var.ecs_instance_password
110+
internet_max_bandwidth_out = 5
111+
}
112+
113+
# 创建一个云助手命令,指令用于:部署示例应用,并通过应用接口来调用大模型
114+
resource "alicloud_ecs_command" "run_command" {
115+
name = "command-run-${local.common_name}"
116+
command_content = base64encode(<<EOF
117+
#!/bin/bash
118+
export ARMS_APP_NAME=llm_app
119+
export ARMS_REGION_ID=${local.region}
120+
export ARMS_IS_PUBLIC=True
121+
export ARMS_LICENSE_KEY=${var.arms_license_key}
122+
export DASHSCOPE_API_KEY=${var.bai_lian_api_key}
123+
124+
curl -fsSL https://help-static-aliyun-doc.aliyuncs.com/install-script/ai-observable/install.sh | bash # 部署应用
125+
126+
EOF
127+
)
128+
working_dir = "/root"
129+
type = "RunShellScript"
130+
timeout = 3600
131+
}
132+
133+
# 在指定的ECS实例上执行上面创建的云助手命令
134+
resource "alicloud_ecs_invocation" "invoke_script" {
135+
instance_id = [alicloud_instance.ecs_instance.id]
136+
command_id = alicloud_ecs_command.run_command.id
137+
timeouts {
138+
create = "15m"
139+
}
140+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
output "ecs_login_address" {
2+
description = "部署应用的ECS实例的登录地址。登录后执行"
3+
value = format("https://ecs-workbench.aliyun.com/?from=ecs&instanceType=ecs&regionId=%s&instanceId=%s&resourceGroupId=", local.region, alicloud_instance.ecs_instance.id)
4+
}
5+
6+
output "ecs_public_ip" {
7+
description = <<EOF
8+
为确保可以从公网访问,请配置ECS的安全组对所有IP开放8000端口(即在main.tf中,将cidr_ip改为"0.0.0.0/0")
9+
接口调用示例(将<ecs_public_ip>用实际值替换):
10+
curl http://<ecs_public_ip>:8000/docs # 查看应用信息
11+
curl -X 'POST' 'http://<ecs_public_ip>:8000/agent/invoke' -H 'Content-Type: application/json' -d '{"input": {"input": "北京天气怎么样?"}}' # 调用大模型,等待返回结果。
12+
EOF
13+
value = alicloud_instance.ecs_instance.public_ip
14+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# ------------------------------------------------------------------------------
2+
# 模块输入变量 (Module Input Variables)
3+
#
4+
# 本文件定义了该 Terraform 模块所有可配置的输入变量。
5+
# 每个变量都包含了详细的 'description',以说明其用途、格式和默认值逻辑。
6+
# 请参考这些描述来正确配置模块。
7+
# ------------------------------------------------------------------------------
8+
9+
# 指定创建的ECS云服务器的规格。
10+
variable "ecs_instance_type" {
11+
type = string
12+
default = "ecs.t6-c1m2.large"
13+
description = "实例类型"
14+
}
15+
16+
# 用于登录ECS实例的密码。
17+
variable "ecs_instance_password" {
18+
type = string
19+
sensitive = true
20+
description = "服务器登录密码,长度8-30,必须包含三项(大写字母、小写字母、数字、 ()`~!@#$%^&*_-+=|{}[]:;'<>,.?/ 中的特殊符号)"
21+
# default = ""
22+
}
23+
24+
# 百炼API-KEY
25+
variable "bai_lian_api_key" {
26+
type = string
27+
description = "百炼 API-KEY,需开通百炼模型服务再获取 API-KEY,详情请参考:https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key"
28+
# default = ""
29+
}
30+
31+
# ARMS LicenseKey
32+
variable "arms_license_key" {
33+
type = string
34+
description = "当前环境 ARMS License Key。可以通过OpenAPI获取,前往<https://api.aliyun.com/api/ARMS/2019-08-08/DescribeTraceLicenseKey>,输入参数中填写RegionId(部署地域),单击发起调用,获取结果中LicenseKey对应的值。"
35+
# default = ""
36+
}
37+

0 commit comments

Comments
 (0)