Skip to content

Commit 540e9c8

Browse files
author
Feng Xu
committed
add init code for opensearch
1 parent 9696495 commit 540e9c8

File tree

6 files changed

+436
-0
lines changed

6 files changed

+436
-0
lines changed
File renamed without changes.

application/deployment/__init__.py

Whitespace-only changes.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
2+
bulk_questions = [
3+
# 1. 下面是Sample QA对的例子
4+
{"question": "医院按年收入最高的是?",
5+
"sql": '''
6+
select name, revenue from table_a
7+
order by
8+
CASE
9+
WHEN revenue = '500~1000W' THEN 1
10+
WHEN revenue = '1000~3000W' THEN 2
11+
END
12+
desc
13+
limit 10
14+
'''}
15+
]
16+
17+
for q in bulk_questions:
18+
# 2. 请修改profile_name和Data Profile name一致
19+
q['profile'] = '<profile_name>'
20+
21+
custom_bulk_questions = {
22+
'custom': bulk_questions
23+
}
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
bulk_questions = [
2+
{"question": "30岁以下女性用户购买商品的平均价格是多少?",
3+
"sql": '''SELECT AVG(price)
4+
FROM interactions i
5+
JOIN items it ON i.item_id = it.item_id
6+
JOIN users u ON i.user_id = u.user_id
7+
WHERE u.gender = 'female' AND u.age < 30 AND i.event_type = 'purchase'
8+
'''},
9+
{"question": "40岁以上男性用户浏览次数最多的前3个商品类别是什么?",
10+
"sql": '''SELECT category_l1, COUNT(*) AS views
11+
FROM interactions i
12+
JOIN items it ON i.item_id = it.item_id
13+
JOIN users u ON i.user_id = u.user_id
14+
WHERE u.gender = 'male' AND u.age > 40 AND i.event_type = 'view'
15+
GROUP BY category_l1
16+
ORDER BY views DESC
17+
LIMIT 3
18+
'''},
19+
{"question": "18-25岁用户购买打折商品的数量有多少?",
20+
"sql": '''SELECT COUNT(DISTINCT item_id)
21+
FROM interactions i
22+
JOIN users u ON i.user_id = u.user_id
23+
WHERE u.age BETWEEN 18 AND 25
24+
AND i.event_type = 'purchase'
25+
AND i.discount != ''
26+
'''},
27+
{"question": "每个商品类别从浏览到购买的转换率是多少?",
28+
"sql": '''WITH views AS (
29+
SELECT category_l1, COUNT(*) AS views
30+
FROM interactions i
31+
JOIN items it ON i.item_id = it.item_id
32+
WHERE i.event_type = 'view'
33+
GROUP BY category_l1
34+
),
35+
purchases AS (
36+
SELECT category_l1, COUNT(*) AS purchases
37+
FROM interactions i
38+
JOIN items it ON i.item_id = it.item_id
39+
WHERE i.event_type = 'purchase'
40+
GROUP BY category_l1
41+
)
42+
SELECT v.category_l1, purchases / views AS conversion_rate
43+
FROM views v
44+
JOIN purchases p ON v.category_l1 = p.category_l1
45+
'''},
46+
{"question": "30岁以下用户浏览次数最多的前5个商品是什么?",
47+
"sql": '''SELECT item_id, COUNT(*) AS views
48+
FROM interactions i
49+
JOIN users u ON i.user_id = u.user_id
50+
WHERE u.age < 30
51+
AND i.event_type = 'view'
52+
GROUP BY item_id
53+
ORDER BY views DESC
54+
LIMIT 5
55+
'''},
56+
{"question": "过去30天内,男性和女性用户中有多少人完成了购买?",
57+
"sql": '''
58+
SELECT gender, COUNT(DISTINCT user_id) AS users
59+
FROM interactions i
60+
JOIN users u ON i.user_id = u.user_id
61+
WHERE i.event_type = 'purchase'
62+
AND i.timestamp >= UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 DAY))
63+
GROUP BY gender
64+
'''},
65+
{"question": "购买价格在50美元以上的商品,用户年龄分布如何?",
66+
"sql": '''SELECT age, COUNT(DISTINCT i.user_id) AS users
67+
FROM interactions i
68+
JOIN users u ON i.user_id = u.user_id
69+
JOIN items it ON i.item_id = it.item_id
70+
WHERE i.event_type = 'purchase'
71+
AND it.price > 50
72+
GROUP BY age
73+
'''},
74+
{"question": "25岁以下女性用户购买打折商品最常见的类别是什么?",
75+
"sql": '''SELECT category_l1, COUNT(*) AS purchases
76+
FROM interactions i
77+
JOIN items it ON i.item_id = it.item_id
78+
JOIN users u ON i.user_id = u.user_id
79+
WHERE u.gender = 'female' AND u.age < 25
80+
AND i.discount != ''
81+
AND i.event_type = 'purchase'
82+
GROUP BY category_l1
83+
ORDER BY purchases DESC
84+
LIMIT 1
85+
'''},
86+
{"question": "有多少商品被同一用户购买了多次?",
87+
"sql": '''SELECT COUNT(*)
88+
FROM (
89+
SELECT item_id, user_id, COUNT(*) AS num_purchases
90+
FROM interactions
91+
WHERE event_type = 'purchase'
92+
GROUP BY item_id, user_id
93+
HAVING num_purchases > 1
94+
) t
95+
'''},
96+
{"question": "有哪些商品被浏览过但从未被购买?",
97+
"sql": '''SELECT item_id
98+
FROM interactions i
99+
WHERE i.event_type = 'view'
100+
AND item_id NOT IN (
101+
SELECT item_id
102+
FROM interactions
103+
WHERE event_type = 'purchase'
104+
)
105+
'''},
106+
{"question": "30岁至40岁用户的购买总收入是多少?",
107+
"sql": '''SELECT SUM(price) AS total_revenue
108+
FROM interactions i
109+
JOIN items it ON i.item_id = it.item_id
110+
JOIN users u ON i.user_id = u.user_id
111+
WHERE u.age BETWEEN 30 AND 40
112+
AND i.event_type = 'purchase'
113+
'''},
114+
{"question": "每个商品被加入购物车的平均次数是多少?",
115+
"sql": '''SELECT item_id, AVG(added_to_cart) AS avg_cart_adds
116+
FROM (
117+
SELECT item_id, COUNT(*) AS added_to_cart
118+
FROM interactions
119+
WHERE event_type = 'add_to_cart'
120+
GROUP BY item_id
121+
) t
122+
GROUP BY item_id
123+
'''},
124+
{"question": "女性用户中低于10美元的购买占所有购买的百分比是多少?",
125+
"sql": '''WITH purchases AS (
126+
SELECT *
127+
FROM interactions i
128+
JOIN items it ON i.item_id = it.item_id
129+
WHERE i.event_type = 'purchase' AND price < 10
130+
)
131+
132+
SELECT COUNT(*) / (SELECT COUNT(*) FROM purchases) AS percentage
133+
FROM purchases p
134+
JOIN users u ON p.user_id = u.user_id
135+
WHERE u.gender = 'female'
136+
'''},
137+
{"question": "从商品浏览到商品详情页面浏览的点击率是多少?",
138+
"sql": '''WITH product_views AS (
139+
SELECT COUNT(*) AS views
140+
FROM interactions
141+
WHERE event_type = 'view'
142+
),
143+
144+
detail_views AS (
145+
SELECT COUNT(*) AS detail_views
146+
FROM interactions
147+
WHERE event_type = 'detail_view'
148+
)
149+
150+
SELECT detail_views / views AS ctr
151+
FROM product_views, detail_views
152+
'''},
153+
{"question": "25岁以下用户最常购买的前3个商品类别是什么?每个商品的平均购买次数是多少?",
154+
"sql": '''SELECT category_l1, COUNT(*) AS purchases
155+
FROM interactions i
156+
JOIN items it ON i.item_id = it.item_id
157+
JOIN users u ON i.user_id = u.user_id
158+
WHERE u.age < 25 AND i.event_type = 'purchase'
159+
GROUP BY category_l1
160+
ORDER BY purchases DESC
161+
LIMIT 3
162+
'''},
163+
{"question": "每个商品的平均购买次数是多少?",
164+
"sql": '''SELECT item_id, AVG(purchases) AS avg_purchases
165+
FROM (
166+
SELECT item_id, COUNT(*) AS purchases
167+
FROM interactions
168+
WHERE event_type = 'purchase'
169+
GROUP BY item_id
170+
) t
171+
GROUP BY item_id
172+
'''},
173+
{"question": "男性用户中折扣大于30%的购买占所有购买的百分比是多少?",
174+
"sql": '''WITH male_purchases AS (
175+
SELECT *
176+
FROM interactions i
177+
JOIN users u ON i.user_id = u.user_id
178+
WHERE u.gender = 'male' AND i.event_type = 'purchase'
179+
)
180+
181+
SELECT COUNT(*) / (SELECT COUNT(*) FROM male_purchases) AS percentage
182+
FROM male_purchases
183+
WHERE CAST(discount AS FLOAT) > 0.3
184+
'''},
185+
{"question": "只浏览过但从未购买商品的用户有多少?",
186+
"sql": '''SELECT COUNT(DISTINCT user_id)
187+
FROM interactions
188+
WHERE user_id NOT IN (
189+
SELECT DISTINCT user_id
190+
FROM interactions
191+
WHERE event_type = 'purchase'
192+
)
193+
AND event_type = 'view'
194+
'''},
195+
{"question": "哪些类别的商品从浏览到购买的转换率最高和最低?",
196+
"sql": '''WITH views AS (
197+
SELECT category_l1, COUNT(*) AS views
198+
FROM interactions i
199+
JOIN items it ON i.item_id = it.item_id
200+
WHERE event_type = 'view'
201+
GROUP BY category_l1
202+
),
203+
204+
purchases AS (
205+
SELECT category_l1, COUNT(*) AS purchases
206+
FROM interactions i
207+
JOIN items it ON i.item_id = it.item_id
208+
WHERE event_type = 'purchase'
209+
GROUP BY category_l1
210+
)
211+
212+
SELECT v.category_l1, purchases/views AS conversion_rate
213+
FROM views v
214+
JOIN purchases p ON v.category_l1 = p.category_l1
215+
ORDER BY conversion_rate DESC
216+
LIMIT 1
217+
218+
UNION
219+
220+
SELECT v.category_l1, purchases/views AS conversion_rate
221+
FROM views v
222+
JOIN purchases p ON v.category_l1 = p.category_l1
223+
ORDER BY conversion_rate ASC
224+
LIMIT 1
225+
'''},
226+
{"question": "哪个商品的浏览转化购买率最高?",
227+
"sql": '''WITH views AS (
228+
SELECT item_id, COUNT(*) AS views
229+
FROM interactions
230+
WHERE event_type = 'view'
231+
GROUP BY item_id
232+
),
233+
234+
purchases AS (
235+
SELECT item_id, COUNT(*) AS purchases
236+
FROM interactions
237+
WHERE event_type = 'purchase'
238+
GROUP BY item_id
239+
)
240+
241+
SELECT v.item_id, purchases/views AS view_to_purchase_pct
242+
FROM views v
243+
JOIN purchases p ON v.item_id = p.item_id
244+
ORDER BY view_to_purchase_pct DESC
245+
LIMIT 1
246+
'''},
247+
]
248+
249+
for q in bulk_questions:
250+
q['profile'] = 'shopping_guide'

application/initial_data/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Please download demo data to this folder. For example, run the following command:
2+
```
3+
wget https://github.com/fengxu1211/generative-bi-using-rag/raw/demo_data/application/initial_data/init_mysql_db.sql.zip
4+
```

0 commit comments

Comments
 (0)