From 933a9f5c98917060f3a0886723330a02fa14c905 Mon Sep 17 00:00:00 2001 From: kamikennn Date: Fri, 16 Feb 2024 15:28:49 +0900 Subject: [PATCH] update --- .../models/tla/marts/INSERT_DELETE_1.sql | 24 ++++++++++ .../models/tla/marts/INSERT_DELETE_2.sql | 20 ++++++++ .../models/tla/marts/MERGE_TEST_1.sql | 6 +-- .../models/tla/marts/ONLY_INSERT_1.sql | 18 +++++++ .../analytics/models/tla/marts/models.yml | 5 +- .../WRK_RUN_QUERY_MACRO_WITH_IF_TEST_3.sql | 2 - .../analytics/template/check_row_count.sql | 37 +++++++++++++++ .../analytics/template/delete_all__insert.sql | 33 +++++++++++++ .../template/delete_partial__insert.sql | 39 +++++++++++++++ .../dbtprj/analytics/template/ephemeral.sql | 23 +++++++++ .../analytics/template/get_row_count.sql | 12 +++++ .../template/insert__delete_partial.sql | 20 ++++++++ localenv/dbtprj/analytics/template/merge.sql | 24 ++++++++++ localenv/dbtprj/analytics/template/models.yml | 31 ++++++++++++ .../dbtprj/analytics/template/only_insert.sql | 18 +++++++ .../dbtprj/analytics/template/properties.yml | 27 +++++++++++ localenv/dbtprj/analytics/template/readme.md | 47 +++++++++++++++++++ .../dbtprj/analytics/template/sources.yml | 32 +++++++++++++ 18 files changed, 412 insertions(+), 6 deletions(-) create mode 100644 localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_1.sql create mode 100644 localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_2.sql create mode 100644 localenv/dbtprj/analytics/models/tla/marts/ONLY_INSERT_1.sql create mode 100644 localenv/dbtprj/analytics/template/check_row_count.sql create mode 100644 localenv/dbtprj/analytics/template/delete_all__insert.sql create mode 100644 localenv/dbtprj/analytics/template/delete_partial__insert.sql create mode 100644 localenv/dbtprj/analytics/template/ephemeral.sql create mode 100644 localenv/dbtprj/analytics/template/get_row_count.sql create mode 100644 localenv/dbtprj/analytics/template/insert__delete_partial.sql create mode 100644 localenv/dbtprj/analytics/template/merge.sql create mode 100644 localenv/dbtprj/analytics/template/models.yml create mode 100644 localenv/dbtprj/analytics/template/only_insert.sql create mode 100644 localenv/dbtprj/analytics/template/properties.yml create mode 100644 localenv/dbtprj/analytics/template/readme.md create mode 100644 localenv/dbtprj/analytics/template/sources.yml diff --git a/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_1.sql b/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_1.sql new file mode 100644 index 0000000..2ad31cd --- /dev/null +++ b/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_1.sql @@ -0,0 +1,24 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge', + partition_by={ + "field": "ts_create_utc", + "data_type": "timestamp", + "granularity": "hour" + }, + cluster_by = ['dt_create_utc'], + incremental_predicates = ["DBT_INTERNAL_DEST.dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY)"], + ) +}} + +select + * +from + {{ source('TLA__TRVANALYT_RAW', 'TLA__CRYPTO_CANDLES_MINUTE_3') }} + + +{% set query %} + delete from {{ this }} where dt_create_utc = '2024-01-03' +{% endset %} + +{% do run_query(query) %} \ No newline at end of file diff --git a/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_2.sql b/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_2.sql new file mode 100644 index 0000000..01bce31 --- /dev/null +++ b/localenv/dbtprj/analytics/models/tla/marts/INSERT_DELETE_2.sql @@ -0,0 +1,20 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge', + partition_by={ + "field": "ts_create_utc", + "data_type": "timestamp", + "granularity": "hour" + }, + cluster_by = ['dt_create_utc'], + incremental_predicates = ["DBT_INTERNAL_DEST.dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY)"], + post_hook = [ + "delete from {{ this }} where dt_create_utc = '2024-01-01'" + ] + ) +}} + +select + * +from + {{ source('TLA__TRVANALYT_RAW', 'TLA__CRYPTO_CANDLES_MINUTE_2') }} diff --git a/localenv/dbtprj/analytics/models/tla/marts/MERGE_TEST_1.sql b/localenv/dbtprj/analytics/models/tla/marts/MERGE_TEST_1.sql index 8a49353..51bd5a8 100644 --- a/localenv/dbtprj/analytics/models/tla/marts/MERGE_TEST_1.sql +++ b/localenv/dbtprj/analytics/models/tla/marts/MERGE_TEST_1.sql @@ -19,7 +19,7 @@ 'buytakerquantity', 'tradecount' ], - incremental_predicates = ["DBT_INTERNAL_DEST.dt_create_utc >= DATE_SUB('2024-01-03', INTERVAL 1 DAY)"] + incremental_predicates = ["DBT_INTERNAL_DEST.dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY)"] ) }} @@ -29,7 +29,7 @@ select * from - {{ source('TLA__TRVANALYT_RAW', 'TLA__CRYPTO_CANDLES_MINUTE_3') }} + {{ source('TLA__TRVANALYT_RAW', 'TLA__CRYPTO_CANDLES_MINUTE_1') }} {% if is_incremental() %} -where dt_create_utc >= DATE_SUB('2024-01-03', INTERVAL 1 DAY) + where dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY) {% endif %} diff --git a/localenv/dbtprj/analytics/models/tla/marts/ONLY_INSERT_1.sql b/localenv/dbtprj/analytics/models/tla/marts/ONLY_INSERT_1.sql new file mode 100644 index 0000000..c74dae2 --- /dev/null +++ b/localenv/dbtprj/analytics/models/tla/marts/ONLY_INSERT_1.sql @@ -0,0 +1,18 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge', + partition_by={ + "field": "ts_create_utc", + "data_type": "timestamp", + "granularity": "hour" + }, + cluster_by = ['dt_create_utc'], + incremental_predicates = ["DBT_INTERNAL_DEST.dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY)"], + ) +}} + +select + * +from + {{ source('TLA__TRVANALYT_RAW', 'TLA__CRYPTO_CANDLES_MINUTE_3') }} +where dt_create_utc >= DATE_SUB('2024-01-02', INTERVAL 1 DAY) diff --git a/localenv/dbtprj/analytics/models/tla/marts/models.yml b/localenv/dbtprj/analytics/models/tla/marts/models.yml index c72ab18..e4f0e62 100644 --- a/localenv/dbtprj/analytics/models/tla/marts/models.yml +++ b/localenv/dbtprj/analytics/models/tla/marts/models.yml @@ -1,3 +1,5 @@ +version: 2 + models: - name: CRYPTO_CANDLES config: @@ -16,11 +18,12 @@ models: - type: not_null tests: - not_null + mode: "required" - name: low data_type: FLOAT64 - name: high data_type: FLOAT64 - - name: open + - name: open data_type: FLOAT64 - name: close data_type: FLOAT64 diff --git a/localenv/dbtprj/analytics/models/tla/wrk/WRK_RUN_QUERY_MACRO_WITH_IF_TEST_3.sql b/localenv/dbtprj/analytics/models/tla/wrk/WRK_RUN_QUERY_MACRO_WITH_IF_TEST_3.sql index cbfe76e..3f2dc30 100644 --- a/localenv/dbtprj/analytics/models/tla/wrk/WRK_RUN_QUERY_MACRO_WITH_IF_TEST_3.sql +++ b/localenv/dbtprj/analytics/models/tla/wrk/WRK_RUN_QUERY_MACRO_WITH_IF_TEST_3.sql @@ -16,7 +16,5 @@ order by 1 {% else %} {{ exceptions.raise_compiler_error("Invalid `row_counts`. Got: " ~ row_counts) }} {% endif %} -{% else %} - {% set row_counts = 0 %} {% endif %} diff --git a/localenv/dbtprj/analytics/template/check_row_count.sql b/localenv/dbtprj/analytics/template/check_row_count.sql new file mode 100644 index 0000000..347fa42 --- /dev/null +++ b/localenv/dbtprj/analytics/template/check_row_count.sql @@ -0,0 +1,37 @@ +{% set query %} +select + count(distinct id) +from + {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} +order by 1 +{% endset %} + +{% set results = run_query(query) %} + +{% if execute %} + {# Return the first column #} + {% set row_counts = results.columns[0].values()[0] %} + {% if row_counts == 0 %} + {{ exceptions.raise_compiler_error("Invalid `row_counts`. Got: " ~ row_counts) }} + {% else %} + WITH TMP_1 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + ), + TMP_2 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + ), + FINAL AS ( + SELECT * FROM TMP_1 + UNION ALL + SELECT * FROM TMP_2 + ) + SELECT * FROM FINAL + {% endif %} +{% endif %} + diff --git a/localenv/dbtprj/analytics/template/delete_all__insert.sql b/localenv/dbtprj/analytics/template/delete_all__insert.sql new file mode 100644 index 0000000..4214cf4 --- /dev/null +++ b/localenv/dbtprj/analytics/template/delete_all__insert.sql @@ -0,0 +1,33 @@ +{{ config( + materialized='table', + partition_by={ + "field": "dt", + "data_type": "date", + "granularity": "day" + }, + cluster_by = ['dt'], + labels = {'contains_pii': 'no', 'contains_easyid': 'yes'} + ) +}} + +{# This is comment by jinja #} +{# delete all data (drop table) -> insert #} + +WITH TMP_1 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} +), +TMP_2 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} +), +FINAL AS ( + SELECT * FROM TMP_1 + UNION ALL + SELECT * FROM TMP_2 +) +SELECT * FROM FINAL diff --git a/localenv/dbtprj/analytics/template/delete_partial__insert.sql b/localenv/dbtprj/analytics/template/delete_partial__insert.sql new file mode 100644 index 0000000..12abeab --- /dev/null +++ b/localenv/dbtprj/analytics/template/delete_partial__insert.sql @@ -0,0 +1,39 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge' + ) +}} + +{# This is comment by jinja #} +{# delete partial data -> insert #} + + +{# 1. Delete Data #} + +{% set query %} + delete from {{ this }} where dt = CURRENT_DATE +{% endset %} + +{# In the first run, this delete macro will not be run #} +{# If we want to run in the first run, remove "if is_incremental()" block #} +{% if is_incremental() %} + {% do run_query(query) %} +{% endif %} + + + + +{# 2. Insert(append) Data #} +{# In the first run, all source data will be inserted #} +{# From the second run, the where condition will be applied #} +WITH FINAL AS ( + SELECT + * + FROM + {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + {% if is_incremental() %} + WHERE dt >= DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY) + {% endif %} +) +SELECT * FROM FINAL + diff --git a/localenv/dbtprj/analytics/template/ephemeral.sql b/localenv/dbtprj/analytics/template/ephemeral.sql new file mode 100644 index 0000000..0a063ab --- /dev/null +++ b/localenv/dbtprj/analytics/template/ephemeral.sql @@ -0,0 +1,23 @@ +{{ config( + materialized='ephemeral' +) +}} + +WITH TMP_1 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} +), +TMP_2 AS ( + SELECT + id, + dt + FROM {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} +), +FINAL AS ( + SELECT * FROM TMP_1 + UNION ALL + SELECT * FROM TMP_2 +) +SELECT * FROM FINAL diff --git a/localenv/dbtprj/analytics/template/get_row_count.sql b/localenv/dbtprj/analytics/template/get_row_count.sql new file mode 100644 index 0000000..c9fb586 --- /dev/null +++ b/localenv/dbtprj/analytics/template/get_row_count.sql @@ -0,0 +1,12 @@ +{% macro get_row_count(table_name) %} + {% set query %} + SELECT COUNT(*) FROM {{ table_name }} + {% endset %} + + {% set results = run_query(query) %} + + {% if execute %} + {% set row_count = results.columns[0].values()[0] %} + {{ return(row_count) }} + {% endif %} +{% endmacro %} diff --git a/localenv/dbtprj/analytics/template/insert__delete_partial.sql b/localenv/dbtprj/analytics/template/insert__delete_partial.sql new file mode 100644 index 0000000..f15849b --- /dev/null +++ b/localenv/dbtprj/analytics/template/insert__delete_partial.sql @@ -0,0 +1,20 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge', + post_hook = [ + "delete from {{ this }} where dt = CURRENT_DATE" + ] + ) +}} + +{# This is comment by jinja #} +{# insert -> delete partial data by post_hook #} + +WITH FINAL AS ( + SELECT + * + FROM + {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + WHERE dt >= DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY) +) +SELECT * FROM FINAL diff --git a/localenv/dbtprj/analytics/template/merge.sql b/localenv/dbtprj/analytics/template/merge.sql new file mode 100644 index 0000000..e042a97 --- /dev/null +++ b/localenv/dbtprj/analytics/template/merge.sql @@ -0,0 +1,24 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'merge', + unique_key = ['id','dt'], + merge_update_columns = [ 'dt' ], + incremental_predicates = ["DBT_INTERNAL_DEST.dt >= DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY)"] +) }} + +{# This is comment by jinja #} +{# merge opreration #} +{# Columns in the unique_key list are used for condition in merge query #} +{# E.g., "MERGE target_tbl t USING source_tbl s ON s.id = t.id and s.dt = t.dt ..." #} +{# If we set 'merge_update_columns', only the columns in the list will be update. #} + +WITH FINAL AS ( + SELECT + * + FROM + {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + {% if is_incremental() %} + WHERE dt >= DATE_SUB(CURRENT_DATE,INTERVAL 7 DAY) + {% endif %} +) +SELECT * FROM FINAL diff --git a/localenv/dbtprj/analytics/template/models.yml b/localenv/dbtprj/analytics/template/models.yml new file mode 100644 index 0000000..78ef030 --- /dev/null +++ b/localenv/dbtprj/analytics/template/models.yml @@ -0,0 +1,31 @@ +version: 2 + +models: + - name: F_RESERVATIONS_INFO + config: + contract: + enforced: true + constraints: + - type: primary_key # not enforced -- will warn & include, not check duplication. + columns: [id, dt] + tests: + - unique: + column_name: "id || '-' || dt" + columns: + - name: id + description: easy id + data_type: STRING + constraints: + - type: not_null + - type: unique + - type: primary_key # not enforced -- will warn & include, not check duplication. + tests: + - not_null + - unique + mode: "required" # set required if the column is not_null type + - name: dt + description: created date + data_type: DATE + tests: + - unique + - name: F_RESERVATIONS_INFO_TRANSITION \ No newline at end of file diff --git a/localenv/dbtprj/analytics/template/only_insert.sql b/localenv/dbtprj/analytics/template/only_insert.sql new file mode 100644 index 0000000..83de2f7 --- /dev/null +++ b/localenv/dbtprj/analytics/template/only_insert.sql @@ -0,0 +1,18 @@ +{{ config( + materialized='incremental', + incremental_strategy = 'merge' + ) +}} + +{# This is comment by jinja #} +{# insert (append), no delete #} + +WITH FINAL AS ( + SELECT + * + FROM + {{ source('TRVANALYT_RAW','RESERVATION_D_BOOKINGS') }} + WHERE + dt >= DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY) +) +SELECT * FROM FINAL diff --git a/localenv/dbtprj/analytics/template/properties.yml b/localenv/dbtprj/analytics/template/properties.yml new file mode 100644 index 0000000..a42d3f9 --- /dev/null +++ b/localenv/dbtprj/analytics/template/properties.yml @@ -0,0 +1,27 @@ +version: 2 + +seeds: + - name: F_RESERVATIONS_INFO + description: "CSV file for F_RESERVATIONS_INFO" + docs: + show: true + config: + column_types: + id: STRING + dt: DATE + delimiter: "," + tests: + - unique: + column_name: "id || '-' || dt" + columns: + - name: "id" + description: "reservation id" + meta: + contains_pii: false + quote: false + tests: + - not_null + - name: "dt" + quote: false + tests: + - not_null diff --git a/localenv/dbtprj/analytics/template/readme.md b/localenv/dbtprj/analytics/template/readme.md new file mode 100644 index 0000000..b5eddcf --- /dev/null +++ b/localenv/dbtprj/analytics/template/readme.md @@ -0,0 +1,47 @@ +# DBT model template + +## sources.yml and models.yml + +1. sources.yml is a config for source dataset such as TRVL_RAW, TRVANALYT_RAW + 1. ```sources.yml``` +2. models.yml is a config for models (mart dataset) + 1. ```models.yml``` + +## DML + +1. delete all data (drop table) -> insert + 1. ```delete_all__insert.sql``` +2. delete partial data -> insert + 1. ```delete_partial__insert.sql``` +3. insert (append), no delete (run_query, not pre_hook, but can use pre_hook as well) + 1. ```only_insert.sql``` +4. insert (append) -> delete (post_hook) + 1. ```insert__delete_partial.sql``` +5. delete only + 1. Cannot do this in DBT. + 2. ```If a bteq file has only deletion, it needs to be merged into another one with insertion.``` +6. merge + 1. ```merge.sql``` +7. update + 1. Need to modify to use "merge" query. + +## DML with IF ELSE condition + +1. record count == 0 then do X else do Y + 1. ```check_row_count.sql``` +2. quit if we get unexpected result + 1. ```check_row_count.sql``` + +## Ephemeral model + +- ```ephemeral.sql``` + +## Seeds + +- config file + - ```properties.yml``` + +## Macros + +- sample macro + - ```get_row_count.sql``` \ No newline at end of file diff --git a/localenv/dbtprj/analytics/template/sources.yml b/localenv/dbtprj/analytics/template/sources.yml new file mode 100644 index 0000000..8362903 --- /dev/null +++ b/localenv/dbtprj/analytics/template/sources.yml @@ -0,0 +1,32 @@ +version: 2 + +sources: + - name: TRVANALYT_RAW # arbitrary source name (must be unique in a DBT project) + database: travel-dwh-mart # bigquery project-id + schema: TRVANALYT_RAW # bigquery dataset-name + tables: + - name: RESERVATION_D_BOOKINGS + tests: + - table_exists: + table_name: RESERVATION_D_BOOKINGS + columns: + - name: id + description: easy id + tests: + - dbt_expectations.expect_column_to_exist + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: STRING + - unique + - not_null + - name: dt + description: created date + tests: + - dbt_expectations.expect_column_to_exist + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: DATE + - unique + - name: TRVL_RAW # arbitrary source name (must be unique in a DBT project) + database: travel-dwh-mart # bigquery project-id + schema: TRVL_RAW # bigquery dataset-name + tables: + - name: RESERVATION_D_BOOKINGS \ No newline at end of file