Skip to content

Commit 5b61691

Browse files
As Glue limits comments to 255 characters, we may need to truncate them (#174)
* As Glue limits comments to 255 characters, we may need to truncate them. * truncate comments to 255 characters for partition key --------- Co-authored-by: Mikko Leppänen <mikko.leppanen@fonecta.com>
1 parent c831320 commit 5b61691

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

utilities/Hive_metastore_migration/src/hive_metastore_migration.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,14 @@ def transform_df_with_idx(self, df, id_col, idx, payloads_column_name, payload_t
475475
return self.sql_context.createDataFrame(rdd_result, schema)
476476

477477
def transform_ms_partition_keys(self, ms_partition_keys):
478+
def extract_row(row):
479+
def truncate(x):
480+
return x[:255] if hasattr(x,"__getitem__") else x
481+
return (
482+
row['PKEY_NAME'],
483+
row['PKEY_NAME'],
484+
truncate(row['PKEY_COMMENT'])
485+
)
478486
return self.transform_df_with_idx(
479487
df=ms_partition_keys,
480488
id_col="TBL_ID",
@@ -487,7 +495,7 @@ def transform_ms_partition_keys(self, ms_partition_keys):
487495
StructField(name="comment", dataType=StringType()),
488496
]
489497
),
490-
payload_func=lambda row: (row["PKEY_NAME"], row["PKEY_TYPE"], row["PKEY_COMMENT"]),
498+
payload_func=extract_row,
491499
)
492500

493501
def transform_ms_partition_key_vals(self, ms_partition_key_vals):
@@ -511,6 +519,14 @@ def transform_ms_bucketing_cols(self, ms_bucketing_cols):
511519
)
512520

513521
def transform_ms_columns(self, ms_columns):
522+
def extract_row(row):
523+
def truncate(x):
524+
return x[:255] if hasattr(x,"__getitem__") else x
525+
return (
526+
row['COLUMN_NAME'],
527+
row['TYPE_NAME'],
528+
truncate(row['COMMENT'])
529+
)
514530
return self.transform_df_with_idx(
515531
df=ms_columns,
516532
id_col="CD_ID",
@@ -523,7 +539,7 @@ def transform_ms_columns(self, ms_columns):
523539
StructField(name="comment", dataType=StringType()),
524540
]
525541
),
526-
payload_func=lambda row: (row["COLUMN_NAME"], row["TYPE_NAME"], row["COMMENT"]),
542+
payload_func=extract_row,
527543
)
528544

529545
def transform_ms_skewed_col_names(self, ms_skewed_col_names):

0 commit comments

Comments
 (0)