Skip to content

Commit 18b9a40

Browse files
committed
resolve conflicts for aws-samples#13
1 parent 5b61691 commit 18b9a40

File tree

3 files changed

+56
-2
lines changed

3 files changed

+56
-2
lines changed

utilities/Hive_metastore_migration/README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,17 @@ as an Glue ETL job, if AWS Glue can directly connect to your Hive metastore.
206206
- `--database-prefix` and `--table-prefix` (optional) to set a string prefix that is applied to the
207207
database and table names. They are empty by default.
208208

209+
- Optionally, you can set `--config_file` to `<path_to_your_config_yaml_file>` which contains configuration parameters (default path: `artifacts/config.yaml`)
210+
- Provide the following configuration parameters in the configuration yaml file:
211+
* mode
212+
* jdbc-url
213+
* jdbc-username
214+
* jdbc-password
215+
* database-prefix
216+
* table-prefix
217+
209218
- Example spark-submit command to migrate Hive metastore to S3, tested on EMR-4.7.1:
210-
```bash
219+
```bash
211220
MYSQL_JAR_PATH=/usr/lib/hadoop/mysql-connector-java-5.1.42-bin.jar
212221
DRIVER_CLASSPATH=/home/hadoop/*:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:$MYSQL_JAR_PATH
213222
spark-submit --driver-class-path $DRIVER_CLASSPATH \
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
mode:
2+
jdbc-url:
3+
jdbc-username:
4+
jdbc-password:
5+
database-prefix:
6+
table-prefix:
7+
output-path:
8+
input_path:

utilities/Hive_metastore_migration/src/hive_metastore_migration.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# except for python 2.7 standard library and Spark 2.1
1010
import sys
1111
from datetime import datetime, timedelta, tzinfo
12+
import yaml
1213
from time import localtime, strftime
1314
from types import MethodType
1415

@@ -1606,6 +1607,39 @@ def parse_arguments(args):
16061607
return options
16071608

16081609

1610+
def parse_arguments_from_yaml_file(args):
1611+
"""
1612+
This function accepts the path to a config file
1613+
and extracts the needed arguments for the metastore migration
1614+
----------
1615+
Return:
1616+
Dictionary of config options
1617+
"""
1618+
parser = argparse.ArgumentParser(prog=args[0])
1619+
parser.add_argument('-f', '--config_file', required=True, default='artifacts/config.yaml`', help='Provide yaml configuration file path to read migration arguments from. Default path: `artifacts/config.yaml`')
1620+
options = get_options(parser, args)
1621+
config_file_path = options['config_file']
1622+
## read the yaml file
1623+
with open(config_file_path, 'r') as yaml_file_stream:
1624+
config_options = yaml.load(yaml_file_stream)
1625+
1626+
if config_options['mode'] == FROM_METASTORE:
1627+
validate_options_in_mode(
1628+
options=config_options, mode=FROM_METASTORE,
1629+
required_options=['output_path'],
1630+
not_allowed_options=['input_path']
1631+
)
1632+
elif config_options['mode'] == TO_METASTORE:
1633+
validate_options_in_mode(
1634+
options=config_options, mode=TO_METASTORE,
1635+
required_options=['input_path'],
1636+
not_allowed_options=['output_path']
1637+
)
1638+
else:
1639+
raise AssertionError('unknown mode ' + options['mode'])
1640+
1641+
return config_options
1642+
16091643
def get_spark_env():
16101644
try:
16111645
sc = SparkContext.getOrCreate()
@@ -1733,7 +1767,10 @@ def validate_aws_regions(region):
17331767

17341768

17351769
def main():
1736-
options = parse_arguments(sys.argv)
1770+
# options = parse_arguments(sys.argv)
1771+
1772+
## This now reads options from path to config yaml file
1773+
options = parse_arguments_from_yaml_file(sys.argv)
17371774

17381775
connection = {"url": options["jdbc_url"], "user": options["jdbc_username"], "password": options["jdbc_password"]}
17391776
db_prefix = options.get("database_prefix") or ""

0 commit comments

Comments
 (0)