Initial commit

This commit is contained in:
2019-03-29 13:52:26 +03:00
commit a6cd9ae918
25 changed files with 775 additions and 0 deletions

29
concatenator.sh Normal file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
pattern='-Dapp.tables.filter=db.table'
tasks='-Dapp.parallel.tasks=15'
hdfs='-Dapp.services.hdfs=hdfs://nameservice1:8020'
tables_owner='-Dapp.tables.owner=owner'
hive_extra_params='-Dapp.hiveopts.hive.output.file.extension=.snappy.parquet'
nohup spark-submit --class ru.sa2.hive.concatenator.Main \
--conf spark.driver.cores=1 --conf spark.driver.memory=16G \
--conf spark.yarn.maxAppAttempts=1 \
--conf spark.driver.extraJavaOptions="${pattern} ${tasks} ${hdfs} ${tables_owner} ${hive_extra_params}" \
concatenator-assembly-1.0.jar > concatenator.logs
//////////////////////////////////V2
#!/usr/bin/env bash
pattern='-Dapp.tables.filter=^db.table'
tasks='-Dapp.parallel.tasks=2'
hdfs='-Dapp.services.hdfs=hdfs://nameservice1:8020'
hive_extra_params='-Dapp.hiveopts.0="hive.output.file.extension=.gz.parquet" -Dapp.hiveopts.1="mapred.job.queue.name=queue"'
tasks_filter_strategy='-Dapp.task.filter.strategy=default'
nohup spark-submit --class ru.sa2.hive.concatenator.Main \
--conf spark.driver.cores=1 --conf spark.driver.memory=16G \
--conf spark.yarn.maxAppAttempts=1 \
--conf spark.driver.extraJavaOptions="${pattern} ${tasks} ${tasks_filter_strategy} ${hdfs} ${hive_extra_params}" \
concatenator-assembly-1.0.jar > concatenator_jbt.logs