您的当前位置:首页正文

130.Impala基准测试

2024-11-07 来源:个人技术集锦
  • 已使用hive-testbench生成好Hive的基准测试数据
  • 创建好了Hive的外部表
  • Hue查看
  • SQL脚本alltables_parquet.sql
    • 用于生成parquet格式的表
drop database if exists ${VAR:DB} cascade;
create database ${VAR:DB};
use ${VAR:DB};
set parquet_file_size=512M;
set COMPRESSION_CODEC=snappy;
drop table if exists call_center;
create table ${VAR:DB}.call_center
stored as parquet
as select * from ${VAR:HIVE_DB}.call_center;
drop table if exists catalog_page;
create table ${VAR:DB}.catalog_page
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_page;
drop table if exists catalog_returns;
create table ${VAR:DB}.catalog_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_returns;
drop table if exists catalog_sales;
create table ${VAR:DB}.catalog_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_sales;
drop table if exists customer_address;
create table ${VAR:DB}.customer_address
stored as parquet
as select * from ${VAR:HIVE_DB}.customer_address;
drop table if exists customer_demographics;
create table ${VAR:DB}.customer_demographics
stored as parquet
as select * from ${VAR:HIVE_DB}.customer_demographics;
drop table if exists customer;
create table ${VAR:DB}.customer
stored as parquet
as select * from ${VAR:HIVE_DB}.customer;
drop table if exists date_dim;
create table ${VAR:DB}.date_dim
stored as parquet
as select * from ${VAR:HIVE_DB}.date_dim;
drop table if exists household_demographics;
create table ${VAR:DB}.household_demographics
stored as parquet
as select * from ${VAR:HIVE_DB}.household_demographics;
drop table if exists income_band;
create table ${VAR:DB}.income_band
stored as parquet
as select * from ${VAR:HIVE_DB}.income_band;
drop table if exists inventory;
create table ${VAR:DB}.inventory
stored as parquet
as select * from ${VAR:HIVE_DB}.inventory;
drop table if exists item;
create table ${VAR:DB}.item
stored as parquet
as select * from ${VAR:HIVE_DB}.item;
drop table if exists promotion;
create table ${VAR:DB}.promotion
stored as parquet
as select * from ${VAR:HIVE_DB}.promotion;
drop table if exists reason;
create table ${VAR:DB}.reason
stored as parquet
as select * from ${VAR:HIVE_DB}.reason;
drop table if exists ship_mode;
create table ${VAR:DB}.ship_mode
stored as parquet
as select * from ${VAR:HIVE_DB}.ship_mode;
drop table if exists store_returns;
create table ${VAR:DB}.store_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.store_returns;
drop table if exists store_sales;
create table ${VAR:DB}.store_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.store_sales;
drop table if exists store;
create table ${VAR:DB}.store
stored as parquet
as select * from ${VAR:HIVE_DB}.store;
drop table if exists time_dim;
create table ${VAR:DB}.time_dim
stored as parquet
as select * from ${VAR:HIVE_DB}.time_dim;
drop table if exists warehouse;
create table ${VAR:DB}.warehouse
stored as parquet
as select * from ${VAR:HIVE_DB}.warehouse;
drop table if exists web_page;
create table ${VAR:DB}.web_page
stored as parquet
as select * from ${VAR:HIVE_DB}.web_page;
drop table if exists web_returns;
create table ${VAR:DB}.web_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.web_returns;
drop table if exists web_sales;
create table ${VAR:DB}.web_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.web_sales;
drop table if exists web_site;
create table ${VAR:DB}.web_site
stored as parquet
as select * from ${VAR:HIVE_DB}.web_site;

  • Impala基准测试数据生成
# 在Impala Daemon节点执行
[root@ip-xxx-xx-xx-xx ~]# impala-shell -i ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal --var=DB=tpcds_parquet_2 --var=HIVE_DB=tpcds_text_2 -f alltables_parquet.sql

  • Impala查看是否创建成功
  • analyze.sql创建
# 用于统计分析Impala的表
use ${VAR:DB};
compute stats call_center ;
compute stats catalog_page ;
compute stats catalog_returns ;
compute stats catalog_sales ;
compute stats customer_address ;
compute stats customer_demographics ;
compute stats customer ;
compute stats date_dim ;
compute stats household_demographics ;
compute stats income_band ;
compute stats inventory ;
compute stats item ;
compute stats promotion ;
compute stats reason ;
compute stats ship_mode ;
compute stats store_returns ;
compute stats store_sales ;
compute stats store ;
compute stats time_dim ;
compute stats warehouse ;
compute stats web_page ;
compute stats web_returns ;
compute stats web_sales ;
compute stats web_site ;

  • 对Impala的表进行统计分析
[root@ip-xxx-xx-xx-xx ddl-tpcds]# impala-shell -i ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal --var=DB=tpcds_parquet_2 -f analyze.sql

  • 批量运行脚本run_all_queries.sh
# 将结果输出到日志文件
[root@ip-xxx-xx-xx-xx impala-tpcds]# vim run_all_queries.sh 
#!/bin/bash

impala_demon=ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal
database_name=tpcds_parquet_2
current_path=`pwd`
queries_dir=${current_path}/queries
rm -rf logs
mkdir logs
for t in `ls ${queries_dir}`
do
    echo "current query will be ${queries_dir}/${t}"
    impala-shell --database=$database_name -i $impala_demon -f ${queries_dir}/${t} &>logs/${t}.log
done
echo "all queries execution are finished, please check logs for the result!"
# 将脚本中impala_daemon和database_name修改为你自己环境的配置即可
  • 查看执行结果及运行时间


大数据视频推荐:






大数据语音推荐:





显示全文