添加依赖

https://www.elastic.co/guide/en/elasticsearch/hadoop/6.3/hive.html

配置说明

TBLPROPERTIES配置项
https://www.elastic.co/guide/en/elasticsearch/hadoop/6.3/configuration.html

‘es.mapping.names’ = ‘occur_date:timestamp, url:url_123’

es.resource

es.resource.read

  • 只能读取,不能写入。
  • 可以设置多个index和type。

配置样例 

  1. es.resource.write = soc_*/data,logs

es.resource.write

  • 只能写入,不能读取。
  • 不能设置多个index和type。 但能通过设置动态模板,进行分表、分区。
  • 可以根据若干个属性,设置到对应的index和type。

index、type动态配置:
数据

  1. {
  2. "media_type":"game",
  3. "title":"Final Fantasy VI",
  4. "year":"1994"
  5. },
  6. {
  7. "media_type":"book",
  8. "title":"Harry Potter",
  9. "year":"2010"
  10. },
  11. {
  12. "media_type":"music",
  13. "title":"Surfing With The Alien",
  14. "year":"1987"
  15. }

配置

  1. es.resource.write = soc_{year}/{media_type}

结果

index type title
soc_1994 game Final Fantasy VI
soc_2010 book Harry Potter
soc_1987 music Surfing With The Alien

es.mapping.id

  1. es.mapping.id='car_id'

ES 数据的_id字段,配置使用指定的字段

es.resource.write

  1. 'es.resource.write' = 'used_car_d_{stat_date}/data'

ES 数据的_index和_type,配置使用指定的字段

读取ES

  1. add jar hdfs:/data/lib/es/elasticsearch-hadoop-6.3.2.jar;
  2. add jar hdfs:/data/lib/es/commons-httpclient-3.1.jar;

不分区

样例一

  1. create EXTERNAL table es_test(
  2. car_id string,
  3. share_link string
  4. )
  5. STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
  6. TBLPROPERTIES(
  7. 'es.resource' = 'uxin_test_2019-01-04/data',
  8. 'es.nodes' = 'mvp-hadoop40,mvp-hadoop41,mvp-hadoop42',
  9. 'es.port' = '9200',
  10. 'es.nodes.wan.only'='true');

样例二

  1. CREATE EXTERNAL TABLE `bigdata.uxin_car_info_origin_es_20190103` (
  2. `car_id` string COMMENT '车辆ID',
  3. `car_name` string COMMENT '车辆名称'
  4. )
  5. STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
  6. TBLPROPERTIES(
  7. 'es.nodes' = 'mvp-hadoop40,mvp-hadoop41,mvp-hadoop42',
  8. 'es.port' = '9200',
  9. 'es.resource' = 'used_car_r_20190103/data',
  10. 'es.nodes.wan.only'='true');

x-pack认证

  1. CREATE EXTERNAL TABLE `sword_r_es`(
  2. `meta_table_name` string COMMENT 'from deserializer',
  3. `string_time` string COMMENT 'from deserializer')s
  4. STORED BY
  5. 'org.elasticsearch.hadoop.hive.EsStorageHandler'
  6. TBLPROPERTIES (
  7. 'es.nodes'='mvp-hadoop172',
  8. 'es.nodes.wan.only'='true',
  9. 'es.port'='9200',
  10. 'es.resource'='audit/data',
  11. 'es.net.http.auth.user'='elastic',
  12. 'es.net.http.auth.pass'='elastic'
  13. )
  1. alter table table_name set tblproperties('es.net.http.auth.user'='elastic');
  2. alter table table_name set tblproperties('es.net.http.auth.pass'='elastic');

分区读取ES

  1. CREATE EXTERNAL TABLE `bigdata.uxin_car_info_origin_es` (
  2. `occur_timestamp` TIMESTAMP COMMENT '爬取日期',
  3. `car_id` string COMMENT '车辆ID',
  4. `price` BIGINT COMMENT '车辆报价',
  5. `car_name` string COMMENT '车辆名称',
  6. `brand` string COMMENT '车辆品牌',
  7. `mode` string COMMENT '型号',
  8. `year` BIGINT COMMENT '车辆年款',
  9. `gear_box` string COMMENT '变速箱',
  10. `version` string COMMENT '车型',
  11. `down_payment` BIGINT COMMENT '首付',
  12. `month_payment` BIGINT COMMENT '月付',
  13. `nationwide_purchase` BIGINT COMMENT '全国购',
  14. `special_offer` BIGINT COMMENT '超值',
  15. `return_car_3_days` BIGINT COMMENT '三天无理由退车',
  16. `certificate` string COMMENT '认证类型',
  17. `warehouse` string COMMENT '货仓',
  18. `registration_date` TIMESTAMP COMMENT '车辆上牌日期',
  19. `apparent_mileage` string COMMENT '表显里程',
  20. `emission_standards` string COMMENT '排放标准',
  21. `car_color` string COMMENT '颜色',
  22. `engine_intake` string COMMENT '发动机类型',
  23. `lift_time_min` string COMMENT '最快提车时间',
  24. `lift_time_max` string COMMENT '最慢提车时间',
  25. `engine_exhaust` string COMMENT '发动机排量',
  26. `check_people` string COMMENT '检测员',
  27. `check_date` TIMESTAMP COMMENT '检测时间',
  28. `last_maintenance_date` TIMESTAMP COMMENT '最后一次维保时间',
  29. `fix_times` string COMMENT '维修次数',
  30. `maintenance_times` string COMMENT '保养次数',
  31. `accident_times` string COMMENT '事故次数',
  32. `share_link` string COMMENT '分享链接'
  33. )
  34. STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
  35. TBLPROPERTIES(
  36. 'es.nodes' = 'mvp-hadoop40,mvp-hadoop41,mvp-hadoop42',
  37. 'es.port' = '9200',
  38. 'es.resource.read' = 'used_car_r_*/data',
  39. 'es.nodes.wan.only'='true',
  40. 'es.mapping.names' = 'occur_timestamp:timestamp');

写入ES

操作流程

  1. add jar hdfs:/data/lib/es/elasticsearch-hadoop-6.3.2.jar;
  2. add jar hdfs:/data/lib/es/commons-httpclient-3.1.jar;

建表

  1. CREATE EXTERNAL TABLE `bigdata.uxin_car_info_snapshot_es` (
  2. `timestamp` TIMESTAMP COMMENT 'ES分片',
  3. `occur_timestamp` TIMESTAMP COMMENT '爬取日期',
  4. `car_id` string COMMENT '车辆ID',
  5. `stat_date` string COMMENT '日期index',
  6. `meta_table_name` string,
  7. `meta_app_name` string
  8. )
  9. STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
  10. TBLPROPERTIES(
  11. 'es.nodes' = 'mvp-hadoop40,mvp-hadoop41,mvp-hadoop42',
  12. 'es.port' = '9200',
  13. 'es.resource.write' = 'used_car_d_{stat_date}/data',
  14. 'es.nodes.wan.only'='true');

写数据

  1. insert into bigdata.uxin_car_info_snapshot_es
  2. select unix_timestamp('2019-01-09', 'yyyy-MM-dd')*1000,
  3. unix_timestamp(substr(occur_timestamp, 2, 19), 'yyyy-MM-dd HH:mm:ss')*1000,
  4. car_id,price,
  5. '20190109',
  6. 'car_info',
  7. 'uxin'
  8. from bigdata.uxin_car_info_snapshot where dt = '2019-01-09';
文档更新时间: 2019-07-30 07:51   作者:admin