From 59ab83c4038970c2a642f401df397da1b7c61f96 Mon Sep 17 00:00:00 2001 From: heibaiying <31504331+heibaiying@users.noreply.github.com> Date: Sat, 27 Apr 2019 10:32:25 +0800 Subject: [PATCH] =?UTF-8?q?Update=20Hive=E5=88=86=E5=8C=BA=E8=A1=A8?= =?UTF-8?q?=E5=92=8C=E5=88=86=E6=A1=B6=E8=A1=A8.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- notes/Hive分区表和分桶表.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/notes/Hive分区表和分桶表.md b/notes/Hive分区表和分桶表.md index e9f898b..8c18b23 100644 --- a/notes/Hive分区表和分桶表.md +++ b/notes/Hive分区表和分桶表.md @@ -79,7 +79,7 @@ LOAD DATA LOCAL INPATH "/usr/file/emp30.txt" OVERWRITE INTO TABLE emp_partition 在HashMap中,当我们给put()方法传递键和值时,我们先对键调用hashCode()方法,返回的hashCode用于找到bucket(桶)位置,最后将键值对存储在对应桶的链表结构中,链表达到一定阈值后会转换为红黑树(JDK1.8+)。下图为HashMap的数据结构图: -
+
> 图片引用自:[HashMap vs. Hashtable](http://www.itcuties.com/java/hashmap-hashtable/) @@ -113,9 +113,6 @@ LOAD DATA LOCAL INPATH "/usr/file/emp30.txt" OVERWRITE INTO TABLE emp_partition ```sql set hive.enforce.bucketing = true; --Hive 2.x不需要这一步 ``` - -需要在插入分桶的时候hash, **也就是说向分桶表中插入数据的时候必然要执行一次MAPREDUCE,** - 在Hive 0.x and 1.x版本,必须使用设置`hive.enforce.bucketing = true`,表示强制分桶,允许程序根据表结构自动选择正确数量的Reducer和cluster by column来进行分桶。 #### 2. CTAS导入数据 @@ -168,4 +165,4 @@ SELECT * FROM page_view WHERE dt='2009-02-25'; ## 参考资料 -1. [LanguageManual DDL BucketedTables](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL+BucketedTables) \ No newline at end of file +1. [LanguageManual DDL BucketedTables](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL+BucketedTables)