格式调整
This commit is contained in:
parent
2c37679b55
commit
5758c26d8b
@ -1,5 +1,35 @@
|
||||
# Hive常用DDL操作
|
||||
|
||||
<nav>
|
||||
<a href="#一Database">一、Database</a><br/>
|
||||
<a href="#11-查看数据列表">1.1 查看数据列表</a><br/>
|
||||
<a href="#12-使用数据库">1.2 使用数据库</a><br/>
|
||||
<a href="#13-新建数据库">1.3 新建数据库</a><br/>
|
||||
<a href="#14-查看数据库信息">1.4 查看数据库信息</a><br/>
|
||||
<a href="#15-删除数据库">1.5 删除数据库</a><br/>
|
||||
<a href="#二创建表">二、创建表</a><br/>
|
||||
<a href="#21-建表语法">2.1 建表语法</a><br/>
|
||||
<a href="#22-内部表">2.2 内部表</a><br/>
|
||||
<a href="#23-外部表">2.3 外部表</a><br/>
|
||||
<a href="#24-分区表">2.4 分区表</a><br/>
|
||||
<a href="#25-分桶表">2.5 分桶表</a><br/>
|
||||
<a href="#26-倾斜表">2.6 倾斜表</a><br/>
|
||||
<a href="#27-临时表">2.7 临时表</a><br/>
|
||||
<a href="#28-CTAS创建表">2.8 CTAS创建表</a><br/>
|
||||
<a href="#29-复制表结构">2.9 复制表结构</a><br/>
|
||||
<a href="#210-加载数据到表">2.10 加载数据到表</a><br/>
|
||||
<a href="#三修改表">三、修改表</a><br/>
|
||||
<a href="#31-重命名表">3.1 重命名表</a><br/>
|
||||
<a href="#32-修改列">3.2 修改列</a><br/>
|
||||
<a href="#33-新增列">3.3 新增列</a><br/>
|
||||
<a href="#四清空表删除表">四、清空表/删除表</a><br/>
|
||||
<a href="#41-清空表">4.1 清空表</a><br/>
|
||||
<a href="#42-删除表">4.2 删除表</a><br/>
|
||||
<a href="#五其他命令">五、其他命令</a><br/>
|
||||
<a href="#51-Describe">5.1 Describe</a><br/>
|
||||
<a href="#52-Show">5.2 Show</a><br/>
|
||||
</nav>
|
||||
|
||||
## 一、Database
|
||||
|
||||
### 1.1 查看数据列表
|
||||
@ -8,7 +38,7 @@
|
||||
show databases;
|
||||
```
|
||||
|
||||

|
||||
<div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-show-database.png"/> </div>
|
||||
|
||||
### 1.2 使用数据库
|
||||
|
||||
@ -81,7 +111,10 @@ CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name --
|
||||
... [constraint_specification])] --列名 列数据类型
|
||||
[COMMENT table_comment] --表描述
|
||||
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] --分区表分区规则
|
||||
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] --分桶表分桶规则
|
||||
[
|
||||
CLUSTERED BY (col_name, col_name, ...)
|
||||
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS
|
||||
] --分桶表分桶规则
|
||||
[SKEWED BY (col_name, col_name, ...) ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
|
||||
[STORED AS DIRECTORIES]
|
||||
] --指定倾斜列和值
|
||||
@ -128,7 +161,7 @@ CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name --
|
||||
|
||||
使用 `desc format emp_external`命令可以查看表的详细信息如下:
|
||||
|
||||

|
||||
<div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-external-table.png"/> </div>
|
||||
|
||||
### 2.4 分区表
|
||||
|
||||
@ -240,7 +273,7 @@ CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS emp_co LIKE emp
|
||||
load data local inpath "/usr/file/emp.txt" into table emp;
|
||||
```
|
||||
|
||||
其中emp.txt的文件内容如下,你可以直接复制粘贴,也可以到本仓库的resources目录下载对应的文件:
|
||||
其中emp.txt的内容如下,你可以直接复制使用,也可以到本仓库的resources目录下载:
|
||||
|
||||
```txt
|
||||
7369 SMITH CLERK 7902 1980-12-17 00:00:00 800.00 20
|
||||
@ -261,7 +294,7 @@ load data local inpath "/usr/file/emp.txt" into table emp;
|
||||
|
||||
加载后可查询表中数据:
|
||||
|
||||

|
||||
<div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-select-emp.png"/> </div>
|
||||
|
||||
|
||||
|
||||
@ -325,7 +358,7 @@ ALTER TABLE emp_temp ADD COLUMNS (address STRING COMMENT 'home address');
|
||||
|
||||
```sql
|
||||
-- 清空整个表或表指定分区中的数据
|
||||
TRUNCATE TABLE table_name [PARTITION (partition_column = partition_col_value, partition_column = partition_col_value, ...)];
|
||||
TRUNCATE TABLE table_name [PARTITION (partition_column = partition_col_value, ...)];
|
||||
```
|
||||
|
||||
+ 目前只有内部表才能执行TRUNCATE操作,外部表执行时会抛出异常`Cannot truncate non-managed table XXXX`。
|
||||
@ -350,12 +383,6 @@ DROP TABLE [IF EXISTS] table_name [PURGE];
|
||||
+ 外部表:只会删除表的元数据,不会删除HDFS上的数据;
|
||||
+ 删除视图引用的表时,不会给出警告(但视图已经无效了,必须由用户删除或重新创建)。
|
||||
|
||||
示例:
|
||||
|
||||
```sql
|
||||
`DROP TABLE [IF EXISTS] table_name [PURGE]; `
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 五、其他命令
|
||||
@ -420,3 +447,6 @@ SHOW CREATE TABLE ([db_name.]table_name|view_name);
|
||||
|
||||
|
||||
|
||||
## 参考资料
|
||||
|
||||
[LanguageManual DDL](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL)
|
@ -208,72 +208,77 @@ DELETE FROM tablename [WHERE expression]
|
||||
|
||||
### 4.2 示例
|
||||
|
||||
1. 首先需要更改`hive-site.xml`,添加如下配置,开启事务支持,配置完成后需要重启Hive服务。
|
||||
**1. 修改配置**
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>hive.support.concurrency</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.enforce.bucketing</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.exec.dynamic.partition.mode</name>
|
||||
<value>nonstrict</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.txn.manager</name>
|
||||
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.compactor.initiator.on</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.in.test</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
```
|
||||
首先需要更改`hive-site.xml`,添加如下配置,开启事务支持,配置完成后需要重启Hive服务。
|
||||
|
||||
2. 创建用于测试的事务表,建表时候指定属性`transactional = true`则代表该表是事务表。需要注意的是,按照[官方文档](https://cwiki.apache.org/confluence/display/Hive/Hive+Transactions)的说明,目前Hive中的事务表有以下限制:
|
||||
+ 必须是buckets Table;
|
||||
+ 仅支持ORC文件格式;
|
||||
+ 不支持LOAD DATA ...语句。
|
||||
|
||||
```sql
|
||||
-- 建表语句
|
||||
CREATE TABLE emp_ts(
|
||||
empno int,
|
||||
ename String
|
||||
)
|
||||
CLUSTERED BY (empno) INTO 2 BUCKETS STORED AS ORC
|
||||
TBLPROPERTIES ("transactional"="true");
|
||||
```
|
||||
3. 插入测试数据
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE emp_ts VALUES (1,"ming"),(2,"hong");
|
||||
```
|
||||
```xml
|
||||
<property>
|
||||
<name>hive.support.concurrency</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.enforce.bucketing</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.exec.dynamic.partition.mode</name>
|
||||
<value>nonstrict</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.txn.manager</name>
|
||||
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.compactor.initiator.on</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.in.test</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
插入数据依靠的是MapReduce作业,执行成功后数据如下:
|
||||
**2. 创建测试表**
|
||||
|
||||
<div align="center"> <img src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts.png"/> </div>
|
||||
|
||||
4. 测试更新和删除
|
||||
|
||||
```sql
|
||||
--更新数据
|
||||
UPDATE emp_ts SET ename = "lan" WHERE empno=1;
|
||||
|
||||
--删除数据
|
||||
DELETE FROM emp_ts WHERE empno=2;
|
||||
```
|
||||
|
||||
更新和删除数据依靠的也是MapReduce作业,执行成功后数据如下:
|
||||
|
||||
<div align="center"> <img src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts-2.png"/> </div>
|
||||
创建用于测试的事务表,建表时候指定属性`transactional = true`则代表该表是事务表。需要注意的是,按照[官方文档](https://cwiki.apache.org/confluence/display/Hive/Hive+Transactions)的说明,目前Hive中的事务表有以下限制:
|
||||
|
||||
+ 必须是buckets Table;
|
||||
+ 仅支持ORC文件格式;
|
||||
+ 不支持LOAD DATA ...语句。
|
||||
|
||||
```sql
|
||||
CREATE TABLE emp_ts(
|
||||
empno int,
|
||||
ename String
|
||||
)
|
||||
CLUSTERED BY (empno) INTO 2 BUCKETS STORED AS ORC
|
||||
TBLPROPERTIES ("transactional"="true");
|
||||
```
|
||||
|
||||
**3. 插入测试数据**
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE emp_ts VALUES (1,"ming"),(2,"hong");
|
||||
```
|
||||
|
||||
插入数据依靠的是MapReduce作业,执行成功后数据如下:
|
||||
|
||||
<div align="center"> <img src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts.png"/> </div>
|
||||
|
||||
**4. 测试更新和删除**
|
||||
|
||||
```sql
|
||||
--更新数据
|
||||
UPDATE emp_ts SET ename = "lan" WHERE empno=1;
|
||||
|
||||
--删除数据
|
||||
DELETE FROM emp_ts WHERE empno=2;
|
||||
```
|
||||
|
||||
更新和删除数据依靠的也是MapReduce作业,执行成功后数据如下:
|
||||
|
||||
<div align="center"> <img src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts-2.png"/> </div>
|
||||
|
||||
|
||||
|
||||
|
@ -249,7 +249,7 @@ Hive支持内连接,外连接,左外连接,右外连接,笛卡尔连接
|
||||
|
||||
需要特别强调:JOIN语句的关联条件必须用ON指定,不能用WHERE指定,否则就会先做笛卡尔积,再过滤,这会导致你得不到预期的结果(下面的演示会有说明)。
|
||||
|
||||
<div align="center"> <img width="700px" src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/sql-join.jpg"/> </div>
|
||||
<div align="center"> <img width="600px" src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/sql-join.jpg"/> </div>
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user