格式调整
This commit is contained in:
		| @@ -1,5 +1,35 @@ | ||||
| # Hive常用DDL操作 | ||||
|  | ||||
| <nav> | ||||
| <a href="#一Database">一、Database</a><br/> | ||||
|         <a href="#11-查看数据列表">1.1 查看数据列表</a><br/> | ||||
|         <a href="#12-使用数据库">1.2 使用数据库</a><br/> | ||||
|         <a href="#13-新建数据库">1.3 新建数据库</a><br/> | ||||
|         <a href="#14-查看数据库信息">1.4 查看数据库信息</a><br/> | ||||
|         <a href="#15-删除数据库">1.5 删除数据库</a><br/> | ||||
| <a href="#二创建表">二、创建表</a><br/> | ||||
|         <a href="#21-建表语法">2.1 建表语法</a><br/> | ||||
|         <a href="#22-内部表">2.2 内部表</a><br/> | ||||
|         <a href="#23-外部表">2.3 外部表</a><br/> | ||||
|         <a href="#24-分区表">2.4 分区表</a><br/> | ||||
|         <a href="#25-分桶表">2.5 分桶表</a><br/> | ||||
|         <a href="#26-倾斜表">2.6 倾斜表</a><br/> | ||||
|         <a href="#27-临时表">2.7 临时表</a><br/> | ||||
|         <a href="#28-CTAS创建表">2.8 CTAS创建表</a><br/> | ||||
|         <a href="#29-复制表结构">2.9 复制表结构</a><br/> | ||||
|         <a href="#210-加载数据到表">2.10 加载数据到表</a><br/> | ||||
| <a href="#三修改表">三、修改表</a><br/> | ||||
|         <a href="#31-重命名表">3.1 重命名表</a><br/> | ||||
|         <a href="#32-修改列">3.2 修改列</a><br/> | ||||
|         <a href="#33-新增列">3.3 新增列</a><br/> | ||||
| <a href="#四清空表删除表">四、清空表/删除表</a><br/> | ||||
|         <a href="#41-清空表">4.1 清空表</a><br/> | ||||
|         <a href="#42-删除表">4.2 删除表</a><br/> | ||||
| <a href="#五其他命令">五、其他命令</a><br/> | ||||
|         <a href="#51-Describe">5.1 Describe</a><br/> | ||||
|         <a href="#52-Show">5.2 Show</a><br/> | ||||
| </nav> | ||||
|  | ||||
| ## 一、Database | ||||
|  | ||||
| ### 1.1 查看数据列表 | ||||
| @@ -8,7 +38,7 @@ | ||||
| show databases; | ||||
| ``` | ||||
|  | ||||
|  | ||||
| <div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-show-database.png"/> </div> | ||||
|  | ||||
| ### 1.2 使用数据库 | ||||
|  | ||||
| @@ -81,7 +111,10 @@ CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name     -- | ||||
|     ... [constraint_specification])]  --列名 列数据类型 | ||||
|   [COMMENT table_comment]   --表描述 | ||||
|   [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]  --分区表分区规则 | ||||
|   [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]  --分桶表分桶规则 | ||||
|   [ | ||||
|     CLUSTERED BY (col_name, col_name, ...)  | ||||
|    [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS | ||||
|   ]  --分桶表分桶规则 | ||||
|   [SKEWED BY (col_name, col_name, ...) ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)   | ||||
|    [STORED AS DIRECTORIES]  | ||||
|   ]  --指定倾斜列和值 | ||||
| @@ -128,7 +161,7 @@ CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name     -- | ||||
|  | ||||
| 使用 `desc format  emp_external`命令可以查看表的详细信息如下: | ||||
|  | ||||
|  | ||||
| <div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-external-table.png"/> </div> | ||||
|  | ||||
| ### 2.4 分区表 | ||||
|  | ||||
| @@ -240,7 +273,7 @@ CREATE TEMPORARY EXTERNAL TABLE  IF NOT EXISTS  emp_co  LIKE emp | ||||
| load data local inpath "/usr/file/emp.txt" into table emp; | ||||
| ``` | ||||
|  | ||||
| 其中emp.txt的文件内容如下,你可以直接复制粘贴,也可以到本仓库的resources目录下载对应的文件: | ||||
| 其中emp.txt的内容如下,你可以直接复制使用,也可以到本仓库的resources目录下载: | ||||
|  | ||||
| ```txt | ||||
| 7369	SMITH	CLERK	7902	1980-12-17 00:00:00	800.00		20 | ||||
| @@ -261,7 +294,7 @@ load data local inpath "/usr/file/emp.txt" into table emp; | ||||
|  | ||||
| 加载后可查询表中数据: | ||||
|  | ||||
|  | ||||
| <div align="center"> <img width='700px' src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-select-emp.png"/> </div> | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -325,7 +358,7 @@ ALTER TABLE emp_temp ADD COLUMNS (address STRING COMMENT 'home address'); | ||||
|  | ||||
| ```sql | ||||
| -- 清空整个表或表指定分区中的数据 | ||||
| TRUNCATE TABLE table_name [PARTITION (partition_column = partition_col_value, partition_column = partition_col_value, ...)]; | ||||
| TRUNCATE TABLE table_name [PARTITION (partition_column = partition_col_value,  ...)]; | ||||
| ``` | ||||
|  | ||||
| + 目前只有内部表才能执行TRUNCATE操作,外部表执行时会抛出异常`Cannot truncate non-managed table XXXX`。 | ||||
| @@ -350,12 +383,6 @@ DROP TABLE [IF EXISTS] table_name [PURGE]; | ||||
| + 外部表:只会删除表的元数据,不会删除HDFS上的数据; | ||||
| + 删除视图引用的表时,不会给出警告(但视图已经无效了,必须由用户删除或重新创建)。 | ||||
|  | ||||
| 示例: | ||||
|  | ||||
| ```sql | ||||
| `DROP TABLE [IF EXISTS] table_name [PURGE]; ` | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
| ## 五、其他命令 | ||||
| @@ -420,3 +447,6 @@ SHOW CREATE TABLE ([db_name.]table_name|view_name); | ||||
|  | ||||
|  | ||||
|  | ||||
| ## 参考资料 | ||||
|  | ||||
| [LanguageManual DDL](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL) | ||||
| @@ -208,72 +208,77 @@ DELETE FROM tablename [WHERE expression] | ||||
|  | ||||
| ### 4.2 示例 | ||||
|  | ||||
| 1.  首先需要更改`hive-site.xml`,添加如下配置,开启事务支持,配置完成后需要重启Hive服务。 | ||||
| **1. 修改配置** | ||||
|  | ||||
|    ```xml | ||||
|    <property> | ||||
|        <name>hive.support.concurrency</name> | ||||
|        <value>true</value> | ||||
|    </property> | ||||
|    <property> | ||||
|        <name>hive.enforce.bucketing</name> | ||||
|        <value>true</value> | ||||
|    </property> | ||||
|    <property> | ||||
|        <name>hive.exec.dynamic.partition.mode</name> | ||||
|        <value>nonstrict</value> | ||||
|    </property> | ||||
|    <property> | ||||
|        <name>hive.txn.manager</name> | ||||
|        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value> | ||||
|    </property> | ||||
|    <property> | ||||
|        <name>hive.compactor.initiator.on</name> | ||||
|        <value>true</value> | ||||
|    </property> | ||||
|    <property> | ||||
|        <name>hive.in.test</name> | ||||
|        <value>true</value> | ||||
|    </property> | ||||
|    ``` | ||||
| 首先需要更改`hive-site.xml`,添加如下配置,开启事务支持,配置完成后需要重启Hive服务。 | ||||
|  | ||||
| 2. 创建用于测试的事务表,建表时候指定属性`transactional = true`则代表该表是事务表。需要注意的是,按照[官方文档](https://cwiki.apache.org/confluence/display/Hive/Hive+Transactions)的说明,目前Hive中的事务表有以下限制: | ||||
|       + 必须是buckets Table; | ||||
|       + 仅支持ORC文件格式; | ||||
|       + 不支持LOAD DATA ...语句。 | ||||
| ```xml | ||||
| <property> | ||||
|     <name>hive.support.concurrency</name> | ||||
|     <value>true</value> | ||||
| </property> | ||||
| <property> | ||||
|     <name>hive.enforce.bucketing</name> | ||||
|     <value>true</value> | ||||
| </property> | ||||
| <property> | ||||
|     <name>hive.exec.dynamic.partition.mode</name> | ||||
|     <value>nonstrict</value> | ||||
| </property> | ||||
| <property> | ||||
|     <name>hive.txn.manager</name> | ||||
|     <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value> | ||||
| </property> | ||||
| <property> | ||||
|     <name>hive.compactor.initiator.on</name> | ||||
|     <value>true</value> | ||||
| </property> | ||||
| <property> | ||||
|     <name>hive.in.test</name> | ||||
|     <value>true</value> | ||||
| </property> | ||||
| ``` | ||||
|  | ||||
|       ```sql | ||||
|       -- 建表语句 | ||||
|       CREATE TABLE emp_ts(   | ||||
|         empno int,   | ||||
|         ename String | ||||
|       ) | ||||
|       CLUSTERED BY (empno) INTO 2 BUCKETS STORED AS ORC | ||||
|       TBLPROPERTIES ("transactional"="true"); | ||||
|       ``` | ||||
| 3. 插入测试数据 | ||||
| **2. 创建测试表** | ||||
|  | ||||
|    ```sql | ||||
|    INSERT INTO TABLE emp_ts  VALUES (1,"ming"),(2,"hong"); | ||||
|    ``` | ||||
| 创建用于测试的事务表,建表时候指定属性`transactional = true`则代表该表是事务表。需要注意的是,按照[官方文档](https://cwiki.apache.org/confluence/display/Hive/Hive+Transactions)的说明,目前Hive中的事务表有以下限制: | ||||
|  | ||||
|    插入数据依靠的是MapReduce作业,执行成功后数据如下: | ||||
| + 必须是buckets Table; | ||||
| + 仅支持ORC文件格式; | ||||
| + 不支持LOAD DATA ...语句。 | ||||
|  | ||||
|    <div align="center"> <img  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts.png"/> </div> | ||||
| ```sql | ||||
| CREATE TABLE emp_ts(   | ||||
|   empno int,   | ||||
|   ename String | ||||
| ) | ||||
| CLUSTERED BY (empno) INTO 2 BUCKETS STORED AS ORC | ||||
| TBLPROPERTIES ("transactional"="true"); | ||||
| ``` | ||||
|  | ||||
| 4. 测试更新和删除 | ||||
| **3. 插入测试数据** | ||||
|  | ||||
|    ```sql | ||||
|    --更新数据 | ||||
|    UPDATE emp_ts SET ename = "lan"  WHERE  empno=1; | ||||
| ```sql | ||||
| INSERT INTO TABLE emp_ts  VALUES (1,"ming"),(2,"hong"); | ||||
| ``` | ||||
|  | ||||
|    --删除数据 | ||||
|    DELETE FROM emp_ts WHERE empno=2; | ||||
|    ``` | ||||
| 插入数据依靠的是MapReduce作业,执行成功后数据如下: | ||||
|  | ||||
|    更新和删除数据依靠的也是MapReduce作业,执行成功后数据如下: | ||||
| <div align="center"> <img  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts.png"/> </div> | ||||
|  | ||||
|    <div align="center"> <img  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts-2.png"/> </div> | ||||
| **4. 测试更新和删除** | ||||
|  | ||||
| ```sql | ||||
| --更新数据 | ||||
| UPDATE emp_ts SET ename = "lan"  WHERE  empno=1; | ||||
|  | ||||
| --删除数据 | ||||
| DELETE FROM emp_ts WHERE empno=2; | ||||
| ``` | ||||
|  | ||||
| 更新和删除数据依靠的也是MapReduce作业,执行成功后数据如下: | ||||
|  | ||||
| <div align="center"> <img  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/hive-emp-ts-2.png"/> </div> | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -249,7 +249,7 @@ Hive支持内连接,外连接,左外连接,右外连接,笛卡尔连接 | ||||
|  | ||||
| 需要特别强调:JOIN语句的关联条件必须用ON指定,不能用WHERE指定,否则就会先做笛卡尔积,再过滤,这会导致你得不到预期的结果(下面的演示会有说明)。 | ||||
|  | ||||
| <div align="center"> <img width="700px"  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/sql-join.jpg"/> </div> | ||||
| <div align="center"> <img width="600px"  src="https://github.com/heibaiying/BigData-Notes/blob/master/pictures/sql-join.jpg"/> </div> | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user