spark sql联结操作

This commit is contained in:
罗祥
2019-05-23 16:48:26 +08:00
parent 45846d9573
commit 32b02527de
8 changed files with 287 additions and 139 deletions

View File

@ -11,46 +11,17 @@ object SparkSqlApp {
val spark = SparkSession.builder().appName("aggregations").master("local[2]").getOrCreate()
val empDF = spark.read.json("/usr/file/json/emp.json")
empDF.createOrReplaceTempView("emp")
val df = spark.read.json("/usr/file/json/emp.json")
val deptDF = spark.read.json("/usr/file/json/dept.json")
deptDF.createOrReplaceTempView("dept")
import spark.implicits._
deptDF.printSchema()
df.select($"ename").limit(5).show()
df.sort("sal").limit(3).show()
// 1.定义联结表达式
val joinExpression = empDF.col("deptno") === deptDF.col("deptno")
// 2.联结查询
empDF.join(deptDF, joinExpression).select("ename", "dname").show()
spark.sql("SELECT ename,dname FROM emp JOIN dept ON emp.deptno = dept.deptno").show()
df.orderBy(desc("sal")).limit(3).show()
df.select("deptno").distinct().show()
empDF.join(deptDF, joinExpression, "outer").show()
spark.sql("SELECT * FROM emp FULL OUTER JOIN dept ON emp.deptno = dept.deptno").show()
empDF.join(deptDF, joinExpression, "left_outer").show()
spark.sql("SELECT * FROM emp LEFT OUTER JOIN dept ON emp.deptno = dept.deptno").show()
empDF.join(deptDF, joinExpression, "right_outer").show()
spark.sql("SELECT * FROM emp RIGHT OUTER JOIN dept ON emp.deptno = dept.deptno").show()
empDF.join(deptDF, joinExpression, "left_semi").show()
spark.sql("SELECT * FROM emp LEFT SEMI JOIN dept ON emp.deptno = dept.deptno").show()
empDF.join(deptDF, joinExpression, "left_anti").show()
spark.sql("SELECT * FROM emp LEFT ANTI dept ON emp.deptno = dept.deptno").show()
/*你绝对应该使用交叉连接100确定这是你需要的。 在Spark中定义交叉连接时有一个原因需要明确。 他们很危险!
高级用户可以将会话级配置spark.sql.crossJoin.enable设置为true以便允许交叉连接而不发出警告或者Spark没有尝试为您执行另一个连接。*/
empDF.join(deptDF, joinExpression, "cross").show()
spark.sql("SELECT * FROM emp CROSS JOIN dept ON emp.deptno = dept.deptno").show()
spark.sql("SELECT * FROM graduateProgram NATURAL JOIN person").show()
df.orderBy(desc("deptno"), asc("sal")).show(2)
}
}