Skip to content
Snippets Groups Projects
Commit e2e7b12c authored by Dilip Biswal's avatar Dilip Biswal Committed by gatorsmile
Browse files

[SPARK-18872][SQL][TESTS] New test cases for EXISTS subquery

## What changes were proposed in this pull request?
This PR adds the first set of tests for EXISTS subquery.

File name                        | Brief description
------------------------| -----------------
exists-basic.sql              |Tests EXISTS and NOT EXISTS subqueries with both correlated and local predicates.
exists-within-and-or.sql|Tests EXISTS and NOT EXISTS subqueries embedded in AND or OR expression.

DB2 results are attached here as reference :

[exists-basic-db2.txt](https://github.com/apache/spark/files/733031/exists-basic-db2.txt)
[exists-and-or-db2.txt](https://github.com/apache/spark/files/733030/exists-and-or-db2.txt)

## How was this patch tested?
This patch is adding tests.

Author: Dilip Biswal <dbiswal@us.ibm.com>

Closes #16710 from dilipbiswal/exist-basic.
parent f7c07db8
No related branches found
No related tags found
No related merge requests found
-- Tests EXISTS subquery support. Tests basic form
-- of EXISTS subquery (both EXISTS and NOT EXISTS)
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(200, "emp 2", date "2003-01-01", 200.00D, 10),
(300, "emp 3", date "2002-01-01", 300.00D, 20),
(400, "emp 4", date "2005-01-01", 400.00D, 30),
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
(700, "emp 7", date "2010-01-01", 400.00D, 100),
(800, "emp 8", date "2016-01-01", 150.00D, 70)
AS EMP(id, emp_name, hiredate, salary, dept_id);
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
(10, "dept 1", "CA"),
(20, "dept 2", "NY"),
(30, "dept 3", "TX"),
(40, "dept 4 - unassigned", "OR"),
(50, "dept 5 - unassigned", "NJ"),
(70, "dept 7", "FL")
AS DEPT(dept_id, dept_name, state);
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
("emp 1", 10.00D),
("emp 1", 20.00D),
("emp 2", 300.00D),
("emp 2", 100.00D),
("emp 3", 300.00D),
("emp 4", 100.00D),
("emp 5", 1000.00D),
("emp 6 - no dept", 500.00D)
AS BONUS(emp_name, bonus_amt);
-- uncorrelated exist query
-- TC.01.01
SELECT *
FROM emp
WHERE EXISTS (SELECT 1
FROM dept
WHERE dept.dept_id > 10
AND dept.dept_id < 30);
-- simple correlated predicate in exist subquery
-- TC.01.02
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id);
-- correlated outer isnull predicate
-- TC.01.03
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id
OR emp.dept_id IS NULL);
-- Simple correlation with a local predicate in outer query
-- TC.01.04
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id)
AND emp.id > 200;
-- Outer references (emp.id) should not be pruned from outer plan
-- TC.01.05
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id)
AND emp.id > 200;
-- not exists with correlated predicate
-- TC.01.06
SELECT *
FROM dept
WHERE NOT EXISTS (SELECT emp_name
FROM emp
WHERE emp.dept_id = dept.dept_id);
-- not exists with correlated predicate + local predicate
-- TC.01.07
SELECT *
FROM dept
WHERE NOT EXISTS (SELECT emp_name
FROM emp
WHERE emp.dept_id = dept.dept_id
OR state = 'NJ');
-- not exist both equal and greaterthan predicate
-- TC.01.08
SELECT *
FROM bonus
WHERE NOT EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary);
-- select employees who have not received any bonus
-- TC 01.09
SELECT emp.*
FROM emp
WHERE NOT EXISTS (SELECT NULL
FROM bonus
WHERE bonus.emp_name = emp.emp_name);
-- Nested exists
-- TC.01.10
SELECT *
FROM bonus
WHERE EXISTS (SELECT emp_name
FROM emp
WHERE bonus.emp_name = emp.emp_name
AND EXISTS (SELECT state
FROM dept
WHERE dept.dept_id = emp.dept_id));
-- Tests EXISTS subquery support. Tests EXISTS
-- subquery within a AND or OR expression.
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(200, "emp 2", date "2003-01-01", 200.00D, 10),
(300, "emp 3", date "2002-01-01", 300.00D, 20),
(400, "emp 4", date "2005-01-01", 400.00D, 30),
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
(700, "emp 7", date "2010-01-01", 400.00D, 100),
(800, "emp 8", date "2016-01-01", 150.00D, 70)
AS EMP(id, emp_name, hiredate, salary, dept_id);
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
(10, "dept 1", "CA"),
(20, "dept 2", "NY"),
(30, "dept 3", "TX"),
(40, "dept 4 - unassigned", "OR"),
(50, "dept 5 - unassigned", "NJ"),
(70, "dept 7", "FL")
AS DEPT(dept_id, dept_name, state);
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
("emp 1", 10.00D),
("emp 1", 20.00D),
("emp 2", 300.00D),
("emp 2", 100.00D),
("emp 3", 300.00D),
("emp 4", 100.00D),
("emp 5", 1000.00D),
("emp 6 - no dept", 500.00D)
AS BONUS(emp_name, bonus_amt);
-- Or used in conjunction with exists - ExistenceJoin
-- TC.02.01
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id)
OR emp.id > 200;
-- all records from emp including the null dept_id
-- TC.02.02
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id)
OR emp.dept_id IS NULL;
-- EXISTS subquery in both LHS and RHS of OR.
-- TC.02.03
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id
AND dept.dept_id = 20)
OR EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id
AND dept.dept_id = 30);
;
-- not exists and exists predicate within OR
-- TC.02.04
SELECT *
FROM bonus
WHERE ( NOT EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary)
OR EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
OR bonus_amt < emp.salary) );
-- not exists and in predicate within AND
-- TC.02.05
SELECT * FROM bonus WHERE NOT EXISTS
(
SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary)
AND
emp_name IN
(
SELECT emp_name
FROM emp
WHERE bonus_amt < emp.salary);
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 13
-- !query 0
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(200, "emp 2", date "2003-01-01", 200.00D, 10),
(300, "emp 3", date "2002-01-01", 300.00D, 20),
(400, "emp 4", date "2005-01-01", 400.00D, 30),
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
(700, "emp 7", date "2010-01-01", 400.00D, 100),
(800, "emp 8", date "2016-01-01", 150.00D, 70)
AS EMP(id, emp_name, hiredate, salary, dept_id)
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
(10, "dept 1", "CA"),
(20, "dept 2", "NY"),
(30, "dept 3", "TX"),
(40, "dept 4 - unassigned", "OR"),
(50, "dept 5 - unassigned", "NJ"),
(70, "dept 7", "FL")
AS DEPT(dept_id, dept_name, state)
-- !query 1 schema
struct<>
-- !query 1 output
-- !query 2
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
("emp 1", 10.00D),
("emp 1", 20.00D),
("emp 2", 300.00D),
("emp 2", 100.00D),
("emp 3", 300.00D),
("emp 4", 100.00D),
("emp 5", 1000.00D),
("emp 6 - no dept", 500.00D)
AS BONUS(emp_name, bonus_amt)
-- !query 2 schema
struct<>
-- !query 2 output
-- !query 3
SELECT *
FROM emp
WHERE EXISTS (SELECT 1
FROM dept
WHERE dept.dept_id > 10
AND dept.dept_id < 30)
-- !query 3 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 3 output
100 emp 1 2005-01-01 100.0 10
100 emp 1 2005-01-01 100.0 10
200 emp 2 2003-01-01 200.0 10
300 emp 3 2002-01-01 300.0 20
400 emp 4 2005-01-01 400.0 30
500 emp 5 2001-01-01 400.0 NULL
600 emp 6 - no dept 2001-01-01 400.0 100
700 emp 7 2010-01-01 400.0 100
800 emp 8 2016-01-01 150.0 70
-- !query 4
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id)
-- !query 4 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 4 output
100 emp 1 2005-01-01 100.0 10
100 emp 1 2005-01-01 100.0 10
200 emp 2 2003-01-01 200.0 10
300 emp 3 2002-01-01 300.0 20
400 emp 4 2005-01-01 400.0 30
800 emp 8 2016-01-01 150.0 70
-- !query 5
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id
OR emp.dept_id IS NULL)
-- !query 5 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 5 output
100 emp 1 2005-01-01 100.0 10
100 emp 1 2005-01-01 100.0 10
200 emp 2 2003-01-01 200.0 10
300 emp 3 2002-01-01 300.0 20
400 emp 4 2005-01-01 400.0 30
500 emp 5 2001-01-01 400.0 NULL
800 emp 8 2016-01-01 150.0 70
-- !query 6
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id)
AND emp.id > 200
-- !query 6 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 6 output
300 emp 3 2002-01-01 300.0 20
400 emp 4 2005-01-01 400.0 30
800 emp 8 2016-01-01 150.0 70
-- !query 7
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id)
AND emp.id > 200
-- !query 7 schema
struct<emp_name:string>
-- !query 7 output
emp 3
emp 4
emp 8
-- !query 8
SELECT *
FROM dept
WHERE NOT EXISTS (SELECT emp_name
FROM emp
WHERE emp.dept_id = dept.dept_id)
-- !query 8 schema
struct<dept_id:int,dept_name:string,state:string>
-- !query 8 output
40 dept 4 - unassigned OR
50 dept 5 - unassigned NJ
-- !query 9
SELECT *
FROM dept
WHERE NOT EXISTS (SELECT emp_name
FROM emp
WHERE emp.dept_id = dept.dept_id
OR state = 'NJ')
-- !query 9 schema
struct<dept_id:int,dept_name:string,state:string>
-- !query 9 output
40 dept 4 - unassigned OR
-- !query 10
SELECT *
FROM bonus
WHERE NOT EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary)
-- !query 10 schema
struct<emp_name:string,bonus_amt:double>
-- !query 10 output
emp 1 10.0
emp 1 20.0
emp 2 100.0
emp 4 100.0
-- !query 11
SELECT emp.*
FROM emp
WHERE NOT EXISTS (SELECT NULL
FROM bonus
WHERE bonus.emp_name = emp.emp_name)
-- !query 11 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 11 output
700 emp 7 2010-01-01 400.0 100
800 emp 8 2016-01-01 150.0 70
-- !query 12
SELECT *
FROM bonus
WHERE EXISTS (SELECT emp_name
FROM emp
WHERE bonus.emp_name = emp.emp_name
AND EXISTS (SELECT state
FROM dept
WHERE dept.dept_id = emp.dept_id))
-- !query 12 schema
struct<emp_name:string,bonus_amt:double>
-- !query 12 output
emp 1 10.0
emp 1 20.0
emp 2 100.0
emp 2 300.0
emp 3 300.0
emp 4 100.0
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 8
-- !query 0
CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(100, "emp 1", date "2005-01-01", 100.00D, 10),
(200, "emp 2", date "2003-01-01", 200.00D, 10),
(300, "emp 3", date "2002-01-01", 300.00D, 20),
(400, "emp 4", date "2005-01-01", 400.00D, 30),
(500, "emp 5", date "2001-01-01", 400.00D, NULL),
(600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
(700, "emp 7", date "2010-01-01", 400.00D, 100),
(800, "emp 8", date "2016-01-01", 150.00D, 70)
AS EMP(id, emp_name, hiredate, salary, dept_id)
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
(10, "dept 1", "CA"),
(20, "dept 2", "NY"),
(30, "dept 3", "TX"),
(40, "dept 4 - unassigned", "OR"),
(50, "dept 5 - unassigned", "NJ"),
(70, "dept 7", "FL")
AS DEPT(dept_id, dept_name, state)
-- !query 1 schema
struct<>
-- !query 1 output
-- !query 2
CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
("emp 1", 10.00D),
("emp 1", 20.00D),
("emp 2", 300.00D),
("emp 2", 100.00D),
("emp 3", 300.00D),
("emp 4", 100.00D),
("emp 5", 1000.00D),
("emp 6 - no dept", 500.00D)
AS BONUS(emp_name, bonus_amt)
-- !query 2 schema
struct<>
-- !query 2 output
-- !query 3
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id)
OR emp.id > 200
-- !query 3 schema
struct<emp_name:string>
-- !query 3 output
emp 1
emp 1
emp 2
emp 3
emp 4
emp 5
emp 6 - no dept
emp 7
emp 8
-- !query 4
SELECT *
FROM emp
WHERE EXISTS (SELECT dept.dept_name
FROM dept
WHERE emp.dept_id = dept.dept_id)
OR emp.dept_id IS NULL
-- !query 4 schema
struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
-- !query 4 output
100 emp 1 2005-01-01 100.0 10
100 emp 1 2005-01-01 100.0 10
200 emp 2 2003-01-01 200.0 10
300 emp 3 2002-01-01 300.0 20
400 emp 4 2005-01-01 400.0 30
500 emp 5 2001-01-01 400.0 NULL
800 emp 8 2016-01-01 150.0 70
-- !query 5
SELECT emp.emp_name
FROM emp
WHERE EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id
AND dept.dept_id = 20)
OR EXISTS (SELECT dept.state
FROM dept
WHERE emp.dept_id = dept.dept_id
AND dept.dept_id = 30)
-- !query 5 schema
struct<emp_name:string>
-- !query 5 output
emp 3
emp 4
-- !query 6
SELECT *
FROM bonus
WHERE ( NOT EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary)
OR EXISTS (SELECT *
FROM emp
WHERE emp.emp_name = emp_name
OR bonus_amt < emp.salary) )
-- !query 6 schema
struct<emp_name:string,bonus_amt:double>
-- !query 6 output
emp 1 10.0
emp 1 20.0
emp 2 100.0
emp 2 300.0
emp 3 300.0
emp 4 100.0
emp 5 1000.0
emp 6 - no dept 500.0
-- !query 7
SELECT * FROM bonus WHERE NOT EXISTS
(
SELECT *
FROM emp
WHERE emp.emp_name = emp_name
AND bonus_amt > emp.salary)
AND
emp_name IN
(
SELECT emp_name
FROM emp
WHERE bonus_amt < emp.salary)
-- !query 7 schema
struct<emp_name:string,bonus_amt:double>
-- !query 7 output
emp 1 10.0
emp 1 20.0
emp 2 100.0
emp 4 100.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment