Skip to content
Snippets Groups Projects
Commit a78d6ce3 authored by Sameer Agarwal's avatar Sameer Agarwal Committed by Davies Liu
Browse files

[SPARK-15078] [SQL] Add all TPCDS 1.4 benchmark queries for SparkSQL

## What changes were proposed in this pull request?

Now that SparkSQL supports all TPC-DS queries, this patch adds all 99 benchmark queries inside SparkSQL.

## How was this patch tested?

Benchmark only

Author: Sameer Agarwal <sameer@databricks.com>

Closes #13188 from sameeragarwal/tpcds-all.
parent dcac8e6f
No related branches found
No related tags found
No related merge requests found
Showing
with 842 additions and 0 deletions
......@@ -98,3 +98,4 @@ spark-deps-.*
.*csv
.*tsv
org.apache.spark.scheduler.ExternalClusterManager
.*\.sql
WITH customer_total_return AS
( SELECT
sr_customer_sk AS ctr_customer_sk,
sr_store_sk AS ctr_store_sk,
sum(sr_return_amt) AS ctr_total_return
FROM store_returns, date_dim
WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000
GROUP BY sr_customer_sk, sr_store_sk)
SELECT c_customer_id
FROM customer_total_return ctr1, store, customer
WHERE ctr1.ctr_total_return >
(SELECT avg(ctr_total_return) * 1.2
FROM customer_total_return ctr2
WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
AND s_store_sk = ctr1.ctr_store_sk
AND s_state = 'TN'
AND ctr1.ctr_customer_sk = c_customer_sk
ORDER BY c_customer_id
LIMIT 100
SELECT
cd_gender,
cd_marital_status,
cd_education_status,
count(*) cnt1,
cd_purchase_estimate,
count(*) cnt2,
cd_credit_rating,
count(*) cnt3,
cd_dep_count,
count(*) cnt4,
cd_dep_employed_count,
count(*) cnt5,
cd_dep_college_count,
count(*) cnt6
FROM
customer c, customer_address ca, customer_demographics
WHERE
c.c_current_addr_sk = ca.ca_address_sk AND
ca_county IN ('Rush County', 'Toole County', 'Jefferson County',
'Dona Ana County', 'La Porte County') AND
cd_demo_sk = c.c_current_cdemo_sk AND
exists(SELECT *
FROM store_sales, date_dim
WHERE c.c_customer_sk = ss_customer_sk AND
ss_sold_date_sk = d_date_sk AND
d_year = 2002 AND
d_moy BETWEEN 1 AND 1 + 3) AND
(exists(SELECT *
FROM web_sales, date_dim
WHERE c.c_customer_sk = ws_bill_customer_sk AND
ws_sold_date_sk = d_date_sk AND
d_year = 2002 AND
d_moy BETWEEN 1 AND 1 + 3) OR
exists(SELECT *
FROM catalog_sales, date_dim
WHERE c.c_customer_sk = cs_ship_customer_sk AND
cs_sold_date_sk = d_date_sk AND
d_year = 2002 AND
d_moy BETWEEN 1 AND 1 + 3))
GROUP BY cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
ORDER BY cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
LIMIT 100
WITH year_total AS (
SELECT
c_customer_id customer_id,
c_first_name customer_first_name,
c_last_name customer_last_name,
c_preferred_cust_flag customer_preferred_cust_flag,
c_birth_country customer_birth_country,
c_login customer_login,
c_email_address customer_email_address,
d_year dyear,
sum(ss_ext_list_price - ss_ext_discount_amt) year_total,
's' sale_type
FROM customer, store_sales, date_dim
WHERE c_customer_sk = ss_customer_sk
AND ss_sold_date_sk = d_date_sk
GROUP BY c_customer_id
, c_first_name
, c_last_name
, d_year
, c_preferred_cust_flag
, c_birth_country
, c_login
, c_email_address
, d_year
UNION ALL
SELECT
c_customer_id customer_id,
c_first_name customer_first_name,
c_last_name customer_last_name,
c_preferred_cust_flag customer_preferred_cust_flag,
c_birth_country customer_birth_country,
c_login customer_login,
c_email_address customer_email_address,
d_year dyear,
sum(ws_ext_list_price - ws_ext_discount_amt) year_total,
'w' sale_type
FROM customer, web_sales, date_dim
WHERE c_customer_sk = ws_bill_customer_sk
AND ws_sold_date_sk = d_date_sk
GROUP BY
c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country,
c_login, c_email_address, d_year)
SELECT t_s_secyear.customer_preferred_cust_flag
FROM year_total t_s_firstyear
, year_total t_s_secyear
, year_total t_w_firstyear
, year_total t_w_secyear
WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
AND t_s_firstyear.customer_id = t_w_secyear.customer_id
AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
AND t_s_firstyear.sale_type = 's'
AND t_w_firstyear.sale_type = 'w'
AND t_s_secyear.sale_type = 's'
AND t_w_secyear.sale_type = 'w'
AND t_s_firstyear.dyear = 2001
AND t_s_secyear.dyear = 2001 + 1
AND t_w_firstyear.dyear = 2001
AND t_w_secyear.dyear = 2001 + 1
AND t_s_firstyear.year_total > 0
AND t_w_firstyear.year_total > 0
AND CASE WHEN t_w_firstyear.year_total > 0
THEN t_w_secyear.year_total / t_w_firstyear.year_total
ELSE NULL END
> CASE WHEN t_s_firstyear.year_total > 0
THEN t_s_secyear.year_total / t_s_firstyear.year_total
ELSE NULL END
ORDER BY t_s_secyear.customer_preferred_cust_flag
LIMIT 100
SELECT
i_item_desc,
i_category,
i_class,
i_current_price,
sum(ws_ext_sales_price) AS itemrevenue,
sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price))
OVER
(PARTITION BY i_class) AS revenueratio
FROM
web_sales, item, date_dim
WHERE
ws_item_sk = i_item_sk
AND i_category IN ('Sports', 'Books', 'Home')
AND ws_sold_date_sk = d_date_sk
AND d_date BETWEEN cast('1999-02-22' AS DATE)
AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
GROUP BY
i_item_id, i_item_desc, i_category, i_class, i_current_price
ORDER BY
i_category, i_class, i_item_id, i_item_desc, revenueratio
LIMIT 100
SELECT
avg(ss_quantity),
avg(ss_ext_sales_price),
avg(ss_ext_wholesale_cost),
sum(ss_ext_wholesale_cost)
FROM store_sales
, store
, customer_demographics
, household_demographics
, customer_address
, date_dim
WHERE s_store_sk = ss_store_sk
AND ss_sold_date_sk = d_date_sk AND d_year = 2001
AND ((ss_hdemo_sk = hd_demo_sk
AND cd_demo_sk = ss_cdemo_sk
AND cd_marital_status = 'M'
AND cd_education_status = 'Advanced Degree'
AND ss_sales_price BETWEEN 100.00 AND 150.00
AND hd_dep_count = 3
) OR
(ss_hdemo_sk = hd_demo_sk
AND cd_demo_sk = ss_cdemo_sk
AND cd_marital_status = 'S'
AND cd_education_status = 'College'
AND ss_sales_price BETWEEN 50.00 AND 100.00
AND hd_dep_count = 1
) OR
(ss_hdemo_sk = hd_demo_sk
AND cd_demo_sk = ss_cdemo_sk
AND cd_marital_status = 'W'
AND cd_education_status = '2 yr Degree'
AND ss_sales_price BETWEEN 150.00 AND 200.00
AND hd_dep_count = 1
))
AND ((ss_addr_sk = ca_address_sk
AND ca_country = 'United States'
AND ca_state IN ('TX', 'OH', 'TX')
AND ss_net_profit BETWEEN 100 AND 200
) OR
(ss_addr_sk = ca_address_sk
AND ca_country = 'United States'
AND ca_state IN ('OR', 'NM', 'KY')
AND ss_net_profit BETWEEN 150 AND 300
) OR
(ss_addr_sk = ca_address_sk
AND ca_country = 'United States'
AND ca_state IN ('VA', 'TX', 'MS')
AND ss_net_profit BETWEEN 50 AND 250
))
WITH cross_items AS
(SELECT i_item_sk ss_item_sk
FROM item,
(SELECT
iss.i_brand_id brand_id,
iss.i_class_id class_id,
iss.i_category_id category_id
FROM store_sales, item iss, date_dim d1
WHERE ss_item_sk = iss.i_item_sk
AND ss_sold_date_sk = d1.d_date_sk
AND d1.d_year BETWEEN 1999 AND 1999 + 2
INTERSECT
SELECT
ics.i_brand_id,
ics.i_class_id,
ics.i_category_id
FROM catalog_sales, item ics, date_dim d2
WHERE cs_item_sk = ics.i_item_sk
AND cs_sold_date_sk = d2.d_date_sk
AND d2.d_year BETWEEN 1999 AND 1999 + 2
INTERSECT
SELECT
iws.i_brand_id,
iws.i_class_id,
iws.i_category_id
FROM web_sales, item iws, date_dim d3
WHERE ws_item_sk = iws.i_item_sk
AND ws_sold_date_sk = d3.d_date_sk
AND d3.d_year BETWEEN 1999 AND 1999 + 2) x
WHERE i_brand_id = brand_id
AND i_class_id = class_id
AND i_category_id = category_id
),
avg_sales AS
(SELECT avg(quantity * list_price) average_sales
FROM (
SELECT
ss_quantity quantity,
ss_list_price list_price
FROM store_sales, date_dim
WHERE ss_sold_date_sk = d_date_sk
AND d_year BETWEEN 1999 AND 2001
UNION ALL
SELECT
cs_quantity quantity,
cs_list_price list_price
FROM catalog_sales, date_dim
WHERE cs_sold_date_sk = d_date_sk
AND d_year BETWEEN 1999 AND 1999 + 2
UNION ALL
SELECT
ws_quantity quantity,
ws_list_price list_price
FROM web_sales, date_dim
WHERE ws_sold_date_sk = d_date_sk
AND d_year BETWEEN 1999 AND 1999 + 2) x)
SELECT
channel,
i_brand_id,
i_class_id,
i_category_id,
sum(sales),
sum(number_sales)
FROM (
SELECT
'store' channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) sales,
count(*) number_sales
FROM store_sales, item, date_dim
WHERE ss_item_sk IN (SELECT ss_item_sk
FROM cross_items)
AND ss_item_sk = i_item_sk
AND ss_sold_date_sk = d_date_sk
AND d_year = 1999 + 2
AND d_moy = 11
GROUP BY i_brand_id, i_class_id, i_category_id
HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
FROM avg_sales)
UNION ALL
SELECT
'catalog' channel,
i_brand_id,
i_class_id,
i_category_id,
sum(cs_quantity * cs_list_price) sales,
count(*) number_sales
FROM catalog_sales, item, date_dim
WHERE cs_item_sk IN (SELECT ss_item_sk
FROM cross_items)
AND cs_item_sk = i_item_sk
AND cs_sold_date_sk = d_date_sk
AND d_year = 1999 + 2
AND d_moy = 11
GROUP BY i_brand_id, i_class_id, i_category_id
HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales)
UNION ALL
SELECT
'web' channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ws_quantity * ws_list_price) sales,
count(*) number_sales
FROM web_sales, item, date_dim
WHERE ws_item_sk IN (SELECT ss_item_sk
FROM cross_items)
AND ws_item_sk = i_item_sk
AND ws_sold_date_sk = d_date_sk
AND d_year = 1999 + 2
AND d_moy = 11
GROUP BY i_brand_id, i_class_id, i_category_id
HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales
FROM avg_sales)
) y
GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id)
ORDER BY channel, i_brand_id, i_class_id, i_category_id
LIMIT 100
WITH cross_items AS
(SELECT i_item_sk ss_item_sk
FROM item,
(SELECT
iss.i_brand_id brand_id,
iss.i_class_id class_id,
iss.i_category_id category_id
FROM store_sales, item iss, date_dim d1
WHERE ss_item_sk = iss.i_item_sk
AND ss_sold_date_sk = d1.d_date_sk
AND d1.d_year BETWEEN 1999 AND 1999 + 2
INTERSECT
SELECT
ics.i_brand_id,
ics.i_class_id,
ics.i_category_id
FROM catalog_sales, item ics, date_dim d2
WHERE cs_item_sk = ics.i_item_sk
AND cs_sold_date_sk = d2.d_date_sk
AND d2.d_year BETWEEN 1999 AND 1999 + 2
INTERSECT
SELECT
iws.i_brand_id,
iws.i_class_id,
iws.i_category_id
FROM web_sales, item iws, date_dim d3
WHERE ws_item_sk = iws.i_item_sk
AND ws_sold_date_sk = d3.d_date_sk
AND d3.d_year BETWEEN 1999 AND 1999 + 2) x
WHERE i_brand_id = brand_id
AND i_class_id = class_id
AND i_category_id = category_id
),
avg_sales AS
(SELECT avg(quantity * list_price) average_sales
FROM (SELECT
ss_quantity quantity,
ss_list_price list_price
FROM store_sales, date_dim
WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2
UNION ALL
SELECT
cs_quantity quantity,
cs_list_price list_price
FROM catalog_sales, date_dim
WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2
UNION ALL
SELECT
ws_quantity quantity,
ws_list_price list_price
FROM web_sales, date_dim
WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x)
SELECT *
FROM
(SELECT
'store' channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) sales,
count(*) number_sales
FROM store_sales, item, date_dim
WHERE ss_item_sk IN (SELECT ss_item_sk
FROM cross_items)
AND ss_item_sk = i_item_sk
AND ss_sold_date_sk = d_date_sk
AND d_week_seq = (SELECT d_week_seq
FROM date_dim
WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11)
GROUP BY i_brand_id, i_class_id, i_category_id
HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
FROM avg_sales)) this_year,
(SELECT
'store' channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) sales,
count(*) number_sales
FROM store_sales, item, date_dim
WHERE ss_item_sk IN (SELECT ss_item_sk
FROM cross_items)
AND ss_item_sk = i_item_sk
AND ss_sold_date_sk = d_date_sk
AND d_week_seq = (SELECT d_week_seq
FROM date_dim
WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11)
GROUP BY i_brand_id, i_class_id, i_category_id
HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
FROM avg_sales)) last_year
WHERE this_year.i_brand_id = last_year.i_brand_id
AND this_year.i_class_id = last_year.i_class_id
AND this_year.i_category_id = last_year.i_category_id
ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id
LIMIT 100
SELECT
ca_zip,
sum(cs_sales_price)
FROM catalog_sales, customer, customer_address, date_dim
WHERE cs_bill_customer_sk = c_customer_sk
AND c_current_addr_sk = ca_address_sk
AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475',
'85392', '85460', '80348', '81792')
OR ca_state IN ('CA', 'WA', 'GA')
OR cs_sales_price > 500)
AND cs_sold_date_sk = d_date_sk
AND d_qoy = 2 AND d_year = 2001
GROUP BY ca_zip
ORDER BY ca_zip
LIMIT 100
SELECT
count(DISTINCT cs_order_number) AS `order count `,
sum(cs_ext_ship_cost) AS `total shipping cost `,
sum(cs_net_profit) AS `total net profit `
FROM
catalog_sales cs1, date_dim, customer_address, call_center
WHERE
d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days)
AND cs1.cs_ship_date_sk = d_date_sk
AND cs1.cs_ship_addr_sk = ca_address_sk
AND ca_state = 'GA'
AND cs1.cs_call_center_sk = cc_call_center_sk
AND cc_county IN
('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County')
AND EXISTS(SELECT *
FROM catalog_sales cs2
WHERE cs1.cs_order_number = cs2.cs_order_number
AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
AND NOT EXISTS(SELECT *
FROM catalog_returns cr1
WHERE cs1.cs_order_number = cr1.cr_order_number)
ORDER BY count(DISTINCT cs_order_number)
LIMIT 100
SELECT
i_item_id,
i_item_desc,
s_state,
count(ss_quantity) AS store_sales_quantitycount,
avg(ss_quantity) AS store_sales_quantityave,
stddev_samp(ss_quantity) AS store_sales_quantitystdev,
stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov,
count(sr_return_quantity) as_store_returns_quantitycount,
avg(sr_return_quantity) as_store_returns_quantityave,
stddev_samp(sr_return_quantity) as_store_returns_quantitystdev,
stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov,
count(cs_quantity) AS catalog_sales_quantitycount,
avg(cs_quantity) AS catalog_sales_quantityave,
stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev,
stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov
FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item
WHERE d1.d_quarter_name = '2001Q1'
AND d1.d_date_sk = ss_sold_date_sk
AND i_item_sk = ss_item_sk
AND s_store_sk = ss_store_sk
AND ss_customer_sk = sr_customer_sk
AND ss_item_sk = sr_item_sk
AND ss_ticket_number = sr_ticket_number
AND sr_returned_date_sk = d2.d_date_sk
AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')
AND sr_customer_sk = cs_bill_customer_sk
AND sr_item_sk = cs_item_sk
AND cs_sold_date_sk = d3.d_date_sk
AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')
GROUP BY i_item_id, i_item_desc, s_state
ORDER BY i_item_id, i_item_desc, s_state
LIMIT 100
SELECT
i_item_id,
ca_country,
ca_state,
ca_county,
avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1,
avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2,
avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3,
avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4,
avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5,
avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6,
avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7
FROM catalog_sales, customer_demographics cd1,
customer_demographics cd2, customer, customer_address, date_dim, item
WHERE cs_sold_date_sk = d_date_sk AND
cs_item_sk = i_item_sk AND
cs_bill_cdemo_sk = cd1.cd_demo_sk AND
cs_bill_customer_sk = c_customer_sk AND
cd1.cd_gender = 'F' AND
cd1.cd_education_status = 'Unknown' AND
c_current_cdemo_sk = cd2.cd_demo_sk AND
c_current_addr_sk = ca_address_sk AND
c_birth_month IN (1, 6, 8, 9, 12, 2) AND
d_year = 1998 AND
ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS')
GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county)
ORDER BY ca_country, ca_state, ca_county, i_item_id
LIMIT 100
SELECT
i_brand_id brand_id,
i_brand brand,
i_manufact_id,
i_manufact,
sum(ss_ext_sales_price) ext_price
FROM date_dim, store_sales, item, customer, customer_address, store
WHERE d_date_sk = ss_sold_date_sk
AND ss_item_sk = i_item_sk
AND i_manager_id = 8
AND d_moy = 11
AND d_year = 1998
AND ss_customer_sk = c_customer_sk
AND c_current_addr_sk = ca_address_sk
AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
AND ss_store_sk = s_store_sk
GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact
ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact
LIMIT 100
WITH wscs AS
( SELECT
sold_date_sk,
sales_price
FROM (SELECT
ws_sold_date_sk sold_date_sk,
ws_ext_sales_price sales_price
FROM web_sales) x
UNION ALL
(SELECT
cs_sold_date_sk sold_date_sk,
cs_ext_sales_price sales_price
FROM catalog_sales)),
wswscs AS
( SELECT
d_week_seq,
sum(CASE WHEN (d_day_name = 'Sunday')
THEN sales_price
ELSE NULL END)
sun_sales,
sum(CASE WHEN (d_day_name = 'Monday')
THEN sales_price
ELSE NULL END)
mon_sales,
sum(CASE WHEN (d_day_name = 'Tuesday')
THEN sales_price
ELSE NULL END)
tue_sales,
sum(CASE WHEN (d_day_name = 'Wednesday')
THEN sales_price
ELSE NULL END)
wed_sales,
sum(CASE WHEN (d_day_name = 'Thursday')
THEN sales_price
ELSE NULL END)
thu_sales,
sum(CASE WHEN (d_day_name = 'Friday')
THEN sales_price
ELSE NULL END)
fri_sales,
sum(CASE WHEN (d_day_name = 'Saturday')
THEN sales_price
ELSE NULL END)
sat_sales
FROM wscs, date_dim
WHERE d_date_sk = sold_date_sk
GROUP BY d_week_seq)
SELECT
d_week_seq1,
round(sun_sales1 / sun_sales2, 2),
round(mon_sales1 / mon_sales2, 2),
round(tue_sales1 / tue_sales2, 2),
round(wed_sales1 / wed_sales2, 2),
round(thu_sales1 / thu_sales2, 2),
round(fri_sales1 / fri_sales2, 2),
round(sat_sales1 / sat_sales2, 2)
FROM
(SELECT
wswscs.d_week_seq d_week_seq1,
sun_sales sun_sales1,
mon_sales mon_sales1,
tue_sales tue_sales1,
wed_sales wed_sales1,
thu_sales thu_sales1,
fri_sales fri_sales1,
sat_sales sat_sales1
FROM wswscs, date_dim
WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y,
(SELECT
wswscs.d_week_seq d_week_seq2,
sun_sales sun_sales2,
mon_sales mon_sales2,
tue_sales tue_sales2,
wed_sales wed_sales2,
thu_sales thu_sales2,
fri_sales fri_sales2,
sat_sales sat_sales2
FROM wswscs, date_dim
WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z
WHERE d_week_seq1 = d_week_seq2 - 53
ORDER BY d_week_seq1
SELECT
i_item_desc,
i_category,
i_class,
i_current_price,
sum(cs_ext_sales_price) AS itemrevenue,
sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price))
OVER
(PARTITION BY i_class) AS revenueratio
FROM catalog_sales, item, date_dim
WHERE cs_item_sk = i_item_sk
AND i_category IN ('Sports', 'Books', 'Home')
AND cs_sold_date_sk = d_date_sk
AND d_date BETWEEN cast('1999-02-22' AS DATE)
AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price
ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio
LIMIT 100
SELECT *
FROM (
SELECT
w_warehouse_name,
i_item_id,
sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE))
THEN inv_quantity_on_hand
ELSE 0 END) AS inv_before,
sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE))
THEN inv_quantity_on_hand
ELSE 0 END) AS inv_after
FROM inventory, warehouse, item, date_dim
WHERE i_current_price BETWEEN 0.99 AND 1.49
AND i_item_sk = inv_item_sk
AND inv_warehouse_sk = w_warehouse_sk
AND inv_date_sk = d_date_sk
AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days)
AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days)
GROUP BY w_warehouse_name, i_item_id) x
WHERE (CASE WHEN inv_before > 0
THEN inv_after / inv_before
ELSE NULL
END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0
ORDER BY w_warehouse_name, i_item_id
LIMIT 100
SELECT
i_product_name,
i_brand,
i_class,
i_category,
avg(inv_quantity_on_hand) qoh
FROM inventory, date_dim, item, warehouse
WHERE inv_date_sk = d_date_sk
AND inv_item_sk = i_item_sk
AND inv_warehouse_sk = w_warehouse_sk
AND d_month_seq BETWEEN 1200 AND 1200 + 11
GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category)
ORDER BY qoh, i_product_name, i_brand, i_class, i_category
LIMIT 100
WITH frequent_ss_items AS
(SELECT
substr(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
d_date solddate,
count(*) cnt
FROM store_sales, date_dim, item
WHERE ss_sold_date_sk = d_date_sk
AND ss_item_sk = i_item_sk
AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count(*) > 4),
max_store_sales AS
(SELECT max(csales) tpcds_cmax
FROM (SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) csales
FROM store_sales, customer, date_dim
WHERE ss_customer_sk = c_customer_sk
AND ss_sold_date_sk = d_date_sk
AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
GROUP BY c_customer_sk) x),
best_ss_customer AS
(SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) ssales
FROM store_sales, customer
WHERE ss_customer_sk = c_customer_sk
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
(SELECT *
FROM max_store_sales))
SELECT sum(sales)
FROM ((SELECT cs_quantity * cs_list_price sales
FROM catalog_sales, date_dim
WHERE d_year = 2000
AND d_moy = 2
AND cs_sold_date_sk = d_date_sk
AND cs_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND cs_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer))
UNION ALL
(SELECT ws_quantity * ws_list_price sales
FROM web_sales, date_dim
WHERE d_year = 2000
AND d_moy = 2
AND ws_sold_date_sk = d_date_sk
AND ws_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND ws_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer))) y
LIMIT 100
WITH frequent_ss_items AS
(SELECT
substr(i_item_desc, 1, 30) itemdesc,
i_item_sk item_sk,
d_date solddate,
count(*) cnt
FROM store_sales, date_dim, item
WHERE ss_sold_date_sk = d_date_sk
AND ss_item_sk = i_item_sk
AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count(*) > 4),
max_store_sales AS
(SELECT max(csales) tpcds_cmax
FROM (SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) csales
FROM store_sales, customer, date_dim
WHERE ss_customer_sk = c_customer_sk
AND ss_sold_date_sk = d_date_sk
AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
GROUP BY c_customer_sk) x),
best_ss_customer AS
(SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) ssales
FROM store_sales
, customer
WHERE ss_customer_sk = c_customer_sk
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
(SELECT *
FROM max_store_sales))
SELECT
c_last_name,
c_first_name,
sales
FROM ((SELECT
c_last_name,
c_first_name,
sum(cs_quantity * cs_list_price) sales
FROM catalog_sales, customer, date_dim
WHERE d_year = 2000
AND d_moy = 2
AND cs_sold_date_sk = d_date_sk
AND cs_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND cs_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer)
AND cs_bill_customer_sk = c_customer_sk
GROUP BY c_last_name, c_first_name)
UNION ALL
(SELECT
c_last_name,
c_first_name,
sum(ws_quantity * ws_list_price) sales
FROM web_sales, customer, date_dim
WHERE d_year = 2000
AND d_moy = 2
AND ws_sold_date_sk = d_date_sk
AND ws_item_sk IN (SELECT item_sk
FROM frequent_ss_items)
AND ws_bill_customer_sk IN (SELECT c_customer_sk
FROM best_ss_customer)
AND ws_bill_customer_sk = c_customer_sk
GROUP BY c_last_name, c_first_name)) y
ORDER BY c_last_name, c_first_name, sales
LIMIT 100
WITH ssales AS
(SELECT
c_last_name,
c_first_name,
s_store_name,
ca_state,
s_state,
i_color,
i_current_price,
i_manager_id,
i_units,
i_size,
sum(ss_net_paid) netpaid
FROM store_sales, store_returns, store, item, customer, customer_address
WHERE ss_ticket_number = sr_ticket_number
AND ss_item_sk = sr_item_sk
AND ss_customer_sk = c_customer_sk
AND ss_item_sk = i_item_sk
AND ss_store_sk = s_store_sk
AND c_birth_country = upper(ca_country)
AND s_zip = ca_zip
AND s_market_id = 8
GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color,
i_current_price, i_manager_id, i_units, i_size)
SELECT
c_last_name,
c_first_name,
s_store_name,
sum(netpaid) paid
FROM ssales
WHERE i_color = 'pale'
GROUP BY c_last_name, c_first_name, s_store_name
HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid)
FROM ssales)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment