【发布时间】:2019-04-11 17:09:38
【问题描述】:
我在 Qubole 中有一个代码需要将近 3 个小时才能执行。我正在寻找一些减少代码执行时间的建议。
WITH
-- Get latest date - 10 days before as day
d
AS (
SELECT CAST(CONCAT (
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 1, 4),
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 6, 2),
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 9, 2)
) AS BIGINT) AS day,
CAST(CONCAT (
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 1, 4),
'-',
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 6, 2),
'-',
SUBSTR(CAST(DATE_ADD('day', - 10, CAST(CURRENT_TIMESTAMP AS DATE)) AS VARCHAR), 9, 2)
) AS DATE) AS DATE,
'FR' AS country
)
SELECT 'Streaming' AS TRANSACTION,
'Spotify' AS account,
p_day,
access,
COUNT(DISTINCT customer_id) AS users,
COUNT(*) AS units
FROM temp_1
WHERE day >= (
SELECT day
FROM d
)
AND country_code = (
SELECT country
FROM d
)
GROUP BY 1,
2,
3,
4
UNION ALL
SELECT 'Streaming' AS TRANSACTION,
'Deezer' AS account,
p_day,
CASE
WHEN offer_code IN ('APP', 'BAO', 'BDP', 'BDS', 'BMO', 'BMS', 'BMW', 'BPF', 'BPP', 'BPR', 'BSO', 'BWE', 'BWP', 'BWS', 'DEE', 'DEP', 'ETT', 'EXT', 'FFX', 'IOS', 'OT1', 'PBH', 'PE1', 'PE2', 'PEM', 'PLS', 'PRM', 'PSC', 'PTP', 'SDP', 'SMG', 'SPF', 'SPP', 'SPR', 'SUP', 'SWE', 'SWP', '3M', 'FAM', 'GOO', 'GOF', 'HFP', 'HFF', 'HFI')
THEN 'premium'
WHEN offer_code IN ('BFR', 'MFS', 'MOD', 'SMR')
THEN 'free'
ELSE NULL
END AS access,
COUNT(DISTINCT masked_consumer_id) AS users,
SUM(units_sold_streams) AS streams
FROM temp_2
WHERE day >= (
SELECT day
FROM d
)
AND country_code = (
SELECT country
FROM d
)
GROUP BY 1,
2,
3,
4
UNION ALL
SELECT 'Streaming' AS TRANSACTION,
'Apple Music' AS account,
ingest_datestamp AS p_day,
'premium' AS access,
COUNT(DISTINCT anonymized_person_id) AS users,
COUNT(*) AS streams
FROM temp_streams1
WHERE ingest_datestamp >= (
SELECT DATE
FROM d
)
AND country_code = (
SELECT country
FROM d
)
GROUP BY 1,
2,
3,
4
【问题讨论】:
-
您至少应该include the actual Execution Plan,您可以使用Paste the Plan 并在您的问题中分享链接。另外try to read it yourself,也许您可以通过查询找出性能问题。最后包括schema DDL。
-
您使用什么数据库和版本?是普雷斯托吗?这似乎是 presto 语法。还是mysql...
-
GROUP BY 1, 2, 3, 4是不推荐使用的 SQL 标准语法,我相信这是不推荐使用的 sinds SQL 1999 ......而且这很容易出错,而且不容易阅读。
标签: sql performance query-optimization qubole