-- =============================================
-- OPTIMIZED SNOWFLAKE QUERY PERFORMANCE ANALYZER
-- =============================================
-- Step 1: Create materialized table for refreshable data
CREATE OR REPLACE TABLE QUERY_PERFORMANCE_ANALYSIS (
USER_NAME VARCHAR(255),
TOTAL_QUERIES NUMBER,
WAREHOUSES_USED NUMBER,
DATABASES_ACCESSED NUMBER,
TOTAL_CREDITS NUMBER(10,2),
AVG_EXECUTION_TIME_MS NUMBER(10,2),
AVG_BYTES_PER_ROW NUMBER(15,2),
TOTAL_DATA_SCANNED_GB NUMBER(15,2),
FAILURE_CANCELLATION_RATE_PCT NUMBER(5,2),
SPILLED_QUERIES NUMBER,
OVER_PROVISIONED_QUERIES NUMBER,
PEAK_HOUR_LONG_RUNNING_QUERIES NUMBER,
SELECT_STAR_QUERIES NUMBER,
UNPARTITIONED_SCAN_QUERIES NUMBER,
REPEATED_QUERIES NUMBER,
COMPLEX_JOIN_QUERIES NUMBER,
ZERO_RESULT_QUERIES NUMBER,
HIGH_COMPILE_QUERIES NUMBER,
UNTAGGED_QUERIES NUMBER,
UNLIMITED_ORDER_BY_QUERIES NUMBER,
SLOW_QUERIES NUMBER,
EXPENSIVE_DISTINCT_QUERIES NUMBER,
INEFFICIENT_LIKE_QUERIES NUMBER,
NO_RESULTS_WITH_SCAN_QUERIES NUMBER,
HIGH_COMPILE_RATIO_QUERIES NUMBER,
-- Normalized scores (1-100)
SPILLED_SCORE NUMBER(5,2),
OVER_PROVISIONED_SCORE NUMBER(5,2),
PEAK_HOUR_SCORE NUMBER(5,2),
SELECT_STAR_SCORE NUMBER(5,2),
UNPARTITIONED_SCORE NUMBER(5,2),
REPEATED_SCORE NUMBER(5,2),
COMPLEX_JOIN_SCORE NUMBER(5,2),
ZERO_RESULT_SCORE NUMBER(5,2),
HIGH_COMPILE_SCORE NUMBER(5,2),
SLOW_QUERY_SCORE NUMBER(5,2),
EXPENSIVE_DISTINCT_SCORE NUMBER(5,2),
INEFFICIENT_LIKE_SCORE NUMBER(5,2),
NO_RESULTS_SCAN_SCORE NUMBER(5,2),
HIGH_COMPILE_RATIO_SCORE NUMBER(5,2),
OVERALL_OPTIMIZATION_SCORE NUMBER(5,2), -- 1 = optimized, 100 = most unoptimized
COST_STATUS VARCHAR(50),
RECOMMENDATIONS ARRAY,
QUERY_SAMPLES OBJECT,
ANALYSIS_DATE TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Step 2: Create procedure to refresh the analysis
CREATE OR REPLACE PROCEDURE REFRESH_QUERY_PERFORMANCE_ANALYSIS(
START_DATE STRING,
END_DATE STRING,
OBJECT_FILTER STRING DEFAULT ''
)
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
-- Clear existing data
DELETE FROM QUERY_PERFORMANCE_ANALYSIS;
-- Insert new analysis
INSERT INTO QUERY_PERFORMANCE_ANALYSIS
WITH percentile_reference AS (
SELECT
COALESCE(warehouse_size, 'UNKNOWN') as warehouse_size,
PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY COALESCE(bytes_scanned, 0)) AS bytes_scanned_p10,
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_elapsed_time) AS execution_time_p90
FROM snowflake.account_usage.query_history
WHERE start_time >= TRY_TO_TIMESTAMP_NTZ(START_DATE)
AND start_time <= TRY_TO_TIMESTAMP_NTZ(END_DATE || ' 23:59:59')
AND user_name IS NOT NULL
AND query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
AND total_elapsed_time >= 2000 -- Minimum 2 seconds
GROUP BY COALESCE(warehouse_size, 'UNKNOWN')
),
query_flags AS (
SELECT
qh.query_id,
qh.warehouse_size,
qh.bytes_scanned,
qh.total_elapsed_time AS execution_time_ms,
qh.compilation_time,
qh.start_time,
qh.query_text, -- Full query text, no truncation
qh.query_hash,
qh.query_tag,
qh.error_code,
qh.execution_status,
qh.partitions_scanned,
qh.partitions_total,
qh.bytes_spilled_to_local_storage,
qh.bytes_spilled_to_remote_storage,
qh.user_name,
qh.database_name,
qh.schema_name,
qh.warehouse_name,
COALESCE(qh.credits_used_cloud_services, 0) AS credits_used_cloud_services,
qh.rows_produced,
-- Optimized flag calculations
CASE
WHEN qh.warehouse_size IN ('MEDIUM', 'LARGE', 'X-LARGE', '2X-LARGE', '3X-LARGE', '4X-LARGE')
AND qh.bytes_scanned < pr.bytes_scanned_p10 THEN 1 ELSE 0
END AS over_provisioned,
CASE
WHEN EXTRACT(HOUR FROM qh.start_time) BETWEEN 9 AND 17
AND qh.total_elapsed_time > 300000 THEN 1 ELSE 0
END AS peak_hour_long_running,
CASE
WHEN UPPER(qh.query_text) LIKE 'SELECT *%' THEN 1 ELSE 0
END AS select_star,
CASE
WHEN qh.partitions_total > 0 AND qh.partitions_scanned = qh.partitions_total THEN 1 ELSE 0
END AS unpartitioned_scan,
CASE
WHEN qh.query_hash IS NOT NULL THEN 1 ELSE 0
END AS repeated_query,
CASE
WHEN (UPPER(qh.query_text) LIKE '%JOIN%' AND UPPER(qh.query_text) LIKE '%JOIN%')
OR UPPER(qh.query_text) LIKE '%WINDOW%' THEN 1 ELSE 0
END AS complex_query,
CASE
WHEN qh.error_code IS NOT NULL OR qh.execution_status IN ('FAILED', 'CANCELLED') THEN 1 ELSE 0
END AS failed_cancelled,
CASE
WHEN qh.bytes_spilled_to_local_storage > 0 OR qh.bytes_spilled_to_remote_storage > 0 THEN 1 ELSE 0
END AS spilled,
CASE
WHEN qh.rows_produced = 0 AND qh.bytes_scanned > 1000000 THEN 1 ELSE 0
END AS zero_result_query,
CASE
WHEN qh.compilation_time > 5000 THEN 1 ELSE 0
END AS high_compile_time,
CASE
WHEN qh.query_tag IS NULL THEN 1 ELSE 0
END AS untagged_query,
CASE
WHEN UPPER(qh.query_text) LIKE '%ORDER BY%' AND UPPER(qh.query_text) NOT LIKE '%LIMIT%' THEN 1 ELSE 0
END AS unlimited_order_by,
CASE
WHEN qh.total_elapsed_time > pr.execution_time_p90 THEN 1 ELSE 0
END AS slow_query,
CASE
WHEN UPPER(qh.query_text) LIKE '%DISTINCT%' AND qh.bytes_scanned > 100000000 THEN 1 ELSE 0
END AS expensive_distinct,
CASE
WHEN UPPER(qh.query_text) LIKE '%LIKE%' AND UPPER(qh.query_text) NOT LIKE '%INDEX%' THEN 1 ELSE 0
END AS inefficient_like,
CASE
WHEN qh.bytes_scanned > 0 AND qh.rows_produced = 0 THEN 1 ELSE 0
END AS no_results_with_scan,
CASE
WHEN qh.total_elapsed_time > 0 AND qh.compilation_time / qh.total_elapsed_time > 0.5 THEN 1 ELSE 0
END AS high_compile_ratio
FROM snowflake.account_usage.query_history qh
LEFT JOIN percentile_reference pr ON COALESCE(qh.warehouse_size, 'UNKNOWN') = pr.warehouse_size
WHERE
qh.start_time >= TRY_TO_TIMESTAMP_NTZ(START_DATE)
AND qh.start_time <= TRY_TO_TIMESTAMP_NTZ(END_DATE || ' 23:59:59')
AND qh.query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
AND qh.user_name IS NOT NULL
AND qh.total_elapsed_time >= 2000 -- Minimum 2 seconds
),
-- Optimized sample queries collection
sample_queries AS (
SELECT
user_name,
OBJECT_CONSTRUCT(
'over_provisioned', ARRAY_AGG(
CASE WHEN over_provisioned = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text, -- Full query text
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY start_time DESC),
'peak_hour_long_running', ARRAY_AGG(
CASE WHEN peak_hour_long_running = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY execution_time_ms DESC),
'select_star', ARRAY_AGG(
CASE WHEN select_star = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY bytes_scanned DESC),
'spilled', ARRAY_AGG(
CASE WHEN spilled = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_spilled_to_local_storage', bytes_spilled_to_local_storage,
'bytes_spilled_to_remote_storage', bytes_spilled_to_remote_storage,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY (bytes_spilled_to_local_storage + bytes_spilled_to_remote_storage) DESC),
'slow_query', ARRAY_AGG(
CASE WHEN slow_query = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY execution_time_ms DESC)
) AS query_samples
FROM query_flags
GROUP BY user_name
),
user_metrics AS (
SELECT
user_name,
COUNT(DISTINCT query_id) AS total_queries,
COUNT(DISTINCT warehouse_name) AS warehouses_used,
COUNT(DISTINCT database_name) AS databases_accessed,
ROUND(SUM(credits_used_cloud_services), 2) AS total_credits,
ROUND(AVG(execution_time_ms), 2) AS avg_execution_time_ms,
ROUND(AVG(NULLIF(bytes_scanned, 0) / NULLIF(rows_produced, 0)), 2) AS avg_bytes_per_row,
ROUND(SUM(bytes_scanned) / POWER(1024, 3), 2) AS total_data_scanned_gb,
ROUND(SUM(failed_cancelled) * 100.0 / NULLIF(COUNT(*), 0), 2) AS failure_cancellation_rate_pct,
SUM(spilled) AS spilled_queries,
SUM(over_provisioned) AS over_provisioned_queries,
SUM(peak_hour_long_running) AS peak_hour_long_running_queries,
SUM(select_star) AS select_star_queries,
SUM(unpartitioned_scan) AS unpartitioned_scan_queries,
COUNT(*) - COUNT(DISTINCT query_hash) AS repeated_queries,
SUM(complex_query) AS complex_join_queries,
SUM(zero_result_query) AS zero_result_queries,
SUM(high_compile_time) AS high_compile_queries,
SUM(untagged_query) AS untagged_queries,
SUM(unlimited_order_by) AS unlimited_order_by_queries,
SUM(slow_query) AS slow_queries,
SUM(expensive_distinct) AS expensive_distinct_queries,
SUM(inefficient_like) AS inefficient_like_queries,
SUM(no_results_with_scan) AS no_results_with_scan_queries,
SUM(high_compile_ratio) AS high_compile_ratio_queries
FROM query_flags
GROUP BY user_name
),
-- Calculate percentiles for normalization
normalization_stats AS (
SELECT
MAX(spilled_queries) AS max_spilled,
MAX(over_provisioned_queries) AS max_over_provisioned,
MAX(peak_hour_long_running_queries) AS max_peak_hour,
MAX(select_star_queries) AS max_select_star,
MAX(unpartitioned_scan_queries) AS max_unpartitioned,
MAX(repeated_queries) AS max_repeated,
MAX(complex_join_queries) AS max_complex,
MAX(zero_result_queries) AS max_zero_result,
MAX(high_compile_queries) AS max_high_compile,
MAX(slow_queries) AS max_slow,
MAX(expensive_distinct_queries) AS max_expensive_distinct,
MAX(inefficient_like_queries) AS max_inefficient_like,
MAX(no_results_with_scan_queries) AS max_no_results_scan,
MAX(high_compile_ratio_queries) AS max_high_compile_ratio,
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_credits) AS cost_p90
FROM user_metrics
)
SELECT
um.user_name,
um.total_queries,
um.warehouses_used,
um.databases_accessed,
um.total_credits,
um.avg_execution_time_ms,
um.avg_bytes_per_row,
um.total_data_scanned_gb,
um.failure_cancellation_rate_pct,
um.spilled_queries,
um.over_provisioned_queries,
um.peak_hour_long_running_queries,
um.select_star_queries,
um.unpartitioned_scan_queries,
um.repeated_queries,
um.complex_join_queries,
um.zero_result_queries,
um.high_compile_queries,
um.untagged_queries,
um.unlimited_order_by_queries,
um.slow_queries,
um.expensive_distinct_queries,
um.inefficient_like_queries,
um.no_results_with_scan_queries,
um.high_compile_ratio_queries,
-- Normalized scores (1-100 scale)
GREATEST(1, LEAST(100, ROUND((um.spilled_queries * 100.0) / NULLIF(ns.max_spilled, 0), 2))) AS spilled_score,
GREATEST(1, LEAST(100, ROUND((um.over_provisioned_queries * 100.0) / NULLIF(ns.max_over_provisioned, 0), 2))) AS over_provisioned_score,
GREATEST(1, LEAST(100, ROUND((um.peak_hour_long_running_queries * 100.0) / NULLIF(ns.max_peak_hour, 0), 2))) AS peak_hour_score,
GREATEST(1, LEAST(100, ROUND((um.select_star_queries * 100.0) / NULLIF(ns.max_select_star, 0), 2))) AS select_star_score,
GREATEST(1, LEAST(100, ROUND((um.unpartitioned_scan_queries * 100.0) / NULLIF(ns.max_unpartitioned, 0), 2))) AS unpartitioned_score,
GREATEST(1, LEAST(100, ROUND((um.repeated_queries * 100.0) / NULLIF(ns.max_repeated, 0), 2))) AS repeated_score,
GREATEST(1, LEAST(100, ROUND((um.complex_join_queries * 100.0) / NULLIF(ns.max_complex, 0), 2))) AS complex_join_score,
GREATEST(1, LEAST(100, ROUND((um.zero_result_queries * 100.0) / NULLIF(ns.max_zero_result, 0), 2))) AS zero_result_score,
GREATEST(1, LEAST(100, ROUND((um.high_compile_queries * 100.0) / NULLIF(ns.max_high_compile, 0), 2))) AS high_compile_score,
GREATEST(1, LEAST(100, ROUND((um.slow_queries * 100.0) / NULLIF(ns.max_slow, 0), 2))) AS slow_query_score,
GREATEST(1, LEAST(100, ROUND((um.expensive_distinct_queries * 100.0) / NULLIF(ns.max_expensive_distinct, 0), 2))) AS expensive_distinct_score,
GREATEST(1, LEAST(100, ROUND((um.inefficient_like_queries * 100.0) / NULLIF(ns.max_inefficient_like, 0), 2))) AS inefficient_like_score,
GREATEST(1, LEAST(100, ROUND((um.no_results_with_scan_queries * 100.0) / NULLIF(ns.max_no_results_scan, 0), 2))) AS no_results_scan_score,
GREATEST(1, LEAST(100, ROUND((um.high_compile_ratio_queries * 100.0) / NULLIF(ns.max_high_compile_ratio, 0), 2))) AS high_compile_ratio_score,
-- Overall optimization score (weighted average)
GREATEST(1, LEAST(100, ROUND(
(GREATEST(1, LEAST(100, (um.spilled_queries * 100.0) / NULLIF(ns.max_spilled, 0))) * 0.15 +
GREATEST(1, LEAST(100, (um.over_provisioned_queries * 100.0) / NULLIF(ns.max_over_provisioned, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.peak_hour_long_running_queries * 100.0) / NULLIF(ns.max_peak_hour, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.select_star_queries * 100.0) / NULLIF(ns.max_select_star, 0))) * 0.10 +
GREATEST(1, LEAST(100, (um.unpartitioned_scan_queries * 100.0) / NULLIF(ns.max_unpartitioned, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.repeated_queries * 100.0) / NULLIF(ns.max_repeated, 0))) * 0.08 +
GREATEST(1, LEAST(100, (um.zero_result_queries * 100.0) / NULLIF(ns.max_zero_result, 0))) * 0.15 +
GREATEST(1, LEAST(100, (um.slow_queries * 100.0) / NULLIF(ns.max_slow, 0))) * 0.16), 2
))) AS overall_optimization_score,
CASE
WHEN um.total_credits > ns.cost_p90 THEN 'High Cost'
ELSE 'Normal'
END AS cost_status,
ARRAY_CONSTRUCT_COMPACT(
CASE WHEN um.spilled_queries > 5 THEN 'Optimize memory usage or increase warehouse size.' END,
CASE WHEN um.over_provisioned_queries > 5 THEN 'Use smaller warehouses for simple queries.' END,
CASE WHEN um.peak_hour_long_running_queries > 5 THEN 'Schedule long queries off-peak.' END,
CASE WHEN um.select_star_queries > 5 THEN 'Specify columns instead of SELECT *.' END,
CASE WHEN um.unpartitioned_scan_queries > 5 THEN 'Implement partitioning or clustering.' END,
CASE WHEN um.repeated_queries > 10 THEN 'Review frequently executed queries.' END,
CASE WHEN um.zero_result_queries > 3 THEN 'Avoid queries that return no data.' END,
CASE WHEN um.slow_queries > 3 THEN 'Optimize slow-running queries.' END,
CASE WHEN um.expensive_distinct_queries > 3 THEN 'Replace DISTINCT with GROUP BY where possible.' END,
CASE WHEN um.no_results_with_scan_queries > 3 THEN 'Add WHERE clauses to prevent unnecessary scans.' END
) AS recommendations,
COALESCE(sq.query_samples, OBJECT_CONSTRUCT()) AS query_samples,
CURRENT_TIMESTAMP() AS analysis_date
FROM user_metrics um
CROSS JOIN normalization_stats ns
LEFT JOIN sample_queries sq ON um.user_name = sq.user_name
ORDER BY overall_optimization_score DESC;
RETURN 'Analysis refreshed successfully for ' || START_DATE || ' to ' || END_DATE;
END;
$$;
-- Step 3: Query to refresh and view results
-- Usage example:
-- CALL REFRESH_QUERY_PERFORMANCE_ANALYSIS('2024-01-01', '2024-01-31');
-- Step 4: Debug query to check data availability
-- Run this first to see if there's any data in your date range
CREATE OR REPLACE VIEW DEBUG_QUERY_DATA AS
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT user_name) as unique_users,
MIN(start_time) as earliest_query,
MAX(start_time) as latest_query,
COUNT(CASE WHEN total_elapsed_time >= 2000 THEN 1 END) as queries_over_2sec,
COUNT(CASE WHEN query_type NOT IN ('DESCRIBE', 'SHOW', 'USE') THEN 1 END) as non_metadata_queries,
ARRAY_AGG(DISTINCT query_type) as query_types_found,
ARRAY_AGG(DISTINCT user_name) WITHIN GROUP (ORDER BY user_name) as sample_users
FROM snowflake.account_usage.query_history
WHERE start_time >= CURRENT_DATE - 30 -- Last 30 days
AND user_name IS NOT NULL;
-- Step 5: Simple test query to verify data
CREATE OR REPLACE PROCEDURE TEST_QUERY_DATA(
START_DATE STRING,
END_DATE STRING
)
RETURNS VARIANT
LANGUAGE SQL
AS
$
BEGIN
LET result VARIANT;
SELECT
OBJECT_CONSTRUCT(
'total_queries', COUNT(*),
'unique_users', COUNT(DISTINCT user_name),
'date_range_start', MIN(start_time),
'date_range_end', MAX(start_time),
'queries_over_2sec', COUNT(CASE WHEN total_elapsed_time >= 2000 THEN 1 END),
'sample_users', ARRAY_AGG(DISTINCT user_name) WITHIN GROUP (ORDER BY user_name LIMIT 10),
'query_types', ARRAY_AGG(DISTINCT query_type) WITHIN GROUP (ORDER BY query_type)
)
INTO result
FROM snowflake.account_usage.query_history
WHERE start_time >= TRY_TO_TIMESTAMP_NTZ(START_DATE)
AND start_time <= TRY_TO_TIMESTAMP_NTZ(END_DATE || ' 23:59:59')
AND user_name IS NOT NULL;
RETURN result;
END;
$;
-- Step 6: View the results
-- SELECT * FROM QUERY_PERFORMANCE_ANALYSIS ORDER BY OVERALL_OPTIMIZATION_SCORE DESC;
-- =============================================
-- OPTIMIZED SNOWFLAKE QUERY PERFORMANCE ANALYZER
-- =============================================
-- Step 1: Create materialized table for refreshable data
CREATE OR REPLACE TABLE QUERY_PERFORMANCE_ANALYSIS (
USER_NAME VARCHAR(255),
TOTAL_QUERIES NUMBER,
WAREHOUSES_USED NUMBER,
DATABASES_ACCESSED NUMBER,
TOTAL_CREDITS NUMBER(10,2),
AVG_EXECUTION_TIME_MS NUMBER(10,2),
AVG_BYTES_PER_ROW NUMBER(15,2),
TOTAL_DATA_SCANNED_GB NUMBER(15,2),
FAILURE_CANCELLATION_RATE_PCT NUMBER(5,2),
SPILLED_QUERIES NUMBER,
OVER_PROVISIONED_QUERIES NUMBER,
PEAK_HOUR_LONG_RUNNING_QUERIES NUMBER,
SELECT_STAR_QUERIES NUMBER,
UNPARTITIONED_SCAN_QUERIES NUMBER,
REPEATED_QUERIES NUMBER,
COMPLEX_JOIN_QUERIES NUMBER,
ZERO_RESULT_QUERIES NUMBER,
HIGH_COMPILE_QUERIES NUMBER,
UNTAGGED_QUERIES NUMBER,
UNLIMITED_ORDER_BY_QUERIES NUMBER,
SLOW_QUERIES NUMBER,
EXPENSIVE_DISTINCT_QUERIES NUMBER,
INEFFICIENT_LIKE_QUERIES NUMBER,
NO_RESULTS_WITH_SCAN_QUERIES NUMBER,
HIGH_COMPILE_RATIO_QUERIES NUMBER,
-- Normalized scores (1-100)
SPILLED_SCORE NUMBER(5,2),
OVER_PROVISIONED_SCORE NUMBER(5,2),
PEAK_HOUR_SCORE NUMBER(5,2),
SELECT_STAR_SCORE NUMBER(5,2),
UNPARTITIONED_SCORE NUMBER(5,2),
REPEATED_SCORE NUMBER(5,2),
COMPLEX_JOIN_SCORE NUMBER(5,2),
ZERO_RESULT_SCORE NUMBER(5,2),
HIGH_COMPILE_SCORE NUMBER(5,2),
SLOW_QUERY_SCORE NUMBER(5,2),
EXPENSIVE_DISTINCT_SCORE NUMBER(5,2),
INEFFICIENT_LIKE_SCORE NUMBER(5,2),
NO_RESULTS_SCAN_SCORE NUMBER(5,2),
HIGH_COMPILE_RATIO_SCORE NUMBER(5,2),
OVERALL_OPTIMIZATION_SCORE NUMBER(5,2), -- 1 = optimized, 100 = most unoptimized
COST_STATUS VARCHAR(50),
RECOMMENDATIONS ARRAY,
QUERY_SAMPLES OBJECT,
ANALYSIS_DATE TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Step 2: Create procedure to refresh the analysis
CREATE OR REPLACE PROCEDURE REFRESH_QUERY_PERFORMANCE_ANALYSIS(
START_DATE STRING,
END_DATE STRING,
OBJECT_FILTER STRING DEFAULT ''
)
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
-- Clear existing data
DELETE FROM QUERY_PERFORMANCE_ANALYSIS;
-- Insert new analysis
INSERT INTO QUERY_PERFORMANCE_ANALYSIS
WITH percentile_reference AS (
SELECT
warehouse_size,
PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY bytes_scanned) AS bytes_scanned_p10,
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_elapsed_time) AS execution_time_p90
FROM snowflake.account_usage.query_history
WHERE start_time >= TRY_TO_TIMESTAMP_NTZ(START_DATE)
AND start_time <= TRY_TO_TIMESTAMP_NTZ(END_DATE || ' 23:59:59')
AND OBJECT_FILTER = '' OR OBJECT_FILTER != '' -- Dynamic filter placeholder
AND user_name IS NOT NULL
AND query_type NOT IN ('DESCRIBE', 'SHOW', 'USE', 'CREATE', 'DROP', 'ALTER', 'GRANT', 'REVOKE')
AND total_elapsed_time >= 2000 -- Minimum 2 seconds
GROUP BY warehouse_size
),
query_flags AS (
SELECT
qh.query_id,
qh.warehouse_size,
qh.bytes_scanned,
qh.total_elapsed_time AS execution_time_ms,
qh.compilation_time,
qh.start_time,
qh.query_text, -- Full query text, no truncation
qh.query_hash,
qh.query_tag,
qh.error_code,
qh.execution_status,
qh.partitions_scanned,
qh.partitions_total,
qh.bytes_spilled_to_local_storage,
qh.bytes_spilled_to_remote_storage,
qh.user_name,
qh.database_name,
qh.schema_name,
qh.warehouse_name,
COALESCE(qh.credits_used_cloud_services, 0) AS credits_used_cloud_services,
qh.rows_produced,
-- Optimized flag calculations
CASE
WHEN qh.warehouse_size IN ('MEDIUM', 'LARGE', 'X-LARGE', '2X-LARGE', '3X-LARGE', '4X-LARGE')
AND qh.bytes_scanned < pr.bytes_scanned_p10 THEN 1 ELSE 0
END AS over_provisioned,
CASE
WHEN EXTRACT(HOUR FROM qh.start_time) BETWEEN 9 AND 17
AND qh.total_elapsed_time > 300000 THEN 1 ELSE 0
END AS peak_hour_long_running,
CASE
WHEN UPPER(qh.query_text) LIKE 'SELECT *%' THEN 1 ELSE 0
END AS select_star,
CASE
WHEN qh.partitions_total > 0 AND qh.partitions_scanned = qh.partitions_total THEN 1 ELSE 0
END AS unpartitioned_scan,
CASE
WHEN qh.query_hash IS NOT NULL THEN 1 ELSE 0
END AS repeated_query,
CASE
WHEN (UPPER(qh.query_text) LIKE '%JOIN%' AND UPPER(qh.query_text) LIKE '%JOIN%')
OR UPPER(qh.query_text) LIKE '%WINDOW%' THEN 1 ELSE 0
END AS complex_query,
CASE
WHEN qh.error_code IS NOT NULL OR qh.execution_status IN ('FAILED', 'CANCELLED') THEN 1 ELSE 0
END AS failed_cancelled,
CASE
WHEN qh.bytes_spilled_to_local_storage > 0 OR qh.bytes_spilled_to_remote_storage > 0 THEN 1 ELSE 0
END AS spilled,
CASE
WHEN qh.rows_produced = 0 AND qh.bytes_scanned > 1000000 THEN 1 ELSE 0
END AS zero_result_query,
CASE
WHEN qh.compilation_time > 5000 THEN 1 ELSE 0
END AS high_compile_time,
CASE
WHEN qh.query_tag IS NULL THEN 1 ELSE 0
END AS untagged_query,
CASE
WHEN UPPER(qh.query_text) LIKE '%ORDER BY%' AND UPPER(qh.query_text) NOT LIKE '%LIMIT%' THEN 1 ELSE 0
END AS unlimited_order_by,
CASE
WHEN qh.total_elapsed_time > pr.execution_time_p90 THEN 1 ELSE 0
END AS slow_query,
CASE
WHEN UPPER(qh.query_text) LIKE '%DISTINCT%' AND qh.bytes_scanned > 100000000 THEN 1 ELSE 0
END AS expensive_distinct,
CASE
WHEN UPPER(qh.query_text) LIKE '%LIKE%' AND UPPER(qh.query_text) NOT LIKE '%INDEX%' THEN 1 ELSE 0
END AS inefficient_like,
CASE
WHEN qh.bytes_scanned > 0 AND qh.rows_produced = 0 THEN 1 ELSE 0
END AS no_results_with_scan,
CASE
WHEN qh.total_elapsed_time > 0 AND qh.compilation_time / qh.total_elapsed_time > 0.5 THEN 1 ELSE 0
END AS high_compile_ratio
FROM snowflake.account_usage.query_history qh
LEFT JOIN percentile_reference pr ON qh.warehouse_size = pr.warehouse_size
WHERE
qh.start_time >= TRY_TO_TIMESTAMP_NTZ(START_DATE)
AND qh.start_time <= TRY_TO_TIMESTAMP_NTZ(END_DATE || ' 23:59:59')
AND OBJECT_FILTER = '' OR OBJECT_FILTER != '' -- Dynamic filter placeholder
AND qh.query_type NOT IN ('DESCRIBE', 'SHOW', 'USE', 'CREATE', 'DROP', 'ALTER', 'GRANT', 'REVOKE')
AND qh.user_name IS NOT NULL
AND qh.total_elapsed_time >= 2000 -- Minimum 2 seconds
),
-- Optimized sample queries collection
sample_queries AS (
SELECT
user_name,
OBJECT_CONSTRUCT(
'over_provisioned', ARRAY_AGG(
CASE WHEN over_provisioned = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text, -- Full query text
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY start_time DESC),
'peak_hour_long_running', ARRAY_AGG(
CASE WHEN peak_hour_long_running = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY execution_time_ms DESC),
'select_star', ARRAY_AGG(
CASE WHEN select_star = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY bytes_scanned DESC),
'spilled', ARRAY_AGG(
CASE WHEN spilled = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_spilled_to_local_storage', bytes_spilled_to_local_storage,
'bytes_spilled_to_remote_storage', bytes_spilled_to_remote_storage,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY (bytes_spilled_to_local_storage + bytes_spilled_to_remote_storage) DESC),
'slow_query', ARRAY_AGG(
CASE WHEN slow_query = 1 THEN
OBJECT_CONSTRUCT(
'query_id', query_id,
'query_text', query_text,
'execution_time_ms', execution_time_ms,
'bytes_scanned', bytes_scanned,
'warehouse_size', warehouse_size,
'start_time', start_time
)
END
) WITHIN GROUP (ORDER BY execution_time_ms DESC)
) AS query_samples
FROM query_flags
GROUP BY user_name
),
user_metrics AS (
SELECT
user_name,
COUNT(DISTINCT query_id) AS total_queries,
COUNT(DISTINCT warehouse_name) AS warehouses_used,
COUNT(DISTINCT database_name) AS databases_accessed,
ROUND(SUM(credits_used_cloud_services), 2) AS total_credits,
ROUND(AVG(execution_time_ms), 2) AS avg_execution_time_ms,
ROUND(AVG(NULLIF(bytes_scanned, 0) / NULLIF(rows_produced, 0)), 2) AS avg_bytes_per_row,
ROUND(SUM(bytes_scanned) / POWER(1024, 3), 2) AS total_data_scanned_gb,
ROUND(SUM(failed_cancelled) * 100.0 / NULLIF(COUNT(*), 0), 2) AS failure_cancellation_rate_pct,
SUM(spilled) AS spilled_queries,
SUM(over_provisioned) AS over_provisioned_queries,
SUM(peak_hour_long_running) AS peak_hour_long_running_queries,
SUM(select_star) AS select_star_queries,
SUM(unpartitioned_scan) AS unpartitioned_scan_queries,
COUNT(*) - COUNT(DISTINCT query_hash) AS repeated_queries,
SUM(complex_query) AS complex_join_queries,
SUM(zero_result_query) AS zero_result_queries,
SUM(high_compile_time) AS high_compile_queries,
SUM(untagged_query) AS untagged_queries,
SUM(unlimited_order_by) AS unlimited_order_by_queries,
SUM(slow_query) AS slow_queries,
SUM(expensive_distinct) AS expensive_distinct_queries,
SUM(inefficient_like) AS inefficient_like_queries,
SUM(no_results_with_scan) AS no_results_with_scan_queries,
SUM(high_compile_ratio) AS high_compile_ratio_queries
FROM query_flags
GROUP BY user_name
),
-- Calculate percentiles for normalization
normalization_stats AS (
SELECT
MAX(spilled_queries) AS max_spilled,
MAX(over_provisioned_queries) AS max_over_provisioned,
MAX(peak_hour_long_running_queries) AS max_peak_hour,
MAX(select_star_queries) AS max_select_star,
MAX(unpartitioned_scan_queries) AS max_unpartitioned,
MAX(repeated_queries) AS max_repeated,
MAX(complex_join_queries) AS max_complex,
MAX(zero_result_queries) AS max_zero_result,
MAX(high_compile_queries) AS max_high_compile,
MAX(slow_queries) AS max_slow,
MAX(expensive_distinct_queries) AS max_expensive_distinct,
MAX(inefficient_like_queries) AS max_inefficient_like,
MAX(no_results_with_scan_queries) AS max_no_results_scan,
MAX(high_compile_ratio_queries) AS max_high_compile_ratio,
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_credits) AS cost_p90
FROM user_metrics
)
SELECT
um.user_name,
um.total_queries,
um.warehouses_used,
um.databases_accessed,
um.total_credits,
um.avg_execution_time_ms,
um.avg_bytes_per_row,
um.total_data_scanned_gb,
um.failure_cancellation_rate_pct,
um.spilled_queries,
um.over_provisioned_queries,
um.peak_hour_long_running_queries,
um.select_star_queries,
um.unpartitioned_scan_queries,
um.repeated_queries,
um.complex_join_queries,
um.zero_result_queries,
um.high_compile_queries,
um.untagged_queries,
um.unlimited_order_by_queries,
um.slow_queries,
um.expensive_distinct_queries,
um.inefficient_like_queries,
um.no_results_with_scan_queries,
um.high_compile_ratio_queries,
-- Normalized scores (1-100 scale)
GREATEST(1, LEAST(100, ROUND((um.spilled_queries * 100.0) / NULLIF(ns.max_spilled, 0), 2))) AS spilled_score,
GREATEST(1, LEAST(100, ROUND((um.over_provisioned_queries * 100.0) / NULLIF(ns.max_over_provisioned, 0), 2))) AS over_provisioned_score,
GREATEST(1, LEAST(100, ROUND((um.peak_hour_long_running_queries * 100.0) / NULLIF(ns.max_peak_hour, 0), 2))) AS peak_hour_score,
GREATEST(1, LEAST(100, ROUND((um.select_star_queries * 100.0) / NULLIF(ns.max_select_star, 0), 2))) AS select_star_score,
GREATEST(1, LEAST(100, ROUND((um.unpartitioned_scan_queries * 100.0) / NULLIF(ns.max_unpartitioned, 0), 2))) AS unpartitioned_score,
GREATEST(1, LEAST(100, ROUND((um.repeated_queries * 100.0) / NULLIF(ns.max_repeated, 0), 2))) AS repeated_score,
GREATEST(1, LEAST(100, ROUND((um.complex_join_queries * 100.0) / NULLIF(ns.max_complex, 0), 2))) AS complex_join_score,
GREATEST(1, LEAST(100, ROUND((um.zero_result_queries * 100.0) / NULLIF(ns.max_zero_result, 0), 2))) AS zero_result_score,
GREATEST(1, LEAST(100, ROUND((um.high_compile_queries * 100.0) / NULLIF(ns.max_high_compile, 0), 2))) AS high_compile_score,
GREATEST(1, LEAST(100, ROUND((um.slow_queries * 100.0) / NULLIF(ns.max_slow, 0), 2))) AS slow_query_score,
GREATEST(1, LEAST(100, ROUND((um.expensive_distinct_queries * 100.0) / NULLIF(ns.max_expensive_distinct, 0), 2))) AS expensive_distinct_score,
GREATEST(1, LEAST(100, ROUND((um.inefficient_like_queries * 100.0) / NULLIF(ns.max_inefficient_like, 0), 2))) AS inefficient_like_score,
GREATEST(1, LEAST(100, ROUND((um.no_results_with_scan_queries * 100.0) / NULLIF(ns.max_no_results_scan, 0), 2))) AS no_results_scan_score,
GREATEST(1, LEAST(100, ROUND((um.high_compile_ratio_queries * 100.0) / NULLIF(ns.max_high_compile_ratio, 0), 2))) AS high_compile_ratio_score,
-- Overall optimization score (weighted average)
GREATEST(1, LEAST(100, ROUND(
(GREATEST(1, LEAST(100, (um.spilled_queries * 100.0) / NULLIF(ns.max_spilled, 0))) * 0.15 +
GREATEST(1, LEAST(100, (um.over_provisioned_queries * 100.0) / NULLIF(ns.max_over_provisioned, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.peak_hour_long_running_queries * 100.0) / NULLIF(ns.max_peak_hour, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.select_star_queries * 100.0) / NULLIF(ns.max_select_star, 0))) * 0.10 +
GREATEST(1, LEAST(100, (um.unpartitioned_scan_queries * 100.0) / NULLIF(ns.max_unpartitioned, 0))) * 0.12 +
GREATEST(1, LEAST(100, (um.repeated_queries * 100.0) / NULLIF(ns.max_repeated, 0))) * 0.08 +
GREATEST(1, LEAST(100, (um.zero_result_queries * 100.0) / NULLIF(ns.max_zero_result, 0))) * 0.15 +
GREATEST(1, LEAST(100, (um.slow_queries * 100.0) / NULLIF(ns.max_slow, 0))) * 0.16), 2
))) AS overall_optimization_score,
CASE
WHEN um.total_credits > ns.cost_p90 THEN 'High Cost'
ELSE 'Normal'
END AS cost_status,
ARRAY_CONSTRUCT_COMPACT(
CASE WHEN um.spilled_queries > 5 THEN 'Optimize memory usage or increase warehouse size.' END,
CASE WHEN um.over_provisioned_queries > 5 THEN 'Use smaller warehouses for simple queries.' END,
CASE WHEN um.peak_hour_long_running_queries > 5 THEN 'Schedule long queries off-peak.' END,
CASE WHEN um.select_star_queries > 5 THEN 'Specify columns instead of SELECT *.' END,
CASE WHEN um.unpartitioned_scan_queries > 5 THEN 'Implement partitioning or clustering.' END,
CASE WHEN um.repeated_queries > 10 THEN 'Review frequently executed queries.' END,
CASE WHEN um.zero_result_queries > 3 THEN 'Avoid queries that return no data.' END,
CASE WHEN um.slow_queries > 3 THEN 'Optimize slow-running queries.' END,
CASE WHEN um.expensive_distinct_queries > 3 THEN 'Replace DISTINCT with GROUP BY where possible.' END,
CASE WHEN um.no_results_with_scan_queries > 3 THEN 'Add WHERE clauses to prevent unnecessary scans.' END
) AS recommendations,
COALESCE(sq.query_samples, OBJECT_CONSTRUCT()) AS query_samples,
CURRENT_TIMESTAMP() AS analysis_date
FROM user_metrics um
CROSS JOIN normalization_stats ns
LEFT JOIN sample_queries sq ON um.user_name = sq.user_name
ORDER BY overall_optimization_score DESC;
RETURN 'Analysis refreshed successfully for ' || START_DATE || ' to ' || END_DATE;
END;
$$;
-- Step 3: Query to refresh and view results
-- Usage example:
-- CALL REFRESH_QUERY_PERFORMANCE_ANALYSIS('2024-01-01', '2024-01-31');
-- Step 4: View the results
-- SELECT * FROM QUERY_PERFORMANCE_ANALYSIS ORDER BY OVERALL_OPTIMIZATION_SCORE DESC;
Top comments (0)
Subscribe
For further actions, you may consider blocking this person and/or reporting abuse
We're a place where coders share, stay up-to-date and grow their careers.
Top comments (0)