DEV Community

Armaan Khan
Armaan Khan

Posted on

armaan

CREATE OR REPLACE TABLE WAREHOUSE_ANALYTICS_DASHBOARD_with_queries AS
WITH warehouse_info AS (
    -- Get warehouse metadata
    SELECT DISTINCT
        wh.WAREHOUSE_ID,
        wh.WAREHOUSE_NAME,
        wh.SIZE,
        wh.WAREHOUSE_TYPE,
        wh.CLUSTER_COUNT,
        NULL as SUSPEND_POLICY,
        NULL as MIN_CLUSTER_COUNT,
        NULL as MAX_CLUSTER_COUNT
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY wh
    WHERE wh.TIMESTAMP >= CURRENT_DATE - 1
),

query_buckets AS (
    SELECT
        q.WAREHOUSE_ID,
        q.WAREHOUSE_NAME,
        q.QUERY_ID,
        q.TOTAL_ELAPSED_TIME,
        q.EXECUTION_STATUS,
        q.CREDITS_USED_CLOUD_SERVICES,
        q.BYTES_SPILLED_TO_LOCAL_STORAGE,
        q.BYTES_SPILLED_TO_REMOTE_STORAGE,
        q.QUERY_TYPE,

        CASE
            WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
            WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
            ELSE '5+ minutes'
        END as DURATION_BUCKET,

        CASE
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 120000 THEN '1-2 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 300000 THEN '2-5 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 600000 THEN '5-10 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 1200000 THEN '10-20 minutes'
            ELSE '20+ minutes'
        END as QUEUED_BUCKET,

        CASE
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
            ELSE '100+ cents'
        END as CREDIT_UTILIZATION_BUCKET

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
    WHERE q.START_TIME >= CURRENT_DATE - 1
        AND q.WAREHOUSE_ID IS NOT NULL
        AND q.USER_NAME NOT IN ('SNOWFLAKE', 'SNOWFLAKE_MONITOR')
        AND q.QUERY_TYPE NOT IN ('SHOW', 'DESCRIBE', 'USE', 'CREATE', 'DROP', 'ALTER')
        AND q.QUERY_TEXT IS NOT NULL
),

warehouse_metrics AS (
    SELECT
        wm.WAREHOUSE_ID,
        wm.WAREHOUSE_NAME,
        SUM(wm.CREDITS_USED) as TOTAL_CREDITS_USED,
        SUM(wm.CREDITS_USED_COMPUTE) as TOTAL_COMPUTE_CREDITS,
        SUM(wm.CREDITS_USED_CLOUD_SERVICES) as TOTAL_CLOUD_SERVICES_CREDITS
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY wm
    WHERE wm.START_TIME >= CURRENT_DATE - 1
    GROUP BY wm.WAREHOUSE_ID, wm.WAREHOUSE_NAME
)

SELECT
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID) as WAREHOUSE_ID,
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME) as WAREHOUSE_NAME,
    wi.SIZE as WAREHOUSE_SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,

    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN 1 END) as QUERIES_1_10_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN 1 END) as QUERIES_10_20_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN 1 END) as QUERIES_20_60_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN 1 END) as QUERIES_1_3_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN 1 END) as QUERIES_3_5_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN 1 END) as QUERIES_5_PLUS_MIN,

    COUNT(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN 1 END) as QUEUED_1_2_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN 1 END) as QUEUED_2_5_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN 1 END) as QUEUED_5_10_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN 1 END) as QUEUED_10_20_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN 1 END) as QUEUED_20_PLUS_MIN,

    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_LOCAL,
    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_REMOTE,
    SUM(qb.BYTES_SPILLED_TO_LOCAL_STORAGE) as TOTAL_BYTES_SPILLED_LOCAL,
    SUM(qb.BYTES_SPILLED_TO_REMOTE_STORAGE) as TOTAL_BYTES_SPILLED_REMOTE,

    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN 1 END) as FAILED_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN 1 END) as SUCCESSFUL_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN 1 END) as RUNNING_QUERIES,

    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN 1 END) as QUERIES_0_20_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN 1 END) as QUERIES_20_40_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN 1 END) as QUERIES_40_60_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN 1 END) as QUERIES_60_80_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN 1 END) as QUERIES_80_100_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN 1 END) as QUERIES_100_PLUS_CENTS,

    -- New column: query_ids
    OBJECT_CONSTRUCT(
        '1-10_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN qb.QUERY_ID END),
        '10-20_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN qb.QUERY_ID END),
        '20-60_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN qb.QUERY_ID END),
        '1-3_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN qb.QUERY_ID END),
        '3-5_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN qb.QUERY_ID END),
        '5_plus_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN qb.QUERY_ID END),
        'queued_1-2_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN qb.QUERY_ID END),
        'queued_2-5_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN qb.QUERY_ID END),
        'queued_5-10_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN qb.QUERY_ID END),
        'queued_10-20_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN qb.QUERY_ID END),
        'queued_20_plus_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN qb.QUERY_ID END),
        'spilled_local_ids', ARRAY_AGG(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN qb.QUERY_ID END),
        'spilled_remote_ids', ARRAY_AGG(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN qb.QUERY_ID END),
        'failed_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN qb.QUERY_ID END),
        'successful_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN qb.QUERY_ID END),
        'running_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN qb.QUERY_ID END),
        'credit_0-20_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN qb.QUERY_ID END),
        'credit_20-40_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN qb.QUERY_ID END),
        'credit_40-60_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN qb.QUERY_ID END),
        'credit_60-80_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN qb.QUERY_ID END),
        'credit_80-100_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN qb.QUERY_ID END),
        'credit_100_plus_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN qb.QUERY_ID END)
    ) as QUERY_IDS,

    COUNT(qb.QUERY_ID) as TOTAL_QUERIES,
    COALESCE(wm.TOTAL_CREDITS_USED, 0) as TOTAL_CREDITS_USED,
    COALESCE(wm.TOTAL_COMPUTE_CREDITS, 0) as TOTAL_COMPUTE_CREDITS,
    COALESCE(wm.TOTAL_CLOUD_SERVICES_CREDITS, 0) as TOTAL_CLOUD_SERVICES_CREDITS,

    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM warehouse_info wi
FULL OUTER JOIN query_buckets qb
    ON wi.WAREHOUSE_ID = qb.WAREHOUSE_ID
FULL OUTER JOIN warehouse_metrics wm
    ON COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID) = wm.WAREHOUSE_ID

GROUP BY
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID),
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME),
    wi.SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,
    wm.TOTAL_CREDITS_USED,
    wm.TOTAL_COMPUTE_CREDITS,
    wm.TOTAL_CLOUD_SERVICES_CREDITS

ORDER BY TOTAL_QUERIES DESC;


Enter fullscreen mode Exit fullscreen mode

CREATE OR REPLACE TABLE QUERY_HISTORY_SUMMARY AS
SELECT 
    -- Query Identification
    q.QUERY_ID,
    q.QUERY_HASH,
    q.QUERY_PARAMETERIZED_HASH,
    LEFT(q.QUERY_TEXT, 100) as QUERY_TEXT_PREVIEW,
    q.QUERY_TYPE,
    q.QUERY_TAG,

    -- Timing Information
    q.START_TIME,
    q.END_TIME,
    q.TOTAL_ELAPSED_TIME,
    q.COMPILATION_TIME,
    q.EXECUTION_TIME,

    -- User and Session Info
    q.USER_NAME,
    q.USER_TYPE,
    q.ROLE_NAME,
    q.ROLE_TYPE,
    q.SESSION_ID,

    -- Warehouse Information
    q.WAREHOUSE_ID,
    q.WAREHOUSE_NAME,
    q.WAREHOUSE_SIZE,
    q.WAREHOUSE_TYPE,
    q.CLUSTER_NUMBER,

    -- Database Context
    q.DATABASE_ID,
    q.DATABASE_NAME,
    q.SCHEMA_ID,
    q.SCHEMA_NAME,
    q.USER_DATABASE_NAME,
    q.USER_SCHEMA_NAME,

    -- Execution Status
    q.EXECUTION_STATUS,
    q.ERROR_CODE,
    LEFT(q.ERROR_MESSAGE, 200) as ERROR_MESSAGE_PREVIEW,

    -- Performance Metrics
    q.BYTES_SCANNED,
    q.PERCENTAGE_SCANNED_FROM_CACHE,
    q.BYTES_WRITTEN,
    q.ROWS_PRODUCED,
    q.ROWS_INSERTED,
    q.ROWS_UPDATED,
    q.ROWS_DELETED,

    -- Resource Usage
    q.CREDITS_USED_CLOUD_SERVICES,
    q.BYTES_SPILLED_TO_LOCAL_STORAGE,
    q.BYTES_SPILLED_TO_REMOTE_STORAGE,
    q.PARTITIONS_SCANNED,
    q.PARTITIONS_TOTAL,

    -- Queue Times
    q.QUEUED_PROVISIONING_TIME,
    q.QUEUED_REPAIR_TIME,
    q.QUEUED_OVERLOAD_TIME,
    q.TRANSACTION_BLOCKED_TIME,

    -- Classification Buckets for easy filtering
    CASE 
        WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
        WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds' 
        WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
        WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
        WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
        ELSE '5+ minutes'
    END as DURATION_BUCKET,

    CASE 
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
        ELSE '100+ cents'
    END as CREDIT_BUCKET,

    CASE 
        WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 OR q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'SPILLED'
        ELSE 'NO_SPILL'
    END as SPILL_STATUS,

    CASE 
        WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) > 0 THEN 'QUEUED'
        ELSE 'NOT_QUEUED'
    END as QUEUE_STATUS,

    -- Analysis metadata
    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
WHERE q.START_TIME >= CURRENT_DATE - 1
    AND q.QUERY_ID IS NOT NULL
ORDER BY q.START_TIME DESC;

-- =====================================================
-- TABLE 2: QUERY_DETAILS_COMPLETE
-- This table provides complete details for any specific query
-- =====================================================

CREATE OR REPLACE TABLE QUERY_DETAILS_COMPLETE AS
WITH filtered_queries AS (
    SELECT
        q.*,
        -- Calculate partition scan percentage
        CASE
            WHEN q.PARTITIONS_TOTAL > 0 THEN
                ROUND((q.PARTITIONS_SCANNED::FLOAT / q.PARTITIONS_TOTAL::FLOAT) * 100, 2)
            ELSE 0
        END as PARTITION_SCAN_PERCENTAGE,

        -- Calculate compilation time percentage
        CASE
            WHEN q.TOTAL_ELAPSED_TIME > 0 THEN
                ROUND((q.COMPILATION_TIME::FLOAT / q.TOTAL_ELAPSED_TIME::FLOAT) * 100, 2)
            ELSE 0
        END as COMPILATION_TIME_PERCENTAGE,

        -- Calculate execution time percentage
        CASE
            WHEN q.TOTAL_ELAPSED_TIME > 0 THEN
                ROUND((q.EXECUTION_TIME::FLOAT / q.TOTAL_ELAPSED_TIME::FLOAT) * 100, 2)
            ELSE 0
        END as EXECUTION_TIME_PERCENTAGE,

        -- Calculate rows per MB scanned
        CASE
            WHEN q.BYTES_SCANNED > 0 THEN
                ROUND(q.ROWS_PRODUCED::FLOAT / (q.BYTES_SCANNED::FLOAT / 1024 / 1024), 2)
            ELSE 0
        END as ROWS_PER_MB_SCANNED,

        -- Classify performance
        CASE
            WHEN q.TOTAL_ELAPSED_TIME <= 1000 THEN 'VERY_FAST'
            WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN 'FAST'
            WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN 'MODERATE'
            WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN 'SLOW'
            ELSE 'VERY_SLOW'
        END as PERFORMANCE_CATEGORY,

        -- Classify cache efficiency
        CASE
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE >= 90 THEN 'HIGH_CACHE_HIT'
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE >= 50 THEN 'MEDIUM_CACHE_HIT'
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE > 0 THEN 'LOW_CACHE_HIT'
            ELSE 'NO_CACHE_HIT'
        END as CACHE_EFFICIENCY,

        -- Classify spilling
        CASE
            WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 AND q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'BOTH_SPILL'
            WHEN q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'REMOTE_SPILL'
            WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 'LOCAL_SPILL'
            ELSE 'NO_SPILL'
        END as SPILL_CLASSIFICATION

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
    WHERE q.START_TIME >= CURRENT_DATE - 1
        AND q.QUERY_ID IS NOT NULL
        AND q.USER_NAME NOT IN ('SNOWFLAKE', 'SNOWFLAKE_MONITOR')
        AND q.QUERY_TYPE NOT IN ('SHOW', 'DESCRIBE', 'USE', 'CREATE', 'DROP', 'ALTER')
)
SELECT 
    -- Core Query Information
    QUERY_ID,
    QUERY_TEXT,
    QUERY_HASH,
    QUERY_HASH_VERSION,
    QUERY_PARAMETERIZED_HASH,
    QUERY_PARAMETERIZED_HASH_VERSION,
    QUERY_TYPE,
    QUERY_TAG,

    -- Timing Details (all in milliseconds)
    START_TIME,
    END_TIME,
    TOTAL_ELAPSED_TIME,
    COMPILATION_TIME,
    EXECUTION_TIME,
    QUEUED_PROVISIONING_TIME,
    QUEUED_REPAIR_TIME,
    QUEUED_OVERLOAD_TIME,
    TRANSACTION_BLOCKED_TIME,
    CHILD_QUERIES_WAIT_TIME,
    QUERY_RETRY_TIME,
    QUERY_RETRY_CAUSE,
    FAULT_HANDLING_TIME,
    LIST_EXTERNAL_FILES_TIME,

    -- User and Authentication
    USER_NAME,
    USER_TYPE,
    ROLE_NAME,
    ROLE_TYPE,
    SECONDARY_ROLE_STATS,
    SESSION_ID,

    -- Warehouse and Compute
    WAREHOUSE_ID,
    WAREHOUSE_NAME,
    WAREHOUSE_SIZE,
    WAREHOUSE_TYPE,
    CLUSTER_NUMBER,
    QUERY_LOAD_PERCENT,

    -- Database Context
    DATABASE_ID,
    DATABASE_NAME,
    SCHEMA_ID,
    SCHEMA_NAME,
    USER_DATABASE_ID,
    USER_DATABASE_NAME,
    USER_SCHEMA_ID,
    USER_SCHEMA_NAME,

    -- Execution Results
    EXECUTION_STATUS,
    ERROR_CODE,
    ERROR_MESSAGE,
    IS_CLIENT_GENERATED_STATEMENT,

    -- Data Processing Metrics
    BYTES_SCANNED,
    PERCENTAGE_SCANNED_FROM_CACHE,
    BYTES_WRITTEN,
    BYTES_WRITTEN_TO_RESULT,
    BYTES_READ_FROM_RESULT,
    ROWS_PRODUCED,
    ROWS_WRITTEN_TO_RESULT,
    ROWS_INSERTED,
    ROWS_UPDATED,
    ROWS_DELETED,
    ROWS_UNLOADED,
    BYTES_DELETED,

    -- Partitioning
    PARTITIONS_SCANNED,
    PARTITIONS_TOTAL,
    PARTITION_SCAN_PERCENTAGE,

    -- Memory and Spilling
    BYTES_SPILLED_TO_LOCAL_STORAGE,
    BYTES_SPILLED_TO_REMOTE_STORAGE,
    BYTES_SENT_OVER_THE_NETWORK,

    -- Credits and Cost
    CREDITS_USED_CLOUD_SERVICES,

    -- Data Transfer
    OUTBOUND_DATA_TRANSFER_CLOUD,
    OUTBOUND_DATA_TRANSFER_REGION,
    OUTBOUND_DATA_TRANSFER_BYTES,
    INBOUND_DATA_TRANSFER_CLOUD,
    INBOUND_DATA_TRANSFER_REGION,
    INBOUND_DATA_TRANSFER_BYTES,

    -- External Functions
    EXTERNAL_FUNCTION_TOTAL_INVOCATIONS,
    EXTERNAL_FUNCTION_TOTAL_SENT_ROWS,
    EXTERNAL_FUNCTION_TOTAL_RECEIVED_ROWS,
    EXTERNAL_FUNCTION_TOTAL_SENT_BYTES,
    EXTERNAL_FUNCTION_TOTAL_RECEIVED_BYTES,

    -- Query Acceleration
    QUERY_ACCELERATION_BYTES_SCANNED,
    QUERY_ACCELERATION_PARTITIONS_SCANNED,
    QUERY_ACCELERATION_UPPER_LIMIT_SCALE_FACTOR,

    -- Transaction Information
    TRANSACTION_ID,

    -- System Information
    RELEASE_VERSION,

    -- Performance Ratios and Calculated Fields
    COMPILATION_TIME_PERCENTAGE,
    EXECUTION_TIME_PERCENTAGE,
    ROWS_PER_MB_SCANNED,

    -- Performance Classifications
    PERFORMANCE_CATEGORY,
    CACHE_EFFICIENCY,
    SPILL_CLASSIFICATION,

    -- Analysis metadata
    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM filtered_queries
ORDER BY START_TIME DESC;

-- 
Enter fullscreen mode Exit fullscreen mode
CREATE OR REPLACE TABLE user_query_performance_report AS
WITH percentile_reference AS (
    SELECT
        warehouse_size,
        PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY bytes_scanned) AS bytes_scanned_p10,
        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_elapsed_time) AS execution_time_p90
    FROM snowflake.account_usage.query_history
   WHERE START_TIME >= DATEADD(DAY, -7, CURRENT_DATE)
  AND START_TIME <= CURRENT_DATE

      AND user_name IS NOT NULL
      AND query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
    GROUP BY warehouse_size
),
query_flags AS (
    SELECT
        qh.query_id,
        qh.warehouse_size,
        qh.bytes_scanned,
        qh.total_elapsed_time AS execution_time_ms,
        qh.compilation_time,
        qh.start_time,
        qh.query_text,
        qh.query_hash,
        qh.query_tag,
        qh.error_code,
        qh.execution_status,
        qh.partitions_scanned,
        qh.partitions_total,
        qh.bytes_spilled_to_local_storage,
        qh.bytes_spilled_to_remote_storage,
        qh.user_name,
        qh.database_name,
        qh.schema_name,
        qh.warehouse_name,
        COALESCE(qh.credits_used_cloud_services, 0) AS credits_used_cloud_services,
        qh.rows_produced,
        qh.rows_deleted,
        qh.rows_inserted,
        qh.rows_updated,
        CASE WHEN qh.warehouse_size IN ('MEDIUM', 'LARGE', 'X-LARGE', '2X-LARGE', '3X-LARGE', '4X-LARGE') AND qh.bytes_scanned < pr.bytes_scanned_p10 THEN 1 ELSE 0 END AS over_provisioned,
        CASE WHEN EXTRACT(HOUR FROM qh.start_time) BETWEEN 9 AND 17 AND qh.total_elapsed_time > 300000 THEN 1 ELSE 0 END AS peak_hour_long_running,
        CASE WHEN qh.query_text ILIKE 'SELECT *%' THEN 1 ELSE 0 END AS select_star,
        CASE WHEN qh.partitions_total > 0 AND qh.partitions_scanned = qh.partitions_total THEN 1 ELSE 0 END AS unpartitioned_scan,
        CASE WHEN qh.query_hash IS NOT NULL THEN 1 ELSE 0 END AS repeated_query,
        CASE WHEN (qh.query_text ILIKE '%JOIN%' AND qh.query_text ILIKE '%JOIN%') OR qh.query_text ILIKE '%WINDOW%' THEN 1 ELSE 0 END AS complex_query,
        CASE WHEN qh.error_code IS NOT NULL OR qh.execution_status IN ('FAILED', 'CANCELLED') THEN 1 ELSE 0 END AS failed_cancelled,
        CASE WHEN qh.bytes_spilled_to_local_storage > 0 OR qh.bytes_spilled_to_remote_storage > 0 THEN 1 ELSE 0 END AS spilled,
        CASE WHEN qh.rows_produced = 0 AND qh.bytes_scanned > 1000000 THEN 1 ELSE 0 END AS zero_result_query,
        CASE WHEN qh.compilation_time > 5000 THEN 1 ELSE 0 END AS high_compile_time,
        CASE WHEN qh.query_tag IS NULL THEN 1 ELSE 0 END AS untagged_query,
        CASE WHEN qh.query_text ILIKE '%ORDER BY%' AND qh.query_text NOT ILIKE '%LIMIT%' THEN 1 ELSE 0 END AS unlimited_order_by,
        CASE WHEN qh.query_text ILIKE '%GROUP BY%' AND qh.rows_produced > 1000000 THEN 1 ELSE 0 END AS large_group_by,
        CASE WHEN qh.total_elapsed_time > pr.execution_time_p90 THEN 1 ELSE 0 END AS slow_query,
        CASE WHEN qh.query_text ILIKE '%DISTINCT%' AND qh.bytes_scanned > 100000000 THEN 1 ELSE 0 END AS expensive_distinct,
        CASE WHEN qh.query_text ILIKE '%LIKE%' AND qh.query_text NOT ILIKE '%INDEX%' THEN 1 ELSE 0 END AS inefficient_like,
        CASE WHEN qh.bytes_scanned > 0 AND qh.rows_produced = 0 THEN 1 ELSE 0 END AS no_results_with_scan,
        CASE WHEN qh.query_text ILIKE '%CROSS JOIN%' OR (qh.query_text ILIKE '%JOIN%' AND qh.query_text NOT ILIKE '%ON%') THEN 1 ELSE 0 END AS cartesian_join,
        CASE WHEN qh.total_elapsed_time > 0 AND qh.compilation_time / qh.total_elapsed_time > 0.5 THEN 1 ELSE 0 END AS high_compile_ratio
    FROM snowflake.account_usage.query_history qh
    LEFT JOIN percentile_reference pr ON qh.warehouse_size = pr.warehouse_size
    WHERE START_TIME >= DATEADD(DAY, -7, CURRENT_DATE)
  AND START_TIME <= CURRENT_DATE

        AND qh.query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
        AND qh.user_name IS NOT NULL
        AND qh.total_elapsed_time > 1000
),
sample_queries AS (
    SELECT
        user_name,
        'over_provisioned' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY start_time DESC) AS sample_queries
    FROM query_flags WHERE over_provisioned = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'over_provisioned', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(over_provisioned) = 0
    UNION ALL
    SELECT
        user_name,
        'peak_hour_long_running' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY execution_time_ms DESC) AS sample_queries
    FROM query_flags WHERE peak_hour_long_running = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'peak_hour_long_running', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(peak_hour_long_running) = 0
    UNION ALL
    SELECT
        user_name,
        'select_star' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE select_star = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'select_star', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(select_star) = 0
    UNION ALL
    SELECT
        user_name,
        'unpartitioned_scan' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'partitions_scanned', partitions_scanned, 'partitions_total', partitions_total, 'start_time', start_time)) WITHIN GROUP (ORDER BY partitions_scanned DESC) AS sample_queries
    FROM query_flags WHERE unpartitioned_scan = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'unpartitioned_scan', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(unpartitioned_scan) = 0
    UNION ALL
    SELECT
        user_name,
        'spilled' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_spilled_to_local_storage', bytes_spilled_to_local_storage, 'bytes_spilled_to_remote_storage', bytes_spilled_to_remote_storage, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY (bytes_spilled_to_local_storage + bytes_spilled_to_remote_storage) DESC) AS sample_queries
    FROM query_flags WHERE spilled = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'spilled', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(spilled) = 0
    UNION ALL
    SELECT
        user_name,
        'failed_cancelled' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'error_code', error_code, 'execution_status', execution_status, 'start_time', start_time)) WITHIN GROUP (ORDER BY start_time DESC) AS sample_queries
    FROM query_flags WHERE failed_cancelled = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'failed_cancelled', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(failed_cancelled) = 0
    UNION ALL
    SELECT
        user_name,
        'zero_result_query' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'bytes_scanned', bytes_scanned, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE zero_result_query = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'zero_result_query', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(zero_result_query) = 0
    UNION ALL
    SELECT
        user_name,
        'high_compile_time' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'compilation_time', compilation_time, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY compilation_time DESC) AS sample_queries
    FROM query_flags WHERE high_compile_time = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'high_compile_time', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(high_compile_time) = 0
    UNION ALL
    SELECT
        user_name,
        'slow_query' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY execution_time_ms DESC) AS sample_queries
    FROM query_flags WHERE slow_query = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'slow_query', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(slow_query) = 0
    UNION ALL
    SELECT
        user_name,
        'cartesian_join' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'rows_produced', rows_produced, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE cartesian_join = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'cartesian_join', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(cartesian_join) = 0
    UNION ALL
    SELECT
        user_name,
        'unlimited_order_by' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE unlimited_order_by = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'unlimited_order_by', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(unlimited_order_by) = 0
    UNION ALL
    SELECT
        user_name,
        'large_group_by' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'rows_produced', rows_produced, 'start_time', start_time)) WITHIN GROUP (ORDER BY rows_produced DESC) AS sample_queries
    FROM query_flags WHERE large_group_by = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'large_group_by', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(large_group_by) = 0
    UNION ALL
    SELECT
        user_name,
        'expensive_distinct' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE expensive_distinct = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'expensive_distinct', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(expensive_distinct) = 0
    UNION ALL
    SELECT
        user_name,
        'inefficient_like' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE inefficient_like = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'inefficient_like', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(inefficient_like) = 0
    UNION ALL
    SELECT
        user_name,
        'no_results_with_scan' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE no_results_with_scan = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'no_results_with_scan', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(no_results_with_scan) = 0
    UNION ALL
    SELECT
        user_name,
        'high_compile_ratio' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'compilation_time', compilation_time, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY compilation_time DESC) AS sample_queries
    FROM query_flags WHERE high_compile_ratio = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'high_compile_ratio', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(high_compile_ratio) = 0
),
pivoted_samples AS (
    SELECT
        user_name,
        OBJECT_AGG(flag_type, sample_queries) AS query_samples
    FROM sample_queries
    GROUP BY user_name
),
user_metrics AS (
    SELECT
        user_name,
        COUNT(DISTINCT query_id) AS total_queries,
        COUNT(DISTINCT warehouse_name) AS warehouses_used,
        COUNT(DISTINCT database_name) AS databases_accessed,
        ROUND(SUM(credits_used_cloud_services), 2) AS total_credits,
        ROUND(AVG(execution_time_ms), 2) AS avg_execution_time_ms,
        ROUND(AVG(bytes_scanned / NULLIF(rows_produced, 0)), 2) AS avg_bytes_per_row,
        ROUND(SUM(bytes_scanned) / POWER(1024, 3), 2) AS total_data_scanned_gb,
        ROUND(SUM(failed_cancelled) * 1.0 / NULLIF(COUNT(*), 0) * 100, 2) AS failure_cancellation_rate_pct,
        SUM(spilled) AS spilled_queries,
        SUM(over_provisioned) AS over_provisioned_queries,
        SUM(peak_hour_long_running) AS peak_hour_long_running_queries,
        SUM(select_star) AS select_star_queries,
        SUM(unpartitioned_scan) AS unpartitioned_scan_queries,
        COUNT(*) - COUNT(DISTINCT query_hash) AS repeated_queries,
        SUM(complex_query) AS complex_join_queries,
        SUM(zero_result_query) AS zero_result_queries,
        SUM(high_compile_time) AS high_compile_queries,
        SUM(untagged_query) AS untagged_queries,
        SUM(unlimited_order_by) AS unlimited_order_by_queries,
        SUM(large_group_by) AS large_group_by_queries,
        SUM(slow_query) AS slow_queries,
        SUM(expensive_distinct) AS expensive_distinct_queries,
        SUM(inefficient_like) AS inefficient_like_queries,
        SUM(no_results_with_scan) AS no_results_with_scan_queries,
        SUM(cartesian_join) AS cartesian_join_queries,
        SUM(high_compile_ratio) AS high_compile_ratio_queries,
        ROUND(
            SUM(spilled) * 5 +
            SUM(over_provisioned) * 4 +
            SUM(peak_hour_long_running) * 4 +
            SUM(select_star) * 3 +
            SUM(unpartitioned_scan) * 4 +
            (COUNT(*) - COUNT(DISTINCT query_hash)) * 2 +
            SUM(complex_query) * 3 +
            SUM(failed_cancelled) * 3 +
            SUM(zero_result_query) * 5 +
            SUM(high_compile_time) * 3 +
            SUM(untagged_query) * 2 +
            SUM(unlimited_order_by) * 3 +
            SUM(large_group_by) * 4 +
            SUM(slow_query) * 4 +
            SUM(expensive_distinct) * 4 +
            SUM(inefficient_like) * 2 +
            SUM(no_results_with_scan) * 5 +
            SUM(cartesian_join) * 6 +
            SUM(high_compile_ratio) * 3, 2
        ) AS weighted_score
    FROM query_flags
    GROUP BY user_name
),
cost_percentiles AS (
    SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_credits) AS cost_p90 FROM user_metrics
),
bytes_percentiles AS (
    SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY avg_bytes_per_row) AS bytes_p90 FROM user_metrics
),
failure_percentiles AS (
    SELECT PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY failure_cancellation_rate_pct) AS failure_p80 FROM user_metrics
)
SELECT
    um.user_name,
    um.total_queries,
    um.warehouses_used,
    um.databases_accessed,
    um.total_credits,
    um.avg_execution_time_ms,
    um.avg_bytes_per_row,
    um.total_data_scanned_gb,
    um.failure_cancellation_rate_pct,
    um.spilled_queries,
    um.over_provisioned_queries,
    um.peak_hour_long_running_queries,
    um.select_star_queries,
    um.unpartitioned_scan_queries,
    um.repeated_queries,
    um.complex_join_queries,
    um.zero_result_queries,
    um.high_compile_queries,
    um.untagged_queries,
    um.unlimited_order_by_queries,
    um.large_group_by_queries,
    um.slow_queries,
    um.expensive_distinct_queries,
    um.inefficient_like_queries,
    um.no_results_with_scan_queries,
    um.cartesian_join_queries,
    um.high_compile_ratio_queries,
    um.weighted_score,
    CASE WHEN um.total_credits > cost_percentiles.cost_p90 THEN 'High Cost' ELSE 'Normal' END AS cost_status,
    ARRAY_CONSTRUCT_COMPACT(
        CASE WHEN um.avg_bytes_per_row > bytes_percentiles.bytes_p90 THEN 'Reduce data scanned with clustering or selective columns.' ELSE NULL END,
        CASE WHEN um.failure_cancellation_rate_pct > failure_percentiles.failure_p80 THEN 'Debug query syntax and logic.' ELSE NULL END,
        CASE WHEN um.spilled_queries > 10 THEN 'Optimize memory usage or increase warehouse size.' ELSE NULL END,
        CASE WHEN um.over_provisioned_queries > 10 THEN 'Use smaller warehouses for simple queries.' ELSE NULL END,
        CASE WHEN um.peak_hour_long_running_queries > 10 THEN 'Schedule long queries off-peak.' ELSE NULL END,
        CASE WHEN um.select_star_queries > 10 THEN 'Specify columns instead of SELECT *.' ELSE NULL END,
        CASE WHEN um.unpartitioned_scan_queries > 10 THEN 'Implement partitioning or clustering.' ELSE NULL END,
        CASE WHEN um.repeated_queries > 10 THEN 'Review frequently executed queries.' ELSE NULL END,
        CASE WHEN um.complex_join_queries > 10 THEN 'Simplify complex JOINs or window functions.' ELSE NULL END,
        CASE WHEN um.zero_result_queries > 10 THEN 'Avoid queries that return no data.' ELSE NULL END,
        CASE WHEN um.high_compile_queries > 10 THEN 'Refactor complex SQL to reduce compile time.' ELSE NULL END,
        CASE WHEN um.untagged_queries > 10 THEN 'Apply query tags for cost attribution.' ELSE NULL END,
        CASE WHEN um.unlimited_order_by_queries > 5 THEN 'Add LIMIT clauses to ORDER BY queries.' ELSE NULL END,
        CASE WHEN um.large_group_by_queries > 5 THEN 'Optimize GROUP BY operations with pre-aggregation.' ELSE NULL END,
        CASE WHEN um.slow_queries > 5 THEN 'Optimize slow-running queries.' ELSE NULL END,
        CASE WHEN um.expensive_distinct_queries > 5 THEN 'Replace DISTINCT with GROUP BY where possible.' ELSE NULL END,
        CASE WHEN um.inefficient_like_queries > 10 THEN 'Use proper indexing for LIKE operations.' ELSE NULL END,
        CASE WHEN um.no_results_with_scan_queries > 5 THEN 'Add WHERE clauses to prevent unnecessary scans.' ELSE NULL END,
        CASE WHEN um.cartesian_join_queries > 0 THEN 'Review JOIN conditions to avoid Cartesian products.' ELSE NULL END,
        CASE WHEN um.high_compile_ratio_queries > 5 THEN 'Simplify query complexity to reduce compilation overhead.' ELSE NULL END
    ) AS recommendations,
    COALESCE(ps.query_samples, OBJECT_CONSTRUCT()) AS query_samples
FROM user_metrics um
CROSS JOIN cost_percentiles
CROSS JOIN bytes_percentiles
CROSS JOIN failure_percentiles
LEFT JOIN pivoted_samples ps ON um.user_name = ps.user_name
ORDER BY weighted_score DESC;

Enter fullscreen mode Exit fullscreen mode

Top comments (0)