DEV Community

Armaan Khan
Armaan Khan

Posted on

qureus

CREATE OR REPLACE TABLE ANALYTICS.USER_SUMMARY AS
WITH user_warehouse_stats AS (
    SELECT 
        qh.USER_NAME,
        qh.WAREHOUSE_ID,
        qh.WAREHOUSE_NAME,
        COUNT(DISTINCT qh.QUERY_ID) as QUERIES_COUNT,
        COUNT(DISTINCT DATE(qh.START_TIME)) as ACTIVE_DAYS,
        ROUND(AVG(qh.TOTAL_ELAPSED_TIME/1000), 2) as AVG_DURATION_SEC,
        ROUND(SUM(qh.BYTES_SCANNED)/1024/1024/1024, 2) as TOTAL_GB_SCANNED,
        SUM(qh.ROWS_PRODUCED) as TOTAL_ROWS_PRODUCED,
        SUM(CASE WHEN qh.EXECUTION_STATUS = 'SUCCESS' THEN 1 ELSE 0 END) as SUCCESSFUL_QUERIES,
        SUM(CASE WHEN qh.EXECUTION_STATUS IN ('FAIL', 'CANCELLED') THEN 1 ELSE 0 END) as FAILED_QUERIES,
        MIN(qh.START_TIME) as FIRST_QUERY_TIME,
        MAX(qh.START_TIME) as LAST_QUERY_TIME
    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
    WHERE qh.START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
        AND qh.USER_NAME IS NOT NULL
    GROUP BY qh.USER_NAME, qh.WAREHOUSE_ID, qh.WAREHOUSE_NAME
),
user_credits AS (
    -- Calculate credits per user per warehouse per day to avoid duplication
    SELECT 
        qh.USER_NAME,
        qh.WAREHOUSE_ID,
        DATE(qh.START_TIME) as USAGE_DATE,
        COUNT(DISTINCT qh.QUERY_ID) as DAILY_QUERIES,
        -- Estimate user's share of warehouse credits based on query proportion
        SUM(wh.CREDITS_USED) * COUNT(DISTINCT qh.QUERY_ID)::FLOAT / 
        SUM(COUNT(DISTINCT qh.QUERY_ID)) OVER (PARTITION BY qh.WAREHOUSE_ID, DATE(qh.START_TIME)) as ESTIMATED_DAILY_CREDITS
    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
    JOIN SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY wh 
        ON qh.WAREHOUSE_ID = wh.WAREHOUSE_ID 
        AND DATE(qh.START_TIME) = DATE(wh.START_TIME)
    WHERE qh.START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
        AND qh.USER_NAME IS NOT NULL
    GROUP BY qh.USER_NAME, qh.WAREHOUSE_ID, DATE(qh.START_TIME)
)
SELECT 
    uws.USER_NAME,
    COUNT(DISTINCT uws.WAREHOUSE_ID) as WAREHOUSES_USED,
    SUM(uws.QUERIES_COUNT) as TOTAL_QUERIES,
    ROUND(SUM(uc.ESTIMATED_DAILY_CREDITS), 3) as ESTIMATED_TOTAL_CREDITS,
    SUM(uws.ACTIVE_DAYS) as TOTAL_ACTIVE_DAYS,
    ROUND(AVG(uws.AVG_DURATION_SEC), 2) as OVERALL_AVG_DURATION_SEC,
    ROUND(SUM(uws.TOTAL_GB_SCANNED), 2) as TOTAL_GB_SCANNED,
    SUM(uws.TOTAL_ROWS_PRODUCED) as TOTAL_ROWS_PRODUCED,
    SUM(uws.SUCCESSFUL_QUERIES) as SUCCESSFUL_QUERIES,
    SUM(uws.FAILED_QUERIES) as FAILED_QUERIES,
    ROUND(SUM(uws.SUCCESSFUL_QUERIES)::FLOAT / SUM(uws.QUERIES_COUNT) * 100, 2) as SUCCESS_RATE_PCT,
    MIN(uws.FIRST_QUERY_TIME) as FIRST_QUERY_TIME,
    MAX(uws.LAST_QUERY_TIME) as LAST_QUERY_TIME,
    CURRENT_TIMESTAMP() as ANALYSIS_TIMESTAMP
FROM user_warehouse_stats uws
LEFT JOIN user_credits uc ON uws.USER_NAME = uc.USER_NAME AND uws.WAREHOUSE_ID = uc.WAREHOUSE_ID
GROUP BY uws.USER_NAME
ORDER BY ESTIMATED_TOTAL_CREDITS DESC;

-- 3. QUERY SUMMARY TABLE
CREATE OR REPLACE TABLE ANALYTICS.QUERY_SUMMARY AS
SELECT 
    qh.QUERY_ID,
    qh.QUERY_HASH,
    qh.USER_NAME,
    qh.ROLE_NAME,
    qh.WAREHOUSE_ID,
    qh.WAREHOUSE_NAME,
    qh.WAREHOUSE_SIZE,
    qh.DATABASE_NAME,
    qh.SCHEMA_NAME,
    qh.QUERY_TYPE,
    qh.START_TIME,
    qh.END_TIME,
    qh.EXECUTION_STATUS,
    qh.ERROR_CODE,
    qh.ERROR_MESSAGE,

    -- Performance Metrics
    ROUND(qh.TOTAL_ELAPSED_TIME/1000, 3) as TOTAL_DURATION_SEC,
    ROUND(qh.COMPILATION_TIME/1000, 3) as COMPILATION_SEC,
    ROUND(qh.EXECUTION_TIME/1000, 3) as EXECUTION_SEC,
    ROUND(qh.QUEUED_PROVISIONING_TIME/1000, 3) as QUEUED_PROVISIONING_SEC,
    ROUND(qh.QUEUED_OVERLOAD_TIME/1000, 3) as QUEUED_OVERLOAD_SEC,

    -- Data Metrics
    ROUND(qh.BYTES_SCANNED/1024/1024/1024, 3) as GB_SCANNED,
    ROUND(qh.BYTES_WRITTEN/1024/1024/1024, 3) as GB_WRITTEN,
    qh.ROWS_PRODUCED,
    qh.ROWS_INSERTED,
    qh.ROWS_UPDATED,
    qh.ROWS_DELETED,
    qh.PARTITIONS_SCANNED,
    qh.PARTITIONS_TOTAL,
    qh.PERCENTAGE_SCANNED_FROM_CACHE,

    -- Resource Usage
    ROUND(qh.BYTES_SPILLED_TO_LOCAL_STORAGE/1024/1024/1024, 3) as GB_SPILLED_LOCAL,
    ROUND(qh.BYTES_SPILLED_TO_REMOTE_STORAGE/1024/1024/1024, 3) as GB_SPILLED_REMOTE,
    qh.QUERY_LOAD_PERCENT,
    qh.CREDITS_USED_CLOUD_SERVICES,

    -- Query Classification
    CASE 
        WHEN qh.TOTAL_ELAPSED_TIME <= 1000 THEN '0-1sec'
        WHEN qh.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10sec'
        WHEN qh.TOTAL_ELAPSED_TIME <= 30000 THEN '10-30sec'
        WHEN qh.TOTAL_ELAPSED_TIME <= 60000 THEN '30-60sec'
        WHEN qh.TOTAL_ELAPSED_TIME <= 300000 THEN '1-5min'
        ELSE '5min+'
    END as DURATION_BUCKET,

    CASE 
        WHEN qh.QUERY_LOAD_PERCENT BETWEEN 0 AND 20 THEN 'Low (0-20%)'
        WHEN qh.QUERY_LOAD_PERCENT BETWEEN 21 AND 40 THEN 'Medium-Low (21-40%)'
        WHEN qh.QUERY_LOAD_PERCENT BETWEEN 41 AND 60 THEN 'Medium (41-60%)'
        WHEN qh.QUERY_LOAD_PERCENT BETWEEN 61 AND 80 THEN 'Medium-High (61-80%)'
        WHEN qh.QUERY_LOAD_PERCENT BETWEEN 81 AND 100 THEN 'High (81-100%)'
        ELSE 'Unknown'
    END as UTILIZATION_BUCKET,

    -- Issue Flags
    CASE WHEN qh.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN TRUE ELSE FALSE END as HAS_LOCAL_SPILL,
    CASE WHEN qh.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN TRUE ELSE FALSE END as HAS_REMOTE_SPILL,
    CASE WHEN UPPER(qh.QUERY_TEXT) LIKE '%SELECT *%' THEN TRUE ELSE FALSE END as HAS_SELECT_STAR,
    CASE WHEN qh.PARTITIONS_TOTAL > 0 AND qh.PERCENTAGE_SCANNED_FROM_CACHE < 10 THEN TRUE ELSE FALSE END as HAS_POOR_PRUNING,
    CASE WHEN qh.ROWS_PRODUCED = 0 AND qh.BYTES_SCANNED > 0 THEN TRUE ELSE FALSE END as HAS_ZERO_RESULTS,
    CASE WHEN qh.COMPILATION_TIME > 5000 THEN TRUE ELSE FALSE END as HAS_SLOW_COMPILE,

    LEFT(qh.QUERY_TEXT, 500) as QUERY_TEXT_PREVIEW,
    CURRENT_TIMESTAMP() as ANALYSIS_TIMESTAMP

FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
WHERE qh.START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
ORDER BY qh.START_TIME DESC;

-- 4. QUERY DETAILS TABLE (Full query text and metadata)
CREATE OR REPLACE TABLE ANALYTICS.QUERY_DETAILS AS
SELECT 
    qh.QUERY_ID,
    qh.QUERY_TEXT,
    qh.SESSION_ID,
    qh.TRANSACTION_ID,
    qh.QUERY_TAG,
    qh.RELEASE_VERSION,
    qh.QUERY_HASH,
    qh.QUERY_HASH_VERSION,
    qh.QUERY_PARAMETERIZED_HASH,

    -- External Functions (if any)
    qh.EXTERNAL_FUNCTION_TOTAL_INVOCATIONS,
    qh.EXTERNAL_FUNCTION_TOTAL_SENT_ROWS,
    qh.EXTERNAL_FUNCTION_TOTAL_RECEIVED_ROWS,
    ROUND(qh.EXTERNAL_FUNCTION_TOTAL_SENT_BYTES/1024/1024, 2) as EXTERNAL_FUNCTION_SENT_MB,
    ROUND(qh.EXTERNAL_FUNCTION_TOTAL_RECEIVED_BYTES/1024/1024, 2) as EXTERNAL_FUNCTION_RECEIVED_MB,

    -- Data Transfer
    qh.OUTBOUND_DATA_TRANSFER_CLOUD,
    qh.OUTBOUND_DATA_TRANSFER_REGION,
    ROUND(qh.OUTBOUND_DATA_TRANSFER_BYTES/1024/1024, 2) as OUTBOUND_TRANSFER_MB,
    qh.INBOUND_DATA_TRANSFER_CLOUD,
    qh.INBOUND_DATA_TRANSFER_REGION,
    ROUND(qh.INBOUND_DATA_TRANSFER_BYTES/1024/1024, 2) as INBOUND_TRANSFER_MB,

    -- Additional Timing
    ROUND(qh.LIST_EXTERNAL_FILES_TIME/1000, 3) as LIST_FILES_SEC,
    ROUND(qh.CHILD_QUERIES_WAIT_TIME/1000, 3) as CHILD_QUERIES_WAIT_SEC,
    ROUND(qh.TRANSACTION_BLOCKED_TIME/1000, 3) as TRANSACTION_BLOCKED_SEC,
    ROUND(qh.QUERY_RETRY_TIME/1000, 3) as QUERY_RETRY_SEC,
    qh.QUERY_RETRY_CAUSE,
    ROUND(qh.FAULT_HANDLING_TIME/1000, 3) as FAULT_HANDLING_SEC,

    CURRENT_TIMESTAMP() as ANALYSIS_TIMESTAMP

FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
WHERE qh.START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
ORDER BY qh.START_TIME DESC;
Enter fullscreen mode Exit fullscreen mode
-- 1. WAREHOUSE ANALYTICS TABLE
CREATE OR REPLACE TABLE ANALYTICS.WAREHOUSE_SUMMARY AS
WITH daily_warehouse_credits AS (
    -- Pre-aggregate warehouse credits by day to avoid duplication
    SELECT 
        WAREHOUSE_ID,
        WAREHOUSE_NAME,
        DATE(START_TIME) as USAGE_DATE,
        SUM(CREDITS_USED) as DAILY_CREDITS_USED,
        SUM(CREDITS_USED_COMPUTE) as DAILY_CREDITS_COMPUTE,
        SUM(CREDITS_USED_CLOUD_SERVICES) as DAILY_CREDITS_CLOUD_SERVICES
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY
    WHERE START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
    GROUP BY WAREHOUSE_ID, WAREHOUSE_NAME, DATE(START_TIME)
),
warehouse_config AS (
    -- Get warehouse configuration details
    SELECT 
        we.WAREHOUSE_ID,
        we.WAREHOUSE_NAME,
        we.SIZE,
        we.CLUSTER_COUNT,
        we.WAREHOUSE_TYPE,
        MIN(we.TIMESTAMP) as FIRST_SEEN,
        MAX(we.TIMESTAMP) as LAST_SEEN
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY we
    WHERE we.TIMESTAMP >= DATEADD('day', -7, CURRENT_TIMESTAMP())
    GROUP BY we.WAREHOUSE_ID, we.WAREHOUSE_NAME, we.SIZE, we.CLUSTER_COUNT, we.WAREHOUSE_TYPE
    QUALIFY ROW_NUMBER() OVER (PARTITION BY we.WAREHOUSE_ID ORDER BY we.TIMESTAMP DESC) = 1
),
query_stats AS (
    -- Aggregate query statistics properly
    SELECT 
        qh.WAREHOUSE_ID,
        COUNT(DISTINCT qh.QUERY_ID) as TOTAL_QUERIES,
        COUNT(DISTINCT qh.USER_NAME) as UNIQUE_USERS,
        COUNT(DISTINCT DATE(qh.START_TIME)) as ACTIVE_DAYS,
        COUNT(DISTINCT HOUR(qh.START_TIME)) as ACTIVE_HOURS,

        -- Performance Buckets
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME <= 1000 THEN 1 ELSE 0 END) as QUERIES_0_TO_1_SEC,
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME > 1000 AND qh.TOTAL_ELAPSED_TIME <= 10000 THEN 1 ELSE 0 END) as QUERIES_1_TO_10_SEC,
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME > 10000 AND qh.TOTAL_ELAPSED_TIME <= 30000 THEN 1 ELSE 0 END) as QUERIES_10_TO_30_SEC,
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME > 30000 AND qh.TOTAL_ELAPSED_TIME <= 60000 THEN 1 ELSE 0 END) as QUERIES_30_TO_60_SEC,
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME > 60000 AND qh.TOTAL_ELAPSED_TIME <= 300000 THEN 1 ELSE 0 END) as QUERIES_1_TO_5_MIN,
        SUM(CASE WHEN qh.TOTAL_ELAPSED_TIME > 300000 THEN 1 ELSE 0 END) as QUERIES_5_MIN_PLUS,

        -- Utilization Buckets (based on concurrent queries)
        COUNT(CASE WHEN qh.QUERY_LOAD_PERCENT BETWEEN 0 AND 20 THEN 1 END) as LOW_UTILIZATION_QUERIES,
        COUNT(CASE WHEN qh.QUERY_LOAD_PERCENT BETWEEN 21 AND 40 THEN 1 END) as MEDIUM_LOW_UTILIZATION_QUERIES,
        COUNT(CASE WHEN qh.QUERY_LOAD_PERCENT BETWEEN 41 AND 60 THEN 1 END) as MEDIUM_UTILIZATION_QUERIES,
        COUNT(CASE WHEN qh.QUERY_LOAD_PERCENT BETWEEN 61 AND 80 THEN 1 END) as MEDIUM_HIGH_UTILIZATION_QUERIES,
        COUNT(CASE WHEN qh.QUERY_LOAD_PERCENT BETWEEN 81 AND 100 THEN 1 END) as HIGH_UTILIZATION_QUERIES,

        -- Bad Practice Categories
        SUM(CASE WHEN qh.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 ELSE 0 END) as SPILLED_TO_LOCAL_QUERIES,
        SUM(CASE WHEN qh.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 ELSE 0 END) as SPILLED_TO_REMOTE_QUERIES,
        SUM(CASE WHEN UPPER(qh.QUERY_TEXT) LIKE '%SELECT *%' THEN 1 ELSE 0 END) as SELECT_STAR_QUERIES,
        SUM(CASE WHEN qh.PARTITIONS_TOTAL > 0 AND qh.PERCENTAGE_SCANNED_FROM_CACHE < 10 THEN 1 ELSE 0 END) as UNPARTITIONED_SCAN_QUERIES,
        SUM(CASE WHEN qh.ROWS_PRODUCED = 0 AND qh.BYTES_SCANNED > 0 THEN 1 ELSE 0 END) as ZERO_RESULT_QUERIES,
        SUM(CASE WHEN qh.EXECUTION_STATUS IN ('FAIL', 'CANCELLED') THEN 1 ELSE 0 END) as FAILED_CANCELLED_QUERIES,
        SUM(CASE WHEN UPPER(qh.QUERY_TEXT) LIKE '%ORDER BY%' AND UPPER(qh.QUERY_TEXT) NOT LIKE '%LIMIT%' AND qh.ROWS_PRODUCED > 100000 THEN 1 ELSE 0 END) as UNLIMITED_ORDER_BY_QUERIES,
        SUM(CASE WHEN qh.COMPILATION_TIME > 5000 THEN 1 ELSE 0 END) as HIGH_COMPILE_TIME_QUERIES,
        SUM(CASE WHEN DAYOFWEEK(qh.START_TIME) IN (1, 7) THEN 1 ELSE 0 END) as WEEKEND_QUERIES,
        SUM(CASE WHEN HOUR(qh.START_TIME) NOT BETWEEN 6 AND 22 THEN 1 ELSE 0 END) as OFF_HOURS_QUERIES,

        -- Performance Metrics
        ROUND(AVG(qh.TOTAL_ELAPSED_TIME/1000), 2) as AVG_QUERY_DURATION_SEC,
        ROUND(SUM(qh.BYTES_SCANNED)/1024/1024/1024, 2) as TOTAL_GB_SCANNED,
        SUM(qh.ROWS_PRODUCED) as TOTAL_ROWS_PRODUCED,
        ROUND(AVG(qh.QUERY_LOAD_PERCENT), 2) as AVG_WAREHOUSE_UTILIZATION_PCT

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
    WHERE qh.START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
        AND qh.WAREHOUSE_ID IS NOT NULL
    GROUP BY qh.WAREHOUSE_ID
)
SELECT 
    wc.WAREHOUSE_ID,
    wc.WAREHOUSE_NAME,
    wc.SIZE as WAREHOUSE_SIZE,
    wc.CLUSTER_COUNT as MAX_CLUSTER_COUNT,
    wc.WAREHOUSE_TYPE,

    -- Credit Information (properly aggregated)
    ROUND(SUM(dwc.DAILY_CREDITS_USED), 3) as TOTAL_CREDITS_CONSUMED,
    ROUND(SUM(dwc.DAILY_CREDITS_COMPUTE), 3) as COMPUTE_CREDITS,
    ROUND(SUM(dwc.DAILY_CREDITS_CLOUD_SERVICES), 3) as CLOUD_SERVICES_CREDITS,
    ROUND(AVG(dwc.DAILY_CREDITS_USED), 3) as AVG_DAILY_CREDITS,

    -- Query Statistics
    qs.TOTAL_QUERIES,
    qs.UNIQUE_USERS,
    qs.ACTIVE_DAYS,
    ROUND(qs.ACTIVE_HOURS::FLOAT / qs.ACTIVE_DAYS, 1) as ACTIVE_HOURS_PER_DAY,

    -- Performance Buckets
    qs.QUERIES_0_TO_1_SEC,
    qs.QUERIES_1_TO_10_SEC,
    qs.QUERIES_10_TO_30_SEC,
    qs.QUERIES_30_TO_60_SEC,
    qs.QUERIES_1_TO_5_MIN,
    qs.QUERIES_5_MIN_PLUS,

    -- Utilization Buckets
    qs.LOW_UTILIZATION_QUERIES,
    qs.MEDIUM_LOW_UTILIZATION_QUERIES,
    qs.MEDIUM_UTILIZATION_QUERIES,
    qs.MEDIUM_HIGH_UTILIZATION_QUERIES,
    qs.HIGH_UTILIZATION_QUERIES,

    -- Bad Practices
    qs.SPILLED_TO_LOCAL_QUERIES,
    qs.SPILLED_TO_REMOTE_QUERIES,
    qs.SELECT_STAR_QUERIES,
    qs.UNPARTITIONED_SCAN_QUERIES,
    qs.ZERO_RESULT_QUERIES,
    qs.FAILED_CANCELLED_QUERIES,
    qs.UNLIMITED_ORDER_BY_QUERIES,
    qs.HIGH_COMPILE_TIME_QUERIES,
    qs.WEEKEND_QUERIES,
    qs.OFF_HOURS_QUERIES,

    -- Performance Metrics
    qs.AVG_QUERY_DURATION_SEC,
    qs.TOTAL_GB_SCANNED,
    qs.TOTAL_ROWS_PRODUCED,
    qs.AVG_WAREHOUSE_UTILIZATION_PCT,

    -- Efficiency Metrics
    ROUND(qs.TOTAL_QUERIES::FLOAT / SUM(dwc.DAILY_CREDITS_USED), 2) as QUERIES_PER_CREDIT,
    ROUND(qs.TOTAL_GB_SCANNED::FLOAT / SUM(dwc.DAILY_CREDITS_USED), 2) as GB_SCANNED_PER_CREDIT,

    -- Timestamps
    wc.FIRST_SEEN,
    wc.LAST_SEEN,
    CURRENT_TIMESTAMP() as ANALYSIS_TIMESTAMP

FROM warehouse_config wc
LEFT JOIN daily_warehouse_credits dwc ON wc.WAREHOUSE_ID = dwc.WAREHOUSE_ID
LEFT JOIN query_stats qs ON wc.WAREHOUSE_ID = qs.WAREHOUSE_ID
WHERE dwc.WAREHOUSE_ID IS NOT NULL  -- Only warehouses with actual usage
GROUP BY 
    wc.WAREHOUSE_ID, wc.WAREHOUSE_NAME, wc.SIZE, wc.CLUSTER_COUNT, wc.WAREHOUSE_TYPE,
    qs.TOTAL_QUERIES, qs.UNIQUE_USERS, qs.ACTIVE_DAYS, qs.ACTIVE_HOURS,
    qs.QUERIES_0_TO_1_SEC, qs.QUERIES_1_TO_10_SEC, qs.QUERIES_10_TO_30_SEC,
    qs.QUERIES_30_TO_60_SEC, qs.QUERIES_1_TO_5_MIN, qs.QUERIES_5_MIN_PLUS,
    qs.LOW_UTILIZATION_QUERIES, qs.MEDIUM_LOW_UTILIZATION_QUERIES, qs.MEDIUM_UTILIZATION_QUERIES,
    qs.MEDIUM_HIGH_UTILIZATION_QUERIES, qs.HIGH_UTILIZATION_QUERIES,
    qs.SPILLED_TO_LOCAL_QUERIES, qs.SPILLED_TO_REMOTE_QUERIES, qs.SELECT_STAR_QUERIES,
    qs.UNPARTITIONED_SCAN_QUERIES, qs.ZERO_RESULT_QUERIES, qs.FAILED_CANCELLED_QUERIES,
    qs.UNLIMITED_ORDER_BY_QUERIES, qs.HIGH_COMPILE_TIME_QUERIES, qs.WEEKEND_QUERIES, qs.OFF_HOURS_QUERIES,
    qs.AVG_QUERY_DURATION_SEC, qs.TOTAL_GB_SCANNED, qs.TOTAL_ROWS_PRODUCED, qs.AVG_WAREHOUSE_UTILIZATION_PCT,
    wc.FIRST_SEEN, wc.LAST_SEEN
ORDER BY TOTAL_CREDITS_CONSUMED DESC;
Enter fullscreen mode Exit fullscreen mode
CREATE OR REPLACE TABLE ANALYTICS.WAREHOUSE_SUMMARY AS
WITH 
-- Pre-aggregate warehouse metering to avoid many-to-many
warehouse_daily_credits AS (
    SELECT 
        WAREHOUSE_ID,
        WAREHOUSE_NAME,
        DATE(START_TIME) as USAGE_DATE,
        SUM(CREDITS_USED) as DAILY_CREDITS,
        SUM(CREDITS_USED_COMPUTE) as DAILY_CREDITS_COMPUTE,
        SUM(CREDITS_USED_CLOUD_SERVICES) as DAILY_CREDITS_CLOUD_SERVICES
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY
    WHERE START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
    GROUP BY WAREHOUSE_ID, WAREHOUSE_NAME, DATE(START_TIME)
),

-- Get latest warehouse configuration (avoid grouping by changing attributes)
warehouse_config AS (
    SELECT 
        WAREHOUSE_ID,
        WAREHOUSE_NAME,
        FIRST_VALUE(WAREHOUSE_SIZE) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as CURRENT_SIZE,
        FIRST_VALUE(MIN_CLUSTER_COUNT) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as MIN_CLUSTER_COUNT,
        FIRST_VALUE(MAX_CLUSTER_COUNT) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as MAX_CLUSTER_COUNT,
        FIRST_VALUE(AUTO_SUSPEND) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as AUTO_SUSPEND_SECONDS,
        FIRST_VALUE(AUTO_RESUME) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as AUTO_RESUME_ENABLED,
        FIRST_VALUE(WAREHOUSE_TYPE) OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) as WAREHOUSE_TYPE,
        MIN(CREATED) as FIRST_CREATED,
        MAX(LAST_ALTERED) as LAST_MODIFIED
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSES
    WHERE DELETED IS NULL
    QUALIFY ROW_NUMBER() OVER (PARTITION BY WAREHOUSE_ID ORDER BY LAST_ALTERED DESC) = 1
),

-- Clean query metrics (SUCCESS queries only, no duplicates)
query_metrics AS (
    SELECT 
        WAREHOUSE_ID,
        COUNT(DISTINCT QUERY_ID) as TOTAL_QUERIES,
        COUNT(DISTINCT USER_NAME) as UNIQUE_USERS,
        COUNT(DISTINCT DATE(START_TIME)) as ACTIVE_DAYS,
        COUNT(DISTINCT HOUR(START_TIME)) as UNIQUE_HOURS,

        -- Performance Buckets (SUCCESS queries only)
        SUM(CASE WHEN TOTAL_ELAPSED_TIME <= 1000 THEN 1 ELSE 0 END) as QUERIES_0_TO_1_SEC,
        SUM(CASE WHEN TOTAL_ELAPSED_TIME > 1000 AND TOTAL_ELAPSED_TIME <= 10000 THEN 1 ELSE 0 END) as QUERIES_1_TO_10_SEC,
        SUM(CASE WHEN TOTAL_ELAPSED_TIME > 10000 AND TOTAL_ELAPSED_TIME <= 30000 THEN 1 ELSE 0 END) as QUERIES_10_TO_30_SEC,
        SUM(CASE WHEN TOTAL_ELAPSED_TIME > 30000 AND TOTAL_ELAPSED_TIME <= 60000 THEN 1 ELSE 0 END) as QUERIES_30_TO_60_SEC,
        SUM(CASE WHEN TOTAL_ELAPSED_TIME > 60000 AND TOTAL_ELAPSED_TIME <= 300000 THEN 1 ELSE 0 END) as QUERIES_1_TO_5_MIN,
        SUM(CASE WHEN TOTAL_ELAPSED_TIME > 300000 THEN 1 ELSE 0 END) as QUERIES_5_MIN_PLUS,

        -- Utilization Buckets (based on QUERY_LOAD_PERCENT)
        SUM(CASE WHEN QUERY_LOAD_PERCENT <= 20 THEN 1 ELSE 0 END) as LOW_UTILIZATION_QUERIES,      -- 0-20%
        SUM(CASE WHEN QUERY_LOAD_PERCENT > 20 AND QUERY_LOAD_PERCENT <= 40 THEN 1 ELSE 0 END) as MEDIUM_UTILIZATION_QUERIES,  -- 21-40%
        SUM(CASE WHEN QUERY_LOAD_PERCENT > 40 AND QUERY_LOAD_PERCENT <= 60 THEN 1 ELSE 0 END) as HIGH_UTILIZATION_QUERIES,    -- 41-60%
        SUM(CASE WHEN QUERY_LOAD_PERCENT > 60 AND QUERY_LOAD_PERCENT <= 80 THEN 1 ELSE 0 END) as VERY_HIGH_UTILIZATION_QUERIES, -- 61-80%
        SUM(CASE WHEN QUERY_LOAD_PERCENT > 80 THEN 1 ELSE 0 END) as PEAK_UTILIZATION_QUERIES,    -- 81-100%

        -- Bad Practice Categories
        SUM(CASE WHEN BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 ELSE 0 END) as SPILLED_TO_LOCAL_QUERIES,
        SUM(CASE WHEN BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 ELSE 0 END) as SPILLED_TO_REMOTE_QUERIES,
        SUM(CASE WHEN UPPER(QUERY_TEXT) LIKE '%SELECT *%' THEN 1 ELSE 0 END) as SELECT_STAR_QUERIES,
        SUM(CASE WHEN PARTITIONS_TOTAL > 0 AND PERCENTAGE_SCANNED_FROM_CACHE < 10 THEN 1 ELSE 0 END) as UNPARTITIONED_SCAN_QUERIES,
        SUM(CASE WHEN ROWS_PRODUCED = 0 AND BYTES_SCANNED > 0 THEN 1 ELSE 0 END) as ZERO_RESULT_QUERIES,
        SUM(CASE WHEN COMPILATION_TIME > 5000 THEN 1 ELSE 0 END) as HIGH_COMPILE_TIME_QUERIES,
        SUM(CASE WHEN DAYOFWEEK(START_TIME) IN (1, 7) THEN 1 ELSE 0 END) as WEEKEND_QUERIES,
        SUM(CASE WHEN HOUR(START_TIME) < 6 OR HOUR(START_TIME) > 22 THEN 1 ELSE 0 END) as OFF_HOURS_QUERIES,

        -- Resource Metrics
        ROUND(SUM(BYTES_SCANNED)/1024/1024/1024, 2) as TOTAL_GB_SCANNED,
        ROUND(SUM(BYTES_WRITTEN)/1024/1024/1024, 2) as TOTAL_GB_WRITTEN,
        SUM(ROWS_PRODUCED) as TOTAL_ROWS_PRODUCED,
        ROUND(AVG(TOTAL_ELAPSED_TIME)/1000, 2) as AVG_QUERY_DURATION_SEC,
        ROUND(AVG(QUERY_LOAD_PERCENT), 2) as AVG_WAREHOUSE_UTILIZATION_PCT

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
    WHERE START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
        AND EXECUTION_STATUS = 'SUCCESS'
        AND WAREHOUSE_ID IS NOT NULL
    GROUP BY WAREHOUSE_ID
),

-- Failed/Cancelled query metrics
failed_query_metrics AS (
    SELECT 
        WAREHOUSE_ID,
        COUNT(DISTINCT QUERY_ID) as FAILED_QUERIES,
        COUNT(DISTINCT CASE WHEN EXECUTION_STATUS = 'CANCELLED' THEN QUERY_ID END) as CANCELLED_QUERIES,
        COUNT(DISTINCT CASE WHEN EXECUTION_STATUS = 'FAIL' THEN QUERY_ID END) as ERROR_QUERIES
    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
    WHERE START_TIME >= DATEADD('day', -7, CURRENT_TIMESTAMP())
        AND EXECUTION_STATUS IN ('FAIL', 'CANCELLED')
        AND WAREHOUSE_ID IS NOT NULL
    GROUP BY WAREHOUSE_ID
)

SELECT 
    wc.WAREHOUSE_ID,
    wc.WAREHOUSE_NAME,
    wc.CURRENT_SIZE,
    wc.MIN_CLUSTER_COUNT,
    wc.MAX_CLUSTER_COUNT,
    wc.AUTO_SUSPEND_SECONDS,
    wc.AUTO_RESUME_ENABLED,
    wc.WAREHOUSE_TYPE,
    wc.FIRST_CREATED,
    wc.LAST_MODIFIED,

    -- Credit Metrics (from pre-aggregated data)
    ROUND(SUM(wdc.DAILY_CREDITS), 2) as TOTAL_CREDITS_CONSUMED,
    ROUND(SUM(wdc.DAILY_CREDITS_COMPUTE), 2) as COMPUTE_CREDITS,
    ROUND(SUM(wdc.DAILY_CREDITS_CLOUD_SERVICES), 2) as CLOUD_SERVICES_CREDITS,
    ROUND(AVG(wdc.DAILY_CREDITS), 2) as AVG_CREDITS_PER_DAY,

    -- Query Metrics
    COALESCE(qm.TOTAL_QUERIES, 0) as TOTAL_QUERIES,
    COALESCE(qm.UNIQUE_USERS, 0) as UNIQUE_USERS,
    COALESCE(qm.ACTIVE_DAYS, 0) as ACTIVE_DAYS,
    ROUND(COALESCE(qm.UNIQUE_HOURS, 0) / GREATEST(COALESCE(qm.ACTIVE_DAYS, 1), 1), 1) as ACTIVE_HOURS_PER_DAY,

    -- Performance Buckets
    COALESCE(qm.QUERIES_0_TO_1_SEC, 0) as QUERIES_0_TO_1_SEC,
    COALESCE(qm.QUERIES_1_TO_10_SEC, 0) as QUERIES_1_TO_10_SEC,
    COALESCE(qm.QUERIES_10_TO_30_SEC, 0) as QUERIES_10_TO_30_SEC,
    COALESCE(qm.QUERIES_30_TO_60_SEC, 0) as QUERIES_30_TO_60_SEC,
    COALESCE(qm.QUERIES_1_TO_5_MIN, 0) as QUERIES_1_TO_5_MIN,
    COALESCE(qm.QUERIES_5_MIN_PLUS, 0) as QUERIES_5_MIN_PLUS,

    -- Utilization Buckets
    COALESCE(qm.LOW_UTILIZATION_QUERIES, 0) as LOW_UTILIZATION_QUERIES,
    COALESCE(qm.MEDIUM_UTILIZATION_QUERIES, 0) as MEDIUM_UTILIZATION_QUERIES,
    COALESCE(qm.HIGH_UTILIZATION_QUERIES, 0) as HIGH_UTILIZATION_QUERIES,
    COALESCE(qm.VERY_HIGH_UTILIZATION_QUERIES, 0) as VERY_HIGH_UTILIZATION_QUERIES,
    COALESCE(qm.PEAK_UTILIZATION_QUERIES, 0) as PEAK_UTILIZATION_QUERIES,

    -- Bad Practices
    COALESCE(qm.SPILLED_TO_LOCAL_QUERIES, 0) as SPILLED_TO_LOCAL_QUERIES,
    COALESCE(qm.SPILLED_TO_REMOTE_QUERIES, 0) as SPILLED_TO_REMOTE_QUERIES,
    COALESCE(qm.SELECT_STAR_QUERIES, 0) as SELECT_STAR_QUERIES,
    COALESCE(qm.UNPARTITIONED_SCAN_QUERIES, 0) as UNPARTITIONED_SCAN_QUERIES,
    COALESCE(qm.ZERO_RESULT_QUERIES, 0) as ZERO_RESULT_QUERIES,
    COALESCE(qm.HIGH_COMPILE_TIME_QUERIES, 0) as HIGH_COMPILE_TIME_QUERIES,
    COALESCE(qm.WEEKEND_QUERIES, 0) as WEEKEND_QUERIES,
    COALESCE(qm.OFF_HOURS_QUERIES, 0) as OFF_HOURS_QUERIES,

    -- Failed Queries
    COALESCE(fqm.FAILED_QUERIES, 0) as FAILED_QUERIES,
    COALESCE(fqm.CANCELLED_QUERIES, 0) as CANCELLED_QUERIES,
    COALESCE(fqm.ERROR_QUERIES, 0) as ERROR_QUERIES,

    -- Resource Usage
    COALESCE(qm.TOTAL_GB_SCANNED, 0) as TOTAL_GB_SCANNED,
    COALESCE(qm.TOTAL_GB_WRITTEN, 0) as TOTAL_GB_WRITTEN,
    COALESCE(qm.TOTAL_ROWS_PRODUCED, 0) as TOTAL_ROWS_PRODUCED,
    COALESCE(qm.AVG_QUERY_DURATION_SEC, 0) as AVG_QUERY_DURATION_SEC,
    COALESCE(qm.AVG_WAREHOUSE_UTILIZATION_PCT, 0) as AVG_WAREHOUSE_UTILIZATION_PCT,

    CURRENT_TIMESTAMP() as LAST_UPDATED

FROM warehouse_config wc
LEFT JOIN warehouse_daily_credits wdc ON wc.WAREHOUSE_ID = wdc.WAREHOUSE_ID 
LEFT JOIN query_metrics qm ON wc.WAREHOUSE_ID = qm.WAREHOUSE_ID
LEFT JOIN failed_query_metrics fqm ON wc.WAREHOUSE_ID = fqm.WAREHOUSE_ID
GROUP BY 
    wc.WAREHOUSE_ID, wc.WAREHOUSE_NAME, wc.CURRENT_SIZE, wc.MIN_CLUSTER_COUNT, 
    wc.MAX_CLUSTER_COUNT, wc.AUTO_SUSPEND_SECONDS, wc.AUTO_RESUME_ENABLED, 
    wc.WAREHOUSE_TYPE, wc.FIRST_CREATED, wc.LAST_MODIFIED,
    qm.TOTAL_QUERIES, qm.UNIQUE_USERS, qm.ACTIVE_DAYS, qm.UNIQUE_HOURS,
    qm.QUERIES_0_TO_1_SEC, qm.QUERIES_1_TO_10_SEC, qm.QUERIES_10_TO_30_SEC,
    qm.QUERIES_30_TO_60_SEC, qm.QUERIES_1_TO_5_MIN, qm.QUERIES_5_MIN_PLUS,
    qm.LOW_UTILIZATION_QUERIES, qm.MEDIUM_UTILIZATION_QUERIES, qm.HIGH_UTILIZATION_QUERIES,
    qm.VERY_HIGH_UTILIZATION_QUERIES, qm.PEAK_UTILIZATION_QUERIES,
    qm.SPILLED_TO_LOCAL_QUERIES, qm.SPILLED_TO_REMOTE_QUERIES, qm.SELECT_STAR_QUERIES,
    qm.UNPARTITIONED_SCAN_QUERIES, qm.ZERO_RESULT_QUERIES, qm.HIGH_COMPILE_TIME_QUERIES,
    qm.WEEKEND_QUERIES, qm.OFF_HOURS_QUERIES, qm.TOTAL_GB_SCANNED, qm.TOTAL_GB_WRITTEN,
    qm.TOTAL_ROWS_PRODUCED, qm.AVG_QUERY_DURATION_SEC, qm.AVG_WAREHOUSE_UTILIZATION_PCT,
    fqm.FAILED_QUERIES, fqm.CANCELLED_QUERIES, fqm.ERROR_QUERIES;
Enter fullscreen mode Exit fullscreen mode

Top comments (0)