DEV Community

Armaan Khan
Armaan Khan

Posted on

new queries

-- Create comprehensive warehouse analytics table
CREATE OR REPLACE TABLE WAREHOUSE_ANALYTICS_DASHBOARD AS
WITH warehouse_info AS (
    -- Get warehouse metadata (note: warehouse details need to be extracted from query history and warehouse events)
    SELECT DISTINCT
        wh.WAREHOUSE_ID,
        wh.WAREHOUSE_NAME,
        wh.SIZE,
        wh.WAREHOUSE_TYPE,
        wh.CLUSTER_COUNT,
        -- Note: SUSPEND_POLICY, MIN_CLUSTER_COUNT, MAX_CLUSTER_COUNT are not in the provided schema
        -- These would typically come from SHOW WAREHOUSES command results
        NULL as SUSPEND_POLICY,
        NULL as MIN_CLUSTER_COUNT, 
        NULL as MAX_CLUSTER_COUNT
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY wh
    WHERE wh.TIMESTAMP >= CURRENT_DATE - 1
),

query_buckets AS (
    SELECT 
        q.WAREHOUSE_ID,
        q.WAREHOUSE_NAME,
        q.QUERY_ID,
        q.TOTAL_ELAPSED_TIME,
        q.EXECUTION_STATUS,
        q.CREDITS_USED_CLOUD_SERVICES,
        q.BYTES_SPILLED_TO_LOCAL_STORAGE,
        q.BYTES_SPILLED_TO_REMOTE_STORAGE,

        -- Query duration buckets (in milliseconds to seconds)
        CASE 
            WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds' 
            WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
            WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
            ELSE '5+ minutes'
        END as DURATION_BUCKET,

        -- Query duration buckets for queued queries (in milliseconds)
        CASE 
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 120000 THEN '1-2 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 300000 THEN '2-5 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 600000 THEN '5-10 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 1200000 THEN '10-20 minutes'
            ELSE '20+ minutes'
        END as QUEUED_BUCKET,

        -- Credit utilization buckets
        CASE 
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
            ELSE '100+ cents'
        END as CREDIT_UTILIZATION_BUCKET

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
    WHERE q.START_TIME >= CURRENT_DATE - 1
        AND q.WAREHOUSE_ID IS NOT NULL
),

warehouse_metrics AS (
    SELECT 
        wm.WAREHOUSE_ID,
        wm.WAREHOUSE_NAME,
        SUM(wm.CREDITS_USED) as TOTAL_CREDITS_USED,
        SUM(wm.CREDITS_USED_COMPUTE) as TOTAL_COMPUTE_CREDITS,
        SUM(wm.CREDITS_USED_CLOUD_SERVICES) as TOTAL_CLOUD_SERVICES_CREDITS
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY wm
    WHERE wm.START_TIME >= CURRENT_DATE - 1
    GROUP BY wm.WAREHOUSE_ID, wm.WAREHOUSE_NAME
)

SELECT 
    -- Warehouse Information
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID) as WAREHOUSE_ID,
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME) as WAREHOUSE_NAME,
    wi.SIZE as WAREHOUSE_SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,

    -- Query Duration Buckets
    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN 1 END) as QUERIES_1_10_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN 1 END) as QUERIES_10_20_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN 1 END) as QUERIES_20_60_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN 1 END) as QUERIES_1_3_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN 1 END) as QUERIES_3_5_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN 1 END) as QUERIES_5_PLUS_MIN,

    -- Queued Query Buckets
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN 1 END) as QUEUED_1_2_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN 1 END) as QUEUED_2_5_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN 1 END) as QUEUED_5_10_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN 1 END) as QUEUED_10_20_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN 1 END) as QUEUED_20_PLUS_MIN,

    -- Spilled Queries
    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_LOCAL,
    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_REMOTE,
    SUM(qb.BYTES_SPILLED_TO_LOCAL_STORAGE) as TOTAL_BYTES_SPILLED_LOCAL,
    SUM(qb.BYTES_SPILLED_TO_REMOTE_STORAGE) as TOTAL_BYTES_SPILLED_REMOTE,

    -- Failed Queries
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN 1 END) as FAILED_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN 1 END) as SUCCESSFUL_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN 1 END) as RUNNING_QUERIES,

    -- Credit Utilization Buckets
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN 1 END) as QUERIES_0_20_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN 1 END) as QUERIES_20_40_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN 1 END) as QUERIES_40_60_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN 1 END) as QUERIES_60_80_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN 1 END) as QUERIES_80_100_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN 1 END) as QUERIES_100_PLUS_CENTS,

    -- Overall Metrics
    COUNT(qb.QUERY_ID) as TOTAL_QUERIES,
    COALESCE(wm.TOTAL_CREDITS_USED, 0) as TOTAL_CREDITS_USED,
    COALESCE(wm.TOTAL_COMPUTE_CREDITS, 0) as TOTAL_COMPUTE_CREDITS,
    COALESCE(wm.TOTAL_CLOUD_SERVICES_CREDITS, 0) as TOTAL_CLOUD_SERVICES_CREDITS,

    -- Analysis timestamp
    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM warehouse_info wi
FULL OUTER JOIN query_buckets qb 
    ON wi.WAREHOUSE_ID = qb.WAREHOUSE_ID
FULL OUTER JOIN warehouse_metrics wm 
    ON COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID) = wm.WAREHOUSE_ID

GROUP BY 
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID),
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME),
    wi.SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,
    wm.TOTAL_CREDITS_USED,
    wm.TOTAL_COMPUTE_CREDITS,
    wm.TOTAL_CLOUD_SERVICES_CREDITS

ORDER BY TOTAL_QUERIES DESC;

-- Create a procedure to refresh the table data
CREATE OR REPLACE PROCEDURE REFRESH_WAREHOUSE_ANALYTICS()
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
    -- Recreate the table with fresh data
    CREATE OR REPLACE TABLE WAREHOUSE_ANALYTICS_DASHBOARD AS
    WITH warehouse_info AS (
        SELECT DISTINCT
            wh.WAREHOUSE_ID,
            wh.WAREHOUSE_NAME,
            wh.SIZE,
            wh.WAREHOUSE_TYPE,
            wh.CLUSTER_COUNT,
            NULL as SUSPEND_POLICY,
            NULL as MIN_CLUSTER_COUNT, 
            NULL as MAX_CLUSTER_COUNT
        FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY wh
        WHERE wh.TIMESTAMP >= CURRENT_DATE - 1
    ),

    query_buckets AS (
        SELECT 
            q.WAREHOUSE_ID,
            q.WAREHOUSE_NAME,
            q.QUERY_ID,
            q.TOTAL_ELAPSED_TIME,
            q.EXECUTION_STATUS,
            q.CREDITS_USED_CLOUD_SERVICES,
            q.BYTES_SPILLED_TO_LOCAL_STORAGE,
            q.BYTES_SPILLED_TO_REMOTE_STORAGE,

            CASE 
                WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
                WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds' 
                WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
                WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
                WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
                ELSE '5+ minutes'
            END as DURATION_BUCKET,

            CASE 
                WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 120000 THEN '1-2 minutes'
                WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 300000 THEN '2-5 minutes'
                WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 600000 THEN '5-10 minutes'
                WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 1200000 THEN '10-20 minutes'
                ELSE '20+ minutes'
            END as QUEUED_BUCKET,

            CASE 
                WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
                WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
                WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
                WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
                WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
                ELSE '100+ cents'
            END as CREDIT_UTILIZATION_BUCKET

        FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
        WHERE q.START_TIME >= CURRENT_DATE - 1
            AND q.WAREHOUSE_ID IS NOT NULL
    ),

    warehouse_metrics AS (
        SELECT 
            wm.WAREHOUSE_ID,
            wm.WAREHOUSE_NAME,
            SUM(wm.CREDITS_USED) as TOTAL_CREDITS_USED,
            SUM(wm.CREDITS_USED_COMPUTE) as TOTAL_COMPUTE_CREDITS,
            SUM(wm.CREDITS_USED_CLOUD_SERVICES) as TOTAL_CLOUD_SERVICES_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY wm
        WHERE wm.START_TIME >= CURRENT_DATE - 1
        GROUP BY wm.WAREHOUSE_ID, wm.WAREHOUSE_NAME
    )

    SELECT 
        COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID) as WAREHOUSE_ID,
        COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME) as WAREHOUSE_NAME,
        wi.SIZE as WAREHOUSE_SIZE,
        wi.WAREHOUSE_TYPE,
        wi.CLUSTER_COUNT,
        wi.SUSPEND_POLICY,
        wi.MIN_CLUSTER_COUNT,
        wi.MAX_CLUSTER_COUNT,

        COUNT(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN 1 END) as QUERIES_1_10_SEC,
        COUNT(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN 1 END) as QUERIES_10_20_SEC,
        COUNT(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN 1 END) as QUERIES_20_60_SEC,
        COUNT(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN 1 END) as QUERIES_1_3_MIN,
        COUNT(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN 1 END) as QUERIES_3_5_MIN,
        COUNT(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN 1 END) as QUERIES_5_PLUS_MIN,

        COUNT(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN 1 END) as QUEUED_1_2_MIN,
        COUNT(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN 1 END) as QUEUED_2_5_MIN,
        COUNT(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN 1 END) as QUEUED_5_10_MIN,
        COUNT(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN 1 END) as QUEUED_10_20_MIN,
        COUNT(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN 1 END) as QUEUED_20_PLUS_MIN,

        COUNT(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_LOCAL,
        COUNT(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_REMOTE,
        SUM(qb.BYTES_SPILLED_TO_LOCAL_STORAGE) as TOTAL_BYTES_SPILLED_LOCAL,
        SUM(qb.BYTES_SPILLED_TO_REMOTE_STORAGE) as TOTAL_BYTES_SPILLED_REMOTE,

        COUNT(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN 1 END) as FAILED_QUERIES,
        COUNT(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN 1 END) as SUCCESSFUL_QUERIES,
        COUNT(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN 1 END) as RUNNING_QUERIES,

        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN 1 END) as QUERIES_0_20_CENTS,
        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN 1 END) as QUERIES_20_40_CENTS,
        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN 1 END) as QUERIES_40_60_CENTS,
        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN 1 END) as QUERIES_60_80_CENTS,
        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN 1 END) as QUERIES_80_100_CENTS,
        COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN 1 END) as QUERIES_100_PLUS_CENTS,

        COUNT(qb.QUERY_ID) as TOTAL_QUERIES,
        COALESCE(wm.TOTAL_CREDITS_USED, 0) as TOTAL_CREDITS_USED,
        COALESCE(wm.TOTAL_COMPUTE_CREDITS, 0) as TOTAL_COMPUTE_CREDITS,
        COALESCE(wm.TOTAL_CLOUD_SERVICES_CREDITS, 0) as TOTAL_CLOUD_SERVICES_CREDITS,

        CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
        CURRENT_DATE - 1 as ANALYSIS_DATE

    FROM warehouse_info wi
    FULL OUTER JOIN query_buckets qb 
        ON wi.WAREHOUSE_ID = qb.WAREHOUSE_ID
    FULL OUTER JOIN warehouse_metrics wm 
        ON COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID) = wm.WAREHOUSE_ID

    GROUP BY 
        COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID),
        COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME),
        wi.SIZE,
        wi.WAREHOUSE_TYPE,
        wi.CLUSTER_COUNT,
        wi.SUSPEND_POLICY,
        wi.MIN_CLUSTER_COUNT,
        wi.MAX_CLUSTER_COUNT,
        wm.TOTAL_CREDITS_USED,
        wm.TOTAL_COMPUTE_CREDITS,
        wm.TOTAL_CLOUD_SERVICES_CREDITS

    ORDER BY TOTAL_QUERIES DESC;

    RETURN 'WAREHOUSE_ANALYTICS_DASHBOARD table refreshed successfully at ' || CURRENT_TIMESTAMP;
END;
$$;

-- To refresh the data anytime, simply run:
-- CALL REFRESH_WAREHOUSE_ANALYTICS();

-- Query to view the results
SELECT * FROM WAREHOUSE_ANALYTICS_DASHBOARD
ORDER BY TOTAL_QUERIES DESC;
Enter fullscreen mode Exit fullscreen mode

Top comments (0)