DEV Community

Armaan Khan
Armaan Khan

Posted on

Queries

for exmaple you have all this . you have just extarcted in ptrhon . i have give you the query and you have extrct all data of table . now for warehouse based on queriy id . can you make one function which take for exampel on warehouse table i show in react and i click on spilled to remote qeures . so what i want you have to wrie a fctuon whihc take col name and than fromw arehouse queresi id col object take id of that parctul type of qurires and than called to query summary table and than basd on ids he  gropu by user and show a tabel whic show that yeah this is the user which have ran ths spilled qures and wihtt count.


 . if i pass the query id array  to this . so can i have a fctuon whic take output of this queyr as a df, and query id . and htan based on that give me resutl with gropb by user that this user has ran 24 of t and than other user rna with id . liek a df it will returnn is . user name , query count , and array of all the querd id for that suer . [this colmn will be hidden ] . and than one fucuotn whirh take the id and show the query etxt , and id in tbale and one button which is click to view full detals and wehn click on that so we pass the id to that fuction 
-- =====================================================
-- TABLE 1: QUERY_HISTORY_SUMMARY 
-- This table provides all query overview data for drilling down
-- =====================================================

CREATE OR REPLACE TABLE QUERY_HISTORY_SUMMARY AS
SELECT 
    -- Query Identification
    q.QUERY_ID,
    q.QUERY_HASH,
    q.QUERY_PARAMETERIZED_HASH,
    LEFT(q.QUERY_TEXT, 100) as QUERY_TEXT_PREVIEW,
    q.QUERY_TYPE,
    q.QUERY_TAG,

    -- Timing Information
    q.START_TIME,
    q.END_TIME,
    q.TOTAL_ELAPSED_TIME,
    q.COMPILATION_TIME,
    q.EXECUTION_TIME,

    -- User and Session Info
    q.USER_NAME,
    q.USER_TYPE,
    q.ROLE_NAME,
    q.ROLE_TYPE,
    q.SESSION_ID,

    -- Warehouse Information
    q.WAREHOUSE_ID,
    q.WAREHOUSE_NAME,
    q.WAREHOUSE_SIZE,
    q.WAREHOUSE_TYPE,
    q.CLUSTER_NUMBER,

    -- Database Context
    q.DATABASE_ID,
    q.DATABASE_NAME,
    q.SCHEMA_ID,
    q.SCHEMA_NAME,
    q.USER_DATABASE_NAME,
    q.USER_SCHEMA_NAME,

    -- Execution Status
    q.EXECUTION_STATUS,
    q.ERROR_CODE,
    LEFT(q.ERROR_MESSAGE, 200) as ERROR_MESSAGE_PREVIEW,

    -- Performance Metrics
    q.BYTES_SCANNED,
    q.PERCENTAGE_SCANNED_FROM_CACHE,
    q.BYTES_WRITTEN,
    q.ROWS_PRODUCED,
    q.ROWS_INSERTED,
    q.ROWS_UPDATED,
    q.ROWS_DELETED,

    -- Resource Usage
    q.CREDITS_USED_CLOUD_SERVICES,
    q.BYTES_SPILLED_TO_LOCAL_STORAGE,
    q.BYTES_SPILLED_TO_REMOTE_STORAGE,
    q.PARTITIONS_SCANNED,
    q.PARTITIONS_TOTAL,

    -- Queue Times
    q.QUEUED_PROVISIONING_TIME,
    q.QUEUED_REPAIR_TIME,
    q.QUEUED_OVERLOAD_TIME,
    q.TRANSACTION_BLOCKED_TIME,

    -- Classification Buckets for easy filtering
    CASE 
        WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
        WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds' 
        WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
        WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
        WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
        ELSE '5+ minutes'
    END as DURATION_BUCKET,

    CASE 
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
        WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
        ELSE '100+ cents'
    END as CREDIT_BUCKET,

    CASE 
        WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 OR q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'SPILLED'
        ELSE 'NO_SPILL'
    END as SPILL_STATUS,

    CASE 
        WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) > 0 THEN 'QUEUED'
        ELSE 'NOT_QUEUED'
    END as QUEUE_STATUS,

    -- Analysis metadata
    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
WHERE q.START_TIME >= CURRENT_DATE - 1
    AND q.QUERY_ID IS NOT NULL
ORDER BY q.START_TIME DESC;

-- =====================================================
-- TABLE 2: QUERY_DETAILS_COMPLETE
-- This table provides complete details for any specific query
-- =====================================================

CREATE OR REPLACE TABLE QUERY_DETAILS_COMPLETE AS
WITH filtered_queries AS (
    SELECT
        q.*,
        -- Calculate partition scan percentage
        CASE
            WHEN q.PARTITIONS_TOTAL > 0 THEN
                ROUND((q.PARTITIONS_SCANNED::FLOAT / q.PARTITIONS_TOTAL::FLOAT) * 100, 2)
            ELSE 0
        END as PARTITION_SCAN_PERCENTAGE,

        -- Calculate compilation time percentage
        CASE
            WHEN q.TOTAL_ELAPSED_TIME > 0 THEN
                ROUND((q.COMPILATION_TIME::FLOAT / q.TOTAL_ELAPSED_TIME::FLOAT) * 100, 2)
            ELSE 0
        END as COMPILATION_TIME_PERCENTAGE,

        -- Calculate execution time percentage
        CASE
            WHEN q.TOTAL_ELAPSED_TIME > 0 THEN
                ROUND((q.EXECUTION_TIME::FLOAT / q.TOTAL_ELAPSED_TIME::FLOAT) * 100, 2)
            ELSE 0
        END as EXECUTION_TIME_PERCENTAGE,

        -- Calculate rows per MB scanned
        CASE
            WHEN q.BYTES_SCANNED > 0 THEN
                ROUND(q.ROWS_PRODUCED::FLOAT / (q.BYTES_SCANNED::FLOAT / 1024 / 1024), 2)
            ELSE 0
        END as ROWS_PER_MB_SCANNED,

        -- Classify performance
        CASE
            WHEN q.TOTAL_ELAPSED_TIME <= 1000 THEN 'VERY_FAST'
            WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN 'FAST'
            WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN 'MODERATE'
            WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN 'SLOW'
            ELSE 'VERY_SLOW'
        END as PERFORMANCE_CATEGORY,

        -- Classify cache efficiency
        CASE
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE >= 90 THEN 'HIGH_CACHE_HIT'
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE >= 50 THEN 'MEDIUM_CACHE_HIT'
            WHEN q.PERCENTAGE_SCANNED_FROM_CACHE > 0 THEN 'LOW_CACHE_HIT'
            ELSE 'NO_CACHE_HIT'
        END as CACHE_EFFICIENCY,

        -- Classify spilling
        CASE
            WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 AND q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'BOTH_SPILL'
            WHEN q.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 'REMOTE_SPILL'
            WHEN q.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 'LOCAL_SPILL'
            ELSE 'NO_SPILL'
        END as SPILL_CLASSIFICATION

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
    WHERE q.START_TIME >= CURRENT_DATE - 1
        AND q.QUERY_ID IS NOT NULL
        AND q.USER_NAME NOT IN ('SNOWFLAKE', 'SNOWFLAKE_MONITOR')
        AND q.QUERY_TYPE NOT IN ('SHOW', 'DESCRIBE', 'USE', 'CREATE', 'DROP', 'ALTER')
)
SELECT 
    -- Core Query Information
    QUERY_ID,
    QUERY_TEXT,
    QUERY_HASH,
    QUERY_HASH_VERSION,
    QUERY_PARAMETERIZED_HASH,
    QUERY_PARAMETERIZED_HASH_VERSION,
    QUERY_TYPE,
    QUERY_TAG,

    -- Timing Details (all in milliseconds)
    START_TIME,
    END_TIME,
    TOTAL_ELAPSED_TIME,
    COMPILATION_TIME,
    EXECUTION_TIME,
    QUEUED_PROVISIONING_TIME,
    QUEUED_REPAIR_TIME,
    QUEUED_OVERLOAD_TIME,
    TRANSACTION_BLOCKED_TIME,
    CHILD_QUERIES_WAIT_TIME,
    QUERY_RETRY_TIME,
    QUERY_RETRY_CAUSE,
    FAULT_HANDLING_TIME,
    LIST_EXTERNAL_FILES_TIME,

    -- User and Authentication
    USER_NAME,
    USER_TYPE,
    ROLE_NAME,
    ROLE_TYPE,
    SECONDARY_ROLE_STATS,
    SESSION_ID,

    -- Warehouse and Compute
    WAREHOUSE_ID,
    WAREHOUSE_NAME,
    WAREHOUSE_SIZE,
    WAREHOUSE_TYPE,
    CLUSTER_NUMBER,
    QUERY_LOAD_PERCENT,

    -- Database Context
    DATABASE_ID,
    DATABASE_NAME,
    SCHEMA_ID,
    SCHEMA_NAME,
    USER_DATABASE_ID,
    USER_DATABASE_NAME,
    USER_SCHEMA_ID,
    USER_SCHEMA_NAME,

    -- Execution Results
    EXECUTION_STATUS,
    ERROR_CODE,
    ERROR_MESSAGE,
    IS_CLIENT_GENERATED_STATEMENT,

    -- Data Processing Metrics
    BYTES_SCANNED,
    PERCENTAGE_SCANNED_FROM_CACHE,
    BYTES_WRITTEN,
    BYTES_WRITTEN_TO_RESULT,
    BYTES_READ_FROM_RESULT,
    ROWS_PRODUCED,
    ROWS_WRITTEN_TO_RESULT,
    ROWS_INSERTED,
    ROWS_UPDATED,
    ROWS_DELETED,
    ROWS_UNLOADED,
    BYTES_DELETED,

    -- Partitioning
    PARTITIONS_SCANNED,
    PARTITIONS_TOTAL,
    PARTITION_SCAN_PERCENTAGE,

    -- Memory and Spilling
    BYTES_SPILLED_TO_LOCAL_STORAGE,
    BYTES_SPILLED_TO_REMOTE_STORAGE,
    BYTES_SENT_OVER_THE_NETWORK,

    -- Credits and Cost
    CREDITS_USED_CLOUD_SERVICES,

    -- Data Transfer
    OUTBOUND_DATA_TRANSFER_CLOUD,
    OUTBOUND_DATA_TRANSFER_REGION,
    OUTBOUND_DATA_TRANSFER_BYTES,
    INBOUND_DATA_TRANSFER_CLOUD,
    INBOUND_DATA_TRANSFER_REGION,
    INBOUND_DATA_TRANSFER_BYTES,

    -- External Functions
    EXTERNAL_FUNCTION_TOTAL_INVOCATIONS,
    EXTERNAL_FUNCTION_TOTAL_SENT_ROWS,
    EXTERNAL_FUNCTION_TOTAL_RECEIVED_ROWS,
    EXTERNAL_FUNCTION_TOTAL_SENT_BYTES,
    EXTERNAL_FUNCTION_TOTAL_RECEIVED_BYTES,

    -- Query Acceleration
    QUERY_ACCELERATION_BYTES_SCANNED,
    QUERY_ACCELERATION_PARTITIONS_SCANNED,
    QUERY_ACCELERATION_UPPER_LIMIT_SCALE_FACTOR,

    -- Transaction Information
    TRANSACTION_ID,

    -- System Information
    RELEASE_VERSION,

    -- Performance Ratios and Calculated Fields
    COMPILATION_TIME_PERCENTAGE,
    EXECUTION_TIME_PERCENTAGE,
    ROWS_PER_MB_SCANNED,

    -- Performance Classifications
    PERFORMANCE_CATEGORY,
    CACHE_EFFICIENCY,
    SPILL_CLASSIFICATION,

    -- Analysis metadata
    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM filtered_queries
ORDER BY START_TIME DESC;

-- =====================================================
-- REFRESH PROCEDURES
-- =====================================================



-- Table 3 Warehouse 


CREATE OR REPLACE TABLE WAREHOUSE_ANALYTICS_DASHBOARD_with_queries AS
WITH warehouse_info AS (
    -- Get warehouse metadata
    SELECT DISTINCT
        wh.WAREHOUSE_ID,
        wh.WAREHOUSE_NAME,
        wh.SIZE,
        wh.WAREHOUSE_TYPE,
        wh.CLUSTER_COUNT,
        NULL as SUSPEND_POLICY,
        NULL as MIN_CLUSTER_COUNT,
        NULL as MAX_CLUSTER_COUNT
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY wh
    WHERE wh.TIMESTAMP >= CURRENT_DATE - 1
),

query_buckets AS (
    SELECT
        q.WAREHOUSE_ID,
        q.WAREHOUSE_NAME,
        q.QUERY_ID,
        q.TOTAL_ELAPSED_TIME,
        q.EXECUTION_STATUS,
        q.CREDITS_USED_CLOUD_SERVICES,
        q.BYTES_SPILLED_TO_LOCAL_STORAGE,
        q.BYTES_SPILLED_TO_REMOTE_STORAGE,
        q.QUERY_TYPE,

        CASE
            WHEN q.TOTAL_ELAPSED_TIME <= 10000 THEN '1-10 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 20000 THEN '10-20 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 60000 THEN '20-60 seconds'
            WHEN q.TOTAL_ELAPSED_TIME <= 180000 THEN '1-3 minutes'
            WHEN q.TOTAL_ELAPSED_TIME <= 300000 THEN '3-5 minutes'
            ELSE '5+ minutes'
        END as DURATION_BUCKET,

        CASE
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 120000 THEN '1-2 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 300000 THEN '2-5 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 600000 THEN '5-10 minutes'
            WHEN (q.QUEUED_PROVISIONING_TIME + q.QUEUED_REPAIR_TIME + q.QUEUED_OVERLOAD_TIME) <= 1200000 THEN '10-20 minutes'
            ELSE '20+ minutes'
        END as QUEUED_BUCKET,

        CASE
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.2 THEN '0-20 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.4 THEN '20-40 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.6 THEN '40-60 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 0.8 THEN '60-80 cents'
            WHEN q.CREDITS_USED_CLOUD_SERVICES <= 1.0 THEN '80-100 cents'
            ELSE '100+ cents'
        END as CREDIT_UTILIZATION_BUCKET

    FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
    WHERE q.START_TIME >= CURRENT_DATE - 1
        AND q.WAREHOUSE_ID IS NOT NULL
        AND q.USER_NAME NOT IN ('SNOWFLAKE', 'SNOWFLAKE_MONITOR')
        AND q.QUERY_TYPE NOT IN ('SHOW', 'DESCRIBE', 'USE', 'CREATE', 'DROP', 'ALTER')
        AND q.QUERY_TEXT IS NOT NULL
),

warehouse_metrics AS (
    SELECT
        wm.WAREHOUSE_ID,
        wm.WAREHOUSE_NAME,
        SUM(wm.CREDITS_USED) as TOTAL_CREDITS_USED,
        SUM(wm.CREDITS_USED_COMPUTE) as TOTAL_COMPUTE_CREDITS,
        SUM(wm.CREDITS_USED_CLOUD_SERVICES) as TOTAL_CLOUD_SERVICES_CREDITS
    FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY wm
    WHERE wm.START_TIME >= CURRENT_DATE - 1
    GROUP BY wm.WAREHOUSE_ID, wm.WAREHOUSE_NAME
)

SELECT
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID) as WAREHOUSE_ID,
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME) as WAREHOUSE_NAME,
    wi.SIZE as WAREHOUSE_SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,

    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN 1 END) as QUERIES_1_10_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN 1 END) as QUERIES_10_20_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN 1 END) as QUERIES_20_60_SEC,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN 1 END) as QUERIES_1_3_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN 1 END) as QUERIES_3_5_MIN,
    COUNT(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN 1 END) as QUERIES_5_PLUS_MIN,

    COUNT(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN 1 END) as QUEUED_1_2_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN 1 END) as QUEUED_2_5_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN 1 END) as QUEUED_5_10_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN 1 END) as QUEUED_10_20_MIN,
    COUNT(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN 1 END) as QUEUED_20_PLUS_MIN,

    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_LOCAL,
    COUNT(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN 1 END) as QUERIES_SPILLED_REMOTE,
    SUM(qb.BYTES_SPILLED_TO_LOCAL_STORAGE) as TOTAL_BYTES_SPILLED_LOCAL,
    SUM(qb.BYTES_SPILLED_TO_REMOTE_STORAGE) as TOTAL_BYTES_SPILLED_REMOTE,

    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN 1 END) as FAILED_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN 1 END) as SUCCESSFUL_QUERIES,
    COUNT(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN 1 END) as RUNNING_QUERIES,

    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN 1 END) as QUERIES_0_20_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN 1 END) as QUERIES_20_40_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN 1 END) as QUERIES_40_60_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN 1 END) as QUERIES_60_80_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN 1 END) as QUERIES_80_100_CENTS,
    COUNT(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN 1 END) as QUERIES_100_PLUS_CENTS,

    -- New column: query_ids
    OBJECT_CONSTRUCT(
        '1-10_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '1-10 seconds' THEN qb.QUERY_ID END),
        '10-20_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '10-20 seconds' THEN qb.QUERY_ID END),
        '20-60_sec_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '20-60 seconds' THEN qb.QUERY_ID END),
        '1-3_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '1-3 minutes' THEN qb.QUERY_ID END),
        '3-5_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '3-5 minutes' THEN qb.QUERY_ID END),
        '5_plus_min_ids', ARRAY_AGG(CASE WHEN qb.DURATION_BUCKET = '5+ minutes' THEN qb.QUERY_ID END),
        'queued_1-2_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '1-2 minutes' THEN qb.QUERY_ID END),
        'queued_2-5_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '2-5 minutes' THEN qb.QUERY_ID END),
        'queued_5-10_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '5-10 minutes' THEN qb.QUERY_ID END),
        'queued_10-20_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '10-20 minutes' THEN qb.QUERY_ID END),
        'queued_20_plus_min_ids', ARRAY_AGG(CASE WHEN qb.QUEUED_BUCKET = '20+ minutes' THEN qb.QUERY_ID END),
        'spilled_local_ids', ARRAY_AGG(CASE WHEN qb.BYTES_SPILLED_TO_LOCAL_STORAGE > 0 THEN qb.QUERY_ID END),
        'spilled_remote_ids', ARRAY_AGG(CASE WHEN qb.BYTES_SPILLED_TO_REMOTE_STORAGE > 0 THEN qb.QUERY_ID END),
        'failed_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'FAIL' THEN qb.QUERY_ID END),
        'successful_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'SUCCESS' THEN qb.QUERY_ID END),
        'running_queries_ids', ARRAY_AGG(CASE WHEN qb.EXECUTION_STATUS = 'RUNNING' THEN qb.QUERY_ID END),
        'credit_0-20_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '0-20 cents' THEN qb.QUERY_ID END),
        'credit_20-40_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '20-40 cents' THEN qb.QUERY_ID END),
        'credit_40-60_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '40-60 cents' THEN qb.QUERY_ID END),
        'credit_60-80_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '60-80 cents' THEN qb.QUERY_ID END),
        'credit_80-100_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '80-100 cents' THEN qb.QUERY_ID END),
        'credit_100_plus_cents_ids', ARRAY_AGG(CASE WHEN qb.CREDIT_UTILIZATION_BUCKET = '100+ cents' THEN qb.QUERY_ID END)
    ) as QUERY_IDS,

    COUNT(qb.QUERY_ID) as TOTAL_QUERIES,
    COALESCE(wm.TOTAL_CREDITS_USED, 0) as TOTAL_CREDITS_USED,
    COALESCE(wm.TOTAL_COMPUTE_CREDITS, 0) as TOTAL_COMPUTE_CREDITS,
    COALESCE(wm.TOTAL_CLOUD_SERVICES_CREDITS, 0) as TOTAL_CLOUD_SERVICES_CREDITS,

    CURRENT_TIMESTAMP as ANALYSIS_TIMESTAMP,
    CURRENT_DATE - 1 as ANALYSIS_DATE

FROM warehouse_info wi
FULL OUTER JOIN query_buckets qb
    ON wi.WAREHOUSE_ID = qb.WAREHOUSE_ID
FULL OUTER JOIN warehouse_metrics wm
    ON COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID) = wm.WAREHOUSE_ID

GROUP BY
    COALESCE(wi.WAREHOUSE_ID, qb.WAREHOUSE_ID, wm.WAREHOUSE_ID),
    COALESCE(wi.WAREHOUSE_NAME, qb.WAREHOUSE_NAME, wm.WAREHOUSE_NAME),
    wi.SIZE,
    wi.WAREHOUSE_TYPE,
    wi.CLUSTER_COUNT,
    wi.SUSPEND_POLICY,
    wi.MIN_CLUSTER_COUNT,
    wi.MAX_CLUSTER_COUNT,
    wm.TOTAL_CREDITS_USED,
    wm.TOTAL_COMPUTE_CREDITS,
    wm.TOTAL_CLOUD_SERVICES_CREDITS

ORDER BY TOTAL_QUERIES DESC;






-- table 4 user 


CREATE OR REPLACE TABLE user_query_performance_report AS
WITH percentile_reference AS (
    SELECT
        warehouse_size,
        PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY bytes_scanned) AS bytes_scanned_p10,
        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_elapsed_time) AS execution_time_p90
    FROM snowflake.account_usage.query_history
   WHERE START_TIME >= DATEADD(DAY, -7, CURRENT_DATE)
  AND START_TIME <= CURRENT_DATE

      AND user_name IS NOT NULL
      AND query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
    GROUP BY warehouse_size
),
query_flags AS (
    SELECT
        qh.query_id,
        qh.warehouse_size,
        qh.bytes_scanned,
        qh.total_elapsed_time AS execution_time_ms,
        qh.compilation_time,
        qh.start_time,
        qh.query_text,
        qh.query_hash,
        qh.query_tag,
        qh.error_code,
        qh.execution_status,
        qh.partitions_scanned,
        qh.partitions_total,
        qh.bytes_spilled_to_local_storage,
        qh.bytes_spilled_to_remote_storage,
        qh.user_name,
        qh.database_name,
        qh.schema_name,
        qh.warehouse_name,
        COALESCE(qh.credits_used_cloud_services, 0) AS credits_used_cloud_services,
        qh.rows_produced,
        qh.rows_deleted,
        qh.rows_inserted,
        qh.rows_updated,
        CASE WHEN qh.warehouse_size IN ('MEDIUM', 'LARGE', 'X-LARGE', '2X-LARGE', '3X-LARGE', '4X-LARGE') AND qh.bytes_scanned < pr.bytes_scanned_p10 THEN 1 ELSE 0 END AS over_provisioned,
        CASE WHEN EXTRACT(HOUR FROM qh.start_time) BETWEEN 9 AND 17 AND qh.total_elapsed_time > 300000 THEN 1 ELSE 0 END AS peak_hour_long_running,
        CASE WHEN qh.query_text ILIKE 'SELECT *%' THEN 1 ELSE 0 END AS select_star,
        CASE WHEN qh.partitions_total > 0 AND qh.partitions_scanned = qh.partitions_total THEN 1 ELSE 0 END AS unpartitioned_scan,
        CASE WHEN qh.query_hash IS NOT NULL THEN 1 ELSE 0 END AS repeated_query,
        CASE WHEN (qh.query_text ILIKE '%JOIN%' AND qh.query_text ILIKE '%JOIN%') OR qh.query_text ILIKE '%WINDOW%' THEN 1 ELSE 0 END AS complex_query,
        CASE WHEN qh.error_code IS NOT NULL OR qh.execution_status IN ('FAILED', 'CANCELLED') THEN 1 ELSE 0 END AS failed_cancelled,
        CASE WHEN qh.bytes_spilled_to_local_storage > 0 OR qh.bytes_spilled_to_remote_storage > 0 THEN 1 ELSE 0 END AS spilled,
        CASE WHEN qh.rows_produced = 0 AND qh.bytes_scanned > 1000000 THEN 1 ELSE 0 END AS zero_result_query,
        CASE WHEN qh.compilation_time > 5000 THEN 1 ELSE 0 END AS high_compile_time,
        CASE WHEN qh.query_tag IS NULL THEN 1 ELSE 0 END AS untagged_query,
        CASE WHEN qh.query_text ILIKE '%ORDER BY%' AND qh.query_text NOT ILIKE '%LIMIT%' THEN 1 ELSE 0 END AS unlimited_order_by,
        CASE WHEN qh.query_text ILIKE '%GROUP BY%' AND qh.rows_produced > 1000000 THEN 1 ELSE 0 END AS large_group_by,
        CASE WHEN qh.total_elapsed_time > pr.execution_time_p90 THEN 1 ELSE 0 END AS slow_query,
        CASE WHEN qh.query_text ILIKE '%DISTINCT%' AND qh.bytes_scanned > 100000000 THEN 1 ELSE 0 END AS expensive_distinct,
        CASE WHEN qh.query_text ILIKE '%LIKE%' AND qh.query_text NOT ILIKE '%INDEX%' THEN 1 ELSE 0 END AS inefficient_like,
        CASE WHEN qh.bytes_scanned > 0 AND qh.rows_produced = 0 THEN 1 ELSE 0 END AS no_results_with_scan,
        CASE WHEN qh.query_text ILIKE '%CROSS JOIN%' OR (qh.query_text ILIKE '%JOIN%' AND qh.query_text NOT ILIKE '%ON%') THEN 1 ELSE 0 END AS cartesian_join,
        CASE WHEN qh.total_elapsed_time > 0 AND qh.compilation_time / qh.total_elapsed_time > 0.5 THEN 1 ELSE 0 END AS high_compile_ratio
    FROM snowflake.account_usage.query_history qh
    LEFT JOIN percentile_reference pr ON qh.warehouse_size = pr.warehouse_size
    WHERE START_TIME >= DATEADD(DAY, -7, CURRENT_DATE)
  AND START_TIME <= CURRENT_DATE

        AND qh.query_type NOT IN ('DESCRIBE', 'SHOW', 'USE')
        AND qh.user_name IS NOT NULL
        AND qh.total_elapsed_time > 1000
),
sample_queries AS (
    SELECT
        user_name,
        'over_provisioned' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY start_time DESC) AS sample_queries
    FROM query_flags WHERE over_provisioned = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'over_provisioned', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(over_provisioned) = 0
    UNION ALL
    SELECT
        user_name,
        'peak_hour_long_running' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY execution_time_ms DESC) AS sample_queries
    FROM query_flags WHERE peak_hour_long_running = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'peak_hour_long_running', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(peak_hour_long_running) = 0
    UNION ALL
    SELECT
        user_name,
        'select_star' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE select_star = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'select_star', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(select_star) = 0
    UNION ALL
    SELECT
        user_name,
        'unpartitioned_scan' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'partitions_scanned', partitions_scanned, 'partitions_total', partitions_total, 'start_time', start_time)) WITHIN GROUP (ORDER BY partitions_scanned DESC) AS sample_queries
    FROM query_flags WHERE unpartitioned_scan = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'unpartitioned_scan', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(unpartitioned_scan) = 0
    UNION ALL
    SELECT
        user_name,
        'spilled' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_spilled_to_local_storage', bytes_spilled_to_local_storage, 'bytes_spilled_to_remote_storage', bytes_spilled_to_remote_storage, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY (bytes_spilled_to_local_storage + bytes_spilled_to_remote_storage) DESC) AS sample_queries
    FROM query_flags WHERE spilled = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'spilled', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(spilled) = 0
    UNION ALL
    SELECT
        user_name,
        'failed_cancelled' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'error_code', error_code, 'execution_status', execution_status, 'start_time', start_time)) WITHIN GROUP (ORDER BY start_time DESC) AS sample_queries
    FROM query_flags WHERE failed_cancelled = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'failed_cancelled', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(failed_cancelled) = 0
    UNION ALL
    SELECT
        user_name,
        'zero_result_query' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'bytes_scanned', bytes_scanned, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE zero_result_query = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'zero_result_query', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(zero_result_query) = 0
    UNION ALL
    SELECT
        user_name,
        'high_compile_time' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'compilation_time', compilation_time, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY compilation_time DESC) AS sample_queries
    FROM query_flags WHERE high_compile_time = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'high_compile_time', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(high_compile_time) = 0
    UNION ALL
    SELECT
        user_name,
        'slow_query' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'warehouse_size', warehouse_size, 'start_time', start_time)) WITHIN GROUP (ORDER BY execution_time_ms DESC) AS sample_queries
    FROM query_flags WHERE slow_query = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'slow_query', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(slow_query) = 0
    UNION ALL
    SELECT
        user_name,
        'cartesian_join' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'rows_produced', rows_produced, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE cartesian_join = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'cartesian_join', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(cartesian_join) = 0
    UNION ALL
    SELECT
        user_name,
        'unlimited_order_by' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE unlimited_order_by = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'unlimited_order_by', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(unlimited_order_by) = 0
    UNION ALL
    SELECT
        user_name,
        'large_group_by' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'rows_produced', rows_produced, 'start_time', start_time)) WITHIN GROUP (ORDER BY rows_produced DESC) AS sample_queries
    FROM query_flags WHERE large_group_by = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'large_group_by', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(large_group_by) = 0
    UNION ALL
    SELECT
        user_name,
        'expensive_distinct' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE expensive_distinct = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'expensive_distinct', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(expensive_distinct) = 0
    UNION ALL
    SELECT
        user_name,
        'inefficient_like' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE inefficient_like = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'inefficient_like', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(inefficient_like) = 0
    UNION ALL
    SELECT
        user_name,
        'no_results_with_scan' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'execution_time_ms', execution_time_ms, 'bytes_scanned', bytes_scanned, 'start_time', start_time)) WITHIN GROUP (ORDER BY bytes_scanned DESC) AS sample_queries
    FROM query_flags WHERE no_results_with_scan = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'no_results_with_scan', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(no_results_with_scan) = 0
    UNION ALL
    SELECT
        user_name,
        'high_compile_ratio' AS flag_type,
        ARRAY_AGG(OBJECT_CONSTRUCT('query_id', query_id, 'query_text', query_text, 'compilation_time', compilation_time, 'execution_time_ms', execution_time_ms, 'start_time', start_time)) WITHIN GROUP (ORDER BY compilation_time DESC) AS sample_queries
    FROM query_flags WHERE high_compile_ratio = 1 GROUP BY user_name HAVING COUNT(*) > 0
    UNION ALL SELECT user_name, 'high_compile_ratio', ARRAY_CONSTRUCT() FROM query_flags GROUP BY user_name HAVING SUM(high_compile_ratio) = 0
),
pivoted_samples AS (
    SELECT
        user_name,
        OBJECT_AGG(flag_type, sample_queries) AS query_samples
    FROM sample_queries
    GROUP BY user_name
),
user_metrics AS (
    SELECT
        user_name,
        COUNT(DISTINCT query_id) AS total_queries,
        COUNT(DISTINCT warehouse_name) AS warehouses_used,
        COUNT(DISTINCT database_name) AS databases_accessed,
        ROUND(SUM(credits_used_cloud_services), 2) AS total_credits,
        ROUND(AVG(execution_time_ms), 2) AS avg_execution_time_ms,
        ROUND(AVG(bytes_scanned / NULLIF(rows_produced, 0)), 2) AS avg_bytes_per_row,
        ROUND(SUM(bytes_scanned) / POWER(1024, 3), 2) AS total_data_scanned_gb,
        ROUND(SUM(failed_cancelled) * 1.0 / NULLIF(COUNT(*), 0) * 100, 2) AS failure_cancellation_rate_pct,
        SUM(spilled) AS spilled_queries,
        SUM(over_provisioned) AS over_provisioned_queries,
        SUM(peak_hour_long_running) AS peak_hour_long_running_queries,
        SUM(select_star) AS select_star_queries,
        SUM(unpartitioned_scan) AS unpartitioned_scan_queries,
        COUNT(*) - COUNT(DISTINCT query_hash) AS repeated_queries,
        SUM(complex_query) AS complex_join_queries,
        SUM(zero_result_query) AS zero_result_queries,
        SUM(high_compile_time) AS high_compile_queries,
        SUM(untagged_query) AS untagged_queries,
        SUM(unlimited_order_by) AS unlimited_order_by_queries,
        SUM(large_group_by) AS large_group_by_queries,
        SUM(slow_query) AS slow_queries,
        SUM(expensive_distinct) AS expensive_distinct_queries,
        SUM(inefficient_like) AS inefficient_like_queries,
        SUM(no_results_with_scan) AS no_results_with_scan_queries,
        SUM(cartesian_join) AS cartesian_join_queries,
        SUM(high_compile_ratio) AS high_compile_ratio_queries,
        ROUND(
            SUM(spilled) * 5 +
            SUM(over_provisioned) * 4 +
            SUM(peak_hour_long_running) * 4 +
            SUM(select_star) * 3 +
            SUM(unpartitioned_scan) * 4 +
            (COUNT(*) - COUNT(DISTINCT query_hash)) * 2 +
            SUM(complex_query) * 3 +
            SUM(failed_cancelled) * 3 +
            SUM(zero_result_query) * 5 +
            SUM(high_compile_time) * 3 +
            SUM(untagged_query) * 2 +
            SUM(unlimited_order_by) * 3 +
            SUM(large_group_by) * 4 +
            SUM(slow_query) * 4 +
            SUM(expensive_distinct) * 4 +
            SUM(inefficient_like) * 2 +
            SUM(no_results_with_scan) * 5 +
            SUM(cartesian_join) * 6 +
            SUM(high_compile_ratio) * 3, 2
        ) AS weighted_score
    FROM query_flags
    GROUP BY user_name
),
cost_percentiles AS (
    SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_credits) AS cost_p90 FROM user_metrics
),
bytes_percentiles AS (
    SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY avg_bytes_per_row) AS bytes_p90 FROM user_metrics
),
failure_percentiles AS (
    SELECT PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY failure_cancellation_rate_pct) AS failure_p80 FROM user_metrics
)
SELECT
    um.user_name,
    um.total_queries,
    um.warehouses_used,
    um.databases_accessed,
    um.total_credits,
    um.avg_execution_time_ms,
    um.avg_bytes_per_row,
    um.total_data_scanned_gb,
    um.failure_cancellation_rate_pct,
    um.spilled_queries,
    um.over_provisioned_queries,
    um.peak_hour_long_running_queries,
    um.select_star_queries,
    um.unpartitioned_scan_queries,
    um.repeated_queries,
    um.complex_join_queries,
    um.zero_result_queries,
    um.high_compile_queries,
    um.untagged_queries,
    um.unlimited_order_by_queries,
    um.large_group_by_queries,
    um.slow_queries,
    um.expensive_distinct_queries,
    um.inefficient_like_queries,
    um.no_results_with_scan_queries,
    um.cartesian_join_queries,
    um.high_compile_ratio_queries,
    um.weighted_score,
    CASE WHEN um.total_credits > cost_percentiles.cost_p90 THEN 'High Cost' ELSE 'Normal' END AS cost_status,
    ARRAY_CONSTRUCT_COMPACT(
        CASE WHEN um.avg_bytes_per_row > bytes_percentiles.bytes_p90 THEN 'Reduce data scanned with clustering or selective columns.' ELSE NULL END,
        CASE WHEN um.failure_cancellation_rate_pct > failure_percentiles.failure_p80 THEN 'Debug query syntax and logic.' ELSE NULL END,
        CASE WHEN um.spilled_queries > 10 THEN 'Optimize memory usage or increase warehouse size.' ELSE NULL END,
        CASE WHEN um.over_provisioned_queries > 10 THEN 'Use smaller warehouses for simple queries.' ELSE NULL END,
        CASE WHEN um.peak_hour_long_running_queries > 10 THEN 'Schedule long queries off-peak.' ELSE NULL END,
        CASE WHEN um.select_star_queries > 10 THEN 'Specify columns instead of SELECT *.' ELSE NULL END,
        CASE WHEN um.unpartitioned_scan_queries > 10 THEN 'Implement partitioning or clustering.' ELSE NULL END,
        CASE WHEN um.repeated_queries > 10 THEN 'Review frequently executed queries.' ELSE NULL END,
        CASE WHEN um.complex_join_queries > 10 THEN 'Simplify complex JOINs or window functions.' ELSE NULL END,
        CASE WHEN um.zero_result_queries > 10 THEN 'Avoid queries that return no data.' ELSE NULL END,
        CASE WHEN um.high_compile_queries > 10 THEN 'Refactor complex SQL to reduce compile time.' ELSE NULL END,
        CASE WHEN um.untagged_queries > 10 THEN 'Apply query tags for cost attribution.' ELSE NULL END,
        CASE WHEN um.unlimited_order_by_queries > 5 THEN 'Add LIMIT clauses to ORDER BY queries.' ELSE NULL END,
        CASE WHEN um.large_group_by_queries > 5 THEN 'Optimize GROUP BY operations with pre-aggregation.' ELSE NULL END,
        CASE WHEN um.slow_queries > 5 THEN 'Optimize slow-running queries.' ELSE NULL END,
        CASE WHEN um.expensive_distinct_queries > 5 THEN 'Replace DISTINCT with GROUP BY where possible.' ELSE NULL END,
        CASE WHEN um.inefficient_like_queries > 10 THEN 'Use proper indexing for LIKE operations.' ELSE NULL END,
        CASE WHEN um.no_results_with_scan_queries > 5 THEN 'Add WHERE clauses to prevent unnecessary scans.' ELSE NULL END,
        CASE WHEN um.cartesian_join_queries > 0 THEN 'Review JOIN conditions to avoid Cartesian products.' ELSE NULL END,
        CASE WHEN um.high_compile_ratio_queries > 5 THEN 'Simplify query complexity to reduce compilation overhead.' ELSE NULL END
    ) AS recommendations,
    COALESCE(ps.query_samples, OBJECT_CONSTRUCT()) AS query_samples
FROM user_metrics um
CROSS JOIN cost_percentiles
CROSS JOIN bytes_percentiles
CROSS JOIN failure_percentiles
LEFT JOIN pivoted_samples ps ON um.user_name = ps.user_name
ORDER BY weighted_score DESC;

Enter fullscreen mode Exit fullscreen mode

Top comments (0)