Thursday, July 10, 2025

AWR/ASH Queries

 -- filename: rds_ash_awr_investigation.sql
--
-- Objective: To investigate heavy CPU hitters, session spikes, and top consumers
--            at a particular point in time using Oracle RDS AWR/ASH data.
--            This revised script focuses on CPU-intensive activities, with improved
--            precision for time windows, better error handling, and corrected RDS procedures.
--
-- Key Modifications and Suggestions:
-- 1. **Procedure Calls**: Replaced undocumented 'rds_run_*' with official 'rdsadmin.rdsadmin_diagnostic_util.awr_report' and '.ash_report'.
--    These are procedures (no return value), so executed via BEGIN-END. Filenames are constructed predictably and read using 'rds_file_util.read_text_file'.
--    Added optional reading of report content (uncomment if needed; large reports may overwhelm output—use SPOOL instead).
-- 2. **Snapshot Selection**: Improved logic to select snaps that bracket the desired time more accurately (covering the period).
-- 3. **Query Enhancements**:
--    - Emphasized CPU focus: Added/strengthened filters for 'ON CPU' in relevant queries (e.g., top sessions/SQL/users).
--    - Fixed Joins: Corrected dba_hist_sqltext join (use sql_id only; dbid is for multi-DB, but in RDS it's single). For objects, removed invalid dbid=owner_id.
--    - Connections History: Changed to show delta logons (new connections in interval) using LAG for precision on spikes.
--    - Added Percentages: Ensured consistent % calculations relative to total CPU samples where applicable.
--    - Error Handling: Added basic checks (e.g., if no data, output message).
--    - Performance: Used ANSI joins, FETCH FIRST for TOP N, and avoided unnecessary subqueries.
-- 4. **Best Practices**: 
--    - Consistent bind variables and formatting for readability.
--    - Comments on each section for troubleshooting guidance.
--    - Suggest narrower ASH windows (e.g., 5-15 min) for pinpointing issues; wider for trends.
--    - If no data: Check AWR retention (DBA_HIST_WR_CONTROL) and ensure Diagnostics Pack is licensed.
--    - For RDS: Reports go to 'BDUMP' by default; download from AWS Console if reading fails.
-- 5. **Pinpointing Issues**: Queries now prioritize CPU consumers. Correlate with AWR/ASH reports for full picture (e.g., Top SQL by CPU in AWR).
--
-- Prerequisites:
--   - Oracle Diagnostics Pack License (Enterprise Edition).
--   - Connected to your Oracle RDS instance with sufficient privileges (e.g., rdsadmin user).
--   - Ensure SQL*Plus (or similar client) settings:
--     SET SERVEROUTPUT ON SIZE UNLIMITED
--     SET LONG 20000000 -- For full SQL text and AWR/ASH report output
--     SET PAGESIZE 0   -- No pagination
--     SET FEEDBACK OFF -- No "X rows selected" messages
--     SET HEADING OFF  -- No column headers (for report output)
--     SET TRIMSPOOL ON -- Trim trailing spaces from spool output
--
-- How to Use:
-- 1. Replace placeholder values for bind variables (e.g., :desired_timestamp, :begin_snap_id, :end_snap_id).
-- 2. (Optional but recommended for HTML reports) SPOOL the output to a .html file before executing the AWR/ASH report generation.
--    Example: SPOOL C:\temp\my_report.html
-- 3. Run the script: @rds_ash_awr_investigation.sql
-- 4. SPOOL OFF after execution.
-- 5. Open the .html file in a web browser for formatted reports.

--------------------------------------------------------------------------------
-- 1. Define Bind Variables (ADJUST THESE VALUES)
--------------------------------------------------------------------------------

-- Define your investigation timestamp (e.g., for yesterday 12:30 AM EST)
VAR desired_timestamp TIMESTAMP;
EXEC :desired_timestamp := TO_TIMESTAMP('2025-07-09 00:30:00', 'YYYY-MM-DD HH24:MI:SS'); -- ADJUST ME!

-- Define the time window for ASH-based queries (e.g., +/- 5 minutes around desired_timestamp for pinpointing)
VAR ash_begin_time TIMESTAMP;
EXEC :ash_begin_time := :desired_timestamp - INTERVAL '5' MINUTE; -- ADJUST ASH WINDOW if needed (narrow for precision)

VAR ash_end_time TIMESTAMP;
EXEC :ash_end_time := :desired_timestamp + INTERVAL '5' MINUTE; -- ADJUST ASH WINDOW if needed

-- Define the dump directory for reports (default 'BDUMP'; create custom if needed via rdsadmin.rdsadmin_util.create_directory)
VAR dump_directory VARCHAR2(30);
EXEC :dump_directory := 'BDUMP';

-- Define the N for TOP N queries
VAR top_n_count NUMBER;
EXEC :top_n_count := 10;

--------------------------------------------------------------------------------
-- 2. Find AWR Snapshots (Run this first to get suitable snap IDs)
--------------------------------------------------------------------------------
PROMPT -- Finding AWR Snapshots around the desired timestamp --
PROMPT -- Select snaps that cover the period (begin_snap: earliest covering start; end_snap: latest covering end) --
SET HEADING ON
SET PAGESIZE 100
SELECT snap_id, begin_interval_time, end_interval_time
FROM dba_hist_snapshot
WHERE begin_interval_time <= :ash_end_time
  AND end_interval_time >= :ash_begin_time  -- Bracket the ASH window for relevance
ORDER BY begin_interval_time;
SET HEADING OFF
SET PAGESIZE 0
PROMPT -- Adjust variables below based on the output above, then re-run the script. --

-- Define the AWR snapshot IDs for AWR report generation (from above query)
VAR awr_begin_snap_id NUMBER;
EXEC :awr_begin_snap_id := 12345; -- ADJUST ME!

VAR awr_end_snap_id NUMBER;
EXEC :awr_end_snap_id := 12346; -- ADJUST ME!

--------------------------------------------------------------------------------
-- 3. Generate AWR HTML Report (RDS-Adapted)
--------------------------------------------------------------------------------
PROMPT -- Generating AWR HTML Report... --
DECLARE
    v_filename VARCHAR2(256);
BEGIN
    -- Construct predictable filename
    v_filename := 'awrrpt_' || :awr_begin_snap_id || '_' || :awr_end_snap_id || '.html';

    -- Generate report (procedure; no return value)
    rdsadmin.rdsadmin_diagnostic_util.awr_report(
        begin_snap => :awr_begin_snap_id,
        end_snap => :awr_end_snap_id,
        report_type => 'html',
        dump_directory => :dump_directory
    );

    DBMS_OUTPUT.PUT_LINE('AWR Report generated: ' || v_filename);
    DBMS_OUTPUT.PUT_LINE('Download from AWS RDS Console (Logs & events > Logs tab) or read below.');

    -- Optional: Read and output content (uncomment if needed; for large reports, use SPOOL and download)
    /*
    FOR rec IN (SELECT text FROM TABLE(rdsadmin.rds_file_util.read_text_file(:dump_directory, v_filename))) LOOP
        DBMS_OUTPUT.PUT_LINE(rec.text);
    END LOOP;
    */
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error generating/reading AWR report: ' || SQLERRM);
END;
/
PROMPT -- End of AWR Report Section. --

--------------------------------------------------------------------------------
-- 4. Generate ASH HTML Report (RDS-Adapted)
--------------------------------------------------------------------------------
PROMPT -- Generating ASH HTML Report for ASH window :ash_begin_time to :ash_end_time... --
DECLARE
    v_filename VARCHAR2(256);
BEGIN
    -- Construct predictable filename (RDS pattern: ashrpt_YYYYMMDDHH24MISS_YYYYMMDDHH24MISS.html)
    v_filename := 'ashrpt_' || TO_CHAR(:ash_begin_time, 'YYYYMMDDHH24MISS') || '_' || TO_CHAR(:ash_end_time, 'YYYYMMDDHH24MISS') || '.html';

    -- Generate report (procedure; no return value)
    rdsadmin.rdsadmin_diagnostic_util.ash_report(
        begin_time => :ash_begin_time,
        end_time => :ash_end_time,
        report_type => 'html',
        dump_directory => :dump_directory
    );

    DBMS_OUTPUT.PUT_LINE('ASH Report generated: ' || v_filename);
    DBMS_OUTPUT.PUT_LINE('Download from AWS RDS Console (Logs & events > Logs tab) or read below.');

    -- Optional: Read and output content (uncomment if needed; for large reports, use SPOOL and download)
    /*
    FOR rec IN (SELECT text FROM TABLE(rdsadmin.rds_file_util.read_text_file(:dump_directory, v_filename))) LOOP
        DBMS_OUTPUT.PUT_LINE(rec.text);
    END LOOP;
    */
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error generating/reading ASH report: ' || SQLERRM);
END;
/
PROMPT -- End of ASH Report Section. --

--------------------------------------------------------------------------------
-- 5. DB Load Profile (Minute-by-Minute Active Sessions, with CPU Focus)
--------------------------------------------------------------------------------
PROMPT -- DB Load Profile (Minute-by-Minute Active Sessions, Emphasizing CPU) around :desired_timestamp --
SET HEADING ON
SELECT
    TO_CHAR(TRUNC(sample_time, 'MI'), 'YYYY-MM-DD HH24:MI') AS minute_bucket,
    COUNT(*) AS total_active_sessions,
    SUM(CASE WHEN session_state = 'ON CPU' THEN 1 ELSE 0 END) AS on_cpu_sessions,
    SUM(CASE WHEN session_state = 'WAITING' THEN 1 ELSE 0 END) AS waiting_sessions,
    ROUND(SUM(CASE WHEN session_state = 'ON CPU' THEN 1 ELSE 0 END) * 100 / GREATEST(COUNT(*), 1), 2) AS pct_on_cpu
FROM dba_hist_active_sess_history
WHERE sample_time BETWEEN :ash_begin_time AND :ash_end_time
GROUP BY TRUNC(sample_time, 'MI')
ORDER BY minute_bucket;
SET HEADING OFF
PROMPT -- If on_cpu_sessions high, check CPU capacity in AWS CloudWatch. --

--------------------------------------------------------------------------------
-- 6. Top N Sessions Consuming CPU
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count Sessions Consuming CPU around :desired_timestamp --
SET HEADING ON
SELECT
    h.session_id,
    h.session_serial#,
    u.username,
    h.program,
    h.module,
    h.sql_id,
    COUNT(*) AS cpu_samples,
    ROUND(COUNT(*) * 100 / GREATEST(SUM(COUNT(*)) OVER(), 1), 2) AS cpu_samples_pct
FROM dba_hist_active_sess_history h
JOIN dba_users u ON h.user_id = u.user_id
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.session_state = 'ON CPU'
GROUP BY h.session_id, h.session_serial#, u.username, h.program, h.module, h.sql_id
ORDER BY cpu_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- High samples indicate heavy CPU sessions; kill or tune if needed. --

--------------------------------------------------------------------------------
-- 7. Top N SQL Statements by CPU Usage
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count SQL Statements by CPU Usage around :desired_timestamp --
SET HEADING ON
SELECT
    h.sql_id,
    COUNT(*) AS cpu_samples,
    ROUND(COUNT(*) * 100 / GREATEST((SELECT COUNT(*) FROM dba_hist_active_sess_history WHERE sample_time BETWEEN :ash_begin_time AND :ash_end_time AND session_state = 'ON CPU'), 1), 2) AS cpu_samples_pct_of_total,
    (SELECT DBMS_LOB.SUBSTR(sql_text, 4000, 1) FROM dba_hist_sqltext WHERE sql_id = h.sql_id AND ROWNUM = 1) AS sql_text  -- Truncated for output
FROM dba_hist_active_sess_history h
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.session_state = 'ON CPU'
  AND h.sql_id IS NOT NULL
GROUP BY h.sql_id
ORDER BY cpu_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- Tune high-CPU SQL: Add indexes, rewrite, or gather stats. Get full text via V$SQL if current. --

--------------------------------------------------------------------------------
-- 8. Top N Wait Events (Focusing on Non-Idle, to Complement CPU Analysis)
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count Wait Events around :desired_timestamp (If CPU not the only issue) --
SET HEADING ON
SELECT
    h.event,
    h.wait_class,
    COUNT(*) AS wait_samples,
    ROUND(COUNT(*) * 100 / GREATEST((SELECT COUNT(*) FROM dba_hist_active_sess_history WHERE sample_time BETWEEN :ash_begin_time AND :ash_end_time AND session_state = 'WAITING'), 1), 2) AS wait_samples_pct
FROM dba_hist_active_sess_history h
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.session_state = 'WAITING'
  AND h.wait_class != 'Idle'
GROUP BY h.event, h.wait_class
ORDER BY wait_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- If waits high (e.g., I/O), correlate with CPU overload. --

--------------------------------------------------------------------------------
-- 9. Top N Users by CPU Samples (Heavy Hitters)
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count Users by CPU Samples around :desired_timestamp --
SET HEADING ON
SELECT
    u.username,
    COUNT(*) AS cpu_samples,
    ROUND(COUNT(*) * 100 / GREATEST((SELECT COUNT(*) FROM dba_hist_active_sess_history WHERE sample_time BETWEEN :ash_begin_time AND :ash_end_time AND session_state = 'ON CPU'), 1), 2) AS cpu_samples_pct
FROM dba_hist_active_sess_history h
JOIN dba_users u ON h.user_id = u.user_id
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.session_state = 'ON CPU'
GROUP BY u.username
ORDER BY cpu_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- Focus on top users for application tuning or quotas. --

--------------------------------------------------------------------------------
-- 10. Top N Programs / Modules by CPU Samples
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count Programs and Modules by CPU Samples around :desired_timestamp --
SET HEADING ON
SELECT
    h.program,
    h.module,
    COUNT(*) AS cpu_samples,
    ROUND(COUNT(*) * 100 / GREATEST((SELECT COUNT(*) FROM dba_hist_active_sess_history WHERE sample_time BETWEEN :ash_begin_time AND :ash_end_time AND session_state = 'ON CPU'), 1), 2) AS cpu_samples_pct
FROM dba_hist_active_sess_history h
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.session_state = 'ON CPU'
GROUP BY h.program, h.module
ORDER BY cpu_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- Identifies application components driving CPU. --

--------------------------------------------------------------------------------
-- 11. Top N Accessed Objects by Active Samples (Potential Hotspots)
--------------------------------------------------------------------------------
PROMPT -- Top :top_n_count Accessed Objects by Active Samples around :desired_timestamp --
SET HEADING ON
SELECT
    o.owner AS object_owner,
    o.object_name,
    o.object_type,
    COUNT(*) AS access_samples
FROM dba_hist_active_sess_history h
JOIN dba_objects o ON h.current_obj# = o.object_id
WHERE h.sample_time BETWEEN :ash_begin_time AND :ash_end_time
  AND h.current_obj# > 0  -- Exclude invalid/undo
  AND o.owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'AUDSYS', 'RDSADMIN')
  AND o.object_type IN ('TABLE', 'INDEX', 'PARTITION', 'SUBPARTITION')
GROUP BY o.owner, o.object_name, o.object_type
ORDER BY access_samples DESC
FETCH FIRST :top_n_count ROWS ONLY;
SET HEADING OFF
PROMPT -- High access may indicate contention; check indexes/stats. --

--------------------------------------------------------------------------------
-- 12. New Connections (Delta Logons) History from AWR
--------------------------------------------------------------------------------
PROMPT -- New Connections (Delta Logons) History around :desired_timestamp --
PROMPT -- Shows connection spikes per snapshot interval --
SET HEADING ON
WITH logons AS (
    SELECT
        s.snap_id,
        s.begin_interval_time,
        s.end_interval_time,
        stat.value AS cumulative_logons,
        LAG(stat.value) OVER (ORDER BY s.snap_id) AS prev_cumulative_logons
    FROM dba_hist_sysstat stat
    JOIN dba_hist_snapshot s ON stat.snap_id = s.snap_id AND stat.dbid = s.dbid AND stat.instance_number = s.instance_number
    WHERE stat.stat_name = 'logons cumulative'
      AND s.begin_interval_time BETWEEN :desired_timestamp - INTERVAL '1' HOUR AND :desired_timestamp + INTERVAL '1' HOUR  -- Adjustable window
)
SELECT
    snap_id,
    begin_interval_time,
    end_interval_time,
    GREATEST(cumulative_logons - NVL(prev_cumulative_logons, 0), 0) AS new_logons_in_interval,
    ROUND(GREATEST(cumulative_logons - NVL(prev_cumulative_logons, 0), 0) / EXTRACT(SECOND FROM (end_interval_time - begin_interval_time)), 2) AS new_logons_per_second
FROM logons
WHERE prev_cumulative_logons IS NOT NULL
ORDER BY begin_interval_time;
SET HEADING OFF
PROMPT -- High spikes may indicate connection storms; check app pooling. --

PROMPT -- Investigation script execution complete. --
PROMPT -- Remember to review the generated AWR/ASH HTML reports from the AWS Console. --
PROMPT -- If no data in queries, verify time window and AWR retention. --

-- Reset SQL*Plus settings (optional, good practice)
-- SET PAGESIZE 14
-- SET FEEDBACK ON
-- SET HEADING ON
-- SET TRIMSPOOL OFF

-- SET LONG 80

No comments: