Tuesday, November 6, 2012

Reduced consistent gets - II


This will be the prolongation of the previous post.

create table t
(x  int primary key,
 y  varchar2(80) )
 /

insert into t(x,y)
select level, rpad('*',80,'*')
from dual
connect by level <= 1000
/
commit;

exec dbms_stats.gather_table_stats(user,'T');

create or replace function get_stats
return number as
 l_val number ;
begin
 select s2.value
 into l_val
 from v$sysstat s1,
 v$mystat s2
 where s1.statistic# = s2.statistic#
 and s1.name like 'consistent gets' ;

 return l_val;
end;
/

Lets do a simple select and measure the consistent gets in both 10g and 11g database.

rajesh@ORA10G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select x from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
 Total Consistent gets = 101001

PL/SQL procedure successfully completed.

rajesh@ORA11G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select x from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
 Total Consistent gets = 2602

PL/SQL procedure successfully completed.

This is some what an expected behaviour, since oracle 11g decided to cache block after each lookup in the hope it re-uses its again. Instead of hitting the buffer cache again.

As soon as we start reading the data randomly, this effect disappered.

rajesh@ORA10G> /* Reading PK Randomly */
rajesh@ORA10G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select x from t, (select trunc(dbms_random.value(1,1000)) as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
Total Consistent gets = 101001

PL/SQL procedure successfully completed.

rajesh@ORA11G> /* Reading PK Randomly */
rajesh@ORA11G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select x from t, (select trunc(dbms_random.value(1,1000)) as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /

Total Consistent gets = 102262

PL/SQL procedure successfully completed.
Table Access by Index Rowid

rajesh@ORA10G>
rajesh@ORA10G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select Y from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
Total Consistent gets = 201001

PL/SQL procedure successfully completed.

What we are doing here is reading data from table itself (not from the index). We have to do an Index unique scan followed by an Table access by rowid, resulting in Two logical IO's per row. Now lets see what happens in Oracle 11g.

rajesh@ORA11G>
rajesh@ORA11G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select Y from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
Total Consistent gets = 102602

PL/SQL procedure successfully completed.

you can see we got only single IO per row. In other words Index acess were neutralized. Optimization still works for index even though we acess tables inbetween them.

Table Self Join

When we add the same table twice in join, it adds up in Oracle 10g but not in Oracle 11g.

rajesh@ORA10G> /* How about Same Table Twice */
rajesh@ORA10G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select t.x from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1, t t3
  8      where t.x = t1.l
  9      and t3.x = t1.l )
 10   loop
 11    null;
 12   end loop;
 13 
 14   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 15  end;
 16  /
Total Consistent gets = 202002

PL/SQL procedure successfully completed.

rajesh@ORA11G> /* How about Same Table Twice */
rajesh@ORA11G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select t.x from t, (select mod(level,1000)+1 as l
  6      from dual
  7      connect by level <= 100000 ) t1, t t3
  8      where t.x = t1.l
  9      and t3.x = t1.l )
 10   loop
 11    null;
 12   end loop;
 13 
 14   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 15  end;
 16  /
Total Consistent gets = 2602

PL/SQL procedure successfully completed.
Its still equivalent to having table ( 'T' ) only once in the Self join.


Across Fetches

But this optimization doesn't spanned across different fetches.

rajesh@ORA11G> /* Reading same value across fetches */
rajesh@ORA11G> declare
  2   l_stats number;
  3  begin
  4   l_stats := get_stats ;
  5   for x in ( select x from t, (select 1 as l
  6      from dual
  7      connect by level <= 100000 ) t1
  8      where t.x = t1.l )
  9   loop
 10    null;
 11   end loop;
 12 
 13   dbms_output.put_line (' Total Consistent gets = ' || ( get_stats - l_stats )  );
 14  end;
 15  /
Total Consistent gets = 1008

PL/SQL procedure successfully completed.

Here we join the data set 'T1' with table 'T'  for the value x = 1 for 100 K iterations.  but the join become sucess for 1000 iterations. so we get consistent gets close to that.

By default cursor for loop does implicit array fetch of 100 rows and looks like consistent gets cannot operate across fetches.


In overall it looks like a nice feature and it seems Oracle 11g have introduced these little tricks and features all over the place
 

Saturday, November 3, 2012

Reduced consistent gets in 11g

Oracle database 11g has as Optimization with consistent gets involved in Nested loop joins.

drop table t1 purge;
drop table t2 purge;

create table t1
nologging as
select a.*,rownum as id
from all_objects a
where rownum <= 10000;

create table t2
nologging as
select * from t1;

begin
 dbms_stats.gather_table_stats(user,'T1');
 dbms_stats.gather_table_stats(user,'T2');
end;
/

create index t1_ind on t1(id);
select /*+ use_nl(t2,t1) */ count(*)
from t1, t2
where t1.id = t2.id
/

Tkprof from Oracle 10g database shows me this.


********************************************************************************
select /*+ use_nl(t2,t1) */ count(*)
from t1, t2
where t1.id = t2.id

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.02          0          0          0           0
Fetch        2      0.04       0.04          0      10157          0           1
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        4      0.04       0.06          0      10157          0           1

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 61 

Rows     Row Source Operation
-------  ---------------------------------------------------
      1  SORT AGGREGATE (cr=10157 pr=0 pw=0 time=46412 us)
  10000   NESTED LOOPS  (cr=10157 pr=0 pw=0 time=50094 us)
  10000    TABLE ACCESS FULL T2 (cr=135 pr=0 pw=0 time=77 us)
  10000    INDEX RANGE SCAN T1_IND (cr=10022 pr=0 pw=0 time=36338 us)(object id 53690)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                       2        0.00          0.00
  SQL*Net message from client                     2        0.00          0.00
********************************************************************************


Where as Tkprof from Oracle 11g database shows me this.


********************************************************************************
select /*+ use_nl(t2,t1) */ count(*)
from t1, t2
where t1.id = t2.id

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch        2      0.03       0.02          0        288          0           1
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        4      0.03       0.02          0        288          0           1

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 84 

Rows     Row Source Operation
-------  ---------------------------------------------------
      1  SORT AGGREGATE (cr=288 pr=0 pw=0 time=0 us)
  10000   NESTED LOOPS  (cr=288 pr=0 pw=0 time=26199 us cost=10047 size=80000 card=10000)
  10000    TABLE ACCESS FULL T2 (cr=140 pr=0 pw=0 time=2657 us cost=42 size=40000 card=10000)
  10000    INDEX RANGE SCAN T1_IND (cr=148 pr=0 pw=0 time=0 us cost=1 size=4 card=1)(object id 74845)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                       2        0.00          0.00
  SQL*Net message from client                     2        0.00          0.00
********************************************************************************


We are seeing a reduced consistent gets in Oracle 11g because the dedicated server decide in keeping the index block after each lookup in the hopes it could use it again right away. Instead of reading and re-reading the block time after time. The dedicated server is caching the block in your pga, so that it doesn't have to get and re-get from buffer cache.


drop table t1 purge;
drop table t2 purge;

create table t1
nologging as
select *
from (
select a.*,rownum as id
from all_objects a
where rownum <= 10000
  )
order by dbms_random.random;

create table t2
nologging as
select * from t1;

begin
 dbms_stats.gather_table_stats(user,'T1');
 dbms_stats.gather_table_stats(user,'T2');
end;
/

create index t1_ind on t1(id);

********************************************************************************
select /*+ use_nl(t2,t1) */ count(*)
from t1, t2
where t1.id = t2.id
call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch        2      0.03       0.03          0      10162          0           1
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        4      0.03       0.03          0      10162          0           1
Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 84 
Rows     Row Source Operation
-------  ---------------------------------------------------
      1  SORT AGGREGATE (cr=10162 pr=0 pw=0 time=0 us)
  10000   NESTED LOOPS  (cr=10162 pr=0 pw=0 time=33540 us cost=10047 size=80000 card=10000)
  10000    TABLE ACCESS FULL T2 (cr=140 pr=0 pw=0 time=2911 us cost=42 size=40000 card=10000)
  10000    INDEX RANGE SCAN T1_IND (cr=10022 pr=0 pw=0 time=0 us cost=1 size=4 card=1)(object id 74848)

Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                       2        0.00          0.00
  SQL*Net message from client                     2        0.00          0.00
********************************************************************************

As soon as we start reading the index in random fashion the effect well disappered. Thus far optimization seems to be kicking in only if we repeaditly accessing the same block over and over again