Goal

To show how IN and EXISTS Correlated Subquery works, and how they changed in Oracle 11g CBO SQL Engine. Also covered the NOT IN and NOT EXISTS difference.

Setup

drop table t1 cascade constraints purge;
drop table t2 purge;

create table t1(x number);
create table t2(y number);

insert into t1(x) values(1);
insert into t1(x) values(2);
insert into t1(x) values(Null);

insert into t2(y) values(1);
insert into t2(y) values(Null);
commit;

create index t1_x on t1(x);
create index t2_y on t2(y);

IN

t2 is small, index on t1.x, the subquery ( select y from T2 ) is small, probable full scan t2

Select * from T1 where x in
 ( select y from T2 );
=
select * 
  from t1, ( select distinct y from t2 ) t2
 where t1.x = t2.y;

Small t2,

set autot trace exp

Select /*+ cardinality(t1,50000) */ * from T1 where x in
 ( select /*+ cardinality(t2, 200) */ y from T2 );
------------------------------------------
| Id  | Operation         | Name | Rows  |
------------------------------------------
|   0 | SELECT STATEMENT  |      |   224 |
|   1 |  NESTED LOOPS     |      |   224 |
|   2 |   SORT UNIQUE     |      |   200 |
|   3 |    INDEX FULL SCAN| T2_Y |   200 |
|*  4 |   INDEX RANGE SCAN| T1_X |     1 |
------------------------------------------

Small t1

Select /*+ cardinality(t1,500) */ * from T1 where x in
 ( select /*+ cardinality(t2, 20000) */ y from T2 );
------------------------------------------
| Id  | Operation         | Name | Rows  |
------------------------------------------
|   0 | SELECT STATEMENT  |      |   500 |
|   1 |  NESTED LOOPS SEMI|      |   500 |
|   2 |   INDEX FULL SCAN | T1_X |   500 |
|*  3 |   INDEX RANGE SCAN| T2_Y | 20000 |
------------------------------------------

Exists

t1 is small, index on t2(y), full scan t1,

select * from t1 
 where exists ( select null from t2 where y = x );
=
   for x in ( select * from t1 )
   loop
      if ( exists ( select null from t2 where y = x.x )
      then 
         OUTPUT THE RECORD
      end if
   end loop

Small t2, big t1.

select /*+ cardinality(t1,50000) */ * from t1 
 where exists ( select /*+ cardinality(t2, 700) */ null from t2 where y = x );
------------------------------------------
| Id  | Operation         | Name | Rows  |
------------------------------------------
|   0 | SELECT STATEMENT  |      |   782 |
|   1 |  NESTED LOOPS     |      |   782 |
|   2 |   SORT UNIQUE     |      |   700 |
|   3 |    INDEX FULL SCAN| T2_Y |   700 |
|*  4 |   INDEX RANGE SCAN| T1_X |     1 |
------------------------------------------

Small t1, big t2.

select /*+ cardinality(t1,500) */ * from t1 
 where exists ( select /*+ cardinality(t2, 70000) */ null from t2 where y = x );
------------------------------------------
| Id  | Operation         | Name | Rows  |
------------------------------------------
|   0 | SELECT STATEMENT  |      |   500 |
|   1 |  NESTED LOOPS SEMI|      |   500 |
|   2 |   INDEX FULL SCAN | T1_X |   500 |
|*  3 |   INDEX RANGE SCAN| T2_Y | 70000 |
------------------------------------------

If both the subquery and the outer table are huge — either might work as well as the
other — depends on the indexes and other factors.

Not in and Not exists are different.

select * from t1 outer
where outer.x not in (select y from t2);

is NOT the same as

select * from t1 outer
where not exists (select null from t2 where t2.y = outer.x);

UNLESS the expression "y" is not null. That said:

select * from t1 outer
where outer.x not in (select y from t2 where y is not null);

is the same as

select * from t1 outer
where not exists (select null from t2 where t2.y = outer.x);

If t2.y is null-able,

select * from t1 outer
where outer.x not in (select y from t2 where y is null);

== equals ==


select * from t1 outer
where outer.x not in (2,3,Null);

return no rows.

select * from t1 outer
where outer.x not in (2,3);

Returns some rows.

IN & EXISTS, AskTom, http://bit.ly/aLvOeS

Set Stats

declare
 m_distcnt  number;
 m_density  number;
 m_nullcnt  number;
 m_avgclen  number;

begin

 m_distcnt := 200100;
 m_density := 0.002;
 m_nullcnt := 0;
 m_avgclen := 5;

 dbms_stats.set_column_stats(
  ownname  => user,
  tabname  => 't1',
  colname  => 'x',
  distcnt  => m_distcnt,
  density  => m_density,
  nullcnt  => m_nullcnt,
  avgclen  => m_avgclen
 );

 dbms_stats.set_column_stats(
  ownname  => user,
  tabname  => 't2',
  colname  => 'y',
  distcnt  => m_distcnt,
  density  => m_density,
  nullcnt  => m_nullcnt,
  avgclen  => m_avgclen
 );

  dbms_stats.set_table_stats( user, 't1', numrows => 2000000, numblks => 1000000);
  dbms_stats.set_table_stats( user, 't2', numrows => 2000000, numblks => 1000000);

end;
/

select  table_name,
 num_rows,
 blocks
from
 user_tables
where
 table_name in ('T1','T2')
;

select
 num_distinct,
 low_value,
 high_value,
 density,
 num_nulls,
 num_buckets,
 histogram
from
 user_tab_columns
where
 table_name in ('T1','T2')
and column_name in ('X','Y')
;

Developer 木匠

Monday, August 16, 2010

IN_EXISTS Correlated Subquery