Goal
To show how IN and EXISTS Correlated Subquery works, and how they changed in Oracle 11g CBO SQL Engine. Also covered the NOT IN and NOT EXISTS difference.Setup
drop table t1 cascade constraints purge; drop table t2 purge; create table t1(x number); create table t2(y number); insert into t1(x) values(1); insert into t1(x) values(2); insert into t1(x) values(Null); insert into t2(y) values(1); insert into t2(y) values(Null); commit; create index t1_x on t1(x); create index t2_y on t2(y);
IN
t2 is small, index on t1.x, the subquery ( select y from T2 ) is small, probable full scan t2Select * from T1 where x in ( select y from T2 ); = select * from t1, ( select distinct y from t2 ) t2 where t1.x = t2.y;
- Small t2,
set autot trace exp Select /*+ cardinality(t1,50000) */ * from T1 where x in ( select /*+ cardinality(t2, 200) */ y from T2 ); ------------------------------------------ | Id | Operation | Name | Rows | ------------------------------------------ | 0 | SELECT STATEMENT | | 224 | | 1 | NESTED LOOPS | | 224 | | 2 | SORT UNIQUE | | 200 | | 3 | INDEX FULL SCAN| T2_Y | 200 | |* 4 | INDEX RANGE SCAN| T1_X | 1 | ------------------------------------------
- Small t1
Select /*+ cardinality(t1,500) */ * from T1 where x in ( select /*+ cardinality(t2, 20000) */ y from T2 ); ------------------------------------------ | Id | Operation | Name | Rows | ------------------------------------------ | 0 | SELECT STATEMENT | | 500 | | 1 | NESTED LOOPS SEMI| | 500 | | 2 | INDEX FULL SCAN | T1_X | 500 | |* 3 | INDEX RANGE SCAN| T2_Y | 20000 | ------------------------------------------
Exists
t1 is small, index on t2(y), full scan t1,select * from t1 where exists ( select null from t2 where y = x ); = for x in ( select * from t1 ) loop if ( exists ( select null from t2 where y = x.x ) then OUTPUT THE RECORD end if end loop
- Small t2, big t1.
select /*+ cardinality(t1,50000) */ * from t1
where exists ( select /*+ cardinality(t2, 700) */ null from t2 where y = x );
------------------------------------------
| Id | Operation | Name | Rows |
------------------------------------------
| 0 | SELECT STATEMENT | | 782 |
| 1 | NESTED LOOPS | | 782 |
| 2 | SORT UNIQUE | | 700 |
| 3 | INDEX FULL SCAN| T2_Y | 700 |
|* 4 | INDEX RANGE SCAN| T1_X | 1 |
------------------------------------------
- Small t1, big t2.
select /*+ cardinality(t1,500) */ * from t1
where exists ( select /*+ cardinality(t2, 70000) */ null from t2 where y = x );
------------------------------------------
| Id | Operation | Name | Rows |
------------------------------------------
| 0 | SELECT STATEMENT | | 500 |
| 1 | NESTED LOOPS SEMI| | 500 |
| 2 | INDEX FULL SCAN | T1_X | 500 |
|* 3 | INDEX RANGE SCAN| T2_Y | 70000 |
------------------------------------------
If both the subquery and the outer table are huge — either might work as well as the
other — depends on the indexes and other factors.
Not in and Not exists are different.
select * from t1 outer
where outer.x not in (select y from t2);
is NOT the same asselect * from t1 outer
where not exists (select null from t2 where t2.y = outer.x);
UNLESS the expression "y" is not null. That said:select * from t1 outer
where outer.x not in (select y from t2 where y is not null);
is the same asselect * from t1 outer
where not exists (select null from t2 where t2.y = outer.x);
If t2.y is null-able,
select * from t1 outer
where outer.x not in (select y from t2 where y is null);
== equals ==
select * from t1 outer
where outer.x not in (2,3,Null);
return no rows.
select * from t1 outer
where outer.x not in (2,3);
Returns some rows.
IN & EXISTS, AskTom, http://bit.ly/aLvOeS
Set Stats
declare
m_distcnt number;
m_density number;
m_nullcnt number;
m_avgclen number;
begin
m_distcnt := 200100;
m_density := 0.002;
m_nullcnt := 0;
m_avgclen := 5;
dbms_stats.set_column_stats(
ownname => user,
tabname => 't1',
colname => 'x',
distcnt => m_distcnt,
density => m_density,
nullcnt => m_nullcnt,
avgclen => m_avgclen
);
dbms_stats.set_column_stats(
ownname => user,
tabname => 't2',
colname => 'y',
distcnt => m_distcnt,
density => m_density,
nullcnt => m_nullcnt,
avgclen => m_avgclen
);
dbms_stats.set_table_stats( user, 't1', numrows => 2000000, numblks => 1000000);
dbms_stats.set_table_stats( user, 't2', numrows => 2000000, numblks => 1000000);
end;
/
select table_name,
num_rows,
blocks
from
user_tables
where
table_name in ('T1','T2')
;
select
num_distinct,
low_value,
high_value,
density,
num_nulls,
num_buckets,
histogram
from
user_tab_columns
where
table_name in ('T1','T2')
and column_name in ('X','Y')
;