Thursday, September 30, 2010

Throttle big result set

Goal

To process a big query result set from database, many a time the application server is limited by memory footprint, so we need to throttle the output chunk by chunk.

Solution
Q: How do you eat an elephant? A: One piece at a time.
The staging table will put into a tablespace with EXTENT MANAGEMENT LOCAL UNIFORM SIZE 4M. you may choose other uniform size based on your result size limit.
And then we will read data one extent at a time, after finish one extent, mark it as processed.
When client application crashed or failed, we can restart at failed extent.

We may slow make the chunk across 2 or many extents to make it more flexible.

Reference
On the use of DBMS_ROWID.rowid_create
[http://bit.ly/bphHJb]

Setup
drop table rowid_range_job purge;

create table rowid_range_job
tablespace data_auto nologging
as
select e.extent_id, e.block_id, e.block_id+blocks-1 block_id_end,
 cast(dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id, 0 ) as urowid) min_rowid,
 cast(dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id+e.blocks-1, 10000 ) as urowid) max_rowid,
 cast(o.object_name as varchar2(30)) table_name,
 cast(null as varchar2(30)) partition_name,
 cast('isbn_extract' as varchar2(30)) job_name,
 cast(null as date)         process_date,
 cast(null as number(1))    is_processed
from dba_extents e, user_objects o
 where o.object_name = 'T'
   and e.segment_name = 'T'
   and e.owner = user
   and e.segment_type = 'TABLE'
 order by e.extent_id
;

drop table t purge;
create table t
(
n1 number(10),
d1 date,
c1 varchar2(2000)
);

insert --+ append
 into t(n1,d1,c1)
select rownum, sysdate, rpad('a',1200)
from dual
connect by level <= 10000;
commit;

Generate and check rowid range split,





with data as
(
select e.extent_id, e.block_id, e.block_id+blocks-1,
       dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id, 0 ) min_rowid,
       dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id+e.blocks-1, 10000 ) max_rowid
  from dba_extents e,
       user_objects o
 where o.object_name = 'T'
   and e.segment_name = o.object_name
   and e.owner = user
   and e.segment_type = 'TABLE'
)
select extent_id, count(*) cnt
  from data, T t
 where t.rowid between data.min_rowid and data.max_rowid
 group by rollup (extent_id)
;

delete rowid_range_job where job_name = 'test_output_job';

INSERT INTO rowid_range_job
   SELECT   e.extent_id, e.block_id, e.block_id + blocks - 1 block_id_end,
            CAST
               (DBMS_ROWID.rowid_create (1,
                                         o.data_object_id,
                                         e.file_id,
                                         e.block_id,
                                         0
                                        ) AS UROWID
               ) min_rowid,
            CAST
               (DBMS_ROWID.rowid_create (1,
                                         o.data_object_id,
                                         e.file_id,
                                         e.block_id + e.blocks - 1,
                                         32000
                                        ) AS UROWID
               ) max_rowid,
            CAST (o.object_name AS VARCHAR2 (30)) table_name,
            CAST (NULL AS VARCHAR2 (30)) partition_name,
            CAST ('test_output_job' AS VARCHAR2 (30)) job_name,
            CAST (NULL AS DATE) process_date, 0 is_processed
       FROM dba_extents e, user_objects o
      WHERE o.object_name LIKE 'T'
        AND e.segment_name = o.object_name
        AND e.owner = USER
        AND e.segment_type = 'TABLE'
   ORDER BY o.object_name, e.extent_id;

commit;

Query rowid extent range split metadata





select EXTENT_ID,
BLOCK_ID,
BLOCK_ID_END,
MIN_ROWID,
MAX_ROWID
--,TABLE_NAME
--,PARTITION_NAME
--,JOB_NAME
--,PROCESS_DATE
--,IS_PROCESSED 
from invdb.rowid_range_job where job_name = 'test_output_job';

with data as
(
  select extent_id, block_id, block_id_end, min_rowid, max_rowid, table_name, job_name, process_date, is_processed
  from rowid_range_job
  where job_name = 'test_output_job'
    and table_name = 'T'
)
select extent_id, count(*) cnt
  from data, T t
 where t.rowid between data.min_rowid and data.max_rowid
 group by rollup (extent_id)
/

No comments: