Goal
To process a big query result set from database, many a time the application server is limited by memory footprint, so we need to throttle the output chunk by chunk.Solution
Q: How do you eat an elephant? A: One piece at a time.
The staging table will put into a tablespace with EXTENT MANAGEMENT LOCAL UNIFORM SIZE 4M. you may choose other uniform size based on your result size limit.
And then we will read data one extent at a time, after finish one extent, mark it as processed.
When client application crashed or failed, we can restart at failed extent.
We may slow make the chunk across 2 or many extents to make it more flexible.
Reference
On the use of DBMS_ROWID.rowid_create
[http://bit.ly/bphHJb]
Setup
drop table rowid_range_job purge; create table rowid_range_job tablespace data_auto nologging as select e.extent_id, e.block_id, e.block_id+blocks-1 block_id_end, cast(dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id, 0 ) as urowid) min_rowid, cast(dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id+e.blocks-1, 10000 ) as urowid) max_rowid, cast(o.object_name as varchar2(30)) table_name, cast(null as varchar2(30)) partition_name, cast('isbn_extract' as varchar2(30)) job_name, cast(null as date) process_date, cast(null as number(1)) is_processed from dba_extents e, user_objects o where o.object_name = 'T' and e.segment_name = 'T' and e.owner = user and e.segment_type = 'TABLE' order by e.extent_id ; drop table t purge; create table t ( n1 number(10), d1 date, c1 varchar2(2000) ); insert --+ append into t(n1,d1,c1) select rownum, sysdate, rpad('a',1200) from dual connect by level <= 10000; commit;
Generate and check rowid range split,
with data as ( select e.extent_id, e.block_id, e.block_id+blocks-1, dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id, 0 ) min_rowid, dbms_rowid.rowid_create( 1, o.data_object_id, e.file_id, e.block_id+e.blocks-1, 10000 ) max_rowid from dba_extents e, user_objects o where o.object_name = 'T' and e.segment_name = o.object_name and e.owner = user and e.segment_type = 'TABLE' ) select extent_id, count(*) cnt from data, T t where t.rowid between data.min_rowid and data.max_rowid group by rollup (extent_id) ; delete rowid_range_job where job_name = 'test_output_job'; INSERT INTO rowid_range_job SELECT e.extent_id, e.block_id, e.block_id + blocks - 1 block_id_end, CAST (DBMS_ROWID.rowid_create (1, o.data_object_id, e.file_id, e.block_id, 0 ) AS UROWID ) min_rowid, CAST (DBMS_ROWID.rowid_create (1, o.data_object_id, e.file_id, e.block_id + e.blocks - 1, 32000 ) AS UROWID ) max_rowid, CAST (o.object_name AS VARCHAR2 (30)) table_name, CAST (NULL AS VARCHAR2 (30)) partition_name, CAST ('test_output_job' AS VARCHAR2 (30)) job_name, CAST (NULL AS DATE) process_date, 0 is_processed FROM dba_extents e, user_objects o WHERE o.object_name LIKE 'T' AND e.segment_name = o.object_name AND e.owner = USER AND e.segment_type = 'TABLE' ORDER BY o.object_name, e.extent_id; commit;
Query rowid extent range split metadata
select EXTENT_ID, BLOCK_ID, BLOCK_ID_END, MIN_ROWID, MAX_ROWID --,TABLE_NAME --,PARTITION_NAME --,JOB_NAME --,PROCESS_DATE --,IS_PROCESSED from invdb.rowid_range_job where job_name = 'test_output_job'; with data as ( select extent_id, block_id, block_id_end, min_rowid, max_rowid, table_name, job_name, process_date, is_processed from rowid_range_job where job_name = 'test_output_job' and table_name = 'T' ) select extent_id, count(*) cnt from data, T t where t.rowid between data.min_rowid and data.max_rowid group by rollup (extent_id) /
No comments:
Post a Comment