Monday, January 12, 2009

How to generate testing data

Goal
----
Generate testing data

Interpret
---------
Many times you need to build a test case to prove your ideas or prototype, benchmark, ask questions ... etc.

Tom: You would better supply very very simple create tables and insert statements.
The SMALLEST create table possible (no tablespaces, no schema names, just like I do in my examples for you)

Solution
--------

*) 500 distinct value, to get centain data distribution

Mod(rownum,500)
dbms_random.value(1,500)
dbms_random.normal()

*) string

rpad('x',500,'x')
lpad(RowNum,200,'*')
dbms_random.string('l',500)
# 'u', 'U' - returning string in uppercase alpha characters
# 'l', 'L' - returning string in lowercase alpha characters
# 'p': any printable char. this one is slow

*) source

# dual connect by level <= 3000
# all_objects
# WITH subquery factor

*) Random order by

ORDER BY dbms_random.random;

*) Example

SQL>
--Resets the seed
exec dbms_random.seed(1);

drop table t1;
create table t1
nologging -- adjust as necessary
as
with g as (
select -- materialize
level
from dual
connect by level <= 2000
)
select
/*x leading(v1,v2,v3) use_merge(v2) use_merge(v3) */
/*+ leading(v1,v2,v3) use_nl(v2) use_nl(v3) */
rownum id,
Round(dbms_random.normal * 1000,3) n1,
Mod(rownum,500) n5,
Mod(rownum,400) n4,
Round(dbms_random.value(1,700)) n7b
,rpad(rownum,500,'x') s5
-- ,dbms_random.string('l',20) s5b
,Trunc(sysdate,'YYYY') + Mod(rownum,1440)/1440 update_date
from
g v1
,g v2
,g v3
where
rownum <= 111222
-- order by dbms_random.random
;

create index t1_i1 on t1(n5) nologging ;

exec dbms_stats.gather_table_stats(user,'T1');

# MuJiang

DROP TABLE abelisting.book_delete_stage_part;

CREATE TABLE abelisting.book_delete_stage_part (
listingsid number(20),
qty number(5),
delete_flag number(1),
update_date timestamp(3) default SysTimestamp,
part_mi number(2) default mod(to_number(to_char(sysdate,'MI')),10)
)
PARTITION BY LIST (part_mi) (
PARTITION data_part_0 VALUES (0),
PARTITION data_part_1 VALUES (1),
PARTITION data_part_2 VALUES (2),
PARTITION data_part_3 VALUES (3),
PARTITION data_part_4 VALUES (4),
PARTITION data_part_5 VALUES (5),
PARTITION data_part_6 VALUES (6),
PARTITION data_part_7 VALUES (7),
PARTITION data_part_8 VALUES (8),
PARTITION data_part_9 VALUES (9),
PARTITION dummy_null VALUES (NULL),
PARTITION dummy_other VALUES (DEFAULT)
)
tablespace data_auto;

-- 5000 distinct values, 2000 duplicate values
insert into abelisting.book_delete_stage_part(listingsid,delete_flag,update_date, part_mi)
select trunc(dbms_random.value(1,5000)), mod(level,2), trunc(sysdate,'DD') + mod(level,60)/1440,
mod(level,10)
from dual
connect by level <= 7000;


Reference
---------

# Christian Antognini
drop table t;
CREATE TABLE t
AS
SELECT rownum AS id,
round(5678+dbms_random.normal*1234) AS n1,
mod(255+trunc(dbms_random.normal*1000),255) AS n2,
dbms_random.string('p',255) AS pad
FROM dual
CONNECT BY level <= 10000
ORDER BY dbms_random.value;

# Jonathan Lewis

drop table t1;
create table t1
nologging -- adjust as necessary
as
with generator as (
select --+ materialize
rownum id,
substr(dbms_random.string('U',4),1,4) sortcode
from all_objects
where rownum <= 5000
)
select
/*+ ordered use_nl(v2) */
substr(v2.sortcode,1,4) || substr(v1.sortcode,1,2) sortcode,
substr(v1.sortcode,2,2) v2,
substr(v2.sortcode,2,3) v3
from
generator v1,
generator v2
where
rownum <= 1048576
;


# Tanel Põder
Generating lots of rows using connect by - safely!

select rownum r
from
(select rownum r from dual connect by rownum <= 1000) a,
(select rownum r from dual connect by rownum <= 1000) b,
(select rownum r from dual connect by rownum <= 1000) c
where rownum <= 100000000;

AskTom

AskTom

ops$tkyte@ORA9IR2>

CREATE TABLE t1
(
dt date,
x int,
y varchar2(25)
)
PARTITION BY RANGE (dt)
subpartition by hash(x) subpartitions 8
(
PARTITION part1 VALUES LESS THAN (to_date('13-mar-2003','dd-mon-yyyy')) ,
PARTITION part2 VALUES LESS THAN (to_date('14-mar-2003','dd-mon-yyyy')) ,
PARTITION junk VALUES LESS THAN (MAXVALUE)
);

insert /*+ APPEND */ into t1
select to_date('12-mar-2003','dd-mon-yyyy')+mod(rownum,4), rownum, rpad('*',25,'*')
from all_objects;

drop table t;
create table t as select * from all_objects;
create index t_idx on t(object_name);

No comments: