AWS Reshift Insert into Table without S3

To generate random data into a table without using S3 for doing some quick tests

drop table if exists seed;

create table seed ( n int8 );

insert into seed (
SELECT
p0.n
+ p1.n*2
+ p2.n * POWER(2,2)
+ p3.n * POWER(2,3)
+ p4.n * POWER(2,4)
+ p5.n * POWER(2,5)
+ p6.n * POWER(2,6)
+ p7.n * POWER(2,7)
as number
FROM
(SELECT 0 as n UNION SELECT 1) p0,
(SELECT 0 as n UNION SELECT 1) p1,
(SELECT 0 as n UNION SELECT 1) p2,
(SELECT 0 as n UNION SELECT 1) p3,
(SELECT 0 as n UNION SELECT 1) p4,
(SELECT 0 as n UNION SELECT 1) p5,
(SELECT 0 as n UNION SELECT 1) p6,
(SELECT 0 as n UNION SELECT 1) p7
Order by 1
);

commit;

drop table if exists test_table;

create table test_table(
ingest_time timestamp encode zstd,
doi date encode zstd,
id int encode bytedict,
value float encode zstd,
data_sig varchar(32) encode zstd
) DISTKEY(id) SORTKEY(ingest_time);

commit;

insert into test_table (
select dateadd(‘msec’, – 10n , getdate() ) as ingest_time, trunc(dateadd(‘msec’, – 10n , getdate() )) as doi,id,
n::float / 1000000 as value, ‘sig-‘ || to_hex(n % 16) as data_sig
FROM (select (a.n + b.n + c.n + d.n) as n, (random() * 1000)::int as id from seed a cross join (select n256 as n from seed) b cross join (select n65536 as n from seed) c
cross join (select n*16777216 as n from ( select distinct (n/16)::int as n from seed ) ) d)
) order by ingest_time;

commit;

analyze test_table;

select count(*) from test_table;

–Consecutive run on of above insert query will 268 million rows for each execution–

You can create a table with about 1 billion rows in 8 minutes on a ds2.xlarge cluster

Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s