select array_to_string(array_agg(chr(a[s])),'') "I wish you" from (
select a, generate_series(1,array_length(foo.a,1)) s from (
select array[72,97,112,112,121,32,88,109,97,115,33] as a
) foo
) bar;
Many thanks to all the people who developed and are developing PostgreSQL!
about me and my world
Mittwoch, 23. Dezember 2009
Sonntag, 15. November 2009
writeable CTE - a short performance test
Inspired by the talk from David Fetter and Mirko Tiikkaja performed on the PGDAY 2009 in Paris about writeable CTE i want to analyse a special aspect: minimizing I/O-operations with query clustering.
The talk shows an example that contains 7 tables: one person-table, 3 tables for details for a person (street, im and phone), which contains contact-data for the person, and 3 additional tables to map each person to multiple streets, im and phone.
Assumed, there are 2 entries for street, im and phone for a person, this results in 1 insert into the person-table, respectively 2 inserts into street, im and phone. In addition it needs 2 inserts for every joining-table.
With the new feature 'writeable CTE', the effort is reduced to single, (but complex) SQL-command, which contains - among others - all inserts. Of course, this procedure is complex.
In the following posting i want to analyse, if this single, complex command is faster than many simple traditional commands.
For this purpose i wrote 2 plpgsql-functions, which generate proper SQL-commands, one for the old style with many inserts
and one for the new syntax, both functions with exactly the same amount of data for the inserts.
Here are the two functions:
And:
And here the output of this functions (for example, for one person):
And, with writeable CTE:
The talk shows an example that contains 7 tables: one person-table, 3 tables for details for a person (street, im and phone), which contains contact-data for the person, and 3 additional tables to map each person to multiple streets, im and phone.
Assumed, there are 2 entries for street, im and phone for a person, this results in 1 insert into the person-table, respectively 2 inserts into street, im and phone. In addition it needs 2 inserts for every joining-table.
With the new feature 'writeable CTE', the effort is reduced to single, (but complex) SQL-command, which contains - among others - all inserts. Of course, this procedure is complex.
In the following posting i want to analyse, if this single, complex command is faster than many simple traditional commands.
For this purpose i wrote 2 plpgsql-functions, which generate proper SQL-commands, one for the old style with many inserts
and one for the new syntax, both functions with exactly the same amount of data for the inserts.
Here are the two functions:
create or replace function fill_tables(IN c int, OUT sql text) returns text as $$ declare c_im int; c_phone int; c_street int; count int; begin c_im:= 1 + strpos(md5(c::text),'a'); c_phone := 1 + strpos(md5(c::text),'b'); c_street := 1 + strpos(md5(c::text),'c'); sql := E'INSERT INTO person (first_name, last_name, birthdate) VALUES (\'' || md5(c::text) || E'\', \'' || md5((c+1)::text) || E'\', \'' || ('1970-01-01'::date + ('0' || regexp_replace(substring(md5((c)::text),0,8),'[^0-9]','','g'))::int * '1 minute'::interval)::date || E'\');\n'; for count in 0 .. c_im loop sql := sql || E'INSERT INTO im (provider, handle) VALUES (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\');\n'; sql := sql || E'INSERT INTO person_im(person_id, im_id) VALUES (' || E'currval(\'person_id_seq\'),currval(\'im_id_seq\'));\n'; end loop; for count in 0 .. c_phone loop sql := sql || E'INSERT INTO phone (country_code, phone_number, extension) VALUES (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\');\n'; sql := sql || E'INSERT INTO person_phone(person_id, phone_id) VALUES (' || E'currval(\'person_id_seq\'),currval(\'phone_id_seq\'));\n'; end loop; for count in 0 .. c_street loop sql = sql || E'INSERT INTO street (street1,city,state,country,post_code) VALUES (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\');\n'; sql := sql || E'INSERT INTO person_street(person_id, street_id) VALUES (' || E'currval(\'person_id_seq\'),currval(\'street_id_seq\'));\n'; end loop; end; $$ language plpgsql volatile;
And:
create or replace function fill_tables_new(IN c int, OUT sql text) returns text as $$ declare c_im int; c_phone int; c_street int; count int; begin c_im:= 1 + strpos(md5(c::text),'a'); c_phone := 1 + strpos(md5(c::text),'b'); c_street := 1 + strpos(md5(c::text),'c'); sql := E'WITH t_person AS (\n ' || E'INSERT INTO person (first_name, last_name, birthdate) VALUES (\n \'' || md5(c::text) || E'\', \'' || md5((c+1)::text) || E'\', \'' || ('1970-01-01'::date + ('0' || regexp_replace(substring(md5((c)::text),0,10),'[^0-9]','','g'))::int * '1 minute'::interval)::date || E'\') RETURNING id),\n sql := sql || E't_im AS (INSERT INTO im (provider, handle) VALUES '; for count in 0 .. c_im loop sql := sql || E'\n (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\'),'; end loop; sql := trim(sql,','); sql := sql || E' RETURNING id),\n'; sql := sql || E't_person_im AS (INSERT INTO person_im SELECT * FROM t_person CROSS JOIN t_im),\n'; sql := sql || E't_phone AS (INSERT INTO phone (country_code, phone_number, extension) VALUES '; for count in 0 .. c_phone loop sql := sql || E'\n (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\'),'; end loop; sql := trim(sql,','); sql := sql || E' RETURNING id),\n'; sql := sql || E't_person_phone AS (INSERT INTO person_phone SELECT * FROM t_person CROSS JOIN t_phone),\n'; sql := sql || E't_street AS (INSERT INTO street (street1, city, state, country, post_code) VALUES '; for count in 0 .. c_street loop sql := sql || E'\n (\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\',\'' || md5((c+count)::text) || E'\'),'; end loop; sql := trim(sql,','); sql := sql || E' RETURNING id),\n'; sql := sql || E't_person_street AS (INSERT INTO person_street SELECT * FROM t_person CROSS JOIN t_street)\n'; sql := sql || 'VALUES (true);'; end; $$ language plpgsql volatile;
And here the output of this functions (for example, for one person):
INSERT INTO person (first_name, last_name, birthdate) VALUES ('c4ca4238a0b923820dcc509a6f75849b', 'c81e728d9d4c2f636f067f89cc14862c', '1970-01-04'); INSERT INTO im (provider, handle) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO im (provider, handle) VALUES ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO im (provider, handle) VALUES ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO im (provider, handle) VALUES ('a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO im (provider, handle) VALUES ('e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO im (provider, handle) VALUES ('1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc'); INSERT INTO person_im(person_id, im_id) VALUES (currval('person_id_seq'),currval('im_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('8f14e45fceea167a5a36dedd4bea2543','8f14e45fceea167a5a36dedd4bea2543','8f14e45fceea167a5a36dedd4bea2543'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('c9f0f895fb98ab9159f51fd0297e236d','c9f0f895fb98ab9159f51fd0297e236d','c9f0f895fb98ab9159f51fd0297e236d'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('45c48cce2e2d7fbdea1afc51c7c6ad26','45c48cce2e2d7fbdea1afc51c7c6ad26','45c48cce2e2d7fbdea1afc51c7c6ad26'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('d3d9446802a44259755d38e6d163e820','d3d9446802a44259755d38e6d163e820','d3d9446802a44259755d38e6d163e820'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('6512bd43d9caa6e02c990b0a82652dca','6512bd43d9caa6e02c990b0a82652dca','6512bd43d9caa6e02c990b0a82652dca'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('c20ad4d76fe97759aa27a0c99bff6710','c20ad4d76fe97759aa27a0c99bff6710','c20ad4d76fe97759aa27a0c99bff6710'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO phone (country_code, phone_number, extension) VALUES ('c51ce410c124a10e0db5e4b97fc2af39','c51ce410c124a10e0db5e4b97fc2af39','c51ce410c124a10e0db5e4b97fc2af39'); INSERT INTO person_phone(person_id, phone_id) VALUES (currval('person_id_seq'),currval('phone_id_seq')); INSERT INTO street (street1,city,state,country,post_code) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'); INSERT INTO person_street(person_id, street_id) VALUES (currval('person_id_seq'),currval('street_id_seq')); INSERT INTO street (street1,city,state,country,post_code) VALUES ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'); INSERT INTO person_street(person_id, street_id) VALUES (currval('person_id_seq'),currval('street_id_seq')); INSERT INTO street (street1,city,state,country,post_code) VALUES ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3'); INSERT INTO person_street(person_id, street_id) VALUES (currval('person_id_seq'),currval('street_id_seq'));
And, with writeable CTE:
WITH t_person AS ( INSERT INTO person (first_name, last_name, birthdate) VALUES ( 'c4ca4238a0b923820dcc509a6f75849b', 'c81e728d9d4c2f636f067f89cc14862c', '1970-01-31') RETURNING id), t_im AS (INSERT INTO im (provider, handle) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'), ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'), ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3'), ('a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c'), ('e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5'), ('1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc') RETURNING id), t_person_im AS (INSERT INTO person_im SELECT * FROM t_person CROSS JOIN t_im), t_phone AS (INSERT INTO phone (country_code, phone_number, extension) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'), ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'), ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3'), ('a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c','a87ff679a2f3e71d9181a67b7542122c'), ('e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5','e4da3b7fbbce2345d7772b0674a318d5'), ('1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc','1679091c5a880faf6fb5e6087eb1b2dc'), ('8f14e45fceea167a5a36dedd4bea2543','8f14e45fceea167a5a36dedd4bea2543','8f14e45fceea167a5a36dedd4bea2543'), ('c9f0f895fb98ab9159f51fd0297e236d','c9f0f895fb98ab9159f51fd0297e236d','c9f0f895fb98ab9159f51fd0297e236d'), ('45c48cce2e2d7fbdea1afc51c7c6ad26','45c48cce2e2d7fbdea1afc51c7c6ad26','45c48cce2e2d7fbdea1afc51c7c6ad26'), ('d3d9446802a44259755d38e6d163e820','d3d9446802a44259755d38e6d163e820','d3d9446802a44259755d38e6d163e820'), ('6512bd43d9caa6e02c990b0a82652dca','6512bd43d9caa6e02c990b0a82652dca','6512bd43d9caa6e02c990b0a82652dca'), ('c20ad4d76fe97759aa27a0c99bff6710','c20ad4d76fe97759aa27a0c99bff6710','c20ad4d76fe97759aa27a0c99bff6710'), ('c51ce410c124a10e0db5e4b97fc2af39','c51ce410c124a10e0db5e4b97fc2af39','c51ce410c124a10e0db5e4b97fc2af39') RETURNING id), t_person_phone AS (INSERT INTO person_phone SELECT * FROM t_person CROSS JOIN t_phone), t_street AS (INSERT INTO street (street1, city, state, country, post_code) VALUES ('c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b','c4ca4238a0b923820dcc509a6f75849b'), ('c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c','c81e728d9d4c2f636f067f89cc14862c'), ('eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3','eccbc87e4b5ce2fe28308fd9f2a7baf3') RETURNING id), t_person_street AS (INSERT INTO person_street SELECT * FROM t_person CROSS JOIN t_street) VALUES (true);
First i created 2 sql-files with the output of my functions, each contains 10.000 rows for persons, in total for all tables 727104 rows.kretschmer@tux:~/cte$ psql -X -t test -c "select fill_tables(s) from generate_series(1,10000) s;" > data_old.sql kretschmer@tux:~/cte$ psql -X -t test -c "select fill_tables_new(s) from generate_series(1,10000) s;" > data_new.sql kretschmer@tux:~/cte$ ls -l data* -rw-r--r-- 1 kretschmer kretschmer 51113563 Nov 13 18:43 data_new.sql -rw-r--r-- 1 kretschmer kretschmer 102391561 Nov 13 18:43 data_old.sqlThe file with the new syntax is just half the size. Okay, lets rock'n'roll, my 8.5devel is listening on port 5434, i'm using the default configuration on a quadcore with 4 GByte RAM, the operating system is Ubuntu. I'm executing the files with the following command(s):time /usr/local/pgsql/bin/psql -X -p 5434 -h localhost test -f data_old.sql real 9m2.466s user 0m10.789s sys 0m11.357s time /usr/local/pgsql/bin/psql -X -p 5434 -h localhost test -f data_new.sql real 0m34.294s user 0m1.620s sys 0m0.528sVery impressive, isn't it? (i know, i could optimize both versions with prepared statements etc.) I hope, the upcoming 8.5 will contain this feature. This feature has a small disadvantage: it's not portable, since no other RDBMS knows about writable CTE! Many thanks to all the people who developed and are developing PostgreSQL!
Samstag, 14. November 2009
Mit Freunden in Paris
Wir, also meine Frau sowie Katharina und Tamara, verbrachten einige Tage in Paris.
Zum einen, um den PGDay Europe 2009 zu besuchen, zum anderen, um mit Freunden Paris zu erkunden.
Zu sehen sind hier Andreas, Andrea, David Fetter, meine Frau, ich sowie die Kinder Pascal und Tamara, Katharina machte das Bild.
Leider war die Zeit deutlich zu kurz, um all die vielen tollen Dinge in Paris zu erkunden. Es reichte aber für den Louvre, den Eiffelturm, eine Fahrt auf der Saine, Notre Dame.
Nun ja, was bleibt sind die Erinnerungen an schöne Tage, an denen ich viele mir bis dahin nur via eMail & IRC bekannte Leute mal kennenlernte und mehrere hundert Bilder, die darauf warten, aufbereitet zu werden.
Not only David: see you in Amsterdam (maybe Brussels) next year!
Andreas
Zum einen, um den PGDay Europe 2009 zu besuchen, zum anderen, um mit Freunden Paris zu erkunden.
Zu sehen sind hier Andreas, Andrea, David Fetter, meine Frau, ich sowie die Kinder Pascal und Tamara, Katharina machte das Bild.
Leider war die Zeit deutlich zu kurz, um all die vielen tollen Dinge in Paris zu erkunden. Es reichte aber für den Louvre, den Eiffelturm, eine Fahrt auf der Saine, Notre Dame.
Nun ja, was bleibt sind die Erinnerungen an schöne Tage, an denen ich viele mir bis dahin nur via eMail & IRC bekannte Leute mal kennenlernte und mehrere hundert Bilder, die darauf warten, aufbereitet zu werden.
Not only David: see you in Amsterdam (maybe Brussels) next year!
Andreas
Montag, 12. Oktober 2009
alternating sort order
I know, i'm lazy, i haven't written new blog-entries for a long, long time.
Someone ask in the german Postgresql-Forum: how can i sort in alternating order?
Okay, after some hours i found this solution:
Image, you have this table:
And now my solution:
The first subquery returns the first half of the table with a new numbering column, starting
with 1 increment by 1.
The second returns the other half. Because the row_number() - function would start with the
offset, i have moved the row_number() function out of this subquery and created a other subquery around, to start my row_number with 1. After that, i added 0.5 to this (of course, i have to cast the int to a float to do that)
Yeah, now i'm able to sort in alternating order.
And yes, sorry about my bad english...
Someone ask in the german Postgresql-Forum: how can i sort in alternating order?
Okay, after some hours i found this solution:
Image, you have this table:
test=*# select * from foo;
i
----
1
2
3
4
5
6
7
8
9
10
(10 rows)
And now my solution:
test=# with
f1 as (
select i, row_number() over (order by i)::float as nr from foo limit (select count(*) from foo)/2),
f2 as (
select i, row_number() over()::float+0.5 from (
select i from foo offset (select count(*) from foo)/2)x
)
select * from f1 union select * from f2 order by nr;
i | nr
----+-----
1 | 1
6 | 1.5
2 | 2
7 | 2.5
3 | 3
8 | 3.5
4 | 4
9 | 4.5
5 | 5
10 | 5.5
(10 rows)
The first subquery returns the first half of the table with a new numbering column, starting
with 1 increment by 1.
The second returns the other half. Because the row_number() - function would start with the
offset, i have moved the row_number() function out of this subquery and created a other subquery around, to start my row_number with 1. After that, i added 0.5 to this (of course, i have to cast the int to a float to do that)
Yeah, now i'm able to sort in alternating order.
And yes, sorry about my bad english...
Abonnieren
Posts (Atom)
Labels
- Linux (1)
- PostgreSQL (8)
- Tamara (2)