Practical highlights in my opinion:
- It's important to know about data padding in PG.
- Be conscious when modelling data tables about columns ordering, but don't be pure-school and do it in a best-effort basis.
- Gains up to 25% in wasted storage are impressive but always keep in mind the scope of the system. For me, gains are not worth it in the short-term. Whenever a system grows, it is possible to migrate data to more storage-efficient tables but mind the operative burder.
Here follows my own commands on trying the article points. I added - pg_column_size(row())
on each projection to have clear absolute sizes.
-- How does row function work?
SELECT pg_column_size(row()) AS empty,
pg_column_size(row(0::SMALLINT)) AS byte2,
pg_column_size(row(0::BIGINT)) AS byte8,
pg_column_size(row(0::SMALLINT, 0::BIGINT)) AS byte16,
pg_column_size(row(''::TEXT)) AS text0,
pg_column_size(row('hola'::TEXT)) AS text4,
0 AS term
;
-- My own take on that
SELECT pg_column_size(row()) AS empty,
pg_column_size(row(uuid_generate_v4())) AS uuid_type,
pg_column_size(row('hola mundo'::TEXT)) AS text_type,
pg_column_size(row(uuid_generate_v4(), 'hola mundo'::TEXT)) AS uuid_text_type,
pg_column_size(row('hola mundo'::TEXT, uuid_generate_v4())) AS text_uuid_type,
0 AS term
;
CREATE TABLE user_order (
is_shipped BOOLEAN NOT NULL DEFAULT false,
user_id BIGINT NOT NULL,
order_total NUMERIC NOT NULL,
order_dt TIMESTAMPTZ NOT NULL,
order_type SMALLINT NOT NULL,
ship_dt TIMESTAMPTZ,
item_ct INT NOT NULL,
ship_cost NUMERIC,
receive_dt TIMESTAMPTZ,
tracking_cd TEXT,
id BIGSERIAL PRIMARY KEY NOT NULL
);
SELECT a.attname, t.typname, t.typalign, t.typlen
FROM pg_class c
JOIN pg_attribute a ON (a.attrelid = c.oid)
JOIN pg_type t ON (t.oid = a.atttypid)
WHERE c.relname = 'user_order'
AND a.attnum >= 0
ORDER BY a.attnum;
-- What is it about pg_class, pg_attribute and pg_type tables? For future investigation.
-- SELECT sum(t.typlen)
-- SELECT t.typlen
SELECT a.attname, t.typname, t.typalign, t.typlen
FROM pg_class c
JOIN pg_attribute a ON (a.attrelid = c.oid)
JOIN pg_type t ON (t.oid = a.atttypid)
WHERE c.relname = 'user_order'
AND a.attnum >= 0
ORDER BY a.attnum
;
-- Whoa! I need to master mocking data directly into db.
INSERT INTO user_order (
is_shipped, user_id, order_total, order_dt, order_type,
ship_dt, item_ct, ship_cost, receive_dt, tracking_cd
)
SELECT true, 1000, 500.00, now() - INTERVAL '7 days',
3, now() - INTERVAL '5 days', 10, 4.99,
now() - INTERVAL '3 days', 'X5901324123479RROIENSTBKCV4'
FROM generate_series(1, 1000000);
-- New item to learn, pg_relation_size.
SELECT pg_relation_size('user_order') AS size_bytes,
pg_size_pretty(pg_relation_size('user_order')) AS size_pretty;
SELECT * FROM user_order LIMIT 1;
SELECT pg_column_size(row(0::NUMERIC)) - pg_column_size(row()) AS zero_num,
pg_column_size(row(1::NUMERIC)) - pg_column_size(row()) AS one_num,
pg_column_size(row(9.9::NUMERIC)) - pg_column_size(row()) AS nine_point_nine_num,
pg_column_size(row(1::INT2)) - pg_column_size(row()) AS int2,
pg_column_size(row(1::INT4)) - pg_column_size(row()) AS int4,
pg_column_size(row(1::INT2, 1::NUMERIC)) - pg_column_size(row()) AS int2_one_num,
pg_column_size(row(1::INT4, 1::NUMERIC)) - pg_column_size(row()) AS int4_one_num,
pg_column_size(row(1::NUMERIC, 1::INT4)) - pg_column_size(row()) AS one_num_int4,
0 AS term
;
SELECT pg_column_size(row(''::TEXT)) - pg_column_size(row()) AS empty_text,
pg_column_size(row('a'::TEXT)) - pg_column_size(row()) AS len1_text,
pg_column_size(row('abcd'::TEXT)) - pg_column_size(row()) AS len4_text,
pg_column_size(row('abcde'::TEXT)) - pg_column_size(row()) AS len5_text,
pg_column_size(row('abcdefgh'::TEXT)) - pg_column_size(row()) AS len8_text,
pg_column_size(row('abcdefghi'::TEXT)) - pg_column_size(row()) AS len9_text,
0 AS term
;
SELECT pg_column_size(row(''::TEXT, 1::INT4)) - pg_column_size(row()) AS empty_text_int4,
pg_column_size(row('a'::TEXT, 1::INT4)) - pg_column_size(row()) AS len1_text_int4,
pg_column_size(row('abcd'::TEXT, 1::INT4)) - pg_column_size(row()) AS len4_text_int4,
pg_column_size(row('abcde'::TEXT, 1::INT4)) - pg_column_size(row()) AS len5_text_int4,
pg_column_size(row('abcdefgh'::TEXT, 1::INT4)) - pg_column_size(row()) AS len8_text_int4,
pg_column_size(row('abcdefghi'::TEXT, 1::INT4)) - pg_column_size(row()) AS len9_text_int4,
0 AS term
;
SELECT pg_column_size(row(1::INT4, ''::TEXT)) - pg_column_size(row()) AS int4_empty_text,
pg_column_size(row(1::INT4, 'a'::TEXT)) - pg_column_size(row()) AS int4_len1_text,
pg_column_size(row(1::INT4, 'abcd'::TEXT)) - pg_column_size(row()) AS int4_len4_text,
pg_column_size(row(1::INT4, 'abcde'::TEXT)) - pg_column_size(row()) AS int4_len5_text,
pg_column_size(row(1::INT4, 'abcdefgh'::TEXT)) - pg_column_size(row()) AS int4_len8_text,
pg_column_size(row(1::INT4, 'abcdefghi'::TEXT)) - pg_column_size(row()) AS int4_len9_text,
0 AS term
;
SELECT pg_column_size(row()) - pg_column_size(row()) AS empty_row,
pg_column_size(row(''::TEXT)) - pg_column_size(row()) AS no_text,
pg_column_size(row('a'::TEXT)) - pg_column_size(row()) AS min_text,
pg_column_size(row(1::INT4, 'a'::TEXT)) - pg_column_size(row()) AS two_col,
pg_column_size(row('a'::TEXT, 1::INT4)) - pg_column_size(row()) AS round4;
SELECT pg_column_size(row()) - pg_column_size(row()) AS empty_row,
pg_column_size(row(1::SMALLINT)) - pg_column_size(row()) AS int2,
pg_column_size(row(1::INT)) - pg_column_size(row()) AS int4,
pg_column_size(row(1::BIGINT)) - pg_column_size(row()) AS int8,
pg_column_size(row(1::SMALLINT, 1::BIGINT)) - pg_column_size(row()) AS padded,
pg_column_size(row(1::INT, 1::INT, 1::BIGINT)) - pg_column_size(row()) AS not_padded;
SELECT a.attname, t.typname, t.typalign, t.typlen
FROM pg_class c
JOIN pg_attribute a ON (a.attrelid = c.oid)
JOIN pg_type t ON (t.oid = a.atttypid)
WHERE c.relname = 'user_order'
AND a.attnum >= 0
ORDER BY t.typlen DESC;
DROP TABLE user_order;
CREATE TABLE user_order (
id BIGSERIAL PRIMARY KEY NOT NULL,
user_id BIGINT NOT NULL,
order_dt TIMESTAMPTZ NOT NULL,
ship_dt TIMESTAMPTZ,
receive_dt TIMESTAMPTZ,
item_ct INT NOT NULL,
order_type SMALLINT NOT NULL,
is_shipped BOOLEAN NOT NULL DEFAULT false,
order_total NUMERIC NOT NULL,
ship_cost NUMERIC,
tracking_cd TEXT
);
-- And, what about other varying size types as JSONB?
SELECT pg_column_size(row('{}'::JSONB)) - pg_column_size(row()) AS empty_jsonb,
pg_column_size(row('{}'::JSONB, 0::INT4)) - pg_column_size(row()) AS empty_jsonb_int4,
pg_column_size(row(0::INT4, '{}'::JSONB)) - pg_column_size(row()) AS int4_empty_jsonb,
pg_column_size(row('{"a": 1}'::JSONB)) - pg_column_size(row()) AS basic_jsonb,
pg_column_size(row('{"a": 1}'::JSONB, 0::INT4)) - pg_column_size(row()) AS basic_jsonb_int4,
pg_column_size(row(0::INT4, '{"a": 1}'::JSONB)) - pg_column_size(row()) AS int4_basic_jsonb,
0 AS term;