In this blog, I’ll demonstrate how to check if the PostgreSQL tables of the same structure have identical data using aggregated hash and md5.
Create two tables with the same structure: some_table1 and some_table2:
create table some_table1(val1 serial, val2 varchar(10), val3 timestamp);
create table some_table2(val1 serial, val2 varchar(10), val3 timestamp);
Create a primary key on the tables to ensure the val1 column identifies the table row uniquely:
alter table some_table1 add primary key (val1);
alter table some_table2 add primary key (val1);
Check the structure of the tables:
postgres=# \d some_table1
Table "public.some_table1"
Column | Type | Collation | Nullable | Default
--------+-----------------------------+-----------+----------+-------------------------------------------
val1 | integer | | not null | nextval('some_table1_val1_seq'::regclass)
val2 | character varying(10) | | |
val3 | timestamp without time zone | | |
Indexes:
"some_table1_pkey" PRIMARY KEY, btree (val1)
postgres=# \d some_table2
Table "public.some_table2"
Column | Type | Collation | Nullable | Default
--------+-----------------------------+-----------+----------+-------------------------------------------
val1 | integer | | not null | nextval('some_table2_val1_seq'::regclass)
val2 | character varying(10) | | |
val3 | timestamp without time zone | | |
Indexes:
"some_table2_pkey" PRIMARY KEY, btree (val1)
postgres=#
Populate the tables with some sample data:
insert into some_table1 values(default, 'One', '2022-12-11 19:07:00');
insert into some_table1 values(default, 'Two', '2022-12-11 19:08:00');
insert into some_table1 values(default, 'Three', '2022-12-11 19:09:00');
insert into some_table2 values(default, 'One', '2022-12-11 19:07:00');
insert into some_table2 values(default, 'Two', '2022-12-11 19:08:00');
insert into some_table2 values(default, 'Three', '2022-12-11 19:09:00');
Check if the tables are identical:
with t1 as (select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table1
order by 1) t
),
t2 as (select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table2
order by 1) t
)
select
case
when
t1.chk_sum = t2.chk_sum
then
'Identical'
else
'Non-Identical'
end the_answer
from t1, t2;
Check the case when the tables are not identical:
postgres=# insert into some_table2 values(default, 'Four', '2022-12-11 19:10:00');
INSERT 0 1
postgres=#
postgres=# select * from some_table1;
val1 | val2 | val3
------+-------+---------------------
1 | One | 2022-12-11 19:07:00
2 | Two | 2022-12-11 19:08:00
3 | Three | 2022-12-11 19:09:00
(3 rows)
postgres=# select * from some_table2;
val1 | val2 | val3
------+-------+---------------------
1 | One | 2022-12-11 19:07:00
2 | Two | 2022-12-11 19:08:00
3 | Three | 2022-12-11 19:09:00
4 | Four | 2022-12-11 19:10:00
(4 rows)
postgres=#
with t1 as (select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table1
order by 1) t
),
t2 as (select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table2
order by 1) t
)
select
case
when
t1.chk_sum = t2.chk_sum
then
'Identical'
else
'Non-Identical'
end the_answer
from t1, t2;
There is another version of the query:
select
t1.chk_sum chk_sum_table1,
t2.chk_sum chk_sum_table2,
case
when
t1.chk_sum = t2.chk_sum
then
'Identical'
else
'Non-Identical'
end the_answer
from (select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table1
order by 1) t
) t1,
(select md5(array_agg(md5((t.*)::text))::text) chk_sum
from (select *
from some_table2
order by 1) t
) t2;
Conclusion:
In this blog, I’ve demonstrated SQL query to check if the PostgreSQL tables of the same structure have identical data using aggregated hash and md5.
Top comments (0)