在资料库的日常使用,会有将schema下的table结构复制到另一schema下,供做测试,或是做移转等需求.
为方便说明,将建立三个 schema
create schema myfunc;
create schema s0822;
create schema t0822;
create table s0822.t001 (
id int generated always as identity primary key
, txt text not null
);
create table s0822.t002 (
id int generated always as identity primary key
, t001_id int not null references s0822.t001(id)
, txt text not null
);
create or replace function myfunc.f_get_schema_tables (
in in_schema text
, out relname text
)
returns setof text
language sql as
$code$
select c.relname::text
from pg_class c
join pg_namespace n
on c.relnamespace = n.oid
where n.nspname::text = in_schema
and c.relkind = 'r'
order by 1;
$code$;
create or replace procedure myfunc.p_clone_schmea_tables (
in source_schema text
, in target_schema text
)
language plpgsql as
$code$
declare
curs cursor for select relname from myfunc.f_get_schema_tables(source_schema);
rel text;
begin
open curs;
loop
fetch curs into rel;
exit when not found;
execute 'create table if not exists '
|| target_schema || '.' || rel
|| ' (like ' || source_schema || '.' || rel
|| ' including all)';
end loop;
close curs;
end;
$code$;
call myfunc.p_clone_schmea_tables('s0822', 't0822');
\dt s0822.*
List of relations
Schema | Name | Type | Owner
--------+------+-------+-------
s0822 | t001 | table | nana
s0822 | t002 | table | nana
(2 rows)
\dt t0822.*
List of relations
Schema | Name | Type | Owner
--------+------+-------+-------
t0822 | t001 | table | nana
t0822 | t002 | table | nana
(2 rows)
\d s0822.t001
Table "s0822.t001"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+------------------------------
id | integer | | not null | generated always as identity
txt | text | | not null |
Indexes:
"t001_pkey" PRIMARY KEY, btree (id)
Referenced by:
TABLE "s0822.t002" CONSTRAINT "t002_t001_id_fkey" FOREIGN KEY (t001_id) REFERENCES s0822.t001(id)
\d t0822.t001
Table "t0822.t001"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+------------------------------
id | integer | | not null | generated always as identity
txt | text | | not null |
Indexes:
"t001_pkey" PRIMARY KEY, btree (id)
\d s0822.t002
Table "s0822.t002"
Column | Type | Collation | Nullable | Default
---------+---------+-----------+----------+------------------------------
id | integer | | not null | generated always as identity
t001_id | integer | | not null |
txt | text | | not null |
Indexes:
"t002_pkey" PRIMARY KEY, btree (id)
Foreign-key constraints:
"t002_t001_id_fkey" FOREIGN KEY (t001_id) REFERENCES s0822.t001(id)
\d t0822.t002
Table "t0822.t002"
Column | Type | Collation | Nullable | Default
---------+---------+-----------+----------+------------------------------
id | integer | | not null | generated always as identity
t001_id | integer | | not null |
txt | text | | not null |
Indexes:
"t002_pkey" PRIMARY KEY, btree (id)
注意到除了 Foreign key constraints 是都复制了.
因为我们是使用了 like INCLUDING ALL
细节可以参考官网文件
https://www.postgresql.org/docs/current/sql-createtable.html
create table s0822.t (
a integer
, b text
, c boolean
);
select setseed(0.5);
insert into s0822.t
with r as (
select id
, chr((32+random()*94)::integer)
, random() < 0.01
from generate_series(1, 1e5) as s(id)
)
select *
from r
order by random();
analyze s0822.t;
select attname
, correlation
from pg_stats
where schemaname = 's0822'
and tablename = 't';
attname | correlation
---------+--------------
a | 0.0044361027
b | 0.0063023297
c | 0.9826269
(3 rows)
使用 like 方法, 但不使用 including all
create table t0822.t1 (
like s0822.t
);
create table t0822.t2 (
like s0822.t
);
create table t0822.t3 (
like s0822.t
);
create table t0822.t4 (
like s0822.t
);
https://www.postgresql.org/docs/current/sql-select.html
TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ]
insert into t0822.t1
select *
from s0822.t
tablesample system(10);
insert into t0822.t2
select *
from s0822.t
tablesample bernoulli(10);
INSERT 0 9987
Time: 17.633 ms
analyze t0822.t1;
analyze t0822.t2;
select attname
, correlation
from pg_stats
where schemaname = 't0822'
and tablename = 't1'
order by 1;
attname | correlation
---------+---------------
a | -0.0044052443
b | 0.0011416585
c | 0.9800325
(3 rows)
select attname
, correlation
from pg_stats
where schemaname = 't0822'
and tablename = 't2'
order by 1;
attname | correlation
---------+---------------
a | 0.00066867325
b | 0.023340156
c | 0.98007065
(3 rows)
a 是 id,递增, 取样後相关性就差异很大,是正常现象.
b, c 栏位的相关性, c 原本是 random() < 0.01 方式产生,
b 原本是 chr((32+random()*94)::integer) 方式产生,
可以观察到两种取样方式与原始table在统计值上的接近程度.
但是要知道 pg_stat 的相关性,也是抽样的,并不是 100%.
以官方文件的说法, bernoulli method 会慢一些,但是效果会
好一些.另外可以参考官网文件以及古博士的分享.
https://island.postgresql.tw/2018/07/24/tablesample.html
另外还有 REPEATABLE, 这是让我们可以设定取样时的乱数种子,进而可以重现.
insert into t0822.t3
select *
from s0822.t
tablesample bernoulli(20) REPEATABLE(100);
INSERT 0 20075
Time: 23.151 ms
insert into t0822.t4
select *
from s0822.t
tablesample bernoulli(20) REPEATABLE(100);
INSERT 0 20075
Time: 22.981 ms
---
analyze t0822.t3;
analyze t0822.t4;
select attname
, correlation
from pg_stats
where schemaname = 't0822'
and tablename = 't3'
order by 1;
attname | correlation
---------+--------------
a | 0.0013938936
b | 0.002332309
c | 0.98037195
(3 rows)
select attname
, correlation
from pg_stats
where schemaname = 't0822'
and tablename = 't4'
order by 1;
attname | correlation
---------+--------------
a | 0.0013938936
b | 0.002332309
c | 0.98037195
(3 rows)
可以观察到,使用了同样乱数种子,会取样相同.
透过今天介绍的功能,对我们要做资料库的资料移转测试,验证查询方式的改善测试,结合运用,能够方便及有效验证.
>>: 列举子目录所有档案Python os.walk + filter
在开发程序时,有时候想要测试一点小功能,确认说这个功能可不可以使用,如果说每次都要为了测试这点功能就...
「鲑鱼均,因为一场鲑鱼之乱被主管称为鲑鱼世代,广义来说以年龄和脸蛋分类的话这应该算是一种 KNN 的...
大家安安 突然发现有人关注 但是~ 前面的坑还没有补好 後面很难写 不过这篇我预计会写"算...
千变万化的字串变数 他为什麽千变万化呢?因为字串这种型别,别人给他什麽他就是什麽 就如同一位认真向学...
前言 前面介绍了记忆体分页的管理机制,分页管理让记忆体管理不再以行程为管理的单位,而是以页为单位作为...