create table it201011a (
gal text not null
);
-- 输入一亿笔
insert into it201011a
select (array['小岛南', '初川南', '相沢南', '架乃ゆら', '山岸逢花', '枫カレン', '坂道みる', '桥本ありな', '葵つかさ', '天使もえ', '水卜さくら'])[floor(random() * 11) + 1::int]
from generate_series(1, 1e8);
create index on it201011a(gal);
--
要找出栏位distinct , 一般直接的方式.
select distinct gal
from it201011a;
这样会很慢....
我们只看一下 评估成本,就不用 timing.
explain (costs)
select distinct gal
from it201011a;
+------------------------------------------------------------------------------+
| QUERY PLAN |
+------------------------------------------------------------------------------+
| HashAggregate (cost=1790541.00..1790541.11 rows=11 width=12) |
| Group Key: gal |
| -> Seq Scan on it201011a (cost=0.00..1540541.00 rows=100000000 width=12) |
+------------------------------------------------------------------------------+
--
with recursive t1 as (
select min(t.gal) gal
from it201011a t
where t.gal is not null
union all
select (select min(gal)
from it201011a a
where a.gal > b.gal
and a.gal is not null)
from t1 b
where b.gal is not null
)
select *
from t1
where gal is not null;
+------------+
| gal |
+------------+
| 初川南 |
| 小岛南 |
| 相沢南 |
| 坂道みる |
| 天使もえ |
| 山岸逢花 |
| 架乃ゆら |
| 枫カレン |
| 葵つかさ |
| 桥本ありな |
| 水卜さくら |
+------------+
(11 rows)
Time: 4.377 ms
速度超快!! 来看一下执行计画
explain (analyze,timing,costs)
with recursive t1 as (
select min(t.gal) gal
from it201011a t
where t.gal is not null
union all
select (select min(gal)
from it201011a a
where a.gal > b.gal
and a.gal is not null)
from t1 b
where b.gal is not null
)
select *
from t1
where gal is not null;
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| QUERY PLAN |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| CTE Scan on t1 (cost=241.33..243.35 rows=100 width=32) (actual time=0.147..3.688 rows=11 loops=1) |
| Filter: (gal IS NOT NULL) |
| Rows Removed by Filter: 1 |
| CTE t1 |
| -> Recursive Union (cost=2.33..241.33 rows=101 width=32) (actual time=0.141..3.666 rows=12 loops=1) |
| -> Result (cost=2.33..2.34 rows=1 width=32) (actual time=0.107..0.109 rows=1 loops=1) |
| InitPlan 3 (returns $1) |
| -> Limit (cost=0.57..2.33 rows=1 width=12) (actual time=0.100..0.101 rows=1 loops=1) |
| -> Index Only Scan using it201011a_gal_idx on it201011a t (cost=0.57..176009683.02 rows=100000000 width=12) (actual time=0.098..0.099 rows=1 loops=1) |
| Index Cond: (gal IS NOT NULL) |
| Heap Fetches: 1 |
| -> WorkTable Scan on t1 b (cost=0.00..23.70 rows=10 width=32) (actual time=0.291..0.292 rows=1 loops=12) |
| Filter: (gal IS NOT NULL) |
| Rows Removed by Filter: 0 |
| SubPlan 2 |
| -> Result (cost=2.34..2.35 rows=1 width=32) (actual time=0.315..0.315 rows=1 loops=11) |
| InitPlan 1 (returns $3) |
| -> Limit (cost=0.57..2.34 rows=1 width=12) (actual time=0.313..0.313 rows=1 loops=11) |
| -> Index Only Scan using it201011a_gal_idx on it201011a a (cost=0.57..59072943.18 rows=33333333 width=12) (actual time=0.312..0.312 rows=1 loops=11) |
| Index Cond: ((gal > b.gal) AND (gal IS NOT NULL)) |
| Heap Fetches: 10 |
| Planning Time: 0.437 ms |
| Execution Time: 3.823 ms |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
其实就一个loop,依次比较,且逐步减少需要聚合的量.
-----------
接着来看第二个例子. 有一种情境是 一个 table 存放 device 相关资料,device 可以是 sensor, 车辆 等等;
另一个 table 存放 device 的纪录, 例如感应数据,或是行车状况等等.
为了简便起见,以下的例子就不加上日期.
create table itdevice (
id int not null primary key
);
insert into itdevice
select generate_series(1,1000);
create table itdevval (
id int generated always as identity
, devid int not null
, ts timestamp not null
);
输入600万笔资料
insert into itdevval(devid, ts)
select b.devid
, clock_timestamp()
from (select generate_series(1,1e4)) a
, (select generate_series(1,600) devid) b;
建立 itdevval 中 devid 栏位的 index.
create index on itdevval(devid);
----
当我们想要查询哪些device,没有数据资料,例如设备故障等情况.
因为两边都有 device id 的 index, 一般直接查询.
基本的 not in
select *
from itdevice
where id not in (
select distinct devid
from itdevval);
Time: 1372.260 ms (00:01.372)
select *
from itdevice
where id not in (
select devid
from itdevval);
Time: 249832.184 ms (04:09.832)
来看一下执行计画
explain (costs)
select *
from itdevice
where id not in (
select distinct devid
from itdevval);
+--------------------------------------------------------------------------------+
| QUERY PLAN |
+--------------------------------------------------------------------------------+
| Seq Scan on itdevice (cost=107440.50..107458.00 rows=500 width=4) |
| Filter: (NOT (hashed SubPlan 1)) |
| SubPlan 1 |
| -> HashAggregate (cost=107433.00..107439.00 rows=600 width=4) |
| Group Key: itdevval.devid |
| -> Seq Scan on itdevval (cost=0.00..92433.00 rows=6000000 width=4) |
+--------------------------------------------------------------------------------+
explain (costs)
select *
from itdevice
where id not in (
select devid
from itdevval);
+--------------------------------------------------------------------------------+
| QUERY PLAN |
+--------------------------------------------------------------------------------+
| Seq Scan on itdevice (cost=0.00..80435517.50 rows=500 width=4) |
| Filter: (NOT (SubPlan 1)) |
| SubPlan 1 |
| -> Materialize (cost=0.00..145871.00 rows=6000000 width=4) |
| -> Seq Scan on itdevval (cost=0.00..92433.00 rows=6000000 width=4) |
+--------------------------------------------------------------------------------+
distinct 有聚合, HashAggregate Group Key: itdevval.devid
但是都是 Seq Scan
使用 outer join 的方式
select a.id
from itdevice a
left join itdevval b
on (a.id = b.devid)
where b.* is null;
Time: 2218.486 ms (00:02.218)
explain (costs)
select a.id
from itdevice a
left join itdevval b
on (a.id = b.devid)
where b.* is null;
+---------------------------------------------------------------------------+
| QUERY PLAN |
+---------------------------------------------------------------------------+
| Hash Right Join (cost=27.50..108277.25 rows=30000 width=4) |
| Hash Cond: (b.devid = a.id) |
| Filter: (b.* IS NULL) |
| -> Seq Scan on itdevval b (cost=0.00..92433.00 rows=6000000 width=44) |
| -> Hash (cost=15.00..15.00 rows=1000 width=4) |
| -> Seq Scan on itdevice a (cost=0.00..15.00 rows=1000 width=4) |
+---------------------------------------------------------------------------+
递回的方式
with recursive t1 as (
select min(devid) devid
from itdevval
where devid is not null
union all
select (select min(v.devid)
from itdevval v
where v.devid > t1.devid
and v.devid is not null)
from t1
where t1.devid is not null
), t2 as (
select devid
from t1
where devid is not null
-- t2 是有 itdevval 有资料的 devid
)
select id
from itdevice
where id not in (
select devid
from t2);
Time: 11.715 ms
原理与第一个例子相同.速度比一般方法快多了.
>>: Day27_渗透 Burp Suite-Intruder Positions
前言 这个环节的准备很特别,大部分时间都想着与考官的应答,这也是问题最开放、最及时所以也最难准备的环...
Webpack 一开始只认识 JavaScript,当引入其他语言(如:css)撰写的档案时就会出现...
在我们花了不少时间终於完整的完成ATM付款并成功架设Heroku网站後取回PayToken,更新付款...
可能发生的费用 云地混合的DevOps环境 AWS CodeCommit AWS CodePipel...
目的 建立一个对外的窗口(介面),负责提供特定功能,而功能背後如何运作?与哪些物件有所关联?通通交给...