讲解完网路爬虫的实际应用後,接下来将他跟 Line chatbot 进行整合吧!
首先,先新增一个 Python 档,该 Python 档在此处命名为 cook.py,而这个 Python 档中主要有两个功能:
## cook.py
# import 所需套件
from __future__ import with_statement
import contextlib
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
try:
from urllib.request import urlopen
except:
from urllib2 import urlopen
import sys
from random import random
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import time
# 缩短网址功能
def make_tiny(url):
request_url = ('http://tinyurl.com/api-create.php?' + urlencode({'url':url}))
with contextlib.closing(urlopen(request_url)) as response:
return response.read().decode('utf-8')
# 关键字搜寻食谱功能
class Cook_keyword:
def __init__(self, keyword):
self.keyword = keyword
def scrape(self):
## 使用假header
ua = UserAgent()
user_agent = ua.safari
headers = {'User-Agent': user_agent}
response = requests.get(
"https://cookpad.com/tw/%E6%90%9C%E5%B0%8B/" + self.keyword +
"?event=search.history", headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
# 爬取前五笔食谱
cards = soup.find_all(
'li', {'class': 'block-link card border-cookpad-gray-400 border-t-0 border-l-0 border-r-0 border-b flex m-0 rounded-none overflow-hidden ranked-list__item xs:border-b-none xs:mb-sm xs:rounded'}, limit=5)
content = []
result = []
for card in cards:
# 食谱名称
title = card.find(
"a", {"class": "block-link__main"}).getText()
title = title.replace('/', '-')
title = title[:13]
# 食谱简介
try:
info = card.find("div", {"class": "clamp-2 break-words"}).getText()
info = info[:40]
except:
info = 'None'
# 食谱连结
try:
url = card.find("a")
url = 'https://cookpad.com' + url["href"]
url = make_tiny(url)
except:
url = 'https://cookpad.com'
# 图片网址
try:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
img_url = soup.find('div', {'class', 'tofu_image'})
img_url = img_url.select_one("img").get('data-original')
img_url = make_tiny(img_url)
except:
img_url = 'https://i.imgur.com/bUTHY8X.jpg'
# 将取得的食谱名称、简介、连结、图片网址一起指派给 content
content = [title, info, url, img_url]
result.append(content)
return result
完成网路爬虫搜寻食谱的功能後,接下来就将他套用进 Line Chatbot 的 Carousel template 吧!(因为介面关系,此处仅示范显示 3 个食谱的 Carousel,要增加 Carousel 就以此类推,最多可以有 10 个 Carousel)
from .cook import Cook_search
message.append(
TemplateSendMessage(
alt_text='Carousel template',
template=CarouselTemplate(
columns=[
CarouselColumn(
thumbnail_image_url=content[0][3],
title=content[0][0],
text=str(content[0][1]),
actions=[
MessageTemplateAction(
label='详细资料',
text=content[0][1]
),
URITemplateAction( label='前往食谱',
uri=content[0][2]
), MessageTemplateAction(
label='选择食谱',
text='选择食谱:' + content[0][0]
)]),
CarouselColumn(
thumbnail_image_url=content[1][3],
title=content[1][0],
text=str(content[1][1]),
actions=[
MessageTemplateAction(
label='详细资料',
text=content[1][1]
),
URITemplateAction( label='前往食谱',
uri=content[1][2]
), MessageTemplateAction(
label='选择食谱',
text='选择食谱:' + content[1][0]
)]),
CarouselColumn(
thumbnail_image_url=content[2][3],
title=content[2][0],
text=str(content[2][1]),
actions=[
MessageTemplateAction(
label='详细资料',
text=content[2][1]
),
URITemplateAction( label='前往食谱',
uri=content[2][2]
), MessageTemplateAction(
label='选择食谱',
text='选择食谱:' + content[2][0]
)]),
]
)
)
)
line_bot_api.reply_message(event.reply_token, message)
以上程序的呈现如下图所示,只要输入「搜寻 布丁」,就会自动从 cookpad 上爬取相关的前五笔食谱,并将他制作成 Carousel template 的形式。
<<: 网页框架比一比-30天学会HTML+CSS,制作精美网站
>>: [Day29] CI /CD with GitLab CI
动机 去年0前端基础用JavaScript作为主题参加了自我挑战,近期从原本的Java纯後端实习生默...
接着进行AWS RDS的实作, 以及与地端的基本差异. 在SSMS上, 资料库按右键建立 [New ...
Synology 虽然提供很方便的 QuickConnect 可让用户端应用程序透过网际网路连线至 ...
哈罗!我是 Harry,这次想和大家分享的是前端工程师的「工作日常」,我们可能会接到什麽需求,又该...
GitHub Repo https://github.com/b2etw/Spring-Kotlin...