这篇会针对韩文翻译机器人的功能,整合 Azure 的OCR、翻译和文字转换语音的工具,分别针对韩文的文字和含有韩文的图片,进行翻译并提供发音音档。处理的流程分别如下:
上传config.json
到 Azure Web App,详情可看Chatbot integration- 看图学英文的说明。
config.json
{
"line": {
"line_secret": "your line secret",
"line_token": "your line token",
},
"azure": {
"cv_key": "your subscription key of computer vision",
"cv_end": "your endpoint of computer vision",
"blob_connect": "your connect string",
"blob_container": "your blob container name",
"trans_key": "your subscription key of translator",
"speech_key": "your subscription key of speech"
}
python
套件requirements.txt
Flask==1.0.2
line-bot-sdk
azure-cognitiveservices-vision-computervision
azure-cognitiveservices-speech
azure-storage-blob
Pillow
langdetect
requests
import os
import json
import time
import requests
from flask import Flask, request, abort
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.storage.blob import BlobServiceClient
from azure.cognitiveservices.speech import (
SpeechConfig,
SpeechSynthesizer,
)
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from msrest.authentication import CognitiveServicesCredentials
from linebot import LineBotApi, WebhookHandler
from linebot.exceptions import InvalidSignatureError
from linebot.models import (
MessageEvent,
TextMessage,
TextSendMessage,
FlexSendMessage,
ImageMessage,
)
from PIL import Image
from langdetect import detect
app = Flask(__name__)
CONFIG = json.load(open("/home/config.json", "r"))
# 取得电脑视觉的用户权限
SUBSCRIPTION_KEY = CONFIG["azure"]["cv_key"]
ENDPOINT = CONFIG["azure"]["cv_end"]
CV_CLIENT = ComputerVisionClient(
ENDPOINT, CognitiveServicesCredentials(SUBSCRIPTION_KEY)
)
# 连结 blob service
CONNECT_STR = CONFIG["azure"]["blob_connect"]
CONTAINER = CONFIG["azure"]["blob_container"]
BLOB_SERVICE = BlobServiceClient.from_connection_string(CONNECT_STR)
# 取得翻译工具的金钥
TRANS_KEY = CONFIG["azure"]["trans_key"]
# 设定 Azure 语音的 config
SPEECH_KEY = CONFIG["azure"]["speech_key"]
SPEECH_CONFIG = SpeechConfig(subscription=SPEECH_KEY, region="eastus2")
SPEECH_CONFIG.speech_synthesis_language = "ko-KR"
LINE_SECRET = CONFIG["line"]["line_secret"]
LINE_TOKEN = CONFIG["line"]["line_token"]
LINE_BOT = LineBotApi(LINE_TOKEN)
HANDLER = WebhookHandler(LINE_SECRET)
@app.route("/")
def hello():
"hello world"
return "Hello World!!!!!"
# 上传档案到 Azure blob
def upload_blob(container, path):
"""
Upload files to Azure blob
"""
blob_client = BLOB_SERVICE.get_blob_client(container=container, blob=path)
with open(path, "rb") as data:
blob_client.upload_blob(data, overwrite=True)
data.close()
return blob_client.url
# 透过 Azure 电脑视觉执行 OCR
def azure_ocr(url):
"""
Azure OCR: get characters from image url
"""
ocr_results = CV_CLIENT.read(url, raw=True)
operation_location_remote = ocr_results.headers["Operation-Location"]
operation_id = operation_location_remote.split("/")[-1]
while True:
get_handw_text_results = CV_CLIENT.get_read_result(operation_id)
if get_handw_text_results.status not in ["notStarted", "running"]:
break
time.sleep(1)
text = []
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
text.append(line.text)
if len(text) == 0:
return text
return []
# 将字串翻译成中文
def azure_translation(string, message_id):
"""
Translation with azure API
"""
trans_url = "https://api.cognitive.microsofttranslator.com/translate"
params = {"api-version": "2.0", "to": ["zh-Hant"]}
headers = {
"Ocp-Apim-Subscription-Key": TRANS_KEY,
"Content-type": "application/json",
"Ocp-Apim-Subscription-Region": "eastus2",
}
body = [{"text": string}]
req = requests.post(trans_url, params=params, headers=headers, json=body)
response = req.json()
output = ""
speech_button = ""
ans = []
for i in response:
ans.append(i["translations"][0]["text"])
language = response[0]["detectedLanguage"]["language"]
# 如果是韩文的话,就透过 Azure Speech 取得发音
if language == "ko":
output = " ".join(string) + "\n" + " ".join(ans)
speech_button = azure_speech(string, message_id)
return output, speech_button
# 将字串转换成音讯档,并且上传到 Azure blob
def azure_speech(string, message_id):
"""
Azure speech: text to speech, and save wav file to azure blob
"""
file_name = "{}.wav".format(message_id)
audio_config = AudioOutputConfig(filename=file_name)
synthesizer = SpeechSynthesizer(
speech_config=SPEECH_CONFIG, audio_config=audio_config
)
synthesizer.speak_text_async(string)
# 上传 Azure blob,并取得 URL
link = upload_blob(CONTAINER, file_name)
# 将 URL 包装成 Flex message的按扭,以便最後输出
output = {
"type": "button",
"flex": 2,
"style": "primary",
"color": "#1E90FF",
"action": {"type": "uri", "label": "Voice", "uri": link},
"height": "sm",
}
os.remove(file_name)
return output
# 为了跟 Line platform 沟通的 webhook
@app.route("/callback", methods=["POST"])
def callback():
"""
LINE bot webhook callback
"""
# get X-Line-Signature header value
signature = request.headers["X-Line-Signature"]
print(signature)
body = request.get_data(as_text=True)
print(body)
try:
HANDLER.handle(body, signature)
except InvalidSignatureError:
print(
"Invalid signature. Please check your channel access token/channel secret."
)
abort(400)
return "OK"
# 如果传给 chatbot 的讯息是文字,做以下处理
@HANDLER.add(MessageEvent, message=TextMessage)
def handle_message(event):
"""
Reply text message
"""
with open("templates/detect_result.json", "r") as f_h:
bubble = json.load(f_h)
f_h.close()
# 利用 langdetect 此套件的 detect 判断是否为韩文
if detect(event.message.text) == "ko":
output, speech_button = azure_translation(event.message.text, event.message.id)
# header 是放图片的部分,没有图片的话,就先去除
bubble.pop("header")
# 放入翻译的结果
bubble["body"]["contents"][0]["text"] = output
# 放入语音连结的按钮
bubble["body"]["contents"].append(speech_button)
# 调整 body 的高度
bubble["body"]["height"] = "{}px".format(150)
message = FlexSendMessage(alt_text="Report", contents=bubble)
else:
message = TextSendMessage(text=event.message.text)
LINE_BOT.reply_message(event.reply_token, message)
# 如果传给 chatbot 的讯息是图片,做以下处理
@HANDLER.add(MessageEvent, message=ImageMessage)
def handle_content_message(event):
"""
Reply Image message with results of image description and objection detection
"""
print(event.message)
print(event.source.user_id)
print(event.message.id)
with open("templates/detect_result.json", "r") as f_h:
bubble = json.load(f_h)
f_h.close()
filename = "{}.jpg".format(event.message.id)
message_content = LINE_BOT.get_message_content(event.message.id)
with open(filename, "wb") as f_h:
for chunk in message_content.iter_content():
f_h.write(chunk)
f_h.close()
img = Image.open(filename)
link = upload_blob(CONTAINER, filename)
text = azure_ocr(link)
output, speech_button = azure_translation(" ".join(text), event.message.id)
# 整合图片、翻译结果和音讯按钮到 flex message
bubble["body"]["contents"].append(speech_button)
bubble["body"]["height"] = "{}px".format(150)
bubble["body"]["contents"][0]["text"] = output
bubble["header"]["contents"][0]["url"] = link
bubble["header"]["contents"][0]["aspectRatio"] = "{}:{}".format(
img.size[0], img.size[1]
)
LINE_BOT.reply_message(
event.reply_token, [FlexSendMessage(alt_text="Report", contents=bubble)]
)
最後应该会得到如下图的效果,这个图片是从一本年代久远的韩文讲义找到的内容。有趣的是,OCR 有把咖啡杯上的 coffee 辨识出来,转换成语音时,也用韩文发音 coffee ,跟커피发音相同,听起来比较像 "kopee"。发音也与真人无异,效果可以参照此影片:https://youtu.be/AC154CVsLZ4
接下来,我们试试人脸辨识,明天见。
>>: D-30-安装 vscode ? dotnet sdk
这篇文章是从零开始到 MVP 系列的第二篇,想要看第一篇请往上点。 新创公司甘苦谈 每一间新创公司都...
函数重载,即使用相同的函数名,但函数的参数和数据类型不同,让程序根据需要,自动选择使用那个函数。 下...
之前不管是介绍到主选单或是 Social Links,可以发现它本身都有预设的 Icon 图示可以对...
经过昨天会写了一个简单的 Hello Extension 後, 发现应该要回报进度和共同学习交叉的分...
Technically, Go is not an object-oriented programm...