Day 15 Chatbot integration- 韩文翻译机器人

Chatbot integration- 韩文翻译机器人

这篇会针对韩文翻译机器人的功能，整合 Azure 的OCR、翻译和文字转换语音的工具，分别针对韩文的文字和含有韩文的图片，进行翻译并提供发音音档。处理的流程分别如下：

文字
- 判断字串是否为韩文
- 翻译成中文
- 取得韩文发音，并上传到 blob ，取得音讯的 URL
- 包装翻译和音讯 URL 成 flex message
影像
- 上传影像至 blob，取得图片的 URL
- 针对图片 URL 进行 OCR ，判断图中是否有文字
- 判断从 OCR 得到的字串是否为韩文
- 将韩文字串翻译成中文
- 取得韩文发音，并上传到 blob ，取得音讯的 URL
- 包装翻译和音讯 URL 成 flex message

上传 config

上传config.json到 Azure Web App，详情可看Chatbot integration- 看图学英文的说明。

准备config.json


{
    "line": {
        "line_secret": "your line secret",
        "line_token": "your line token",
    },
    "azure": {
        "cv_key": "your subscription key of computer vision",
        "cv_end": "your endpoint of computer vision",
        "blob_connect": "your connect string",
        "blob_container": "your blob container name",
        "trans_key": "your subscription key of translator",
        "speech_key": "your subscription key of speech"
}

`python`套件

requirements.txt

Flask==1.0.2
line-bot-sdk
azure-cognitiveservices-vision-computervision
azure-cognitiveservices-speech
azure-storage-blob
Pillow
langdetect
requests

示范程序


import os
import json
import time
import requests
from flask import Flask, request, abort
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.storage.blob import BlobServiceClient
from azure.cognitiveservices.speech import (
    SpeechConfig,
    SpeechSynthesizer,
)
from azure.cognitiveservices.speech.audio import AudioOutputConfig

from msrest.authentication import CognitiveServicesCredentials
from linebot import LineBotApi, WebhookHandler
from linebot.exceptions import InvalidSignatureError
from linebot.models import (
    MessageEvent,
    TextMessage,
    TextSendMessage,
    FlexSendMessage,
    ImageMessage,
)
from PIL import Image
from langdetect import detect

app = Flask(__name__)


CONFIG = json.load(open("/home/config.json", "r"))

# 取得电脑视觉的用户权限
SUBSCRIPTION_KEY = CONFIG["azure"]["cv_key"]
ENDPOINT = CONFIG["azure"]["cv_end"]
CV_CLIENT = ComputerVisionClient(
    ENDPOINT, CognitiveServicesCredentials(SUBSCRIPTION_KEY)
)

# 连结 blob service
CONNECT_STR = CONFIG["azure"]["blob_connect"]
CONTAINER = CONFIG["azure"]["blob_container"]
BLOB_SERVICE = BlobServiceClient.from_connection_string(CONNECT_STR)


# 取得翻译工具的金钥
TRANS_KEY = CONFIG["azure"]["trans_key"]
# 设定 Azure 语音的 config
SPEECH_KEY = CONFIG["azure"]["speech_key"]
SPEECH_CONFIG = SpeechConfig(subscription=SPEECH_KEY, region="eastus2")
SPEECH_CONFIG.speech_synthesis_language = "ko-KR"

LINE_SECRET = CONFIG["line"]["line_secret"]
LINE_TOKEN = CONFIG["line"]["line_token"]
LINE_BOT = LineBotApi(LINE_TOKEN)
HANDLER = WebhookHandler(LINE_SECRET)


@app.route("/")
def hello():
    "hello world"
    return "Hello World!!!!!"

# 上传档案到 Azure blob
def upload_blob(container, path):
    """
    Upload files to Azure blob
    """
    blob_client = BLOB_SERVICE.get_blob_client(container=container, blob=path)
    with open(path, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)
    data.close()
    return blob_client.url


# 透过 Azure 电脑视觉执行 OCR
def azure_ocr(url):
    """
    Azure OCR: get characters from image url
    """
    ocr_results = CV_CLIENT.read(url, raw=True)
    operation_location_remote = ocr_results.headers["Operation-Location"]
    operation_id = operation_location_remote.split("/")[-1]
    while True:
        get_handw_text_results = CV_CLIENT.get_read_result(operation_id)
        if get_handw_text_results.status not in ["notStarted", "running"]:
            break
        time.sleep(1)

    text = []
    if get_handw_text_results.status == OperationStatusCodes.succeeded:
        for text_result in get_handw_text_results.analyze_result.read_results:
            for line in text_result.lines:
                text.append(line.text)
    if len(text) == 0:
        return text
    return []

# 将字串翻译成中文
def azure_translation(string, message_id):
    """
    Translation with azure API
    """
    trans_url = "https://api.cognitive.microsofttranslator.com/translate"

    params = {"api-version": "2.0", "to": ["zh-Hant"]}

    headers = {
        "Ocp-Apim-Subscription-Key": TRANS_KEY,
        "Content-type": "application/json",
        "Ocp-Apim-Subscription-Region": "eastus2",
    }

    body = [{"text": string}]

    req = requests.post(trans_url, params=params, headers=headers, json=body)
    response = req.json()
    output = ""
    speech_button = ""
    ans = []
    for i in response:
        ans.append(i["translations"][0]["text"])
    language = response[0]["detectedLanguage"]["language"]
    # 如果是韩文的话，就透过 Azure Speech 取得发音
    if language == "ko":
        output = " ".join(string) + "\n" + " ".join(ans)
        speech_button = azure_speech(string, message_id)
    return output, speech_button

# 将字串转换成音讯档，并且上传到 Azure blob
def azure_speech(string, message_id):
    """
    Azure speech: text to speech, and save wav file to azure blob
    """

    file_name = "{}.wav".format(message_id)
    audio_config = AudioOutputConfig(filename=file_name)

    synthesizer = SpeechSynthesizer(
        speech_config=SPEECH_CONFIG, audio_config=audio_config
    )
    synthesizer.speak_text_async(string)
    # 上传 Azure blob，并取得 URL
    link = upload_blob(CONTAINER, file_name)
    # 将 URL 包装成 Flex message的按扭，以便最後输出
    output = {
        "type": "button",
        "flex": 2,
        "style": "primary",
        "color": "#1E90FF",
        "action": {"type": "uri", "label": "Voice", "uri": link},
        "height": "sm",
    }
    os.remove(file_name)
    return output


# 为了跟 Line platform 沟通的 webhook
@app.route("/callback", methods=["POST"])
def callback():
    """
    LINE bot webhook callback
    """
    # get X-Line-Signature header value
    signature = request.headers["X-Line-Signature"]
    print(signature)
    body = request.get_data(as_text=True)
    print(body)
    try:
        HANDLER.handle(body, signature)
    except InvalidSignatureError:
        print(
            "Invalid signature. Please check your channel access token/channel secret."
        )
        abort(400)
    return "OK"



# 如果传给 chatbot 的讯息是文字，做以下处理
@HANDLER.add(MessageEvent, message=TextMessage)
def handle_message(event):
    """
    Reply text message
    """
    with open("templates/detect_result.json", "r") as f_h:
        bubble = json.load(f_h)
    f_h.close()
    # 利用 langdetect 此套件的 detect 判断是否为韩文
    if detect(event.message.text) == "ko":
        output, speech_button = azure_translation(event.message.text, event.message.id)
        # header 是放图片的部分，没有图片的话，就先去除
        bubble.pop("header")
        # 放入翻译的结果
        bubble["body"]["contents"][0]["text"] = output
        # 放入语音连结的按钮
        bubble["body"]["contents"].append(speech_button)
        # 调整 body 的高度
        bubble["body"]["height"] = "{}px".format(150)
        message = FlexSendMessage(alt_text="Report", contents=bubble)
    else:
        message = TextSendMessage(text=event.message.text)
    LINE_BOT.reply_message(event.reply_token, message)

# 如果传给 chatbot 的讯息是图片，做以下处理
@HANDLER.add(MessageEvent, message=ImageMessage)
def handle_content_message(event):
    """
    Reply Image message with results of image description and objection detection
    """
    print(event.message)
    print(event.source.user_id)
    print(event.message.id)

    with open("templates/detect_result.json", "r") as f_h:
        bubble = json.load(f_h)
    f_h.close()
    filename = "{}.jpg".format(event.message.id)
    message_content = LINE_BOT.get_message_content(event.message.id)
    with open(filename, "wb") as f_h:
        for chunk in message_content.iter_content():
            f_h.write(chunk)
    f_h.close()

    img = Image.open(filename)
    link = upload_blob(CONTAINER, filename)

    text = azure_ocr(link)
    output, speech_button = azure_translation(" ".join(text), event.message.id)
    
    # 整合图片、翻译结果和音讯按钮到 flex message
    bubble["body"]["contents"].append(speech_button)
    bubble["body"]["height"] = "{}px".format(150)
    bubble["body"]["contents"][0]["text"] = output
    bubble["header"]["contents"][0]["url"] = link
    bubble["header"]["contents"][0]["aspectRatio"] = "{}:{}".format(
        img.size[0], img.size[1]
    )

    LINE_BOT.reply_message(
        event.reply_token, [FlexSendMessage(alt_text="Report", contents=bubble)]
    )

最後应该会得到如下图的效果，这个图片是从一本年代久远的韩文讲义找到的内容。有趣的是，OCR 有把咖啡杯上的 coffee 辨识出来，转换成语音时，也用韩文发音 coffee ，跟커피发音相同，听起来比较像 "kopee"。发音也与真人无异，效果可以参照此影片：https://youtu.be/AC154CVsLZ4