การเรียนรู้และจดจำเสียงใน Python

การเรียนรู้และจำเสียงคืออะไร?
การเรียนรู้และจดจำเสียงคือการระบุคำพูดและแปลงเป็นข้อความ ช่วยให้คอมพิวเตอร์เข้าใจภาษาของมนุษย์

การเรียนรู้และจดจำเสียงเริ่มต้นด้วยการนำพลังงานเสียงที่รับเข้าโดยผู้พูดมาแปลงเป็นพลังงานไฟฟ้าจากไมโครโฟน จากนั้นจะแปลงพลังงานไฟฟ้านี้จากแบบ analog เป็นแบบ digital และสุดท้ายเป็นข้อความ

เป็นการแบ่งข้อมูลเสียงออกเป็นเสียงต่างๆ และวิเคราะห์เสียงโดยใช้อัลกอริทึมเพื่อค้นหาคำที่น่าจะตรงกับเสียงนั้นมากที่สุด ทั้งหมดนี้ทำได้โดยใช้ Natural Language Processing และ Neural Networks

การแปลงคำพูดเป็นข้อความในภาษา Python

import speech_recognition as sr
import pyttsx3

เริ่มสร้างโปรแกรมที่รับเสียงเป็น input และแปลงเป็นข้อความ import โมดูลที่จำเป็น

#Initialize the recognizer
r = se.Recognizer()

def SpeakText(command):
   #Initialize the engine
   engine = pyttsx3.init()
   engine.say(command)
   engine.runAndWait()

สร้างฟังก์ชันที่รับเสียงเป็น input เข้ามาและแปลงเป็นข้อความ

#use the microphone as source  for input.
with sr.Microphone() as source2:
   # wait for a second to let the recognitzer
   # adjust the energy threshold based on
   # the surrounding noise level
r.adjust_for_ambuent_noise(source2, duration = 0.2)

   # listens for the user input
audio2 = r.listen(source2)

   # using google to recognize audio
MyText = r.recognize_google(audio2)
MyText = MyText.lower()

print("Did you say" +MyText)
SpeakText(MyText)

ขั้นตอนนี้จะใช้ไมโครโฟนเพื่อรับ input เสียงจากผู้ใช้แบบ Realtime และแสดงผลออกมาเป็นข้อความ

การนำมาประยุคต์ในเกม ทายคำศัพท์ Python Demo:

เริ่มต้นที่การ import packages ที่จำเป็นต้องใช้

import random
import time
import speech_recognition as sr

สร้างฟังก์ชันเพื่อรับ input ที่ผู้ใช้กำลังพูดจากไมโครโฟน

def recognize_speech_form_mic(recognize,microphone):
"""Transcribe speech from recorded from microphone`.
Returns a dictionary with three keys:
"success": a boolean indicaating whether or not the API request was successfull
"error": `None` if no error occured, otherwise a string containing an error message if the API could not ve reached or speech was unrecognizable
"transcription": `None` if speech could not be transcribed, otherwise a string containing the transcriebed text
"""

#check that recognizer and microphone argument are appropriate type 
if not isinstance(recognizer,sr.Recoginzer):
   raise TypeError("`recognizer` must be `Recognizer` instance")

if not isinstance(microphone,sr.Microphone):
   raise TypeError("`microphone` must be `Microphone` instance")

ถัดมาเราจะทำการสร้าง class ตัวจำแนกของคุณที่รับเข้ามา และ จะต้องตรวจสอบว่าเสียงที่ได้นั้นชัดเจนหรือไม่ และทำการเรียก API ว่ามีการทำงานผิดปกติหรือไม่

# adjust the recognizer sensitivit to ambient noise and record audio form the microphone
with microphone as source:
   recognizer.adjust_for_ambien_noise(source)
   audio = recognizer.listen(source)

# set up the response object
response ={
   "success": True,
   "error": None,
   "transciption": None
}

#try recognizer the speech in the recording
#if a RequestError or UnknowValueError exception is caught
# update the response object accordingly
try:
   response["tanscription"] = recognizer.recognizer_google(audio)
except sr.RequestError:
   # API was unreachable or unresponsive
   response["success"] = False
   response["error"] = "API unabailable"
except se.UnknowValueError:
   # speech was unintelligible
   response["error"] = "Unable to recognize speech"
return response

ต่อมาทำการสร้างรายการที่มีคำศัพท์ต่างๆ ที่ผู้เล่นจะต้องทาย

if__name__=="__main__":
   # set the of word, maxnumber of guesses and promt limit
WORDS = ["apple","banana","grape","orange","mango","lemon"]
NUM_GUESses = 2
PROMPT_LIMIT = 5

#create recognizer and mic instances
recognizer = sr.Recognizer()
microphone = sr.Microphone()

# get a random word from the list
word = random.choice(WORDS)

# format the instuctions string
instructions = (
   "I m thinking of one these word: \n"
   "{words}\n"
   "You have {n} tries to guess which on. \n"
).format(words=', '.join(WORDS), n=NUM_GUESSES)

# show instructions and wait 3 second before starting the game
print(instructions)
time.sleep(3)

ถัดมาทำการสร้างฟังก์ชันที่รับ input 3ครั้ง ได้แก่ ตรวจสอบ คำที่เลือก และพิมพ์ผลลัพธ์

for i in range(NUM_GUesses):
# get the guess from the user
# if a transcription is returned, break out of the loop and continue
# if no transcription return and API request failed, break loop and continue
# if API request success but notranscription was returned, 
#   re-promt the user to say their guess again. Do this up
#   to PROMPT_LIMIT times
for j in range(PROMPT_LIMIT):
   print('Guess {}. Speak!'.format(i+1))
   guess = recognize_speech_from_mic(recognizer,microphone)
   if guess["transcription"]:
      break
   if not gusess["success"]:
      break
   print("I didn't catch that. What did you say?\n")

# if there was an error, stop the game
if guss["error"]:
   print("ERROR: {}".format(guess["error"]))
   break

#show the user the transcription
print("You said: {}".format(guess["transcription"]))

output ที่ได้จะแสดงข้อความต่างๆโปรแกรม

if guess_is_correct:
   print("Correct! You win!".format(word))
   break
elif user_has_more_attempis:
   print("Incorrect. Try again.\n")
else:
   print("Sorry,you lose!\nI was thinking of '{}'.".format(word))
break

จากผลลัพธ์ จะเห็นว่าคำที่เลือกคือ 'apple' ผู้เล่นทายได้ทั้งหมด3ครั้งและผิดในครั้งที่ 3 และได้ทำการเฉลยออกมาว่าคำที่ถูกคือคำอะไร

สรุป
การเรียนรู้และจดจำเสียงใน Python จะทำงานร่วมกับอัลกอริทึมที่ทำการสร้างแบบจำลองทางภาษาและเสียง การสร้างแบบจำลองอะคูสติกใช้เพื่อจดจำฟีโนน/สัทศาสตร์ในคำพูดของเราเพื่อให้ได้ส่วนที่สำคัญของคำพูด เช่น คำและประโยค โดยจะแบ่งข้อมูลเสียงออกเป็นเสียงต่างๆ และวิเคราะห์เสียงโดยใช้อัลกอริทึมเพื่อค้นหาคำที่น่าจะใกล้เคียงกับเสียงนั้นมากที่สุด วิธีการนี้มีประโยชน์อย่างมากในการนำไปใช้ในด้านอื่นๆนอกเหนือจากที่ยกตัวอย่างไปในด้านเกี่ยวกับเกม ไม่ว่าจะเป็นตัวช่วยเราพิมพ์ตามคำพูดตอนที่มือเราไม่ว่าง เป็นต้น นี้ก็เป็นอีกหนึ่งวิธีหรือแนวคิดที่น่าสนใจและนำมาประยุคต์ใช้ได้อย่างหลากหลายและมีประสิทธิภาพ

References
www.simplilearn.com

Top comments (0)

Some comments may only be visible to logged-in visitors. Sign in to view all comments.