-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtranscribe.py
43 lines (38 loc) · 1.38 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#uses AWS to transcribe audio stored in AWS s3 storage to text
#to do: add voice input and analyze output
#change job name for each run
from __future__ import print_function
import time
import boto3
import urllib
import json
#set up AWS transcribe
transcribe = boto3.client('transcribe',
region_name='us-east-2',
aws_secret_access_key ='Ix+1mniYH/8N6krJuR4j5Cp574ltULGAgF1KhTks',
aws_access_key_id = 'AKIAIJ54TA7WTPFPFFHA')
job_name = "Transcribe30"
job_uri = "https://s3.us-east-2.amazonaws.com/sound-joelmussell/Merry+Christmas-SoundBible.com-1120316507.mp3"
#start transcription
transcribe.start_transcription_job(
TranscriptionJobName=job_name,
Media={'MediaFileUri': job_uri},
MediaFormat='mp3',
LanguageCode='en-US'
)
#wait for AWS to respond
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
break
print("Not ready yet...")
time.sleep(5)
#record transcribe output
text = transcribe.get_transcription_job(TranscriptionJobName=job_name)
#open json reults file and extract information
jsonText = urllib.urlopen(text['TranscriptionJob']['Transcript']['TranscriptFileUri']).read()
index = jsonText.find('\"transcript\"') + 14
transcript = jsonText[index:]
index = transcript.find('\"')
transcript = transcript[:index]
print(transcript)