In this video, you will learn how to deploy a LLM based application intro production by using Amazon Bedrock, Amazon Transcribe to summarize audio files with ASR model, Titan.
import boto3
import json
import uuid
import time
from jinja2 import Template
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-east-1')
s3_client = boto3.client(service_name='s3', region_name='ap-southeast-2')
transcribe_client = boto3.client('transcribe', region_name='ap-southeast-2')
bucket_name='<replace your bucket name here>'
#file_name = 'angry.mp3'
file_name = 'happy.mp3'
job_name = 'transcription-job-' + str(uuid.uuid4())
response = transcribe_client.start_transcription_job(
Media={'MediaFileUri': f's3://{bucket_name}/{file_name}'},
'ShowSpeakerLabels': True,
'MaxSpeakerLabels': 2
while True:
status = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
# Load the transcript from S3.
transcript_key = f"{job_name}.json"
transcript_obj = s3_client.get_object(Bucket=bucket_name, Key=transcript_key)
transcript_text = transcript_obj['Body'].read().decode('utf-8')
transcript_json = json.loads(transcript_text)
output_text = ""
current_speaker = None
items = transcript_json['results']['items']
for item in items:
speaker_label = item.get('speaker_label', None)
content = item['alternatives'][0]['content']
# Start the line with the speaker label:
if speaker_label is not None and speaker_label != current_speaker:
current_speaker = speaker_label
output_text += f"\n{current_speaker}: "
# Add the speech content:
if item['type'] == 'punctuation':
output_text = output_text.rstrip()
output_text += f"{content} "
# Save the transcript to a text file
with open(f'{job_name}.txt', 'w') as f:
with open(f'{job_name}.txt', "r") as file:
transcript =
template_string = """ I need to summarize a conversation. The transcript of the
conversation is between the <data> XML like tags.
The summary must contain a one word sentiment analysis, and
a list of issues, problems or causes of friction
during the conversation. The output must be provided in
JSON format shown in the following example.
Example output:
"sentiment": <sentiment>,
"issues": [
"topic": <topic>,
"summary": <issue_summary>,
Write the JSON output and nothing more.
Here is the JSON output: """
data = {
'transcript' : transcript
template = Template(template_string)
prompt = template.render(data)
kwargs = {
"modelId": "amazon.titan-text-express-v1",
"contentType": "application/json",
"accept": "*/*",
"body": json.dumps(
"inputText": prompt,
"textGenerationConfig": {
"maxTokenCount": 512,
"temperature": 0,
"topP": 0.9
response = bedrock_runtime.invoke_model(**kwargs)
response_body = json.loads(response.get('body').read())
generation = response_body['results'][0]['outputText']
1 comment:
Hello Sir!
I watched your YouTube video on the same and reached your blog.
I want to know how I can perform real-time transcription of the audio input from the mic by using Amazon Transcribe, without having to use an S3 bucket?
Thank you
Post a Comment