Translate Streaming STT Transcripts with LLM Gateway | AssemblyAI

In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.

Quickstart

Python

JavaScript

1 import pyaudio
2 import websocket
3 import json
4 import threading
5 import time
6 import requests
7 from urllib.parse import urlencode
8 
9 YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your actual API key
10 
11 CONNECTION_PARAMS = {
12     "sample_rate": 16000,
13     "speech_model": "u3-rt-pro",
14 }
15 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
16 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
17 
18 FRAMES_PER_BUFFER = 800
19 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
20 CHANNELS = 1
21 FORMAT = pyaudio.paInt16
22 
23 audio = None
24 stream = None
25 ws_app = None
26 audio_thread = None
27 stop_event = threading.Event()
28 
29 def translate_text(text):
30     """Called when translating final transcripts."""
31     headers = {
32         "authorization": YOUR_API_KEY
33     }
34 
35     llm_gateway_data = {
36         "model": "gemini-2.5-flash-lite",
37         "prompt": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}",
38         "max_tokens": 1000
39     }
40 
41     result = requests.post(
42         "https://llm-gateway.assemblyai.com/v1/chat/completions",
43         headers=headers,
44         json=llm_gateway_data
45     )
46     return result.json()["choices"][0]["message"]["content"]
47 
48 def on_open(ws):
49     print("WebSocket connection opened.")
50     def stream_audio():
51         global stream
52         while not stop_event.is_set():
53             try:
54                 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
55                 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
56             except Exception as e:
57                 print(f"Error streaming audio: {e}")
58                 break
59 
60     global audio_thread
61     audio_thread = threading.Thread(target=stream_audio)
62     audio_thread.daemon = True
63     audio_thread.start()
64 
65 def on_message(ws, message):
66     try:
67         data = json.loads(message)
68         msg_type = data.get("type")
69 
70         if msg_type == "Begin":
71             print(f"Session began: ID={data.get('id')}")
72         elif msg_type == "Turn":
73             transcript = data.get("transcript", "")
74             if data.get("end_of_turn"):
75                 print(f"\r{' ' * 80}\r", end="")
76                 print(translate_text(transcript))
77             else:
78                 print(f"\r{transcript}", end="")
79         elif msg_type == "Termination":
80             print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
81     except Exception as e:
82         print(f"Error handling message: {e}")
83 
84 def on_error(ws, error):
85     print(f"\nWebSocket Error: {error}")
86     stop_event.set()
87 
88 def on_close(ws, close_status_code, close_msg):
89     print(f"\nWebSocket Disconnected: Status={close_status_code}")
90     global stream, audio
91     stop_event.set()
92     if stream:
93         if stream.is_active():
94             stream.stop_stream()
95         stream.close()
96     if audio:
97         audio.terminate()
98 
99 def run():
100     global audio, stream, ws_app
101 
102     audio = pyaudio.PyAudio()
103     stream = audio.open(
104         input=True,
105         frames_per_buffer=FRAMES_PER_BUFFER,
106         channels=CHANNELS,
107         format=FORMAT,
108         rate=SAMPLE_RATE,
109     )
110     print("Speak into your microphone. Press Ctrl+C to stop.")
111 
112     ws_app = websocket.WebSocketApp(
113         API_ENDPOINT,
114         header={"Authorization": YOUR_API_KEY},
115         on_open=on_open,
116         on_message=on_message,
117         on_error=on_error,
118         on_close=on_close,
119     )
120 
121     ws_thread = threading.Thread(target=ws_app.run_forever)
122     ws_thread.daemon = True
123     ws_thread.start()
124 
125     try:
126         while ws_thread.is_alive():
127             time.sleep(0.1)
128     except KeyboardInterrupt:
129         print("\nStopping...")
130         stop_event.set()
131         if ws_app and ws_app.sock and ws_app.sock.connected:
132             ws_app.send(json.dumps({"type": "Terminate"}))
133             time.sleep(2)
134         if ws_app:
135             ws_app.close()
136         ws_thread.join(timeout=2.0)
137 
138 if __name__ == "__main__":
139     run()

Step-by-Step Instructions

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install Dependencies

Python

JavaScript

$ pip install websocket-client pyaudio requests

Import Packages & Set API Key

Python

JavaScript

1 import pyaudio
2 import websocket
3 import json
4 import threading
5 import time
6 import requests
7 from urllib.parse import urlencode
8 
9 YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your actual API key

Audio Configuration & Global Variables

Set all of your audio configurations and global variables.

Python

JavaScript

1 CONNECTION_PARAMS = {
2     "sample_rate": 16000,
3     "speech_model": "u3-rt-pro",
4 }
5 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
6 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
7 
8 FRAMES_PER_BUFFER = 800
9 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
10 CHANNELS = 1
11 FORMAT = pyaudio.paInt16
12 
13 audio = None
14 stream = None
15 ws_app = None
16 audio_thread = None
17 stop_event = threading.Event()

Define Translate Text Function

Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.

Python

JavaScript

1 def translate_text(text):
2     """Called when translating final transcripts."""
3     headers = {
4         "authorization": YOUR_API_KEY
5     }
6 
7     llm_gateway_data = {
8         "model": "gemini-2.5-flash-lite",
9         "prompt": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}",
10         "max_tokens": 1000
11     }
12 
13     result = requests.post(
14         "https://llm-gateway.assemblyai.com/v1/chat/completions",
15         headers=headers,
16         json=llm_gateway_data
17     )
18     return result.json()["choices"][0]["message"]["content"]

Websocket Event Handlers

Open Websocket

Python

JavaScript

1 def on_open(ws):
2     print("WebSocket connection opened.")
3     def stream_audio():
4         global stream
5         while not stop_event.is_set():
6             try:
7                 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
8                 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
9             except Exception as e:
10                 print(f"Error streaming audio: {e}")
11                 break
12 
13     global audio_thread
14     audio_thread = threading.Thread(target=stream_audio)
15     audio_thread.daemon = True
16     audio_thread.start()

Handle Websocket Messages

In this function, use the previously defined translate_text / translateText to translate all final transcripts.

Python

JavaScript

1 def on_message(ws, message):
2     try:
3         data = json.loads(message)
4         msg_type = data.get("type")
5 
6         if msg_type == "Begin":
7             print(f"Session began: ID={data.get('id')}")
8         elif msg_type == "Turn":
9             transcript = data.get("transcript", "")
10             if data.get("end_of_turn"):
11                 print(f"\r{' ' * 80}\r", end="")
12                 print(translate_text(transcript))
13             else:
14                 print(f"\r{transcript}", end="")
15         elif msg_type == "Termination":
16             print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
17     except Exception as e:
18         print(f"Error handling message: {e}")

Close Websocket

Python

JavaScript

1 def on_close(ws, close_status_code, close_msg):
2     print(f"\nWebSocket Disconnected: Status={close_status_code}")
3     global stream, audio
4     stop_event.set()
5     if stream:
6         if stream.is_active():
7             stream.stop_stream()
8         stream.close()
9     if audio:
10         audio.terminate()

Websocket Error Handling

Python

JavaScript

1 def on_error(ws, error):
2     print(f"\nWebSocket Error: {error}")
3     stop_event.set()

Begin Streaming STT Transcription

Python

JavaScript

1 def run():
2     global audio, stream, ws_app
3 
4     audio = pyaudio.PyAudio()
5     stream = audio.open(
6         input=True,
7         frames_per_buffer=FRAMES_PER_BUFFER,
8         channels=CHANNELS,
9         format=FORMAT,
10         rate=SAMPLE_RATE,
11     )
12     print("Speak into your microphone. Press Ctrl+C to stop.")
13 
14     ws_app = websocket.WebSocketApp(
15         API_ENDPOINT,
16         header={"Authorization": YOUR_API_KEY},
17         on_open=on_open,
18         on_message=on_message,
19         on_error=on_error,
20         on_close=on_close,
21     )
22 
23     ws_thread = threading.Thread(target=ws_app.run_forever)
24     ws_thread.daemon = True
25     ws_thread.start()
26 
27     try:
28         while ws_thread.is_alive():
29             time.sleep(0.1)
30     except KeyboardInterrupt:
31         print("\nStopping...")
32         stop_event.set()
33         if ws_app and ws_app.sock and ws_app.sock.connected:
34             ws_app.send(json.dumps({"type": "Terminate"}))
35             time.sleep(2)
36         if ws_app:
37             ws_app.close()
38         ws_thread.join(timeout=2.0)
39 
40 if __name__ == "__main__":
41     run()