| 1 | import pyaudio |
| 2 | import websocket |
| 3 | import json |
| 4 | import threading |
| 5 | import time |
| 6 | import wave |
| 7 | from urllib.parse import urlencode |
| 8 | from datetime import datetime |
| 9 | |
| 10 | # --- Configuration --- |
| 11 | YOUR_API_KEY = "<YOUR_API_KEY>" # Replace with your actual API key |
| 12 | |
| 13 | # LLM Gateway Configuration |
| 14 | PROMPT = "Provide a brief summary of the transcript.\n\nTranscript: {{turn}}" |
| 15 | LLM_GATEWAY_CONFIG = { |
| 16 | "model": "claude-sonnet-4-20250514", |
| 17 | "messages": [ |
| 18 | {"role": "user", "content": PROMPT} |
| 19 | ], |
| 20 | "max_tokens": 4000 |
| 21 | } |
| 22 | |
| 23 | CONNECTION_PARAMS = { |
| 24 | "sample_rate": 16000, |
| 25 | "format_turns": True, # Request formatted final transcripts |
| 26 | "speech_model": "u3-rt-pro", # USM 3 Pro model |
| 27 | "llm_gateway": json.dumps(LLM_GATEWAY_CONFIG) # LLM Gateway configuration |
| 28 | } |
| 29 | API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" |
| 30 | API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" |
| 31 | |
| 32 | # Audio Configuration |
| 33 | FRAMES_PER_BUFFER = 800 # 50ms of audio (0.05s * 16000Hz) |
| 34 | SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"] |
| 35 | CHANNELS = 1 |
| 36 | FORMAT = pyaudio.paInt16 |
| 37 | |
| 38 | # Global variables for audio stream and websocket |
| 39 | audio = None |
| 40 | stream = None |
| 41 | ws_app = None |
| 42 | audio_thread = None |
| 43 | stop_event = threading.Event() # To signal the audio thread to stop |
| 44 | |
| 45 | # WAV recording variables |
| 46 | recorded_frames = [] # Store audio frames for WAV file |
| 47 | recording_lock = threading.Lock() # Thread-safe access to recorded_frames |
| 48 | |
| 49 | def save_wav_file(): |
| 50 | """Save recorded audio frames to a WAV file.""" |
| 51 | if not recorded_frames: |
| 52 | print("No audio data recorded.") |
| 53 | return |
| 54 | |
| 55 | # Generate filename with timestamp |
| 56 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| 57 | filename = f"recorded_audio_{timestamp}.wav" |
| 58 | |
| 59 | try: |
| 60 | with wave.open(filename, 'wb') as wf: |
| 61 | wf.setnchannels(CHANNELS) |
| 62 | wf.setsampwidth(2) # 16-bit = 2 bytes |
| 63 | wf.setframerate(SAMPLE_RATE) |
| 64 | |
| 65 | # Write all recorded frames |
| 66 | with recording_lock: |
| 67 | wf.writeframes(b''.join(recorded_frames)) |
| 68 | |
| 69 | print(f"Audio saved to: {filename}") |
| 70 | print(f"Duration: {len(recorded_frames) * FRAMES_PER_BUFFER / SAMPLE_RATE:.2f} seconds") |
| 71 | |
| 72 | except Exception as e: |
| 73 | print(f"Error saving WAV file: {e}") |
| 74 | |
| 75 | # --- WebSocket Event Handlers --- |
| 76 | |
| 77 | def on_open(ws): |
| 78 | """Called when the WebSocket connection is established.""" |
| 79 | print("WebSocket connection opened.") |
| 80 | print(f"Connected to: {API_ENDPOINT_BASE_URL}") |
| 81 | |
| 82 | # Start sending audio data in a separate thread |
| 83 | def stream_audio(): |
| 84 | global stream |
| 85 | print("Starting audio streaming...") |
| 86 | while not stop_event.is_set(): |
| 87 | try: |
| 88 | audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) |
| 89 | |
| 90 | # Store audio data for WAV recording |
| 91 | with recording_lock: |
| 92 | recorded_frames.append(audio_data) |
| 93 | |
| 94 | # Send audio data as binary message |
| 95 | ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) |
| 96 | except Exception as e: |
| 97 | print(f"Error streaming audio: {e}") |
| 98 | # If stream read fails, likely means it's closed, stop the loop |
| 99 | break |
| 100 | print("Audio streaming stopped.") |
| 101 | |
| 102 | global audio_thread |
| 103 | audio_thread = threading.Thread(target=stream_audio) |
| 104 | audio_thread.daemon = ( |
| 105 | True # Allow main thread to exit even if this thread is running |
| 106 | ) |
| 107 | audio_thread.start() |
| 108 | |
| 109 | def on_message(ws, message): |
| 110 | try: |
| 111 | data = json.loads(message) |
| 112 | msg_type = data.get('type') |
| 113 | |
| 114 | if msg_type == "Begin": |
| 115 | session_id = data.get('id') |
| 116 | expires_at = data.get('expires_at') |
| 117 | print(f"Session started: {session_id}") |
| 118 | elif msg_type == "Turn": |
| 119 | end_of_turn = data.get('end_of_turn', False) |
| 120 | |
| 121 | if end_of_turn: |
| 122 | transcript = data.get('transcript', '') |
| 123 | print(f"\nTranscript:\n{transcript}\n") |
| 124 | elif msg_type == "LLMGatewayResponse": |
| 125 | # Extract the LLM response content |
| 126 | llm_data = data.get('data', {}) |
| 127 | llm_content = llm_data.get("choices", [{}])[0].get("message", {}).get("content", "") |
| 128 | print(f"LLM Response:\n{llm_content}\n") |
| 129 | elif msg_type == "Termination": |
| 130 | audio_duration = data.get('audio_duration_seconds', 0) |
| 131 | session_duration = data.get('session_duration_seconds', 0) |
| 132 | print(f"Session terminated: {audio_duration} seconds of audio processed") |
| 133 | except json.JSONDecodeError as e: |
| 134 | print(f"Error decoding message: {e}") |
| 135 | except Exception as e: |
| 136 | print(f"Error handling message: {e}") |
| 137 | |
| 138 | def on_error(ws, error): |
| 139 | """Called when a WebSocket error occurs.""" |
| 140 | print(f"\nWebSocket Error: {error}") |
| 141 | # Attempt to signal stop on error |
| 142 | stop_event.set() |
| 143 | |
| 144 | |
| 145 | def on_close(ws, close_status_code, close_msg): |
| 146 | """Called when the WebSocket connection is closed.""" |
| 147 | print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}") |
| 148 | |
| 149 | # Save recorded audio to WAV file |
| 150 | save_wav_file() |
| 151 | |
| 152 | # Ensure audio resources are released |
| 153 | global stream, audio |
| 154 | stop_event.set() # Signal audio thread just in case it's still running |
| 155 | |
| 156 | if stream: |
| 157 | if stream.is_active(): |
| 158 | stream.stop_stream() |
| 159 | stream.close() |
| 160 | stream = None |
| 161 | if audio: |
| 162 | audio.terminate() |
| 163 | audio = None |
| 164 | # Try to join the audio thread to ensure clean exit |
| 165 | if audio_thread and audio_thread.is_alive(): |
| 166 | audio_thread.join(timeout=1.0) |
| 167 | |
| 168 | # --- Main Execution --- |
| 169 | def run(): |
| 170 | global audio, stream, ws_app |
| 171 | |
| 172 | # Initialize PyAudio |
| 173 | audio = pyaudio.PyAudio() |
| 174 | |
| 175 | # Open microphone stream |
| 176 | try: |
| 177 | stream = audio.open( |
| 178 | input=True, |
| 179 | frames_per_buffer=FRAMES_PER_BUFFER, |
| 180 | channels=CHANNELS, |
| 181 | format=FORMAT, |
| 182 | rate=SAMPLE_RATE, |
| 183 | ) |
| 184 | print("Microphone stream opened successfully.") |
| 185 | print("Speak into your microphone. Press Ctrl+C to stop.") |
| 186 | print("Audio will be saved to a WAV file when the session ends.") |
| 187 | except Exception as e: |
| 188 | print(f"Error opening microphone stream: {e}") |
| 189 | if audio: |
| 190 | audio.terminate() |
| 191 | return # Exit if microphone cannot be opened |
| 192 | |
| 193 | # Create WebSocketApp |
| 194 | ws_app = websocket.WebSocketApp( |
| 195 | API_ENDPOINT, |
| 196 | header={"Authorization": YOUR_API_KEY}, |
| 197 | on_open=on_open, |
| 198 | on_message=on_message, |
| 199 | on_error=on_error, |
| 200 | on_close=on_close, |
| 201 | ) |
| 202 | |
| 203 | # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt |
| 204 | ws_thread = threading.Thread(target=ws_app.run_forever) |
| 205 | ws_thread.daemon = True |
| 206 | ws_thread.start() |
| 207 | |
| 208 | try: |
| 209 | # Keep main thread alive until interrupted |
| 210 | while ws_thread.is_alive(): |
| 211 | time.sleep(0.1) |
| 212 | except KeyboardInterrupt: |
| 213 | print("\nCtrl+C received. Stopping...") |
| 214 | stop_event.set() # Signal audio thread to stop |
| 215 | |
| 216 | # Send termination message to the server |
| 217 | if ws_app and ws_app.sock and ws_app.sock.connected: |
| 218 | try: |
| 219 | terminate_message = {"type": "Terminate"} |
| 220 | print(f"Sending termination message: {json.dumps(terminate_message)}") |
| 221 | ws_app.send(json.dumps(terminate_message)) |
| 222 | # Give a moment for messages to process before forceful close |
| 223 | time.sleep(5) |
| 224 | except Exception as e: |
| 225 | print(f"Error sending termination message: {e}") |
| 226 | |
| 227 | # Close the WebSocket connection (will trigger on_close) |
| 228 | if ws_app: |
| 229 | ws_app.close() |
| 230 | |
| 231 | # Wait for WebSocket thread to finish |
| 232 | ws_thread.join(timeout=2.0) |
| 233 | |
| 234 | except Exception as e: |
| 235 | print(f"\nAn unexpected error occurred: {e}") |
| 236 | stop_event.set() |
| 237 | if ws_app: |
| 238 | ws_app.close() |
| 239 | ws_thread.join(timeout=2.0) |
| 240 | |
| 241 | finally: |
| 242 | # Final cleanup (already handled in on_close, but good as a fallback) |
| 243 | if stream and stream.is_active(): |
| 244 | stream.stop_stream() |
| 245 | if stream: |
| 246 | stream.close() |
| 247 | if audio: |
| 248 | audio.terminate() |
| 249 | print("Cleanup complete. Exiting.") |
| 250 | |
| 251 | |
| 252 | if __name__ == "__main__": |
| 253 | run() |