minimo reconocimiento de voz

2025-06-17 08:48:55 -03:00
commit 36fe9f603e
79 changed files with 7662 additions and 0 deletions
--- a/minimal_server/RealtimeSTT/init.py
+++ b/minimal_server/RealtimeSTT/init.py
@ -0,0 +1,3 @@
+from .audio_recorder import AudioToTextRecorder
+from .audio_recorder_client import AudioToTextRecorderClient
+from .audio_input import AudioInput
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-312.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-312.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-312.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-312.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/server.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/server.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/audio_input.py
+++ b/minimal_server/RealtimeSTT/audio_input.py
@ -0,0 +1,220 @@
+from colorama import init, Fore, Style
+from scipy.signal import butter, filtfilt, resample_poly
+import pyaudio
+import logging
+
+DESIRED_RATE = 16000
+CHUNK_SIZE = 1024
+AUDIO_FORMAT = pyaudio.paInt16
+CHANNELS = 1
+
+class AudioInput:
+    def __init__(
+            self,
+            input_device_index: int = None,
+            debug_mode: bool = False,
+            target_samplerate: int = DESIRED_RATE,
+            chunk_size: int = CHUNK_SIZE,
+            audio_format: int = AUDIO_FORMAT,
+            channels: int = CHANNELS,
+            resample_to_target: bool = True,
+        ):
+
+        self.input_device_index = input_device_index
+        self.debug_mode = debug_mode
+        self.audio_interface = None
+        self.stream = None
+        self.device_sample_rate = None
+        self.target_samplerate = target_samplerate
+        self.chunk_size = chunk_size
+        self.audio_format = audio_format
+        self.channels = channels
+        self.resample_to_target = resample_to_target
+
+    def get_supported_sample_rates(self, device_index):
+        """Test which standard sample rates are supported by the specified device."""
+        standard_rates = [8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000]
+        supported_rates = []
+
+        device_info = self.audio_interface.get_device_info_by_index(device_index)
+        max_channels = device_info.get('maxInputChannels')  # Changed from maxOutputChannels
+
+        for rate in standard_rates:
+            try:
+                if self.audio_interface.is_format_supported(
+                    rate,
+                    input_device=device_index,  # Changed to input_device
+                    input_channels=max_channels,  # Changed to input_channels
+                    input_format=self.audio_format,  # Changed to input_format
+                ):
+                    supported_rates.append(rate)
+            except:
+                continue
+        return supported_rates
+
+    def _get_best_sample_rate(self, actual_device_index, desired_rate):
+        """Determines the best available sample rate for the device."""
+        try:
+            device_info = self.audio_interface.get_device_info_by_index(actual_device_index)
+            supported_rates = self.get_supported_sample_rates(actual_device_index)
+
+            if desired_rate in supported_rates:
+                return desired_rate
+
+            return max(supported_rates)
+
+            # lower_rates = [r for r in supported_rates if r <= desired_rate]
+            # if lower_rates:
+            #     return max(lower_rates)
+
+            # higher_rates = [r for r in supported_rates if r > desired_rate]
+            # if higher_rates:
+            #     return min(higher_rates)
+
+            return int(device_info.get('defaultSampleRate', 44100))
+
+        except Exception as e:
+            logging.warning(f"Error determining sample rate: {e}")
+            return 44100  # Safe fallback
+
+    def list_devices(self):
+        """List all available audio input devices with supported sample rates."""
+        try:
+            init()  # Initialize colorama
+            self.audio_interface = pyaudio.PyAudio()
+            device_count = self.audio_interface.get_device_count()
+
+            print(f"Available audio input devices:")
+            #print(f"{Fore.LIGHTBLUE_EX}Available audio input devices:{Style.RESET_ALL}")
+            for i in range(device_count):
+                device_info = self.audio_interface.get_device_info_by_index(i)
+                device_name = device_info.get('name')
+                max_input_channels = device_info.get('maxInputChannels', 0)
+
+                if max_input_channels > 0:  # Only consider devices with input capabilities
+                    supported_rates = self.get_supported_sample_rates(i)
+                    print(f"{Fore.LIGHTGREEN_EX}Device {Style.RESET_ALL}{i}{Fore.LIGHTGREEN_EX}: {device_name}{Style.RESET_ALL}")
+                    
+                    # Format each rate in cyan
+                    if supported_rates:
+                        rates_formatted = ", ".join([f"{Fore.CYAN}{rate}{Style.RESET_ALL}" for rate in supported_rates])
+                        print(f"  {Fore.YELLOW}Supported sample rates: {rates_formatted}{Style.RESET_ALL}")
+                    else:
+                        print(f"  {Fore.YELLOW}Supported sample rates: None{Style.RESET_ALL}")
+
+        except Exception as e:
+            print(f"Error listing devices: {e}")
+        finally:
+            if self.audio_interface:
+                self.audio_interface.terminate()
+
+    def setup(self):
+        """Initialize audio interface and open stream"""
+        try:
+            self.audio_interface = pyaudio.PyAudio()
+
+            if self.debug_mode:
+                print(f"Input device index: {self.input_device_index}")
+            actual_device_index = (self.input_device_index if self.input_device_index is not None 
+                                else self.audio_interface.get_default_input_device_info()['index'])
+            
+            if self.debug_mode:
+                print(f"Actual selected device index: {actual_device_index}")
+            self.input_device_index = actual_device_index
+            self.device_sample_rate = self._get_best_sample_rate(actual_device_index, self.target_samplerate)
+
+            if self.debug_mode:
+                print(f"Setting up audio on device {self.input_device_index} with sample rate {self.device_sample_rate}")
+
+            try:
+                self.stream = self.audio_interface.open(
+                    format=self.audio_format,
+                    channels=self.channels,
+                    rate=self.device_sample_rate,
+                    input=True,
+                    frames_per_buffer=self.chunk_size,
+                    input_device_index=self.input_device_index,
+                )
+                if self.debug_mode:
+                    print(f"Audio recording initialized successfully at {self.device_sample_rate} Hz")
+                return True
+            except Exception as e:
+                print(f"Failed to initialize audio stream at {self.device_sample_rate} Hz: {e}")
+                return False
+
+        except Exception as e:
+            print(f"Error initializing audio recording: {e}")
+            if self.audio_interface:
+                self.audio_interface.terminate()
+            return False
+
+    def lowpass_filter(self, signal, cutoff_freq, sample_rate):
+        """
+        Apply a low-pass Butterworth filter to prevent aliasing in the signal.
+
+        Args:
+            signal (np.ndarray): Input audio signal to filter
+            cutoff_freq (float): Cutoff frequency in Hz
+            sample_rate (float): Sampling rate of the input signal in Hz
+
+        Returns:
+            np.ndarray: Filtered audio signal
+
+        Notes:
+            - Uses a 5th order Butterworth filter
+            - Applies zero-phase filtering using filtfilt
+        """
+        # Calculate the Nyquist frequency (half the sample rate)
+        nyquist_rate = sample_rate / 2.0
+
+        # Normalize cutoff frequency to Nyquist rate (required by butter())
+        normal_cutoff = cutoff_freq / nyquist_rate
+
+        # Design the Butterworth filter
+        b, a = butter(5, normal_cutoff, btype='low', analog=False)
+
+        # Apply zero-phase filtering (forward and backward)
+        filtered_signal = filtfilt(b, a, signal)
+        return filtered_signal
+
+    def resample_audio(self, pcm_data, target_sample_rate, original_sample_rate):
+        """
+        Filter and resample audio data to a target sample rate.
+
+        Args:
+            pcm_data (np.ndarray): Input audio data
+            target_sample_rate (int): Desired output sample rate in Hz
+            original_sample_rate (int): Original sample rate of input in Hz
+
+        Returns:
+            np.ndarray: Resampled audio data
+
+        Notes:
+            - Applies anti-aliasing filter before resampling
+            - Uses polyphase filtering for high-quality resampling
+        """
+        if target_sample_rate < original_sample_rate:
+            # Downsampling with low-pass filter
+            pcm_filtered = self.lowpass_filter(pcm_data, target_sample_rate / 2, original_sample_rate)
+            resampled = resample_poly(pcm_filtered, target_sample_rate, original_sample_rate)
+        else:
+            # Upsampling without low-pass filter
+            resampled = resample_poly(pcm_data, target_sample_rate, original_sample_rate)
+        return resampled
+
+    def read_chunk(self):
+        """Read a chunk of audio data"""
+        return self.stream.read(self.chunk_size, exception_on_overflow=False)
+
+    def cleanup(self):
+        """Clean up audio resources"""
+        try:
+            if self.stream:
+                self.stream.stop_stream()
+                self.stream.close()
+                self.stream = None
+            if self.audio_interface:
+                self.audio_interface.terminate()
+                self.audio_interface = None
+        except Exception as e:
+            print(f"Error cleaning up audio resources: {e}")
--- a/minimal_server/RealtimeSTT/audio_recorder.py
+++ b/minimal_server/RealtimeSTT/audio_recorder.py
--- a/minimal_server/RealtimeSTT/audio_recorder_client.py
+++ b/minimal_server/RealtimeSTT/audio_recorder_client.py
@ -0,0 +1,881 @@
+log_outgoing_chunks = False
+debug_mode = False
+
+from typing import Iterable, List, Optional, Union
+from urllib.parse import urlparse
+from datetime import datetime
+from websocket import WebSocketApp
+from websocket import ABNF
+import numpy as np
+import subprocess
+import threading
+import platform
+import logging
+import struct
+import base64
+import wave
+import json
+import time
+import sys
+import os
+
+# Import the AudioInput class
+from .audio_input import AudioInput
+
+DEFAULT_CONTROL_URL = "ws://127.0.0.1:8011"
+DEFAULT_DATA_URL = "ws://127.0.0.1:8012"
+
+INIT_MODEL_TRANSCRIPTION = "tiny"
+INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
+INIT_REALTIME_PROCESSING_PAUSE = 0.2
+INIT_REALTIME_INITIAL_PAUSE = 0.2
+INIT_SILERO_SENSITIVITY = 0.4
+INIT_WEBRTC_SENSITIVITY = 3
+INIT_POST_SPEECH_SILENCE_DURATION = 0.6
+INIT_MIN_LENGTH_OF_RECORDING = 0.5
+INIT_MIN_GAP_BETWEEN_RECORDINGS = 0
+INIT_WAKE_WORDS_SENSITIVITY = 0.6
+INIT_PRE_RECORDING_BUFFER_DURATION = 1.0
+INIT_WAKE_WORD_ACTIVATION_DELAY = 0.0
+INIT_WAKE_WORD_TIMEOUT = 5.0
+INIT_WAKE_WORD_BUFFER_DURATION = 0.1
+ALLOWED_LATENCY_LIMIT = 100
+
+BUFFER_SIZE = 512
+SAMPLE_RATE = 16000
+
+INIT_HANDLE_BUFFER_OVERFLOW = False
+if platform.system() != 'Darwin':
+    INIT_HANDLE_BUFFER_OVERFLOW = True
+
+# Define ANSI color codes for terminal output
+class bcolors:
+    HEADER = '\033[95m'   # Magenta
+    OKBLUE = '\033[94m'   # Blue
+    OKCYAN = '\033[96m'   # Cyan
+    OKGREEN = '\033[92m'  # Green
+    WARNING = '\033[93m'  # Yellow
+    FAIL = '\033[91m'     # Red
+    ENDC = '\033[0m'      # Reset to default
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+def format_timestamp_ns(timestamp_ns: int) -> str:
+    # Split into whole seconds and the nanosecond remainder
+    seconds = timestamp_ns // 1_000_000_000
+    remainder_ns = timestamp_ns % 1_000_000_000
+
+    # Convert seconds part into a datetime object (local time)
+    dt = datetime.fromtimestamp(seconds)
+
+    # Format the main time as HH:MM:SS
+    time_str = dt.strftime("%H:%M:%S")
+
+    # For instance, if you want milliseconds, divide the remainder by 1e6 and format as 3-digit
+    milliseconds = remainder_ns // 1_000_000
+    formatted_timestamp = f"{time_str}.{milliseconds:03d}"
+
+    return formatted_timestamp
+
+class AudioToTextRecorderClient:
+    """
+    A class responsible for capturing audio from the microphone, detecting
+    voice activity, and then transcribing the captured audio using the
+    `faster_whisper` model.
+    """
+
+    def __init__(self,
+                 model: str = INIT_MODEL_TRANSCRIPTION,
+                 download_root: str = None, 
+                 language: str = "",
+                 compute_type: str = "default",
+                 input_device_index: int = None,
+                 gpu_device_index: Union[int, List[int]] = 0,
+                 device: str = "cuda",
+                 on_recording_start=None,
+                 on_recording_stop=None,
+                 on_transcription_start=None,
+                 ensure_sentence_starting_uppercase=True,
+                 ensure_sentence_ends_with_period=True,
+                 use_microphone=True,
+                 spinner=True,
+                 level=logging.WARNING,
+                 batch_size: int = 16,
+
+                 # Realtime transcription parameters
+                 enable_realtime_transcription=False,
+                 use_main_model_for_realtime=False,
+                 realtime_model_type=INIT_MODEL_TRANSCRIPTION_REALTIME,
+                 realtime_processing_pause=INIT_REALTIME_PROCESSING_PAUSE,
+                 init_realtime_after_seconds=INIT_REALTIME_INITIAL_PAUSE,
+                 on_realtime_transcription_update=None,
+                 on_realtime_transcription_stabilized=None,
+                 realtime_batch_size: int = 16,
+
+                 # Voice activation parameters
+                 silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
+                 silero_use_onnx: bool = False,
+                 silero_deactivity_detection: bool = False,
+                 webrtc_sensitivity: int = INIT_WEBRTC_SENSITIVITY,
+                 post_speech_silence_duration: float = (
+                     INIT_POST_SPEECH_SILENCE_DURATION
+                 ),
+                 min_length_of_recording: float = (
+                     INIT_MIN_LENGTH_OF_RECORDING
+                 ),
+                 min_gap_between_recordings: float = (
+                     INIT_MIN_GAP_BETWEEN_RECORDINGS
+                 ),
+                 pre_recording_buffer_duration: float = (
+                     INIT_PRE_RECORDING_BUFFER_DURATION
+                 ),
+                 on_vad_start=None,
+                 on_vad_stop=None,
+                 on_vad_detect_start=None,
+                 on_vad_detect_stop=None,
+                 on_turn_detection_start=None,
+                 on_turn_detection_stop=None,
+
+                 # Wake word parameters
+                 wakeword_backend: str = "pvporcupine",
+                 openwakeword_model_paths: str = None,
+                 openwakeword_inference_framework: str = "onnx",
+                 wake_words: str = "",
+                 wake_words_sensitivity: float = INIT_WAKE_WORDS_SENSITIVITY,
+                 wake_word_activation_delay: float = (
+                    INIT_WAKE_WORD_ACTIVATION_DELAY
+                 ),
+                 wake_word_timeout: float = INIT_WAKE_WORD_TIMEOUT,
+                 wake_word_buffer_duration: float = INIT_WAKE_WORD_BUFFER_DURATION,
+                 on_wakeword_detected=None,
+                 on_wakeword_timeout=None,
+                 on_wakeword_detection_start=None,
+                 on_wakeword_detection_end=None,
+                 on_recorded_chunk=None,
+                 debug_mode=False,
+                 handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
+                 beam_size: int = 5,
+                 beam_size_realtime: int = 3,
+                 buffer_size: int = BUFFER_SIZE,
+                 sample_rate: int = SAMPLE_RATE,
+                 initial_prompt: Optional[Union[str, Iterable[int]]] = None,
+                 initial_prompt_realtime: Optional[Union[str, Iterable[int]]] = None,
+                 suppress_tokens: Optional[List[int]] = [-1],
+                 print_transcription_time: bool = False,
+                 early_transcription_on_silence: int = 0,
+                 allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
+                 no_log_file: bool = False,
+                 use_extended_logging: bool = False,
+
+                 # Server urls
+                 control_url: str = DEFAULT_CONTROL_URL,
+                 data_url: str = DEFAULT_DATA_URL,
+                 autostart_server: bool = True,
+                 output_wav_file: str = None,
+                 faster_whisper_vad_filter: bool = False,
+                 ):
+
+        # Set instance variables from constructor parameters
+        self.model = model
+        self.language = language
+        self.compute_type = compute_type
+        self.input_device_index = input_device_index
+        self.gpu_device_index = gpu_device_index
+        self.device = device
+        self.on_recording_start = on_recording_start
+        self.on_recording_stop = on_recording_stop
+        self.on_transcription_start = on_transcription_start
+        self.ensure_sentence_starting_uppercase = ensure_sentence_starting_uppercase
+        self.ensure_sentence_ends_with_period = ensure_sentence_ends_with_period
+        self.use_microphone = use_microphone
+        self.spinner = spinner
+        self.level = level
+        self.batch_size = batch_size
+        self.init_realtime_after_seconds = init_realtime_after_seconds
+        self.realtime_batch_size = realtime_batch_size
+
+        # Real-time transcription parameters
+        self.enable_realtime_transcription = enable_realtime_transcription
+        self.use_main_model_for_realtime = use_main_model_for_realtime
+        self.download_root = download_root
+        self.realtime_model_type = realtime_model_type
+        self.realtime_processing_pause = realtime_processing_pause
+        self.on_realtime_transcription_update = on_realtime_transcription_update
+        self.on_realtime_transcription_stabilized = on_realtime_transcription_stabilized
+
+        # Voice activation parameters
+        self.silero_sensitivity = silero_sensitivity
+        self.silero_use_onnx = silero_use_onnx
+        self.silero_deactivity_detection = silero_deactivity_detection
+        self.webrtc_sensitivity = webrtc_sensitivity
+        self.post_speech_silence_duration = post_speech_silence_duration
+        self.min_length_of_recording = min_length_of_recording
+        self.min_gap_between_recordings = min_gap_between_recordings
+        self.pre_recording_buffer_duration = pre_recording_buffer_duration
+
+        self.on_vad_start = on_vad_start
+        self.on_vad_stop = on_vad_stop
+        self.on_vad_detect_start = on_vad_detect_start
+        self.on_vad_detect_stop = on_vad_detect_stop
+        self.on_turn_detection_start = on_turn_detection_start
+        self.on_turn_detection_stop = on_turn_detection_stop
+
+        # Wake word parameters
+        self.wakeword_backend = wakeword_backend
+        self.openwakeword_model_paths = openwakeword_model_paths
+        self.openwakeword_inference_framework = openwakeword_inference_framework
+        self.wake_words = wake_words
+        self.wake_words_sensitivity = wake_words_sensitivity
+        self.wake_word_activation_delay = wake_word_activation_delay
+        self.wake_word_timeout = wake_word_timeout
+        self.wake_word_buffer_duration = wake_word_buffer_duration
+        self.on_wakeword_detected = on_wakeword_detected
+        self.on_wakeword_timeout = on_wakeword_timeout
+        self.on_wakeword_detection_start = on_wakeword_detection_start
+        self.on_wakeword_detection_end = on_wakeword_detection_end
+        self.on_recorded_chunk = on_recorded_chunk
+        self.debug_mode = debug_mode
+        self.handle_buffer_overflow = handle_buffer_overflow
+        self.beam_size = beam_size
+        self.beam_size_realtime = beam_size_realtime
+        self.buffer_size = buffer_size
+        self.sample_rate = sample_rate
+        self.initial_prompt = initial_prompt
+        self.initial_prompt_realtime = initial_prompt_realtime
+        self.suppress_tokens = suppress_tokens
+        self.print_transcription_time = print_transcription_time
+        self.early_transcription_on_silence = early_transcription_on_silence
+        self.allowed_latency_limit = allowed_latency_limit
+        self.no_log_file = no_log_file
+        self.use_extended_logging = use_extended_logging
+        self.faster_whisper_vad_filter = faster_whisper_vad_filter
+
+        # Server URLs
+        self.control_url = control_url
+        self.data_url = data_url
+        self.autostart_server = autostart_server
+        self.output_wav_file = output_wav_file
+
+        # Instance variables
+        self.muted = False
+        self.recording_thread = None
+        self.is_running = True
+        self.connection_established = threading.Event()
+        self.recording_start = threading.Event()
+        self.final_text_ready = threading.Event()
+        self.realtime_text = ""
+        self.final_text = ""
+        self._recording = False
+        self.server_already_running = False
+        self.wav_file = None
+
+        self.request_counter = 0
+        self.pending_requests = {}  # Map from request_id to threading.Event and value
+
+        if self.debug_mode:
+            print("Checking STT server")
+        if not self.connect():
+            print("Failed to connect to the server.", file=sys.stderr)
+        else:
+            if self.debug_mode:
+                print("STT server is running and connected.")
+
+        if self.use_microphone:
+            self.start_recording()
+
+
+        if self.server_already_running:
+            if not self.connection_established.wait(timeout=10):
+                print("Server connection not established within 10 seconds.")
+            else:
+                self.set_parameter("language", self.language)
+                print(f"Language set to {self.language}")
+                self.set_parameter("wake_word_activation_delay", self.wake_word_activation_delay)
+                print(f"Wake word activation delay set to {self.wake_word_activation_delay}")
+
+    def text(self, on_transcription_finished=None):
+        self.realtime_text = ""
+        self.submitted_realtime_text = ""
+        self.final_text = ""
+        self.final_text_ready.clear()
+
+        self.recording_start.set()
+
+        try:
+            total_wait_time = 0
+            wait_interval = 0.02  # Wait in small intervals, e.g., 100ms
+            max_wait_time = 60  # Timeout after 60 seconds
+
+            while total_wait_time < max_wait_time and self.is_running and self._recording:
+                if self.final_text_ready.wait(timeout=wait_interval):
+                    break  # Break if transcription is ready
+
+                if not self.is_running or not self._recording:
+                    break
+                
+                total_wait_time += wait_interval
+
+                # Check if a manual interrupt has occurred
+                if total_wait_time >= max_wait_time:
+                    if self.debug_mode:
+                        print("Timeout while waiting for text from the server.")
+                    self.recording_start.clear()
+                    if on_transcription_finished:
+                        threading.Thread(target=on_transcription_finished, args=("",)).start()
+                    return ""
+
+            self.recording_start.clear()
+
+            if not self.is_running or not self._recording:
+                return ""
+
+            if on_transcription_finished:
+                threading.Thread(target=on_transcription_finished, args=(self.final_text,)).start()
+
+            return self.final_text
+
+        except KeyboardInterrupt:
+            if self.debug_mode:
+                print("KeyboardInterrupt in text(), exiting...")
+            raise KeyboardInterrupt
+
+        except Exception as e:
+            print(f"Error in AudioToTextRecorderClient.text(): {e}")
+            return ""
+
+    def feed_audio(self, chunk, audio_meta_data, original_sample_rate=16000):
+        # Start with the base metadata
+        metadata = {"sampleRate": original_sample_rate}
+
+        # Merge additional metadata if provided
+        if audio_meta_data:
+            server_sent_to_stt_ns = time.time_ns()
+            audio_meta_data["server_sent_to_stt"] = server_sent_to_stt_ns
+            metadata["server_sent_to_stt_formatted"] = format_timestamp_ns(server_sent_to_stt_ns)
+
+            metadata.update(audio_meta_data)
+
+        # Convert metadata to JSON and prepare the message
+        metadata_json = json.dumps(metadata)
+        metadata_length = len(metadata_json)
+        message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + chunk
+
+        # Send the message if the connection is running
+        if self.is_running:
+            self.data_ws.send(message, opcode=ABNF.OPCODE_BINARY)
+
+    def set_microphone(self, microphone_on=True):
+        """
+        Set the microphone on or off.
+        """
+        self.muted = not microphone_on
+
+    def abort(self):
+        self.call_method("abort")
+
+    def wakeup(self):
+        self.call_method("wakeup")
+
+    def clear_audio_queue(self):
+        self.call_method("clear_audio_queue")
+
+    def perform_final_transcription(self):
+        self.call_method("perform_final_transcription")
+
+    def stop(self):
+        self.call_method("stop")
+
+    def connect(self):
+        if not self.ensure_server_running():
+            print("Cannot start STT server. Exiting.")
+            return False
+
+        try:
+            # Connect to control WebSocket
+            self.control_ws = WebSocketApp(self.control_url,
+                                                     on_message=self.on_control_message,
+                                                     on_error=self.on_error,
+                                                     on_close=self.on_close,
+                                                     on_open=self.on_control_open)
+
+            self.control_ws_thread = threading.Thread(target=self.control_ws.run_forever)
+            self.control_ws_thread.daemon = False
+            self.control_ws_thread.start()
+
+            # Connect to data WebSocket
+            self.data_ws = WebSocketApp(self.data_url,
+                                                  on_message=self.on_data_message,
+                                                  on_error=self.on_error,
+                                                  on_close=self.on_close,
+                                                  on_open=self.on_data_open)
+
+            self.data_ws_thread = threading.Thread(target=self.data_ws.run_forever)
+            self.data_ws_thread.daemon = False
+            self.data_ws_thread.start()
+
+            # Wait for the connections to be established
+            if not self.connection_established.wait(timeout=10):
+                print("Timeout while connecting to the server.")
+                return False
+
+            if self.debug_mode:
+                print("WebSocket connections established successfully.")
+            return True
+        except Exception as e:
+            print(f"Error while connecting to the server: {e}")
+            return False
+
+    def start_server(self):
+        args = ['stt-server']
+
+        # Map constructor parameters to server arguments
+        if self.model:
+            args += ['--model', self.model]
+        if self.realtime_model_type:
+            args += ['--realtime_model_type', self.realtime_model_type]
+        if self.download_root:
+            args += ['--root', self.download_root]
+        if self.batch_size is not None:
+            args += ['--batch', str(self.batch_size)]
+        if self.realtime_batch_size is not None:
+            args += ['--realtime_batch_size', str(self.realtime_batch_size)]
+        if self.init_realtime_after_seconds is not None:
+            args += ['--init_realtime_after_seconds', str(self.init_realtime_after_seconds)]
+        if self.initial_prompt_realtime:
+            sanitized_prompt = self.initial_prompt_realtime.replace("\n", "\\n")
+            args += ['--initial_prompt_realtime', sanitized_prompt]
+
+        # if self.compute_type:
+        #     args += ['--compute_type', self.compute_type]
+        # if self.input_device_index is not None:
+        #     args += ['--input_device_index', str(self.input_device_index)]
+        # if self.gpu_device_index is not None:
+        #     args += ['--gpu_device_index', str(self.gpu_device_index)]
+        # if self.device:
+        #     args += ['--device', self.device]
+        # if self.spinner:
+        #     args.append('--spinner')  # flag, no need for True/False
+        # if self.enable_realtime_transcription:
+        #     args.append('--enable_realtime_transcription')  # flag, no need for True/False
+        # if self.handle_buffer_overflow:
+        #     args.append('--handle_buffer_overflow')  # flag, no need for True/False
+        # if self.suppress_tokens:
+        #     args += ['--suppress_tokens', str(self.suppress_tokens)]
+        # if self.print_transcription_time:
+        #     args.append('--print_transcription_time')  # flag, no need for True/False
+        # if self.allowed_latency_limit is not None:
+        #     args += ['--allowed_latency_limit', str(self.allowed_latency_limit)]
+        # if self.no_log_file:
+        #     args.append('--no_log_file')  # flag, no need for True
+        if self.debug_mode:
+            args.append('--debug')  # flag, no need for True/False
+            
+        if self.language:
+            args += ['--language', self.language]
+        if self.silero_sensitivity is not None:
+            args += ['--silero_sensitivity', str(self.silero_sensitivity)]
+        if self.silero_use_onnx:
+            args.append('--silero_use_onnx')  # flag, no need for True/False
+        if self.webrtc_sensitivity is not None:
+            args += ['--webrtc_sensitivity', str(self.webrtc_sensitivity)]
+        if self.min_length_of_recording is not None:
+            args += ['--min_length_of_recording', str(self.min_length_of_recording)]
+        if self.min_gap_between_recordings is not None:
+            args += ['--min_gap_between_recordings', str(self.min_gap_between_recordings)]
+        if self.realtime_processing_pause is not None:
+            args += ['--realtime_processing_pause', str(self.realtime_processing_pause)]
+        if self.early_transcription_on_silence is not None:
+            args += ['--early_transcription_on_silence', str(self.early_transcription_on_silence)]
+        if self.silero_deactivity_detection:
+            args.append('--silero_deactivity_detection')  # flag, no need for True/False
+        if self.beam_size is not None:
+            args += ['--beam_size', str(self.beam_size)]
+        if self.beam_size_realtime is not None:
+            args += ['--beam_size_realtime', str(self.beam_size_realtime)]
+        if self.wake_words is not None:
+            args += ['--wake_words', str(self.wake_words)]
+        if self.wake_words_sensitivity is not None:
+            args += ['--wake_words_sensitivity', str(self.wake_words_sensitivity)]
+        if self.wake_word_timeout is not None:
+            args += ['--wake_word_timeout', str(self.wake_word_timeout)]
+        if self.wake_word_activation_delay is not None:
+            args += ['--wake_word_activation_delay', str(self.wake_word_activation_delay)]
+        if self.wakeword_backend is not None:
+            args += ['--wakeword_backend', str(self.wakeword_backend)]
+        if self.openwakeword_model_paths:
+            args += ['--openwakeword_model_paths', str(self.openwakeword_model_paths)]
+        if self.openwakeword_inference_framework is not None:
+            args += ['--openwakeword_inference_framework', str(self.openwakeword_inference_framework)]
+        if self.wake_word_buffer_duration is not None:
+            args += ['--wake_word_buffer_duration', str(self.wake_word_buffer_duration)]
+        if self.use_main_model_for_realtime:
+            args.append('--use_main_model_for_realtime')  # flag, no need for True/False
+        if self.use_extended_logging:
+            args.append('--use_extended_logging')  # flag, no need for True/False
+
+        if self.control_url:
+            parsed_control_url = urlparse(self.control_url)
+            if parsed_control_url.port:
+                args += ['--control_port', str(parsed_control_url.port)]
+        if self.data_url:
+            parsed_data_url = urlparse(self.data_url)
+            if parsed_data_url.port:
+                args += ['--data_port', str(parsed_data_url.port)]
+        if self.initial_prompt:
+            sanitized_prompt = self.initial_prompt.replace("\n", "\\n")
+            args += ['--initial_prompt', sanitized_prompt]
+
+        # Start the subprocess with the mapped arguments
+        if os.name == 'nt':  # Windows
+            cmd = 'start /min cmd /c ' + subprocess.list2cmdline(args)
+            if debug_mode:
+                print(f"Opening server with cli command: {cmd}")
+            subprocess.Popen(cmd, shell=True)
+        else:  # Unix-like systems
+            subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True)
+        print("STT server start command issued. Please wait a moment for it to initialize.", file=sys.stderr)
+
+    def is_server_running(self):
+        try:
+            # Attempt a proper WebSocket handshake to the control URL.
+            from websocket import create_connection
+            ws = create_connection(self.control_url, timeout=3)
+            ws.close()
+            return True
+        except Exception as e:
+            if self.debug_mode:
+                print(f"Server connectivity check failed: {e}")
+            return False
+
+    def ensure_server_running(self):
+        if not self.is_server_running():
+            if self.debug_mode:
+                print("STT server is not running.", file=sys.stderr)
+            if self.autostart_server:
+                self.start_server()
+                if self.debug_mode:
+                    print("Waiting for STT server to start...", file=sys.stderr)
+                for _ in range(20):  # Wait up to 20 seconds
+                    if self.is_server_running():
+                        if self.debug_mode:
+                            print("STT server started successfully.", file=sys.stderr)
+                        time.sleep(2)  # Give the server a moment to fully initialize
+                        return True
+                    time.sleep(1)
+                print("Failed to start STT server.", file=sys.stderr)
+                return False
+            else:
+                print("STT server is required. Please start it manually.", file=sys.stderr)
+                return False
+        
+        else:
+            self.server_already_running = True
+
+        return True
+    
+    def list_devices(self):
+        """List all available audio input devices."""
+        audio = AudioInput(debug_mode=self.debug_mode)
+        audio.list_devices()
+
+    def start_recording(self):
+        self.recording_thread = threading.Thread(target=self.record_and_send_audio)
+        self.recording_thread.daemon = False
+        self.recording_thread.start()
+
+    def setup_audio(self):
+        """Initialize audio input"""
+        self.audio_input = AudioInput(
+            input_device_index=self.input_device_index,
+            debug_mode=self.debug_mode
+        )
+        return self.audio_input.setup()
+
+    def record_and_send_audio(self):
+        """Record and stream audio data"""
+        self._recording = True
+
+        try:
+            if not self.setup_audio():
+                raise Exception("Failed to set up audio recording.")
+
+            # Initialize WAV file writer if output_wav_file is provided
+            if self.output_wav_file and not self.wav_file:
+                self.wav_file = wave.open(self.output_wav_file, 'wb')
+                self.wav_file.setnchannels(1)
+                self.wav_file.setsampwidth(2)
+                self.wav_file.setframerate(self.audio_input.device_sample_rate)  # Use self.device_sample_rate
+
+
+            if self.debug_mode:
+                print("Recording and sending audio...")
+
+            while self.is_running:
+                if self.muted:
+                    time.sleep(0.01)
+                    continue
+
+                try:
+                    audio_data = self.audio_input.read_chunk()
+
+                    if self.wav_file:
+                        self.wav_file.writeframes(audio_data)
+
+                    if self.on_recorded_chunk:
+                        self.on_recorded_chunk(audio_data)
+
+                    if self.muted:
+                        continue
+
+                    if self.recording_start.is_set():
+                        metadata = {"sampleRate": self.audio_input.device_sample_rate}
+                        metadata_json = json.dumps(metadata)
+                        metadata_length = len(metadata_json)
+                        message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + audio_data
+
+                        if self.is_running:
+                            if log_outgoing_chunks:
+                                print(".", flush=True, end='')
+                            self.data_ws.send(message, opcode=ABNF.OPCODE_BINARY)
+                except KeyboardInterrupt:
+                    if self.debug_mode:
+                        print("KeyboardInterrupt in record_and_send_audio, exiting...")
+                    break
+                except Exception as e:
+                    print(f"Error sending audio data: {e}")
+                    break
+
+        except Exception as e:
+            print(f"Error in record_and_send_audio: {e}", file=sys.stderr)
+        finally:
+            self.cleanup_audio()
+            self.final_text_ready.set() # fake final text to stop the text() method
+            self.is_running = False
+            self._recording = False
+
+    def cleanup_audio(self):
+        """Clean up audio resources"""
+        if hasattr(self, 'audio_input'):
+            self.audio_input.cleanup()
+
+    def on_control_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            # Handle server response with status
+            if 'status' in data:
+                if data['status'] == 'success':
+                    if 'parameter' in data and 'value' in data:
+                        request_id = data.get('request_id')
+                        if request_id is not None and request_id in self.pending_requests:
+                            if self.debug_mode:
+                                print(f"Parameter {data['parameter']} = {data['value']}")
+                            self.pending_requests[request_id]['value'] = data['value']
+                            self.pending_requests[request_id]['event'].set()
+                elif data['status'] == 'error':
+                    print(f"Server Error: {data.get('message', '')}")
+            else:
+                print(f"Unknown control message format: {data}")
+        except json.JSONDecodeError:
+            print(f"Received non-JSON control message: {message}")
+        except Exception as e:
+            print(f"Error processing control message: {e}")
+
+    # Handle real-time transcription and full sentence updates
+    def on_data_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            # Handle real-time transcription updates
+            if data.get('type') == 'realtime':
+                if data['text'] != self.realtime_text:
+                    self.realtime_text = data['text']
+
+                    timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+                    # print(f"Realtime text [{timestamp}]: {bcolors.OKCYAN}{self.realtime_text}{bcolors.ENDC}")
+
+                    if self.on_realtime_transcription_update:
+                        # Call the callback in a new thread to avoid blocking
+                        threading.Thread(
+                            target=self.on_realtime_transcription_update,
+                            args=(self.realtime_text,)
+                        ).start()
+
+            # Handle full sentences
+            elif data.get('type') == 'fullSentence':
+                self.final_text = data['text']
+                self.final_text_ready.set()
+
+            elif data.get('type') == 'recording_start':
+                if self.on_recording_start:
+                    self.on_recording_start()
+            elif data.get('type') == 'recording_stop':
+                if self.on_recording_stop:
+                    self.on_recording_stop()
+            elif data.get('type') == 'transcription_start':
+                audio_bytes_base64 = data.get('audio_bytes_base64')
+                decoded_bytes = base64.b64decode(audio_bytes_base64)
+
+                # Reconstruct the np.int16 array from the decoded bytes
+                audio_array = np.frombuffer(decoded_bytes, dtype=np.int16)
+
+                # If the original data was normalized, convert to np.float32 and normalize
+                INT16_MAX_ABS_VALUE = 32768.0
+                normalized_audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
+
+                if self.on_transcription_start:
+                    self.on_transcription_start(normalized_audio)
+            elif data.get('type') == 'vad_detect_start':
+                if self.on_vad_detect_start:
+                    self.on_vad_detect_start()
+            elif data.get('type') == 'vad_detect_stop':
+                if self.on_vad_detect_stop:
+                    self.on_vad_detect_stop()
+            elif data.get('type') == 'vad_start':
+                if self.on_vad_start:
+                    self.on_vad_start()
+            elif data.get('type') == 'vad_stop':
+                if self.on_vad_stop:
+                    self.on_vad_stop()
+            elif data.get('type') == 'start_turn_detection':
+                if self.on_turn_detection_start:
+                    self.on_turn_detection_start()
+            elif data.get('type') == 'stop_turn_detection':
+                if self.on_turn_detection_stop:
+                    self.on_turn_detection_stop()
+            elif data.get('type') == 'wakeword_detected':
+                if self.on_wakeword_detected:
+                    self.on_wakeword_detected()
+            elif data.get('type') == 'wakeword_detection_start':
+                if self.on_wakeword_detection_start:
+                    self.on_wakeword_detection_start()
+            elif data.get('type') == 'wakeword_detection_end':
+                if self.on_wakeword_detection_end:
+                    self.on_wakeword_detection_end()
+            elif data.get('type') == 'recorded_chunk':
+                pass
+
+            else:
+                print(f"Unknown data message format: {data}")
+
+        except json.JSONDecodeError:
+            print(f"Received non-JSON data message: {message}")
+        except Exception as e:
+            print(f"Error processing data message: {e}")
+
+    def on_error(self, ws, error):
+        print(f"WebSocket error: {error}")
+
+    def on_close(self, ws, close_status_code, close_msg):
+        if self.debug_mode:
+            if ws == self.data_ws:
+                print(f"Data WebSocket connection closed: {close_status_code} - {close_msg}")
+            elif ws == self.control_ws:
+                print(f"Control WebSocket connection closed: {close_status_code} - {close_msg}")
+        
+        self.is_running = False
+
+    def on_control_open(self, ws):
+        if self.debug_mode:
+            print("Control WebSocket connection opened.")
+        self.connection_established.set()
+
+    def on_data_open(self, ws):
+        if self.debug_mode:
+            print("Data WebSocket connection opened.")
+
+    def set_parameter(self, parameter, value):
+        command = {
+            "command": "set_parameter",
+            "parameter": parameter,
+            "value": value
+        }
+        self.control_ws.send(json.dumps(command))
+
+    def get_parameter(self, parameter):
+        # Generate a unique request_id
+        request_id = self.request_counter
+        self.request_counter += 1
+
+        # Prepare the command with the request_id
+        command = {
+            "command": "get_parameter",
+            "parameter": parameter,
+            "request_id": request_id
+        }
+
+        # Create an event to wait for the response
+        event = threading.Event()
+        self.pending_requests[request_id] = {'event': event, 'value': None}
+
+        # Send the command to the server
+        self.control_ws.send(json.dumps(command))
+
+        # Wait for the response or timeout after 5 seconds
+        if event.wait(timeout=5):
+            value = self.pending_requests[request_id]['value']
+            # Clean up the pending request
+            del self.pending_requests[request_id]
+            return value
+        else:
+            print(f"Timeout waiting for get_parameter {parameter}")
+            # Clean up the pending request
+            del self.pending_requests[request_id]
+            return None
+
+    def call_method(self, method, args=None, kwargs=None):
+        command = {
+            "command": "call_method",
+            "method": method,
+            "args": args or [],
+            "kwargs": kwargs or {}
+        }
+        self.control_ws.send(json.dumps(command))
+
+    def shutdown(self):
+        """Shutdown all resources"""
+        self.is_running = False
+        if self.control_ws:
+            self.control_ws.close()
+        if self.data_ws:
+            self.data_ws.close()
+
+        # Join threads
+        if self.control_ws_thread:
+            self.control_ws_thread.join()
+        if self.data_ws_thread:
+            self.data_ws_thread.join()
+        if self.recording_thread:
+            self.recording_thread.join()
+
+        # Clean up audio
+        self.cleanup_audio()
+
+    def __enter__(self):
+        """
+        Method to setup the context manager protocol.
+
+        This enables the instance to be used in a `with` statement, ensuring
+        proper resource management. When the `with` block is entered, this
+        method is automatically called.
+
+        Returns:
+            self: The current instance of the class.
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Method to define behavior when the context manager protocol exits.
+
+        This is called when exiting the `with` block and ensures that any
+        necessary cleanup or resource release processes are executed, such as
+        shutting down the system properly.
+
+        Args:
+            exc_type (Exception or None): The type of the exception that
+              caused the context to be exited, if any.
+            exc_value (Exception or None): The exception instance that caused
+              the context to be exited, if any.
+            traceback (Traceback or None): The traceback corresponding to the
+              exception, if any.
+        """
+        self.shutdown()
--- a/minimal_server/RealtimeSTT/safepipe.py
+++ b/minimal_server/RealtimeSTT/safepipe.py
@ -0,0 +1,245 @@
+import sys
+import multiprocessing as mp
+import queue
+import threading
+import time
+import logging
+
+# Configure logging. Adjust level and formatting as needed.
+# logging.basicConfig(level=logging.DEBUG,
+#                     format='[%(asctime)s] %(levelname)s:%(name)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+try:
+    # Only set the start method if it hasn't been set already.
+    if sys.platform.startswith('linux') or sys.platform == 'darwin':  # For Linux or macOS
+        mp.set_start_method("spawn")
+    elif mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method("spawn")
+except RuntimeError as e:
+    logger.debug("Start method has already been set. Details: %s", e)
+
+
+class ParentPipe:
+    """
+    A thread-safe wrapper around the 'parent end' of a multiprocessing pipe.
+    All actual pipe operations happen in a dedicated worker thread, so it's safe
+    for multiple threads to call send(), recv(), or poll() on the same ParentPipe
+    without interfering.
+    """
+    def __init__(self, parent_synthesize_pipe):
+        self.name = "ParentPipe"
+        self._pipe = parent_synthesize_pipe  # The raw pipe.
+        self._closed = False  # A flag to mark if close() has been called.
+
+        # The request queue for sending operations to the worker.
+        self._request_queue = queue.Queue()
+
+        # This event signals the worker thread to stop.
+        self._stop_event = threading.Event()
+
+        # Worker thread that executes actual .send(), .recv(), .poll() calls.
+        self._worker_thread = threading.Thread(
+            target=self._pipe_worker,
+            name=f"{self.name}_Worker",
+            daemon=True
+        )
+        self._worker_thread.start()
+
+    def _pipe_worker(self):
+        while not self._stop_event.is_set():
+            try:
+                request = self._request_queue.get(timeout=0.1)
+            except queue.Empty:
+                continue
+
+            if request["type"] == "CLOSE":
+                # Exit worker loop on CLOSE request.
+                break
+
+            try:
+                if request["type"] == "SEND":
+                    data = request["data"]
+                    logger.debug("[%s] Worker: sending => %s", self.name, data)
+                    self._pipe.send(data)
+                    request["result_queue"].put(None)
+
+                elif request["type"] == "RECV":
+                    logger.debug("[%s] Worker: receiving...", self.name)
+                    data = self._pipe.recv()
+                    request["result_queue"].put(data)
+
+                elif request["type"] == "POLL":
+                    timeout = request.get("timeout", 0.0)
+                    logger.debug("[%s] Worker: poll() with timeout: %s", self.name, timeout)
+                    result = self._pipe.poll(timeout)
+                    request["result_queue"].put(result)
+
+            except (EOFError, BrokenPipeError, OSError) as e:
+                # When the other end has closed or an error occurs,
+                # log and notify the waiting thread.
+                logger.debug("[%s] Worker: pipe closed or error occurred (%s). Shutting down.", self.name, e)
+                request["result_queue"].put(None)
+                break
+
+            except Exception as e:
+                logger.exception("[%s] Worker: unexpected error.", self.name)
+                request["result_queue"].put(e)
+                break
+
+        logger.debug("[%s] Worker: stopping.", self.name)
+        try:
+            self._pipe.close()
+        except Exception as e:
+            logger.debug("[%s] Worker: error during pipe close: %s", self.name, e)
+
+    def send(self, data):
+        """
+        Synchronously asks the worker thread to perform .send().
+        """
+        if self._closed:
+            logger.debug("[%s] send() called but pipe is already closed", self.name)
+            return
+        logger.debug("[%s] send() requested with: %s", self.name, data)
+        result_queue = queue.Queue()
+        request = {
+            "type": "SEND",
+            "data": data,
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        result_queue.get()  # Wait until sending completes.
+        logger.debug("[%s] send() completed", self.name)
+
+    def recv(self):
+        """
+        Synchronously asks the worker to perform .recv() and returns the data.
+        """
+        if self._closed:
+            logger.debug("[%s] recv() called but pipe is already closed", self.name)
+            return None
+        logger.debug("[%s] recv() requested", self.name)
+        result_queue = queue.Queue()
+        request = {
+            "type": "RECV",
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        data = result_queue.get()
+
+        # Log a preview for huge byte blobs.
+        if isinstance(data, tuple) and len(data) == 2 and isinstance(data[1], bytes):
+            data_preview = (data[0], f"<{len(data[1])} bytes>")
+        else:
+            data_preview = data
+        logger.debug("[%s] recv() returning => %s", self.name, data_preview)
+        return data
+
+    def poll(self, timeout=0.0):
+        """
+        Synchronously checks whether data is available.
+        Returns True if data is ready, or False otherwise.
+        """
+        if self._closed:
+            return False
+        logger.debug("[%s] poll() requested with timeout: %s", self.name, timeout)
+        result_queue = queue.Queue()
+        request = {
+            "type": "POLL",
+            "timeout": timeout,
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        try:
+            # Use a slightly longer timeout to give the worker a chance.
+            result = result_queue.get(timeout=timeout + 0.1)
+        except queue.Empty:
+            result = False
+        logger.debug("[%s] poll() returning => %s", self.name, result)
+        return result
+
+    def close(self):
+        """
+        Closes the pipe and stops the worker thread. The _closed flag makes
+        sure no further operations are attempted.
+        """
+        if self._closed:
+            return
+        logger.debug("[%s] close() called", self.name)
+        self._closed = True
+        stop_request = {"type": "CLOSE", "result_queue": queue.Queue()}
+        self._request_queue.put(stop_request)
+        self._stop_event.set()
+        self._worker_thread.join()
+        logger.debug("[%s] closed", self.name)
+
+
+def SafePipe(debug=False):
+    """
+    Returns a pair: (thread-safe parent pipe, raw child pipe).
+    """
+    parent_synthesize_pipe, child_synthesize_pipe = mp.Pipe()
+    parent_pipe = ParentPipe(parent_synthesize_pipe)
+    return parent_pipe, child_synthesize_pipe
+
+
+def child_process_code(child_end):
+    """
+    Example child process code that receives messages, logs them,
+    sends acknowledgements, and then closes.
+    """
+    for i in range(3):
+        msg = child_end.recv()
+        logger.debug("[Child] got: %s", msg)
+        child_end.send(f"ACK: {msg}")
+    child_end.close()
+
+
+if __name__ == "__main__":
+    parent_pipe, child_pipe = SafePipe()
+
+    # Create child process with the child_process_code function.
+    p = mp.Process(target=child_process_code, args=(child_pipe,))
+    p.start()
+
+    # Event to signal sender threads to stop if needed.
+    stop_polling_event = threading.Event()
+
+    def sender_thread(n):
+        try:
+            parent_pipe.send(f"hello_from_thread_{n}")
+        except Exception as e:
+            logger.debug("[sender_thread_%s] send exception: %s", n, e)
+            return
+
+        # Use a poll loop with error handling.
+        for _ in range(10):
+            try:
+                if parent_pipe.poll(0.1):
+                    reply = parent_pipe.recv()
+                    logger.debug("[sender_thread_%s] got: %s", n, reply)
+                    break
+                else:
+                    logger.debug("[sender_thread_%s] no data yet...", n)
+            except (OSError, EOFError, BrokenPipeError) as e:
+                logger.debug("[sender_thread_%s] poll/recv exception: %s. Exiting thread.", n, e)
+                break
+
+            # Allow exit if a shutdown is signaled.
+            if stop_polling_event.is_set():
+                logger.debug("[sender_thread_%s] stop event set. Exiting thread.", n)
+                break
+
+    threads = []
+    for i in range(3):
+        t = threading.Thread(target=sender_thread, args=(i,))
+        t.start()
+        threads.append(t)
+
+    for t in threads:
+        t.join()
+
+    # Signal shutdown to any polling threads, then close the pipe.
+    stop_polling_event.set()
+    parent_pipe.close()
+    p.join()
--- a/minimal_server/RealtimeSTT/server.py
+++ b/minimal_server/RealtimeSTT/server.py
@ -0,0 +1,23 @@
+from fastapi import FastAPI, WebSocket
+from RealtimeSTT.audio_recorder import AudioToTextRecorder
+import numpy as np
+
+app = FastAPI()
+
+recorder = AudioToTextRecorder(
+    model="tiny",
+    device="cuda",
+    compute_type="float16",
+    use_microphone=False,
+)
+
+@app.websocket("/ws/transcribe")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    while True:
+        data = await websocket.receive_bytes()
+        # Convierte los bytes a numpy array (ajusta según tu formato)
+        audio = np.frombuffer(data, dtype=np.float32)
+        recorder.feed_audio(audio)
+        text = recorder.text()
+        await websocket.send_text(text)
--- a/minimal_server/RealtimeSTT/warmup_audio.wav
+++ b/minimal_server/RealtimeSTT/warmup_audio.wav
--- a/minimal_server/pycache/install_packages.cpython-311.pyc
+++ b/minimal_server/pycache/install_packages.cpython-311.pyc
--- a/minimal_server/pycache/stt_server.cpython-311.pyc
+++ b/minimal_server/pycache/stt_server.cpython-311.pyc
--- a/minimal_server/server/pycache/install_packages.cpython-311.pyc
+++ b/minimal_server/server/pycache/install_packages.cpython-311.pyc
--- a/minimal_server/server/pycache/stt_server.cpython-311.pyc
+++ b/minimal_server/server/pycache/stt_server.cpython-311.pyc
--- a/minimal_server/server/install_packages.py
+++ b/minimal_server/server/install_packages.py
@ -0,0 +1,55 @@
+import subprocess
+import sys
+import importlib
+
+def check_and_install_packages(packages):
+    """
+    Checks if the specified packages are installed, and if not, prompts the user
+    to install them.
+
+    Parameters:
+    - packages: A list of dictionaries, each containing:
+        - 'module_name': The module or package name to import.
+        - 'attribute': (Optional) The attribute or class to check within the module.
+        - 'install_name': The name used in the pip install command.
+        - 'version': (Optional) Version constraint for the package.
+    """
+    for package in packages:
+        module_name = package['module_name']
+        attribute = package.get('attribute')
+        install_name = package.get('install_name', module_name)
+        version = package.get('version', '')
+
+        try:
+            # Attempt to import the module
+            module = importlib.import_module(module_name)
+            # If an attribute is specified, check if it exists
+            if attribute:
+                getattr(module, attribute)
+        except (ImportError, AttributeError):
+            user_input = input(
+                f"This program requires '{module_name}'"
+                f"{'' if not attribute else ' with attribute ' + attribute}, which is not installed or missing.\n"
+                f"Do you want to install '{install_name}' now? (y/n): "
+            )
+            if user_input.strip().lower() == 'y':
+                try:
+                    # Build the pip install command
+                    install_command = [sys.executable, "-m", "pip", "install"]
+                    if version:
+                        install_command.append(f"{install_name}{version}")
+                    else:
+                        install_command.append(install_name)
+
+                    subprocess.check_call(install_command)
+                    # Try to import again after installation
+                    module = importlib.import_module(module_name)
+                    if attribute:
+                        getattr(module, attribute)
+                    print(f"Successfully installed '{install_name}'.")
+                except Exception as e:
+                    print(f"An error occurred while installing '{install_name}': {e}")
+                    sys.exit(1)
+            else:
+                print(f"The program requires '{install_name}' to run. Exiting...")
+                sys.exit(1)
--- a/minimal_server/server/stt_server.py
+++ b/minimal_server/server/stt_server.py
@ -0,0 +1,913 @@
+"""
+Speech-to-Text (STT) Server with Real-Time Transcription and WebSocket Interface
+
+This server provides real-time speech-to-text (STT) transcription using the RealtimeSTT library. It allows clients to connect via WebSocket to send audio data and receive real-time transcription updates. The server supports configurable audio recording parameters, voice activity detection (VAD), and wake word detection. It is designed to handle continuous transcription as well as post-recording processing, enabling real-time feedback with the option to improve final transcription quality after the complete sentence is recognized.
+
+### Features:
+- Real-time transcription using pre-configured or user-defined STT models.
+- WebSocket-based communication for control and data handling.
+- Flexible recording and transcription options, including configurable pauses for sentence detection.
+- Supports Silero and WebRTC VAD for robust voice activity detection.
+
+### Starting the Server:
+You can start the server using the command-line interface (CLI) command `stt-server`, passing the desired configuration options.
+
+```bash
+stt-server [OPTIONS]
+```
+
+### Available Parameters:
+    - `-m, --model`: Model path or size; default 'large-v2'.
+    - `-r, --rt-model, --realtime_model_type`: Real-time model size; default 'tiny'.
+    - `-l, --lang, --language`: Language code for transcription; default 'es'.
+    - `-i, --input-device, --input_device_index`: Audio input device index; default 1.
+    - `-c, --control, --control_port`: WebSocket control port; default 8011.
+    - `-d, --data, --data_port`: WebSocket data port; default 8012.
+    - `-w, --wake_words`: Wake word(s) to trigger listening; default "".
+    - `-D, --debug`: Enable debug logging.
+    - `-W, --write`: Save audio to WAV file.
+    - `-s, --silence_timing`: Enable dynamic silence duration for sentence detection; default True. 
+    - `-b, --batch, --batch_size`: Batch size for inference; default 16.
+    - `--root, --download_root`: Specifies the root path were the Whisper models are downloaded to.
+    - `--silero_sensitivity`: Silero VAD sensitivity (0-1); default 0.05.
+    - `--silero_use_onnx`: Use Silero ONNX model; default False.
+    - `--webrtc_sensitivity`: WebRTC VAD sensitivity (0-3); default 3.
+    - `--min_length_of_recording`: Minimum recording duration in seconds; default 1.1.
+    - `--min_gap_between_recordings`: Min time between recordings in seconds; default 0.
+    - `--enable_realtime_transcription`: Enable real-time transcription; default True.
+    - `--realtime_processing_pause`: Pause between audio chunk processing; default 0.02.
+    - `--silero_deactivity_detection`: Use Silero for end-of-speech detection; default True.
+    - `--early_transcription_on_silence`: Start transcription after silence in seconds; default 0.2.
+    - `--beam_size`: Beam size for main model; default 5.
+    - `--beam_size_realtime`: Beam size for real-time model; default 3.
+    - `--init_realtime_after_seconds`: Initial waiting time for realtime transcription; default 0.2.
+    - `--realtime_batch_size`: Batch size for the real-time transcription model; default 16.
+    - `--initial_prompt`: Initial main transcription guidance prompt.
+    - `--initial_prompt_realtime`: Initial realtime transcription guidance prompt.
+    - `--end_of_sentence_detection_pause`: Silence duration for sentence end detection; default 0.5.
+    - `--unknown_sentence_detection_pause`: Pause duration for incomplete sentence detection; default 0.5.
+    - `--mid_sentence_detection_pause`: Pause for mid-sentence break; default 0.5.
+    - `--wake_words_sensitivity`: Wake word detection sensitivity (0-1); default 0.5.
+    - `--wake_word_timeout`: Wake word timeout in seconds; default 5.0.
+    - `--wake_word_activation_delay`: Delay before wake word activation; default 20.
+    - `--wakeword_backend`: Backend for wake word detection; default 'none'.
+    - `--openwakeword_model_paths`: Paths to OpenWakeWord models.
+    - `--openwakeword_inference_framework`: OpenWakeWord inference framework; default 'tensorflow'.
+    - `--wake_word_buffer_duration`: Wake word buffer duration in seconds; default 1.0.
+    - `--use_main_model_for_realtime`: Use main model for real-time transcription.
+    - `--use_extended_logging`: Enable extensive log messages.
+    - `--logchunks`: Log incoming audio chunks.
+    - `--compute_type`: Type of computation to use.
+    - `--input_device_index`: Index of the audio input device.
+    - `--gpu_device_index`: Index of the GPU device.
+    - `--device`: Device to use for computation.
+    - `--handle_buffer_overflow`: Handle buffer overflow during transcription.
+    - `--suppress_tokens`: Suppress tokens during transcription.
+    - `--allowed_latency_limit`: Allowed latency limit for real-time transcription.
+    - `--faster_whisper_vad_filter`: Enable VAD filter for Faster Whisper; default False.
+
+
+### WebSocket Interface:
+The server supports two WebSocket connections:
+1. **Control WebSocket**: Used to send and receive commands, such as setting parameters or calling recorder methods.
+2. **Data WebSocket**: Used to send audio data for transcription and receive real-time transcription updates.
+
+The server will broadcast real-time transcription updates to all connected clients on the data WebSocket.
+"""
+
+from .install_packages import check_and_install_packages
+from difflib import SequenceMatcher
+from collections import deque
+from datetime import datetime
+import logging
+import asyncio
+import pyaudio
+import base64
+import sys
+
+
+debug_logging = False
+extended_logging = False
+send_recorded_chunk = False
+log_incoming_chunks = False
+silence_timing = False
+writechunks = False
+wav_file = None
+
+hard_break_even_on_background_noise = 3.0
+hard_break_even_on_background_noise_min_texts = 3
+hard_break_even_on_background_noise_min_similarity = 0.99
+hard_break_even_on_background_noise_min_chars = 15
+
+
+text_time_deque = deque()
+loglevel = logging.WARNING
+
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+
+
+if sys.platform == 'win32':
+    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+
+
+check_and_install_packages([
+    {
+        'module_name': 'RealtimeSTT',                 # Import module
+        'attribute': 'AudioToTextRecorder',           # Specific class to check
+        'install_name': 'RealtimeSTT',                # Package name for pip install
+    },
+    {
+        'module_name': 'websockets',                  # Import module
+        'install_name': 'websockets',                 # Package name for pip install
+    },
+    {
+        'module_name': 'numpy',                       # Import module
+        'install_name': 'numpy',                      # Package name for pip install
+    },
+    {
+        'module_name': 'scipy.signal',                # Submodule of scipy
+        'attribute': 'resample',                      # Specific function to check
+        'install_name': 'scipy',                      # Package name for pip install
+    }
+])
+
+# Define ANSI color codes for terminal output
+class bcolors:
+    HEADER = '\033[95m'   # Magenta
+    OKBLUE = '\033[94m'   # Blue
+    OKCYAN = '\033[96m'   # Cyan
+    OKGREEN = '\033[92m'  # Green
+    WARNING = '\033[93m'  # Yellow
+    FAIL = '\033[91m'     # Red
+    ENDC = '\033[0m'      # Reset to default
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+print(f"{bcolors.BOLD}{bcolors.OKCYAN}Starting server, please wait...{bcolors.ENDC}")
+
+# Initialize colorama
+from colorama import init, Fore, Style
+init()
+
+from RealtimeSTT import AudioToTextRecorder
+from scipy.signal import resample
+import numpy as np
+import websockets
+import threading
+import logging
+import wave
+import json
+import time
+
+global_args = None
+recorder = None
+recorder_config = {}
+recorder_ready = threading.Event()
+recorder_thread = None
+stop_recorder = False
+prev_text = ""
+
+# Define allowed methods and parameters for security
+allowed_methods = [
+    'set_microphone',
+    'abort',
+    'stop',
+    'clear_audio_queue',
+    'wakeup',
+    'shutdown',
+    'text',
+]
+allowed_parameters = [
+    'language',
+    'silero_sensitivity',
+    'wake_word_activation_delay',
+    'post_speech_silence_duration',
+    'listen_start',
+    'recording_stop_time',
+    'last_transcription_bytes',
+    'last_transcription_bytes_b64',
+    'speech_end_silence_start',
+    'is_recording',
+    'use_wake_words',
+]
+
+# Queues and connections for control and data
+control_connections = set()
+data_connections = set()
+control_queue = asyncio.Queue()
+audio_queue = asyncio.Queue()
+
+def preprocess_text(text):
+    # Remove leading whitespaces
+    text = text.lstrip()
+
+    # Remove starting ellipses if present
+    if text.startswith("..."):
+        text = text[3:]
+
+    if text.endswith("...'."):
+        text = text[:-1]
+
+    if text.endswith("...'"):
+        text = text[:-1]
+
+    # Remove any leading whitespaces again after ellipses removal
+    text = text.lstrip()
+
+    # Uppercase the first letter
+    if text:
+        text = text[0].upper() + text[1:]
+    
+    return text
+
+def debug_print(message):
+    if debug_logging:
+        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        thread_name = threading.current_thread().name
+        print(f"{Fore.CYAN}[DEBUG][{timestamp}][{thread_name}] {message}{Style.RESET_ALL}", file=sys.stderr)
+
+def format_timestamp_ns(timestamp_ns: int) -> str:
+    # Split into whole seconds and the nanosecond remainder
+    seconds = timestamp_ns // 1_000_000_000
+    remainder_ns = timestamp_ns % 1_000_000_000
+
+    # Convert seconds part into a datetime object (local time)
+    dt = datetime.fromtimestamp(seconds)
+
+    # Format the main time as HH:MM:SS
+    time_str = dt.strftime("%H:%M:%S")
+
+    # For instance, if you want milliseconds, divide the remainder by 1e6 and format as 3-digit
+    milliseconds = remainder_ns // 1_000_000
+    formatted_timestamp = f"{time_str}.{milliseconds:03d}"
+
+    return formatted_timestamp
+
+def text_detected(text, loop):
+    global prev_text
+
+    text = preprocess_text(text)
+
+    # if silence_timing:
+    #     def ends_with_ellipsis(text: str):
+    #         if text.endswith("..."):
+    #             return True
+    #         if len(text) > 1 and text[:-1].endswith("..."):
+    #             return True
+    #         return False
+
+    #     def sentence_end(text: str):
+    #         sentence_end_marks = ['.', '!', '?', '。']
+    #         if text and text[-1] in sentence_end_marks:
+    #             return True
+    #         return False
+
+
+    #     if ends_with_ellipsis(text):
+    #         recorder.post_speech_silence_duration = global_args.mid_sentence_detection_pause
+    #     elif sentence_end(text) and sentence_end(prev_text) and not ends_with_ellipsis(prev_text):
+    #         recorder.post_speech_silence_duration = global_args.end_of_sentence_detection_pause
+    #     else:
+    #         recorder.post_speech_silence_duration = global_args.unknown_sentence_detection_pause
+
+
+    #     # Append the new text with its timestamp
+    #     current_time = time.time()
+    #     text_time_deque.append((current_time, text))
+
+    #     # Remove texts older than hard_break_even_on_background_noise seconds
+    #     while text_time_deque and text_time_deque[0][0] < current_time - hard_break_even_on_background_noise:
+    #         text_time_deque.popleft()
+
+    #     # Check if at least hard_break_even_on_background_noise_min_texts texts have arrived within the last hard_break_even_on_background_noise seconds
+    #     if len(text_time_deque) >= hard_break_even_on_background_noise_min_texts:
+    #         texts = [t[1] for t in text_time_deque]
+    #         first_text = texts[0]
+    #         last_text = texts[-1]
+
+    #         # Compute the similarity ratio between the first and last texts
+    #         similarity = SequenceMatcher(None, first_text, last_text).ratio()
+
+    #         if similarity > hard_break_even_on_background_noise_min_similarity and len(first_text) > hard_break_even_on_background_noise_min_chars:
+    #             recorder.stop()
+    #             recorder.clear_audio_queue()
+    #             prev_text = ""
+
+    prev_text = text
+
+    # Put the message in the audio queue to be sent to clients
+    message = json.dumps({
+        'type': 'realtime',
+        'text': text
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+    # Get current timestamp in HH:MM:SS.nnn format
+    timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+
+    if extended_logging:
+        print(f"  [{timestamp}] Realtime text: {bcolors.OKCYAN}{text}{bcolors.ENDC}\n", flush=True, end="")
+    else:
+        print(f"\r[{timestamp}] {bcolors.OKCYAN}{text}{bcolors.ENDC}", flush=True, end='')
+
+def on_recording_start(loop):
+    message = json.dumps({
+        'type': 'recording_start'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_recording_stop(loop):
+    message = json.dumps({
+        'type': 'recording_stop'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_vad_detect_start(loop):
+    message = json.dumps({
+        'type': 'vad_detect_start'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_vad_detect_stop(loop):
+    message = json.dumps({
+        'type': 'vad_detect_stop'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_wakeword_detected(loop):
+    message = json.dumps({
+        'type': 'wakeword_detected'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_wakeword_detection_start(loop):
+    message = json.dumps({
+        'type': 'wakeword_detection_start'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_wakeword_detection_end(loop):
+    message = json.dumps({
+        'type': 'wakeword_detection_end'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_transcription_start(_audio_bytes, loop):
+    bytes_b64 = base64.b64encode(_audio_bytes.tobytes()).decode('utf-8')
+    message = json.dumps({
+        'type': 'transcription_start',
+        'audio_bytes_base64': bytes_b64
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_turn_detection_start(loop):
+    print("&&& stt_server on_turn_detection_start")
+    message = json.dumps({
+        'type': 'start_turn_detection'
+    })
+    asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+def on_turn_detection_stop(loop):
+    # print("&&& stt_server on_turn_detection_stop")
+    # message = json.dumps({
+    #     'type': 'stop_turn_detection'
+    # })
+    # asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+    pass
+
+
+# def on_realtime_transcription_update(text, loop):
+#     # Send real-time transcription updates to the client
+#     text = preprocess_text(text)
+#     message = json.dumps({
+#         'type': 'realtime_update',
+#         'text': text
+#     })
+#     asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+# def on_recorded_chunk(chunk, loop):
+#     if send_recorded_chunk:
+#         bytes_b64 = base64.b64encode(chunk.tobytes()).decode('utf-8')
+#         message = json.dumps({
+#             'type': 'recorded_chunk',
+#             'bytes': bytes_b64
+#         })
+#         asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+# Define the server's arguments
+def parse_arguments():
+    global debug_logging, extended_logging, loglevel, writechunks, log_incoming_chunks, dynamic_silence_timing
+
+    import argparse
+    parser = argparse.ArgumentParser(description='Start the Speech-to-Text (STT) server with various configuration options.')
+
+    parser.add_argument('-m', '--model', type=str, default='large-v2',
+                        help='Path to the STT model or model size. Options include: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, or any huggingface CTranslate2 STT model such as deepdml/faster-whisper-large-v3-turbo-ct2. Default is large-v2.')
+
+    parser.add_argument('-r', '--rt-model', '--realtime_model_type', type=str, default='tiny',
+                        help='Model size for real-time transcription. Options same as --model.  This is used only if real-time transcription is enabled (enable_realtime_transcription). Default is tiny.en.')
+    
+    parser.add_argument('-l', '--lang', '--language', type=str, default='es',
+                help='Language code for the STT model to transcribe in a specific language. Leave this empty for auto-detection based on input audio. Default is en. List of supported language codes: https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L11-L110')
+
+    parser.add_argument('-i', '--input-device', '--input-device-index', type=int, default=1,
+                    help='Index of the audio input device to use. Use this option to specify a particular microphone or audio input device based on your system. Default is 1.')
+
+    parser.add_argument('-c', '--control', '--control_port', type=int, default=8011,
+                        help='The port number used for the control WebSocket connection. Control connections are used to send and receive commands to the server. Default is port 8011.')
+
+    parser.add_argument('-d', '--data', '--data_port', type=int, default=8012,
+                        help='The port number used for the data WebSocket connection. Data connections are used to send audio data and receive transcription updates in real time. Default is port 8012.')
+
+    parser.add_argument('-w', '--wake_words', type=str, default="",
+                        help='Specify the wake word(s) that will trigger the server to start listening. For example, setting this to "Jarvis" will make the system start transcribing when it detects the wake word "Jarvis". Default is "Jarvis".')
+
+    parser.add_argument('-D', '--debug', action='store_true', help='Enable debug logging for detailed server operations')
+
+    parser.add_argument('--debug_websockets', action='store_true', help='Enable debug logging for detailed server websocket operations')
+
+    parser.add_argument('-W', '--write', metavar='FILE', help='Save received audio to a WAV file')
+    
+    parser.add_argument('-b', '--batch', '--batch_size', type=int, default=16, help='Batch size for inference. This parameter controls the number of audio chunks processed in parallel during transcription. Default is 16.')
+
+    parser.add_argument('--root', '--download_root', type=str,default=None, help='Specifies the root path where the Whisper models are downloaded to. Default is None.')
+
+    parser.add_argument('-s', '--silence_timing', action='store_true', default=True,
+                    help='Enable dynamic adjustment of silence duration for sentence detection. Adjusts post-speech silence duration based on detected sentence structure and punctuation. Default is False.')
+
+    parser.add_argument('--init_realtime_after_seconds', type=float, default=0.2,
+                        help='The initial waiting time in seconds before real-time transcription starts. This delay helps prevent false positives at the beginning of a session. Default is 0.2 seconds.')  
+    
+    parser.add_argument('--realtime_batch_size', type=int, default=16,
+                        help='Batch size for the real-time transcription model. This parameter controls the number of audio chunks processed in parallel during real-time transcription. Default is 16.')
+    
+    parser.add_argument('--initial_prompt_realtime', type=str, default="", help='Initial prompt that guides the real-time transcription model to produce transcriptions in a particular style or format.')
+
+    parser.add_argument('--silero_sensitivity', type=float, default=0.05,
+                        help='Sensitivity level for Silero Voice Activity Detection (VAD), with a range from 0 to 1. Lower values make the model less sensitive, useful for noisy environments. Default is 0.05.')
+
+    parser.add_argument('--silero_use_onnx', action='store_true', default=False,
+                        help='Enable ONNX version of Silero model for faster performance with lower resource usage. Default is False.')
+
+    parser.add_argument('--webrtc_sensitivity', type=int, default=3,
+                        help='Sensitivity level for WebRTC Voice Activity Detection (VAD), with a range from 0 to 3. Higher values make the model less sensitive, useful for cleaner environments. Default is 3.')
+
+    parser.add_argument('--min_length_of_recording', type=float, default=1.1,
+                        help='Minimum duration of valid recordings in seconds. This prevents very short recordings from being processed, which could be caused by noise or accidental sounds. Default is 1.1 seconds.')
+
+    parser.add_argument('--min_gap_between_recordings', type=float, default=0,
+                        help='Minimum time (in seconds) between consecutive recordings. Setting this helps avoid overlapping recordings when there’s a brief silence between them. Default is 0 seconds.')
+
+    parser.add_argument('--enable_realtime_transcription', action='store_true', default=True,
+                        help='Enable continuous real-time transcription of audio as it is received. When enabled, transcriptions are sent in near real-time. Default is True.')
+
+    parser.add_argument('--realtime_processing_pause', type=float, default=0.02,
+                        help='Time interval (in seconds) between processing audio chunks for real-time transcription. Lower values increase responsiveness but may put more load on the CPU. Default is 0.02 seconds.')
+
+    parser.add_argument('--silero_deactivity_detection', action='store_true', default=True,
+                        help='Use the Silero model for end-of-speech detection. This option can provide more robust silence detection in noisy environments, though it consumes more GPU resources. Default is True.')
+
+    parser.add_argument('--early_transcription_on_silence', type=float, default=0.2,
+                        help='Start transcription after the specified seconds of silence. This is useful when you want to trigger transcription mid-speech when there is a brief pause. Should be lower than post_speech_silence_duration. Set to 0 to disable. Default is 0.2 seconds.')
+
+    parser.add_argument('--beam_size', type=int, default=5,
+                        help='Beam size for the main transcription model. Larger values may improve transcription accuracy but increase the processing time. Default is 5.')
+
+    parser.add_argument('--beam_size_realtime', type=int, default=3,
+                        help='Beam size for the real-time transcription model. A smaller beam size allows for faster real-time processing but may reduce accuracy. Default is 3.')
+
+    parser.add_argument('--initial_prompt', type=str,
+                        default="Incomplete thoughts should end with '...'. Examples of complete thoughts: 'The sky is blue.' 'She walked home.' Examples of incomplete thoughts: 'When the sky...' 'Because he...'",
+                        help='Initial prompt that guides the transcription model to produce transcriptions in a particular style or format. The default provides instructions for handling sentence completions and ellipsis usage.')
+
+    parser.add_argument('--end_of_sentence_detection_pause', type=float, default=5.0,
+                        help='The duration of silence (in seconds) that the model should interpret as the end of a sentence. This helps the system detect when to finalize the transcription of a sentence. Default is 0.45 seconds.')
+
+    parser.add_argument('--unknown_sentence_detection_pause', type=float, default=5.0,
+                        help='The duration of pause (in seconds) that the model should interpret as an incomplete or unknown sentence. This is useful for identifying when a sentence is trailing off or unfinished. Default is 0.7 seconds.')
+
+    parser.add_argument('--mid_sentence_detection_pause', type=float, default=5.0,
+                        help='The duration of pause (in seconds) that the model should interpret as a mid-sentence break. Longer pauses can indicate a pause in speech but not necessarily the end of a sentence. Default is 2.0 seconds.')
+
+    parser.add_argument('--wake_words_sensitivity', type=float, default=0.5,
+                        help='Sensitivity level for wake word detection, with a range from 0 (most sensitive) to 1 (least sensitive). Adjust this value based on your environment to ensure reliable wake word detection. Default is 0.5.')
+
+    parser.add_argument('--wake_word_timeout', type=float, default=5.0,
+                        help='Maximum time in seconds that the system will wait for a wake word before timing out. After this timeout, the system stops listening for wake words until reactivated. Default is 5.0 seconds.')
+
+    parser.add_argument('--wake_word_activation_delay', type=float, default=0,
+                        help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0 seconds.')
+
+    parser.add_argument('--wakeword_backend', type=str, default='none',
+                        help='The backend used for wake word detection. You can specify different backends such as "default" or any custom implementations depending on your setup. Default is "pvporcupine".')
+
+    parser.add_argument('--openwakeword_model_paths', type=str, nargs='*',
+                        help='A list of file paths to OpenWakeWord models. This is useful if you are using OpenWakeWord for wake word detection and need to specify custom models.')
+
+    parser.add_argument('--openwakeword_inference_framework', type=str, default='tensorflow',
+                        help='The inference framework to use for OpenWakeWord models. Supported frameworks could include "tensorflow", "pytorch", etc. Default is "tensorflow".')
+
+    parser.add_argument('--wake_word_buffer_duration', type=float, default=1.0,
+                        help='Duration of the buffer in seconds for wake word detection. This sets how long the system will store the audio before and after detecting the wake word. Default is 1.0 seconds.')
+
+    parser.add_argument('--use_main_model_for_realtime', action='store_true',
+                        help='Enable this option if you want to use the main model for real-time transcription, instead of the smaller, faster real-time model. Using the main model may provide better accuracy but at the cost of higher processing time.')
+
+    parser.add_argument('--use_extended_logging', action='store_true',
+                        help='Writes extensive log messages for the recording worker, that processes the audio chunks.')
+
+    parser.add_argument('--compute_type', type=str, default='default',
+                        help='Type of computation to use. See https://opennmt.net/CTranslate2/quantization.html')
+
+    parser.add_argument('--gpu_device_index', type=int, default=0,
+                        help='Index of the GPU device to use. Default is None.')
+    
+    parser.add_argument('--device', type=str, default='cuda',
+                        help='Device for model to use. Can either be "cuda" or "cpu". Default is cuda.')
+    
+    parser.add_argument('--handle_buffer_overflow', action='store_true',
+                        help='Handle buffer overflow during transcription. Default is False.')
+
+    parser.add_argument('--suppress_tokens', type=int, default=[-1], nargs='*', help='Suppress tokens during transcription. Default is [-1].')
+
+    parser.add_argument('--allowed_latency_limit', type=int, default=100,
+                        help='Maximal amount of chunks that can be unprocessed in queue before discarding chunks.. Default is 100.')
+
+    parser.add_argument('--faster_whisper_vad_filter', action='store_true',
+                        help='Enable VAD filter for Faster Whisper. Default is False.')
+
+    parser.add_argument('--logchunks', action='store_true', help='Enable logging of incoming audio chunks (periods)')
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    debug_logging = args.debug
+    extended_logging = args.use_extended_logging
+    writechunks = args.write
+    log_incoming_chunks = args.logchunks
+    dynamic_silence_timing = args.silence_timing
+
+
+    ws_logger = logging.getLogger('websockets')
+    if args.debug_websockets:
+        # If app debug is on, let websockets be verbose too
+        ws_logger.setLevel(logging.DEBUG)
+        # Ensure it uses the handler configured by basicConfig
+        ws_logger.propagate = False # Prevent duplicate messages if it also propagates to root
+    else:
+        # If app debug is off, silence websockets below WARNING
+        ws_logger.setLevel(logging.WARNING)
+        ws_logger.propagate = True # Allow WARNING/ERROR messages to reach root logger's handler
+
+    # Replace escaped newlines with actual newlines in initial_prompt
+    if args.initial_prompt:
+        args.initial_prompt = args.initial_prompt.replace("\\n", "\n")
+
+    if args.initial_prompt_realtime:
+        args.initial_prompt_realtime = args.initial_prompt_realtime.replace("\\n", "\n")
+
+    return args
+
+def _recorder_thread(loop):
+    global recorder, stop_recorder
+    print(f"{bcolors.OKGREEN}Initializing RealtimeSTT server with parameters:{bcolors.ENDC}")
+    for key, value in recorder_config.items():
+        print(f"    {bcolors.OKBLUE}{key}{bcolors.ENDC}: {value}")
+    recorder = AudioToTextRecorder(**recorder_config)
+    print(f"{bcolors.OKGREEN}{bcolors.BOLD}RealtimeSTT initialized{bcolors.ENDC}")
+    recorder_ready.set()
+    
+    def process_text(full_sentence):
+        global prev_text
+        prev_text = ""
+        full_sentence = preprocess_text(full_sentence)
+        message = json.dumps({
+            'type': 'fullSentence',  # <- Mensaje final y preciso
+            'text': full_sentence
+        })
+        asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
+
+        timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+        if extended_logging:
+            print(f"  [{timestamp}] Full text: {bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}\n", flush=True, end="")
+        else:
+            print(f"\r[{timestamp}] {bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}\n")
+    try:
+        while not stop_recorder:
+            recorder.text(process_text)  # <- Esto llama al modelo grande al terminar la frase
+    except KeyboardInterrupt:
+        print(f"{bcolors.WARNING}Exiting application due to keyboard interrupt{bcolors.ENDC}")
+        
+def decode_and_resample(
+        audio_data,
+        original_sample_rate,
+        target_sample_rate):
+
+    # Decode 16-bit PCM data to numpy array
+    if original_sample_rate == target_sample_rate:
+        return audio_data
+
+    audio_np = np.frombuffer(audio_data, dtype=np.int16)
+
+    # Calculate the number of samples after resampling
+    num_original_samples = len(audio_np)
+    num_target_samples = int(num_original_samples * target_sample_rate /
+                                original_sample_rate)
+
+    # Resample the audio
+    resampled_audio = resample(audio_np, num_target_samples)
+
+    return resampled_audio.astype(np.int16).tobytes()
+
+async def control_handler(websocket):
+    debug_print(f"New control connection from {websocket.remote_address}")
+    print(f"{bcolors.OKGREEN}Control client connected{bcolors.ENDC}")
+    global recorder
+    control_connections.add(websocket)
+    try:
+        async for message in websocket:
+            debug_print(f"Received control message: {message[:200]}...")
+            if not recorder_ready.is_set():
+                print(f"{bcolors.WARNING}Recorder not ready{bcolors.ENDC}")
+                continue
+            if isinstance(message, str):
+                # Handle text message (command)
+                try:
+                    command_data = json.loads(message)
+                    command = command_data.get("command")
+                    if command == "set_parameter":
+                        parameter = command_data.get("parameter")
+                        value = command_data.get("value")
+                        if parameter in allowed_parameters and hasattr(recorder, parameter):
+                            setattr(recorder, parameter, value)
+                            # Format the value for output
+                            if isinstance(value, float):
+                                value_formatted = f"{value:.2f}"
+                            else:
+                                value_formatted = value
+                            timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+                            if extended_logging:
+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Set recorder.{parameter} to: {bcolors.OKBLUE}{value_formatted}{bcolors.ENDC}")
+                            # Optionally send a response back to the client
+                            await websocket.send(json.dumps({"status": "success", "message": f"Parameter {parameter} set to {value}"}))
+                        else:
+                            if not parameter in allowed_parameters:
+                                print(f"{bcolors.WARNING}Parameter {parameter} is not allowed (set_parameter){bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "error", "message": f"Parameter {parameter} is not allowed (set_parameter)"}))
+                            else:
+                                print(f"{bcolors.WARNING}Parameter {parameter} does not exist (set_parameter){bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "error", "message": f"Parameter {parameter} does not exist (set_parameter)"}))
+
+                    elif command == "get_parameter":
+                        parameter = command_data.get("parameter")
+                        request_id = command_data.get("request_id")  # Get the request_id from the command data
+                        if parameter in allowed_parameters and hasattr(recorder, parameter):
+                            value = getattr(recorder, parameter)
+                            if isinstance(value, float):
+                                value_formatted = f"{value:.2f}"
+                            else:
+                                value_formatted = f"{value}"
+
+                            value_truncated = value_formatted[:39] + "…" if len(value_formatted) > 40 else value_formatted
+
+                            timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+                            if extended_logging:
+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Get recorder.{parameter}: {bcolors.OKBLUE}{value_truncated}{bcolors.ENDC}")
+                            response = {"status": "success", "parameter": parameter, "value": value}
+                            if request_id is not None:
+                                response["request_id"] = request_id
+                            await websocket.send(json.dumps(response))
+                        else:
+                            if not parameter in allowed_parameters:
+                                print(f"{bcolors.WARNING}Parameter {parameter} is not allowed (get_parameter){bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "error", "message": f"Parameter {parameter} is not allowed (get_parameter)"}))
+                            else:
+                                print(f"{bcolors.WARNING}Parameter {parameter} does not exist (get_parameter){bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "error", "message": f"Parameter {parameter} does not exist (get_parameter)"}))
+                    elif command == "call_method":
+                        method_name = command_data.get("method")
+                        if method_name in allowed_methods:
+                            method = getattr(recorder, method_name, None)
+                            if method and callable(method):
+                                args = command_data.get("args", [])
+                                kwargs = command_data.get("kwargs", {})
+                                method(*args, **kwargs)
+                                timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Called method recorder.{bcolors.OKBLUE}{method_name}{bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "success", "message": f"Method {method_name} called"}))
+                            else:
+                                print(f"{bcolors.WARNING}Recorder does not have method {method_name}{bcolors.ENDC}")
+                                await websocket.send(json.dumps({"status": "error", "message": f"Recorder does not have method {method_name}"}))
+                        else:
+                            print(f"{bcolors.WARNING}Method {method_name} is not allowed{bcolors.ENDC}")
+                            await websocket.send(json.dumps({"status": "error", "message": f"Method {method_name} is not allowed"}))
+                    else:
+                        print(f"{bcolors.WARNING}Unknown command: {command}{bcolors.ENDC}")
+                        await websocket.send(json.dumps({"status": "error", "message": f"Unknown command {command}"}))
+                except json.JSONDecodeError:
+                    print(f"{bcolors.WARNING}Received invalid JSON command{bcolors.ENDC}")
+                    await websocket.send(json.dumps({"status": "error", "message": "Invalid JSON command"}))
+            else:
+                print(f"{bcolors.WARNING}Received unknown message type on control connection{bcolors.ENDC}")
+    except websockets.exceptions.ConnectionClosed as e:
+        print(f"{bcolors.WARNING}Control client disconnected: {e}{bcolors.ENDC}")
+    finally:
+        control_connections.remove(websocket)
+
+async def data_handler(websocket):
+    global writechunks, wav_file
+    print(f"{bcolors.OKGREEN}Data client connected{bcolors.ENDC}")
+    data_connections.add(websocket)
+    try:
+        while True:
+            message = await websocket.recv()
+            if isinstance(message, bytes):
+                if extended_logging:
+                    debug_print(f"Received audio chunk (size: {len(message)} bytes)")
+                elif log_incoming_chunks:
+                    print(".", end='', flush=True)
+                # Handle binary message (audio data)
+                metadata_length = int.from_bytes(message[:4], byteorder='little')
+                metadata_json = message[4:4+metadata_length].decode('utf-8')
+                metadata = json.loads(metadata_json)
+                sample_rate = metadata['sampleRate']
+
+                if 'server_sent_to_stt' in metadata:
+                    stt_received_ns = time.time_ns()
+                    metadata["stt_received"] = stt_received_ns
+                    metadata["stt_received_formatted"] = format_timestamp_ns(stt_received_ns)
+                    print(f"Server received audio chunk of length {len(message)} bytes, metadata: {metadata}")
+
+                if extended_logging:
+                    debug_print(f"Processing audio chunk with sample rate {sample_rate}")
+                chunk = message[4+metadata_length:]
+
+                if writechunks:
+                    if not wav_file:
+                        wav_file = wave.open(writechunks, 'wb')
+                        wav_file.setnchannels(CHANNELS)
+                        wav_file.setsampwidth(pyaudio.get_sample_size(FORMAT))
+                        wav_file.setframerate(sample_rate)
+
+                    wav_file.writeframes(chunk)
+
+                if sample_rate != 16000:
+                    resampled_chunk = decode_and_resample(chunk, sample_rate, 16000)
+                    if extended_logging:
+                        debug_print(f"Resampled chunk size: {len(resampled_chunk)} bytes")
+                    recorder.feed_audio(resampled_chunk)
+                else:
+                    recorder.feed_audio(chunk)
+            else:
+                print(f"{bcolors.WARNING}Received non-binary message on data connection{bcolors.ENDC}")
+    except websockets.exceptions.ConnectionClosed as e:
+        print(f"{bcolors.WARNING}Data client disconnected: {e}{bcolors.ENDC}")
+    finally:
+        data_connections.remove(websocket)
+        # recorder.clear_audio_queue()  # Ensure audio queue is cleared if client disconnects
+
+async def broadcast_audio_messages():
+    while True:
+        message = await audio_queue.get()
+        for conn in list(data_connections):
+            try:
+                timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+
+                if extended_logging:
+                    print(f"  [{timestamp}] Sending message: {bcolors.OKBLUE}{message}{bcolors.ENDC}\n", flush=True, end="")
+                await conn.send(message)
+            except websockets.exceptions.ConnectionClosed:
+                data_connections.remove(conn)
+
+# Helper function to create event loop bound closures for callbacks
+def make_callback(loop, callback):
+    def inner_callback(*args, **kwargs):
+        callback(*args, **kwargs, loop=loop)
+    return inner_callback
+
+async def main_async():            
+    global stop_recorder, recorder_config, global_args
+    args = parse_arguments()
+    global_args = args
+
+    # Get the event loop here and pass it to the recorder thread
+    loop = asyncio.get_event_loop()
+
+    recorder_config = {
+        'model': args.model,
+        'download_root': args.root,
+        'realtime_model_type': args.rt_model,
+        'language': args.lang,
+        'batch_size': args.batch,
+        'init_realtime_after_seconds': args.init_realtime_after_seconds,
+        'realtime_batch_size': args.realtime_batch_size,
+        'initial_prompt_realtime': args.initial_prompt_realtime,
+        'input_device_index': args.input_device,
+        'silero_sensitivity': args.silero_sensitivity,
+        'silero_use_onnx': args.silero_use_onnx,
+        'webrtc_sensitivity': args.webrtc_sensitivity,
+        'post_speech_silence_duration': args.unknown_sentence_detection_pause,
+        'min_length_of_recording': args.min_length_of_recording,
+        'min_gap_between_recordings': args.min_gap_between_recordings,
+        'enable_realtime_transcription': args.enable_realtime_transcription,
+        'realtime_processing_pause': args.realtime_processing_pause,
+        'silero_deactivity_detection': args.silero_deactivity_detection,
+        'early_transcription_on_silence': args.early_transcription_on_silence,
+        'beam_size': args.beam_size,
+        'beam_size_realtime': args.beam_size_realtime,
+        'initial_prompt': args.initial_prompt,
+        'wake_words': args.wake_words,
+        'wake_words_sensitivity': args.wake_words_sensitivity,
+        'wake_word_timeout': args.wake_word_timeout,
+        'wake_word_activation_delay': args.wake_word_activation_delay,
+        'wakeword_backend': args.wakeword_backend,
+        'openwakeword_model_paths': args.openwakeword_model_paths,
+        'openwakeword_inference_framework': args.openwakeword_inference_framework,
+        'wake_word_buffer_duration': args.wake_word_buffer_duration,
+        'use_main_model_for_realtime': args.use_main_model_for_realtime,
+        'spinner': False,
+        'use_microphone': False,
+
+        'on_realtime_transcription_update': make_callback(loop, text_detected),
+        'on_recording_start': make_callback(loop, on_recording_start),
+        'on_recording_stop': make_callback(loop, on_recording_stop),
+        'on_vad_detect_start': make_callback(loop, on_vad_detect_start),
+        'on_vad_detect_stop': make_callback(loop, on_vad_detect_stop),
+        'on_wakeword_detected': make_callback(loop, on_wakeword_detected),
+        'on_wakeword_detection_start': make_callback(loop, on_wakeword_detection_start),
+        'on_wakeword_detection_end': make_callback(loop, on_wakeword_detection_end),
+        'on_transcription_start': make_callback(loop, on_transcription_start),
+        'on_turn_detection_start': make_callback(loop, on_turn_detection_start),
+        'on_turn_detection_stop': make_callback(loop, on_turn_detection_stop),
+
+        # 'on_recorded_chunk': make_callback(loop, on_recorded_chunk),
+        'no_log_file': True,  # Disable logging to file
+        'use_extended_logging': args.use_extended_logging,
+        'level': loglevel,
+        'compute_type': args.compute_type,
+        'gpu_device_index': args.gpu_device_index,
+        'device': args.device,
+        'handle_buffer_overflow': args.handle_buffer_overflow,
+        'suppress_tokens': args.suppress_tokens,
+        'allowed_latency_limit': args.allowed_latency_limit,
+        'faster_whisper_vad_filter': args.faster_whisper_vad_filter,
+    }
+
+    try:
+        # Attempt to start control and data servers
+        control_server = await websockets.serve(control_handler, "localhost", args.control)
+        data_server = await websockets.serve(data_handler, "localhost", args.data)
+        print(f"{bcolors.OKGREEN}Control server started on {bcolors.OKBLUE}ws://localhost:{args.control}{bcolors.ENDC}")
+        print(f"{bcolors.OKGREEN}Data server started on {bcolors.OKBLUE}ws://localhost:{args.data}{bcolors.ENDC}")
+
+        # Start the broadcast and recorder threads
+        broadcast_task = asyncio.create_task(broadcast_audio_messages())
+
+        recorder_thread = threading.Thread(target=_recorder_thread, args=(loop,))
+        recorder_thread.start()
+        recorder_ready.wait()
+
+        print(f"{bcolors.OKGREEN}Server started. Press Ctrl+C to stop the server.{bcolors.ENDC}")
+
+        # Run server tasks
+        await asyncio.gather(control_server.wait_closed(), data_server.wait_closed(), broadcast_task)
+    except OSError as e:
+        print(f"{bcolors.FAIL}Error: Could not start server on specified ports. It’s possible another instance of the server is already running, or the ports are being used by another application.{bcolors.ENDC}")
+    except KeyboardInterrupt:
+        print(f"{bcolors.WARNING}Server interrupted by user, shutting down...{bcolors.ENDC}")
+    finally:
+        # Shutdown procedures for recorder and server threads
+        await shutdown_procedure()
+        print(f"{bcolors.OKGREEN}Server shutdown complete.{bcolors.ENDC}")
+
+async def shutdown_procedure():
+    global stop_recorder, recorder_thread
+    if recorder:
+        stop_recorder = True
+        recorder.abort()
+        # recorder.stop()
+        recorder.shutdown()
+        print(f"{bcolors.OKGREEN}Recorder shut down{bcolors.ENDC}")
+
+        if recorder_thread:
+            recorder_thread.join()
+            print(f"{bcolors.OKGREEN}Recorder thread finished{bcolors.ENDC}")
+
+    tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
+    for task in tasks:
+        task.cancel()
+    await asyncio.gather(*tasks, return_exceptions=True)
+
+    print(f"{bcolors.OKGREEN}All tasks cancelled, closing event loop now.{bcolors.ENDC}")
+
+def main():
+    try:
+        asyncio.run(main_async())
+    except KeyboardInterrupt:
+        # Capture any final KeyboardInterrupt to prevent it from showing up in logs
+        print(f"{bcolors.WARNING}Server interrupted by user.{bcolors.ENDC}")
+        exit(0)
+
+if __name__ == '__main__':
+    main()