minimo reconocimiento de voz

2025-06-17 08:48:55 -03:00
commit 36fe9f603e
79 changed files with 7662 additions and 0 deletions
--- a/minimal_server/RealtimeSTT/init.py
+++ b/minimal_server/RealtimeSTT/init.py
@ -0,0 +1,3 @@
+from .audio_recorder import AudioToTextRecorder
+from .audio_recorder_client import AudioToTextRecorderClient
+from .audio_input import AudioInput
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-312.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-312.pyc
--- a/minimal_server/RealtimeSTT/pycache/init.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/init.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_input.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_input.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-312.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-312.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/audio_recorder_client.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-310.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-310.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/pycache/safepipe.cpython-313.pyc
+++ b/minimal_server/RealtimeSTT/pycache/safepipe.cpython-313.pyc
--- a/minimal_server/RealtimeSTT/pycache/server.cpython-311.pyc
+++ b/minimal_server/RealtimeSTT/pycache/server.cpython-311.pyc
--- a/minimal_server/RealtimeSTT/audio_input.py
+++ b/minimal_server/RealtimeSTT/audio_input.py
@ -0,0 +1,220 @@
+from colorama import init, Fore, Style
+from scipy.signal import butter, filtfilt, resample_poly
+import pyaudio
+import logging
+
+DESIRED_RATE = 16000
+CHUNK_SIZE = 1024
+AUDIO_FORMAT = pyaudio.paInt16
+CHANNELS = 1
+
+class AudioInput:
+    def __init__(
+            self,
+            input_device_index: int = None,
+            debug_mode: bool = False,
+            target_samplerate: int = DESIRED_RATE,
+            chunk_size: int = CHUNK_SIZE,
+            audio_format: int = AUDIO_FORMAT,
+            channels: int = CHANNELS,
+            resample_to_target: bool = True,
+        ):
+
+        self.input_device_index = input_device_index
+        self.debug_mode = debug_mode
+        self.audio_interface = None
+        self.stream = None
+        self.device_sample_rate = None
+        self.target_samplerate = target_samplerate
+        self.chunk_size = chunk_size
+        self.audio_format = audio_format
+        self.channels = channels
+        self.resample_to_target = resample_to_target
+
+    def get_supported_sample_rates(self, device_index):
+        """Test which standard sample rates are supported by the specified device."""
+        standard_rates = [8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000]
+        supported_rates = []
+
+        device_info = self.audio_interface.get_device_info_by_index(device_index)
+        max_channels = device_info.get('maxInputChannels')  # Changed from maxOutputChannels
+
+        for rate in standard_rates:
+            try:
+                if self.audio_interface.is_format_supported(
+                    rate,
+                    input_device=device_index,  # Changed to input_device
+                    input_channels=max_channels,  # Changed to input_channels
+                    input_format=self.audio_format,  # Changed to input_format
+                ):
+                    supported_rates.append(rate)
+            except:
+                continue
+        return supported_rates
+
+    def _get_best_sample_rate(self, actual_device_index, desired_rate):
+        """Determines the best available sample rate for the device."""
+        try:
+            device_info = self.audio_interface.get_device_info_by_index(actual_device_index)
+            supported_rates = self.get_supported_sample_rates(actual_device_index)
+
+            if desired_rate in supported_rates:
+                return desired_rate
+
+            return max(supported_rates)
+
+            # lower_rates = [r for r in supported_rates if r <= desired_rate]
+            # if lower_rates:
+            #     return max(lower_rates)
+
+            # higher_rates = [r for r in supported_rates if r > desired_rate]
+            # if higher_rates:
+            #     return min(higher_rates)
+
+            return int(device_info.get('defaultSampleRate', 44100))
+
+        except Exception as e:
+            logging.warning(f"Error determining sample rate: {e}")
+            return 44100  # Safe fallback
+
+    def list_devices(self):
+        """List all available audio input devices with supported sample rates."""
+        try:
+            init()  # Initialize colorama
+            self.audio_interface = pyaudio.PyAudio()
+            device_count = self.audio_interface.get_device_count()
+
+            print(f"Available audio input devices:")
+            #print(f"{Fore.LIGHTBLUE_EX}Available audio input devices:{Style.RESET_ALL}")
+            for i in range(device_count):
+                device_info = self.audio_interface.get_device_info_by_index(i)
+                device_name = device_info.get('name')
+                max_input_channels = device_info.get('maxInputChannels', 0)
+
+                if max_input_channels > 0:  # Only consider devices with input capabilities
+                    supported_rates = self.get_supported_sample_rates(i)
+                    print(f"{Fore.LIGHTGREEN_EX}Device {Style.RESET_ALL}{i}{Fore.LIGHTGREEN_EX}: {device_name}{Style.RESET_ALL}")
+                    
+                    # Format each rate in cyan
+                    if supported_rates:
+                        rates_formatted = ", ".join([f"{Fore.CYAN}{rate}{Style.RESET_ALL}" for rate in supported_rates])
+                        print(f"  {Fore.YELLOW}Supported sample rates: {rates_formatted}{Style.RESET_ALL}")
+                    else:
+                        print(f"  {Fore.YELLOW}Supported sample rates: None{Style.RESET_ALL}")
+
+        except Exception as e:
+            print(f"Error listing devices: {e}")
+        finally:
+            if self.audio_interface:
+                self.audio_interface.terminate()
+
+    def setup(self):
+        """Initialize audio interface and open stream"""
+        try:
+            self.audio_interface = pyaudio.PyAudio()
+
+            if self.debug_mode:
+                print(f"Input device index: {self.input_device_index}")
+            actual_device_index = (self.input_device_index if self.input_device_index is not None 
+                                else self.audio_interface.get_default_input_device_info()['index'])
+            
+            if self.debug_mode:
+                print(f"Actual selected device index: {actual_device_index}")
+            self.input_device_index = actual_device_index
+            self.device_sample_rate = self._get_best_sample_rate(actual_device_index, self.target_samplerate)
+
+            if self.debug_mode:
+                print(f"Setting up audio on device {self.input_device_index} with sample rate {self.device_sample_rate}")
+
+            try:
+                self.stream = self.audio_interface.open(
+                    format=self.audio_format,
+                    channels=self.channels,
+                    rate=self.device_sample_rate,
+                    input=True,
+                    frames_per_buffer=self.chunk_size,
+                    input_device_index=self.input_device_index,
+                )
+                if self.debug_mode:
+                    print(f"Audio recording initialized successfully at {self.device_sample_rate} Hz")
+                return True
+            except Exception as e:
+                print(f"Failed to initialize audio stream at {self.device_sample_rate} Hz: {e}")
+                return False
+
+        except Exception as e:
+            print(f"Error initializing audio recording: {e}")
+            if self.audio_interface:
+                self.audio_interface.terminate()
+            return False
+
+    def lowpass_filter(self, signal, cutoff_freq, sample_rate):
+        """
+        Apply a low-pass Butterworth filter to prevent aliasing in the signal.
+
+        Args:
+            signal (np.ndarray): Input audio signal to filter
+            cutoff_freq (float): Cutoff frequency in Hz
+            sample_rate (float): Sampling rate of the input signal in Hz
+
+        Returns:
+            np.ndarray: Filtered audio signal
+
+        Notes:
+            - Uses a 5th order Butterworth filter
+            - Applies zero-phase filtering using filtfilt
+        """
+        # Calculate the Nyquist frequency (half the sample rate)
+        nyquist_rate = sample_rate / 2.0
+
+        # Normalize cutoff frequency to Nyquist rate (required by butter())
+        normal_cutoff = cutoff_freq / nyquist_rate
+
+        # Design the Butterworth filter
+        b, a = butter(5, normal_cutoff, btype='low', analog=False)
+
+        # Apply zero-phase filtering (forward and backward)
+        filtered_signal = filtfilt(b, a, signal)
+        return filtered_signal
+
+    def resample_audio(self, pcm_data, target_sample_rate, original_sample_rate):
+        """
+        Filter and resample audio data to a target sample rate.
+
+        Args:
+            pcm_data (np.ndarray): Input audio data
+            target_sample_rate (int): Desired output sample rate in Hz
+            original_sample_rate (int): Original sample rate of input in Hz
+
+        Returns:
+            np.ndarray: Resampled audio data
+
+        Notes:
+            - Applies anti-aliasing filter before resampling
+            - Uses polyphase filtering for high-quality resampling
+        """
+        if target_sample_rate < original_sample_rate:
+            # Downsampling with low-pass filter
+            pcm_filtered = self.lowpass_filter(pcm_data, target_sample_rate / 2, original_sample_rate)
+            resampled = resample_poly(pcm_filtered, target_sample_rate, original_sample_rate)
+        else:
+            # Upsampling without low-pass filter
+            resampled = resample_poly(pcm_data, target_sample_rate, original_sample_rate)
+        return resampled
+
+    def read_chunk(self):
+        """Read a chunk of audio data"""
+        return self.stream.read(self.chunk_size, exception_on_overflow=False)
+
+    def cleanup(self):
+        """Clean up audio resources"""
+        try:
+            if self.stream:
+                self.stream.stop_stream()
+                self.stream.close()
+                self.stream = None
+            if self.audio_interface:
+                self.audio_interface.terminate()
+                self.audio_interface = None
+        except Exception as e:
+            print(f"Error cleaning up audio resources: {e}")
--- a/minimal_server/RealtimeSTT/audio_recorder.py
+++ b/minimal_server/RealtimeSTT/audio_recorder.py
--- a/minimal_server/RealtimeSTT/audio_recorder_client.py
+++ b/minimal_server/RealtimeSTT/audio_recorder_client.py
@ -0,0 +1,881 @@
+log_outgoing_chunks = False
+debug_mode = False
+
+from typing import Iterable, List, Optional, Union
+from urllib.parse import urlparse
+from datetime import datetime
+from websocket import WebSocketApp
+from websocket import ABNF
+import numpy as np
+import subprocess
+import threading
+import platform
+import logging
+import struct
+import base64
+import wave
+import json
+import time
+import sys
+import os
+
+# Import the AudioInput class
+from .audio_input import AudioInput
+
+DEFAULT_CONTROL_URL = "ws://127.0.0.1:8011"
+DEFAULT_DATA_URL = "ws://127.0.0.1:8012"
+
+INIT_MODEL_TRANSCRIPTION = "tiny"
+INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
+INIT_REALTIME_PROCESSING_PAUSE = 0.2
+INIT_REALTIME_INITIAL_PAUSE = 0.2
+INIT_SILERO_SENSITIVITY = 0.4
+INIT_WEBRTC_SENSITIVITY = 3
+INIT_POST_SPEECH_SILENCE_DURATION = 0.6
+INIT_MIN_LENGTH_OF_RECORDING = 0.5
+INIT_MIN_GAP_BETWEEN_RECORDINGS = 0
+INIT_WAKE_WORDS_SENSITIVITY = 0.6
+INIT_PRE_RECORDING_BUFFER_DURATION = 1.0
+INIT_WAKE_WORD_ACTIVATION_DELAY = 0.0
+INIT_WAKE_WORD_TIMEOUT = 5.0
+INIT_WAKE_WORD_BUFFER_DURATION = 0.1
+ALLOWED_LATENCY_LIMIT = 100
+
+BUFFER_SIZE = 512
+SAMPLE_RATE = 16000
+
+INIT_HANDLE_BUFFER_OVERFLOW = False
+if platform.system() != 'Darwin':
+    INIT_HANDLE_BUFFER_OVERFLOW = True
+
+# Define ANSI color codes for terminal output
+class bcolors:
+    HEADER = '\033[95m'   # Magenta
+    OKBLUE = '\033[94m'   # Blue
+    OKCYAN = '\033[96m'   # Cyan
+    OKGREEN = '\033[92m'  # Green
+    WARNING = '\033[93m'  # Yellow
+    FAIL = '\033[91m'     # Red
+    ENDC = '\033[0m'      # Reset to default
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+def format_timestamp_ns(timestamp_ns: int) -> str:
+    # Split into whole seconds and the nanosecond remainder
+    seconds = timestamp_ns // 1_000_000_000
+    remainder_ns = timestamp_ns % 1_000_000_000
+
+    # Convert seconds part into a datetime object (local time)
+    dt = datetime.fromtimestamp(seconds)
+
+    # Format the main time as HH:MM:SS
+    time_str = dt.strftime("%H:%M:%S")
+
+    # For instance, if you want milliseconds, divide the remainder by 1e6 and format as 3-digit
+    milliseconds = remainder_ns // 1_000_000
+    formatted_timestamp = f"{time_str}.{milliseconds:03d}"
+
+    return formatted_timestamp
+
+class AudioToTextRecorderClient:
+    """
+    A class responsible for capturing audio from the microphone, detecting
+    voice activity, and then transcribing the captured audio using the
+    `faster_whisper` model.
+    """
+
+    def __init__(self,
+                 model: str = INIT_MODEL_TRANSCRIPTION,
+                 download_root: str = None, 
+                 language: str = "",
+                 compute_type: str = "default",
+                 input_device_index: int = None,
+                 gpu_device_index: Union[int, List[int]] = 0,
+                 device: str = "cuda",
+                 on_recording_start=None,
+                 on_recording_stop=None,
+                 on_transcription_start=None,
+                 ensure_sentence_starting_uppercase=True,
+                 ensure_sentence_ends_with_period=True,
+                 use_microphone=True,
+                 spinner=True,
+                 level=logging.WARNING,
+                 batch_size: int = 16,
+
+                 # Realtime transcription parameters
+                 enable_realtime_transcription=False,
+                 use_main_model_for_realtime=False,
+                 realtime_model_type=INIT_MODEL_TRANSCRIPTION_REALTIME,
+                 realtime_processing_pause=INIT_REALTIME_PROCESSING_PAUSE,
+                 init_realtime_after_seconds=INIT_REALTIME_INITIAL_PAUSE,
+                 on_realtime_transcription_update=None,
+                 on_realtime_transcription_stabilized=None,
+                 realtime_batch_size: int = 16,
+
+                 # Voice activation parameters
+                 silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
+                 silero_use_onnx: bool = False,
+                 silero_deactivity_detection: bool = False,
+                 webrtc_sensitivity: int = INIT_WEBRTC_SENSITIVITY,
+                 post_speech_silence_duration: float = (
+                     INIT_POST_SPEECH_SILENCE_DURATION
+                 ),
+                 min_length_of_recording: float = (
+                     INIT_MIN_LENGTH_OF_RECORDING
+                 ),
+                 min_gap_between_recordings: float = (
+                     INIT_MIN_GAP_BETWEEN_RECORDINGS
+                 ),
+                 pre_recording_buffer_duration: float = (
+                     INIT_PRE_RECORDING_BUFFER_DURATION
+                 ),
+                 on_vad_start=None,
+                 on_vad_stop=None,
+                 on_vad_detect_start=None,
+                 on_vad_detect_stop=None,
+                 on_turn_detection_start=None,
+                 on_turn_detection_stop=None,
+
+                 # Wake word parameters
+                 wakeword_backend: str = "pvporcupine",
+                 openwakeword_model_paths: str = None,
+                 openwakeword_inference_framework: str = "onnx",
+                 wake_words: str = "",
+                 wake_words_sensitivity: float = INIT_WAKE_WORDS_SENSITIVITY,
+                 wake_word_activation_delay: float = (
+                    INIT_WAKE_WORD_ACTIVATION_DELAY
+                 ),
+                 wake_word_timeout: float = INIT_WAKE_WORD_TIMEOUT,
+                 wake_word_buffer_duration: float = INIT_WAKE_WORD_BUFFER_DURATION,
+                 on_wakeword_detected=None,
+                 on_wakeword_timeout=None,
+                 on_wakeword_detection_start=None,
+                 on_wakeword_detection_end=None,
+                 on_recorded_chunk=None,
+                 debug_mode=False,
+                 handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
+                 beam_size: int = 5,
+                 beam_size_realtime: int = 3,
+                 buffer_size: int = BUFFER_SIZE,
+                 sample_rate: int = SAMPLE_RATE,
+                 initial_prompt: Optional[Union[str, Iterable[int]]] = None,
+                 initial_prompt_realtime: Optional[Union[str, Iterable[int]]] = None,
+                 suppress_tokens: Optional[List[int]] = [-1],
+                 print_transcription_time: bool = False,
+                 early_transcription_on_silence: int = 0,
+                 allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
+                 no_log_file: bool = False,
+                 use_extended_logging: bool = False,
+
+                 # Server urls
+                 control_url: str = DEFAULT_CONTROL_URL,
+                 data_url: str = DEFAULT_DATA_URL,
+                 autostart_server: bool = True,
+                 output_wav_file: str = None,
+                 faster_whisper_vad_filter: bool = False,
+                 ):
+
+        # Set instance variables from constructor parameters
+        self.model = model
+        self.language = language
+        self.compute_type = compute_type
+        self.input_device_index = input_device_index
+        self.gpu_device_index = gpu_device_index
+        self.device = device
+        self.on_recording_start = on_recording_start
+        self.on_recording_stop = on_recording_stop
+        self.on_transcription_start = on_transcription_start
+        self.ensure_sentence_starting_uppercase = ensure_sentence_starting_uppercase
+        self.ensure_sentence_ends_with_period = ensure_sentence_ends_with_period
+        self.use_microphone = use_microphone
+        self.spinner = spinner
+        self.level = level
+        self.batch_size = batch_size
+        self.init_realtime_after_seconds = init_realtime_after_seconds
+        self.realtime_batch_size = realtime_batch_size
+
+        # Real-time transcription parameters
+        self.enable_realtime_transcription = enable_realtime_transcription
+        self.use_main_model_for_realtime = use_main_model_for_realtime
+        self.download_root = download_root
+        self.realtime_model_type = realtime_model_type
+        self.realtime_processing_pause = realtime_processing_pause
+        self.on_realtime_transcription_update = on_realtime_transcription_update
+        self.on_realtime_transcription_stabilized = on_realtime_transcription_stabilized
+
+        # Voice activation parameters
+        self.silero_sensitivity = silero_sensitivity
+        self.silero_use_onnx = silero_use_onnx
+        self.silero_deactivity_detection = silero_deactivity_detection
+        self.webrtc_sensitivity = webrtc_sensitivity
+        self.post_speech_silence_duration = post_speech_silence_duration
+        self.min_length_of_recording = min_length_of_recording
+        self.min_gap_between_recordings = min_gap_between_recordings
+        self.pre_recording_buffer_duration = pre_recording_buffer_duration
+
+        self.on_vad_start = on_vad_start
+        self.on_vad_stop = on_vad_stop
+        self.on_vad_detect_start = on_vad_detect_start
+        self.on_vad_detect_stop = on_vad_detect_stop
+        self.on_turn_detection_start = on_turn_detection_start
+        self.on_turn_detection_stop = on_turn_detection_stop
+
+        # Wake word parameters
+        self.wakeword_backend = wakeword_backend
+        self.openwakeword_model_paths = openwakeword_model_paths
+        self.openwakeword_inference_framework = openwakeword_inference_framework
+        self.wake_words = wake_words
+        self.wake_words_sensitivity = wake_words_sensitivity
+        self.wake_word_activation_delay = wake_word_activation_delay
+        self.wake_word_timeout = wake_word_timeout
+        self.wake_word_buffer_duration = wake_word_buffer_duration
+        self.on_wakeword_detected = on_wakeword_detected
+        self.on_wakeword_timeout = on_wakeword_timeout
+        self.on_wakeword_detection_start = on_wakeword_detection_start
+        self.on_wakeword_detection_end = on_wakeword_detection_end
+        self.on_recorded_chunk = on_recorded_chunk
+        self.debug_mode = debug_mode
+        self.handle_buffer_overflow = handle_buffer_overflow
+        self.beam_size = beam_size
+        self.beam_size_realtime = beam_size_realtime
+        self.buffer_size = buffer_size
+        self.sample_rate = sample_rate
+        self.initial_prompt = initial_prompt
+        self.initial_prompt_realtime = initial_prompt_realtime
+        self.suppress_tokens = suppress_tokens
+        self.print_transcription_time = print_transcription_time
+        self.early_transcription_on_silence = early_transcription_on_silence
+        self.allowed_latency_limit = allowed_latency_limit
+        self.no_log_file = no_log_file
+        self.use_extended_logging = use_extended_logging
+        self.faster_whisper_vad_filter = faster_whisper_vad_filter
+
+        # Server URLs
+        self.control_url = control_url
+        self.data_url = data_url
+        self.autostart_server = autostart_server
+        self.output_wav_file = output_wav_file
+
+        # Instance variables
+        self.muted = False
+        self.recording_thread = None
+        self.is_running = True
+        self.connection_established = threading.Event()
+        self.recording_start = threading.Event()
+        self.final_text_ready = threading.Event()
+        self.realtime_text = ""
+        self.final_text = ""
+        self._recording = False
+        self.server_already_running = False
+        self.wav_file = None
+
+        self.request_counter = 0
+        self.pending_requests = {}  # Map from request_id to threading.Event and value
+
+        if self.debug_mode:
+            print("Checking STT server")
+        if not self.connect():
+            print("Failed to connect to the server.", file=sys.stderr)
+        else:
+            if self.debug_mode:
+                print("STT server is running and connected.")
+
+        if self.use_microphone:
+            self.start_recording()
+
+
+        if self.server_already_running:
+            if not self.connection_established.wait(timeout=10):
+                print("Server connection not established within 10 seconds.")
+            else:
+                self.set_parameter("language", self.language)
+                print(f"Language set to {self.language}")
+                self.set_parameter("wake_word_activation_delay", self.wake_word_activation_delay)
+                print(f"Wake word activation delay set to {self.wake_word_activation_delay}")
+
+    def text(self, on_transcription_finished=None):
+        self.realtime_text = ""
+        self.submitted_realtime_text = ""
+        self.final_text = ""
+        self.final_text_ready.clear()
+
+        self.recording_start.set()
+
+        try:
+            total_wait_time = 0
+            wait_interval = 0.02  # Wait in small intervals, e.g., 100ms
+            max_wait_time = 60  # Timeout after 60 seconds
+
+            while total_wait_time < max_wait_time and self.is_running and self._recording:
+                if self.final_text_ready.wait(timeout=wait_interval):
+                    break  # Break if transcription is ready
+
+                if not self.is_running or not self._recording:
+                    break
+                
+                total_wait_time += wait_interval
+
+                # Check if a manual interrupt has occurred
+                if total_wait_time >= max_wait_time:
+                    if self.debug_mode:
+                        print("Timeout while waiting for text from the server.")
+                    self.recording_start.clear()
+                    if on_transcription_finished:
+                        threading.Thread(target=on_transcription_finished, args=("",)).start()
+                    return ""
+
+            self.recording_start.clear()
+
+            if not self.is_running or not self._recording:
+                return ""
+
+            if on_transcription_finished:
+                threading.Thread(target=on_transcription_finished, args=(self.final_text,)).start()
+
+            return self.final_text
+
+        except KeyboardInterrupt:
+            if self.debug_mode:
+                print("KeyboardInterrupt in text(), exiting...")
+            raise KeyboardInterrupt
+
+        except Exception as e:
+            print(f"Error in AudioToTextRecorderClient.text(): {e}")
+            return ""
+
+    def feed_audio(self, chunk, audio_meta_data, original_sample_rate=16000):
+        # Start with the base metadata
+        metadata = {"sampleRate": original_sample_rate}
+
+        # Merge additional metadata if provided
+        if audio_meta_data:
+            server_sent_to_stt_ns = time.time_ns()
+            audio_meta_data["server_sent_to_stt"] = server_sent_to_stt_ns
+            metadata["server_sent_to_stt_formatted"] = format_timestamp_ns(server_sent_to_stt_ns)
+
+            metadata.update(audio_meta_data)
+
+        # Convert metadata to JSON and prepare the message
+        metadata_json = json.dumps(metadata)
+        metadata_length = len(metadata_json)
+        message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + chunk
+
+        # Send the message if the connection is running
+        if self.is_running:
+            self.data_ws.send(message, opcode=ABNF.OPCODE_BINARY)
+
+    def set_microphone(self, microphone_on=True):
+        """
+        Set the microphone on or off.
+        """
+        self.muted = not microphone_on
+
+    def abort(self):
+        self.call_method("abort")
+
+    def wakeup(self):
+        self.call_method("wakeup")
+
+    def clear_audio_queue(self):
+        self.call_method("clear_audio_queue")
+
+    def perform_final_transcription(self):
+        self.call_method("perform_final_transcription")
+
+    def stop(self):
+        self.call_method("stop")
+
+    def connect(self):
+        if not self.ensure_server_running():
+            print("Cannot start STT server. Exiting.")
+            return False
+
+        try:
+            # Connect to control WebSocket
+            self.control_ws = WebSocketApp(self.control_url,
+                                                     on_message=self.on_control_message,
+                                                     on_error=self.on_error,
+                                                     on_close=self.on_close,
+                                                     on_open=self.on_control_open)
+
+            self.control_ws_thread = threading.Thread(target=self.control_ws.run_forever)
+            self.control_ws_thread.daemon = False
+            self.control_ws_thread.start()
+
+            # Connect to data WebSocket
+            self.data_ws = WebSocketApp(self.data_url,
+                                                  on_message=self.on_data_message,
+                                                  on_error=self.on_error,
+                                                  on_close=self.on_close,
+                                                  on_open=self.on_data_open)
+
+            self.data_ws_thread = threading.Thread(target=self.data_ws.run_forever)
+            self.data_ws_thread.daemon = False
+            self.data_ws_thread.start()
+
+            # Wait for the connections to be established
+            if not self.connection_established.wait(timeout=10):
+                print("Timeout while connecting to the server.")
+                return False
+
+            if self.debug_mode:
+                print("WebSocket connections established successfully.")
+            return True
+        except Exception as e:
+            print(f"Error while connecting to the server: {e}")
+            return False
+
+    def start_server(self):
+        args = ['stt-server']
+
+        # Map constructor parameters to server arguments
+        if self.model:
+            args += ['--model', self.model]
+        if self.realtime_model_type:
+            args += ['--realtime_model_type', self.realtime_model_type]
+        if self.download_root:
+            args += ['--root', self.download_root]
+        if self.batch_size is not None:
+            args += ['--batch', str(self.batch_size)]
+        if self.realtime_batch_size is not None:
+            args += ['--realtime_batch_size', str(self.realtime_batch_size)]
+        if self.init_realtime_after_seconds is not None:
+            args += ['--init_realtime_after_seconds', str(self.init_realtime_after_seconds)]
+        if self.initial_prompt_realtime:
+            sanitized_prompt = self.initial_prompt_realtime.replace("\n", "\\n")
+            args += ['--initial_prompt_realtime', sanitized_prompt]
+
+        # if self.compute_type:
+        #     args += ['--compute_type', self.compute_type]
+        # if self.input_device_index is not None:
+        #     args += ['--input_device_index', str(self.input_device_index)]
+        # if self.gpu_device_index is not None:
+        #     args += ['--gpu_device_index', str(self.gpu_device_index)]
+        # if self.device:
+        #     args += ['--device', self.device]
+        # if self.spinner:
+        #     args.append('--spinner')  # flag, no need for True/False
+        # if self.enable_realtime_transcription:
+        #     args.append('--enable_realtime_transcription')  # flag, no need for True/False
+        # if self.handle_buffer_overflow:
+        #     args.append('--handle_buffer_overflow')  # flag, no need for True/False
+        # if self.suppress_tokens:
+        #     args += ['--suppress_tokens', str(self.suppress_tokens)]
+        # if self.print_transcription_time:
+        #     args.append('--print_transcription_time')  # flag, no need for True/False
+        # if self.allowed_latency_limit is not None:
+        #     args += ['--allowed_latency_limit', str(self.allowed_latency_limit)]
+        # if self.no_log_file:
+        #     args.append('--no_log_file')  # flag, no need for True
+        if self.debug_mode:
+            args.append('--debug')  # flag, no need for True/False
+            
+        if self.language:
+            args += ['--language', self.language]
+        if self.silero_sensitivity is not None:
+            args += ['--silero_sensitivity', str(self.silero_sensitivity)]
+        if self.silero_use_onnx:
+            args.append('--silero_use_onnx')  # flag, no need for True/False
+        if self.webrtc_sensitivity is not None:
+            args += ['--webrtc_sensitivity', str(self.webrtc_sensitivity)]
+        if self.min_length_of_recording is not None:
+            args += ['--min_length_of_recording', str(self.min_length_of_recording)]
+        if self.min_gap_between_recordings is not None:
+            args += ['--min_gap_between_recordings', str(self.min_gap_between_recordings)]
+        if self.realtime_processing_pause is not None:
+            args += ['--realtime_processing_pause', str(self.realtime_processing_pause)]
+        if self.early_transcription_on_silence is not None:
+            args += ['--early_transcription_on_silence', str(self.early_transcription_on_silence)]
+        if self.silero_deactivity_detection:
+            args.append('--silero_deactivity_detection')  # flag, no need for True/False
+        if self.beam_size is not None:
+            args += ['--beam_size', str(self.beam_size)]
+        if self.beam_size_realtime is not None:
+            args += ['--beam_size_realtime', str(self.beam_size_realtime)]
+        if self.wake_words is not None:
+            args += ['--wake_words', str(self.wake_words)]
+        if self.wake_words_sensitivity is not None:
+            args += ['--wake_words_sensitivity', str(self.wake_words_sensitivity)]
+        if self.wake_word_timeout is not None:
+            args += ['--wake_word_timeout', str(self.wake_word_timeout)]
+        if self.wake_word_activation_delay is not None:
+            args += ['--wake_word_activation_delay', str(self.wake_word_activation_delay)]
+        if self.wakeword_backend is not None:
+            args += ['--wakeword_backend', str(self.wakeword_backend)]
+        if self.openwakeword_model_paths:
+            args += ['--openwakeword_model_paths', str(self.openwakeword_model_paths)]
+        if self.openwakeword_inference_framework is not None:
+            args += ['--openwakeword_inference_framework', str(self.openwakeword_inference_framework)]
+        if self.wake_word_buffer_duration is not None:
+            args += ['--wake_word_buffer_duration', str(self.wake_word_buffer_duration)]
+        if self.use_main_model_for_realtime:
+            args.append('--use_main_model_for_realtime')  # flag, no need for True/False
+        if self.use_extended_logging:
+            args.append('--use_extended_logging')  # flag, no need for True/False
+
+        if self.control_url:
+            parsed_control_url = urlparse(self.control_url)
+            if parsed_control_url.port:
+                args += ['--control_port', str(parsed_control_url.port)]
+        if self.data_url:
+            parsed_data_url = urlparse(self.data_url)
+            if parsed_data_url.port:
+                args += ['--data_port', str(parsed_data_url.port)]
+        if self.initial_prompt:
+            sanitized_prompt = self.initial_prompt.replace("\n", "\\n")
+            args += ['--initial_prompt', sanitized_prompt]
+
+        # Start the subprocess with the mapped arguments
+        if os.name == 'nt':  # Windows
+            cmd = 'start /min cmd /c ' + subprocess.list2cmdline(args)
+            if debug_mode:
+                print(f"Opening server with cli command: {cmd}")
+            subprocess.Popen(cmd, shell=True)
+        else:  # Unix-like systems
+            subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True)
+        print("STT server start command issued. Please wait a moment for it to initialize.", file=sys.stderr)
+
+    def is_server_running(self):
+        try:
+            # Attempt a proper WebSocket handshake to the control URL.
+            from websocket import create_connection
+            ws = create_connection(self.control_url, timeout=3)
+            ws.close()
+            return True
+        except Exception as e:
+            if self.debug_mode:
+                print(f"Server connectivity check failed: {e}")
+            return False
+
+    def ensure_server_running(self):
+        if not self.is_server_running():
+            if self.debug_mode:
+                print("STT server is not running.", file=sys.stderr)
+            if self.autostart_server:
+                self.start_server()
+                if self.debug_mode:
+                    print("Waiting for STT server to start...", file=sys.stderr)
+                for _ in range(20):  # Wait up to 20 seconds
+                    if self.is_server_running():
+                        if self.debug_mode:
+                            print("STT server started successfully.", file=sys.stderr)
+                        time.sleep(2)  # Give the server a moment to fully initialize
+                        return True
+                    time.sleep(1)
+                print("Failed to start STT server.", file=sys.stderr)
+                return False
+            else:
+                print("STT server is required. Please start it manually.", file=sys.stderr)
+                return False
+        
+        else:
+            self.server_already_running = True
+
+        return True
+    
+    def list_devices(self):
+        """List all available audio input devices."""
+        audio = AudioInput(debug_mode=self.debug_mode)
+        audio.list_devices()
+
+    def start_recording(self):
+        self.recording_thread = threading.Thread(target=self.record_and_send_audio)
+        self.recording_thread.daemon = False
+        self.recording_thread.start()
+
+    def setup_audio(self):
+        """Initialize audio input"""
+        self.audio_input = AudioInput(
+            input_device_index=self.input_device_index,
+            debug_mode=self.debug_mode
+        )
+        return self.audio_input.setup()
+
+    def record_and_send_audio(self):
+        """Record and stream audio data"""
+        self._recording = True
+
+        try:
+            if not self.setup_audio():
+                raise Exception("Failed to set up audio recording.")
+
+            # Initialize WAV file writer if output_wav_file is provided
+            if self.output_wav_file and not self.wav_file:
+                self.wav_file = wave.open(self.output_wav_file, 'wb')
+                self.wav_file.setnchannels(1)
+                self.wav_file.setsampwidth(2)
+                self.wav_file.setframerate(self.audio_input.device_sample_rate)  # Use self.device_sample_rate
+
+
+            if self.debug_mode:
+                print("Recording and sending audio...")
+
+            while self.is_running:
+                if self.muted:
+                    time.sleep(0.01)
+                    continue
+
+                try:
+                    audio_data = self.audio_input.read_chunk()
+
+                    if self.wav_file:
+                        self.wav_file.writeframes(audio_data)
+
+                    if self.on_recorded_chunk:
+                        self.on_recorded_chunk(audio_data)
+
+                    if self.muted:
+                        continue
+
+                    if self.recording_start.is_set():
+                        metadata = {"sampleRate": self.audio_input.device_sample_rate}
+                        metadata_json = json.dumps(metadata)
+                        metadata_length = len(metadata_json)
+                        message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + audio_data
+
+                        if self.is_running:
+                            if log_outgoing_chunks:
+                                print(".", flush=True, end='')
+                            self.data_ws.send(message, opcode=ABNF.OPCODE_BINARY)
+                except KeyboardInterrupt:
+                    if self.debug_mode:
+                        print("KeyboardInterrupt in record_and_send_audio, exiting...")
+                    break
+                except Exception as e:
+                    print(f"Error sending audio data: {e}")
+                    break
+
+        except Exception as e:
+            print(f"Error in record_and_send_audio: {e}", file=sys.stderr)
+        finally:
+            self.cleanup_audio()
+            self.final_text_ready.set() # fake final text to stop the text() method
+            self.is_running = False
+            self._recording = False
+
+    def cleanup_audio(self):
+        """Clean up audio resources"""
+        if hasattr(self, 'audio_input'):
+            self.audio_input.cleanup()
+
+    def on_control_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            # Handle server response with status
+            if 'status' in data:
+                if data['status'] == 'success':
+                    if 'parameter' in data and 'value' in data:
+                        request_id = data.get('request_id')
+                        if request_id is not None and request_id in self.pending_requests:
+                            if self.debug_mode:
+                                print(f"Parameter {data['parameter']} = {data['value']}")
+                            self.pending_requests[request_id]['value'] = data['value']
+                            self.pending_requests[request_id]['event'].set()
+                elif data['status'] == 'error':
+                    print(f"Server Error: {data.get('message', '')}")
+            else:
+                print(f"Unknown control message format: {data}")
+        except json.JSONDecodeError:
+            print(f"Received non-JSON control message: {message}")
+        except Exception as e:
+            print(f"Error processing control message: {e}")
+
+    # Handle real-time transcription and full sentence updates
+    def on_data_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            # Handle real-time transcription updates
+            if data.get('type') == 'realtime':
+                if data['text'] != self.realtime_text:
+                    self.realtime_text = data['text']
+
+                    timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
+                    # print(f"Realtime text [{timestamp}]: {bcolors.OKCYAN}{self.realtime_text}{bcolors.ENDC}")
+
+                    if self.on_realtime_transcription_update:
+                        # Call the callback in a new thread to avoid blocking
+                        threading.Thread(
+                            target=self.on_realtime_transcription_update,
+                            args=(self.realtime_text,)
+                        ).start()
+
+            # Handle full sentences
+            elif data.get('type') == 'fullSentence':
+                self.final_text = data['text']
+                self.final_text_ready.set()
+
+            elif data.get('type') == 'recording_start':
+                if self.on_recording_start:
+                    self.on_recording_start()
+            elif data.get('type') == 'recording_stop':
+                if self.on_recording_stop:
+                    self.on_recording_stop()
+            elif data.get('type') == 'transcription_start':
+                audio_bytes_base64 = data.get('audio_bytes_base64')
+                decoded_bytes = base64.b64decode(audio_bytes_base64)
+
+                # Reconstruct the np.int16 array from the decoded bytes
+                audio_array = np.frombuffer(decoded_bytes, dtype=np.int16)
+
+                # If the original data was normalized, convert to np.float32 and normalize
+                INT16_MAX_ABS_VALUE = 32768.0
+                normalized_audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
+
+                if self.on_transcription_start:
+                    self.on_transcription_start(normalized_audio)
+            elif data.get('type') == 'vad_detect_start':
+                if self.on_vad_detect_start:
+                    self.on_vad_detect_start()
+            elif data.get('type') == 'vad_detect_stop':
+                if self.on_vad_detect_stop:
+                    self.on_vad_detect_stop()
+            elif data.get('type') == 'vad_start':
+                if self.on_vad_start:
+                    self.on_vad_start()
+            elif data.get('type') == 'vad_stop':
+                if self.on_vad_stop:
+                    self.on_vad_stop()
+            elif data.get('type') == 'start_turn_detection':
+                if self.on_turn_detection_start:
+                    self.on_turn_detection_start()
+            elif data.get('type') == 'stop_turn_detection':
+                if self.on_turn_detection_stop:
+                    self.on_turn_detection_stop()
+            elif data.get('type') == 'wakeword_detected':
+                if self.on_wakeword_detected:
+                    self.on_wakeword_detected()
+            elif data.get('type') == 'wakeword_detection_start':
+                if self.on_wakeword_detection_start:
+                    self.on_wakeword_detection_start()
+            elif data.get('type') == 'wakeword_detection_end':
+                if self.on_wakeword_detection_end:
+                    self.on_wakeword_detection_end()
+            elif data.get('type') == 'recorded_chunk':
+                pass
+
+            else:
+                print(f"Unknown data message format: {data}")
+
+        except json.JSONDecodeError:
+            print(f"Received non-JSON data message: {message}")
+        except Exception as e:
+            print(f"Error processing data message: {e}")
+
+    def on_error(self, ws, error):
+        print(f"WebSocket error: {error}")
+
+    def on_close(self, ws, close_status_code, close_msg):
+        if self.debug_mode:
+            if ws == self.data_ws:
+                print(f"Data WebSocket connection closed: {close_status_code} - {close_msg}")
+            elif ws == self.control_ws:
+                print(f"Control WebSocket connection closed: {close_status_code} - {close_msg}")
+        
+        self.is_running = False
+
+    def on_control_open(self, ws):
+        if self.debug_mode:
+            print("Control WebSocket connection opened.")
+        self.connection_established.set()
+
+    def on_data_open(self, ws):
+        if self.debug_mode:
+            print("Data WebSocket connection opened.")
+
+    def set_parameter(self, parameter, value):
+        command = {
+            "command": "set_parameter",
+            "parameter": parameter,
+            "value": value
+        }
+        self.control_ws.send(json.dumps(command))
+
+    def get_parameter(self, parameter):
+        # Generate a unique request_id
+        request_id = self.request_counter
+        self.request_counter += 1
+
+        # Prepare the command with the request_id
+        command = {
+            "command": "get_parameter",
+            "parameter": parameter,
+            "request_id": request_id
+        }
+
+        # Create an event to wait for the response
+        event = threading.Event()
+        self.pending_requests[request_id] = {'event': event, 'value': None}
+
+        # Send the command to the server
+        self.control_ws.send(json.dumps(command))
+
+        # Wait for the response or timeout after 5 seconds
+        if event.wait(timeout=5):
+            value = self.pending_requests[request_id]['value']
+            # Clean up the pending request
+            del self.pending_requests[request_id]
+            return value
+        else:
+            print(f"Timeout waiting for get_parameter {parameter}")
+            # Clean up the pending request
+            del self.pending_requests[request_id]
+            return None
+
+    def call_method(self, method, args=None, kwargs=None):
+        command = {
+            "command": "call_method",
+            "method": method,
+            "args": args or [],
+            "kwargs": kwargs or {}
+        }
+        self.control_ws.send(json.dumps(command))
+
+    def shutdown(self):
+        """Shutdown all resources"""
+        self.is_running = False
+        if self.control_ws:
+            self.control_ws.close()
+        if self.data_ws:
+            self.data_ws.close()
+
+        # Join threads
+        if self.control_ws_thread:
+            self.control_ws_thread.join()
+        if self.data_ws_thread:
+            self.data_ws_thread.join()
+        if self.recording_thread:
+            self.recording_thread.join()
+
+        # Clean up audio
+        self.cleanup_audio()
+
+    def __enter__(self):
+        """
+        Method to setup the context manager protocol.
+
+        This enables the instance to be used in a `with` statement, ensuring
+        proper resource management. When the `with` block is entered, this
+        method is automatically called.
+
+        Returns:
+            self: The current instance of the class.
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Method to define behavior when the context manager protocol exits.
+
+        This is called when exiting the `with` block and ensures that any
+        necessary cleanup or resource release processes are executed, such as
+        shutting down the system properly.
+
+        Args:
+            exc_type (Exception or None): The type of the exception that
+              caused the context to be exited, if any.
+            exc_value (Exception or None): The exception instance that caused
+              the context to be exited, if any.
+            traceback (Traceback or None): The traceback corresponding to the
+              exception, if any.
+        """
+        self.shutdown()
--- a/minimal_server/RealtimeSTT/safepipe.py
+++ b/minimal_server/RealtimeSTT/safepipe.py
@ -0,0 +1,245 @@
+import sys
+import multiprocessing as mp
+import queue
+import threading
+import time
+import logging
+
+# Configure logging. Adjust level and formatting as needed.
+# logging.basicConfig(level=logging.DEBUG,
+#                     format='[%(asctime)s] %(levelname)s:%(name)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+try:
+    # Only set the start method if it hasn't been set already.
+    if sys.platform.startswith('linux') or sys.platform == 'darwin':  # For Linux or macOS
+        mp.set_start_method("spawn")
+    elif mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method("spawn")
+except RuntimeError as e:
+    logger.debug("Start method has already been set. Details: %s", e)
+
+
+class ParentPipe:
+    """
+    A thread-safe wrapper around the 'parent end' of a multiprocessing pipe.
+    All actual pipe operations happen in a dedicated worker thread, so it's safe
+    for multiple threads to call send(), recv(), or poll() on the same ParentPipe
+    without interfering.
+    """
+    def __init__(self, parent_synthesize_pipe):
+        self.name = "ParentPipe"
+        self._pipe = parent_synthesize_pipe  # The raw pipe.
+        self._closed = False  # A flag to mark if close() has been called.
+
+        # The request queue for sending operations to the worker.
+        self._request_queue = queue.Queue()
+
+        # This event signals the worker thread to stop.
+        self._stop_event = threading.Event()
+
+        # Worker thread that executes actual .send(), .recv(), .poll() calls.
+        self._worker_thread = threading.Thread(
+            target=self._pipe_worker,
+            name=f"{self.name}_Worker",
+            daemon=True
+        )
+        self._worker_thread.start()
+
+    def _pipe_worker(self):
+        while not self._stop_event.is_set():
+            try:
+                request = self._request_queue.get(timeout=0.1)
+            except queue.Empty:
+                continue
+
+            if request["type"] == "CLOSE":
+                # Exit worker loop on CLOSE request.
+                break
+
+            try:
+                if request["type"] == "SEND":
+                    data = request["data"]
+                    logger.debug("[%s] Worker: sending => %s", self.name, data)
+                    self._pipe.send(data)
+                    request["result_queue"].put(None)
+
+                elif request["type"] == "RECV":
+                    logger.debug("[%s] Worker: receiving...", self.name)
+                    data = self._pipe.recv()
+                    request["result_queue"].put(data)
+
+                elif request["type"] == "POLL":
+                    timeout = request.get("timeout", 0.0)
+                    logger.debug("[%s] Worker: poll() with timeout: %s", self.name, timeout)
+                    result = self._pipe.poll(timeout)
+                    request["result_queue"].put(result)
+
+            except (EOFError, BrokenPipeError, OSError) as e:
+                # When the other end has closed or an error occurs,
+                # log and notify the waiting thread.
+                logger.debug("[%s] Worker: pipe closed or error occurred (%s). Shutting down.", self.name, e)
+                request["result_queue"].put(None)
+                break
+
+            except Exception as e:
+                logger.exception("[%s] Worker: unexpected error.", self.name)
+                request["result_queue"].put(e)
+                break
+
+        logger.debug("[%s] Worker: stopping.", self.name)
+        try:
+            self._pipe.close()
+        except Exception as e:
+            logger.debug("[%s] Worker: error during pipe close: %s", self.name, e)
+
+    def send(self, data):
+        """
+        Synchronously asks the worker thread to perform .send().
+        """
+        if self._closed:
+            logger.debug("[%s] send() called but pipe is already closed", self.name)
+            return
+        logger.debug("[%s] send() requested with: %s", self.name, data)
+        result_queue = queue.Queue()
+        request = {
+            "type": "SEND",
+            "data": data,
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        result_queue.get()  # Wait until sending completes.
+        logger.debug("[%s] send() completed", self.name)
+
+    def recv(self):
+        """
+        Synchronously asks the worker to perform .recv() and returns the data.
+        """
+        if self._closed:
+            logger.debug("[%s] recv() called but pipe is already closed", self.name)
+            return None
+        logger.debug("[%s] recv() requested", self.name)
+        result_queue = queue.Queue()
+        request = {
+            "type": "RECV",
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        data = result_queue.get()
+
+        # Log a preview for huge byte blobs.
+        if isinstance(data, tuple) and len(data) == 2 and isinstance(data[1], bytes):
+            data_preview = (data[0], f"<{len(data[1])} bytes>")
+        else:
+            data_preview = data
+        logger.debug("[%s] recv() returning => %s", self.name, data_preview)
+        return data
+
+    def poll(self, timeout=0.0):
+        """
+        Synchronously checks whether data is available.
+        Returns True if data is ready, or False otherwise.
+        """
+        if self._closed:
+            return False
+        logger.debug("[%s] poll() requested with timeout: %s", self.name, timeout)
+        result_queue = queue.Queue()
+        request = {
+            "type": "POLL",
+            "timeout": timeout,
+            "result_queue": result_queue
+        }
+        self._request_queue.put(request)
+        try:
+            # Use a slightly longer timeout to give the worker a chance.
+            result = result_queue.get(timeout=timeout + 0.1)
+        except queue.Empty:
+            result = False
+        logger.debug("[%s] poll() returning => %s", self.name, result)
+        return result
+
+    def close(self):
+        """
+        Closes the pipe and stops the worker thread. The _closed flag makes
+        sure no further operations are attempted.
+        """
+        if self._closed:
+            return
+        logger.debug("[%s] close() called", self.name)
+        self._closed = True
+        stop_request = {"type": "CLOSE", "result_queue": queue.Queue()}
+        self._request_queue.put(stop_request)
+        self._stop_event.set()
+        self._worker_thread.join()
+        logger.debug("[%s] closed", self.name)
+
+
+def SafePipe(debug=False):
+    """
+    Returns a pair: (thread-safe parent pipe, raw child pipe).
+    """
+    parent_synthesize_pipe, child_synthesize_pipe = mp.Pipe()
+    parent_pipe = ParentPipe(parent_synthesize_pipe)
+    return parent_pipe, child_synthesize_pipe
+
+
+def child_process_code(child_end):
+    """
+    Example child process code that receives messages, logs them,
+    sends acknowledgements, and then closes.
+    """
+    for i in range(3):
+        msg = child_end.recv()
+        logger.debug("[Child] got: %s", msg)
+        child_end.send(f"ACK: {msg}")
+    child_end.close()
+
+
+if __name__ == "__main__":
+    parent_pipe, child_pipe = SafePipe()
+
+    # Create child process with the child_process_code function.
+    p = mp.Process(target=child_process_code, args=(child_pipe,))
+    p.start()
+
+    # Event to signal sender threads to stop if needed.
+    stop_polling_event = threading.Event()
+
+    def sender_thread(n):
+        try:
+            parent_pipe.send(f"hello_from_thread_{n}")
+        except Exception as e:
+            logger.debug("[sender_thread_%s] send exception: %s", n, e)
+            return
+
+        # Use a poll loop with error handling.
+        for _ in range(10):
+            try:
+                if parent_pipe.poll(0.1):
+                    reply = parent_pipe.recv()
+                    logger.debug("[sender_thread_%s] got: %s", n, reply)
+                    break
+                else:
+                    logger.debug("[sender_thread_%s] no data yet...", n)
+            except (OSError, EOFError, BrokenPipeError) as e:
+                logger.debug("[sender_thread_%s] poll/recv exception: %s. Exiting thread.", n, e)
+                break
+
+            # Allow exit if a shutdown is signaled.
+            if stop_polling_event.is_set():
+                logger.debug("[sender_thread_%s] stop event set. Exiting thread.", n)
+                break
+
+    threads = []
+    for i in range(3):
+        t = threading.Thread(target=sender_thread, args=(i,))
+        t.start()
+        threads.append(t)
+
+    for t in threads:
+        t.join()
+
+    # Signal shutdown to any polling threads, then close the pipe.
+    stop_polling_event.set()
+    parent_pipe.close()
+    p.join()
--- a/minimal_server/RealtimeSTT/server.py
+++ b/minimal_server/RealtimeSTT/server.py
@ -0,0 +1,23 @@
+from fastapi import FastAPI, WebSocket
+from RealtimeSTT.audio_recorder import AudioToTextRecorder
+import numpy as np
+
+app = FastAPI()
+
+recorder = AudioToTextRecorder(
+    model="tiny",
+    device="cuda",
+    compute_type="float16",
+    use_microphone=False,
+)
+
+@app.websocket("/ws/transcribe")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    while True:
+        data = await websocket.receive_bytes()
+        # Convierte los bytes a numpy array (ajusta según tu formato)
+        audio = np.frombuffer(data, dtype=np.float32)
+        recorder.feed_audio(audio)
+        text = recorder.text()
+        await websocket.send_text(text)
--- a/minimal_server/RealtimeSTT/warmup_audio.wav
+++ b/minimal_server/RealtimeSTT/warmup_audio.wav