diff --git a/pyproject.toml b/pyproject.toml index fbd0760..2ec0a18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "rich>=13.0.0", "rumps>=0.4.0", "realtimestt>=0.3.104", - "pyautogui>=0.9.54", + "pynput>=1.7.6", ] [project.optional-dependencies] @@ -29,6 +29,9 @@ screenshot-all = [ "pyscreenshot>=3.1" ] # All screenshot libraries for maximum compatibility +# Keyboard automation +keyboard = ["pynput>=1.7.6"] # Cross-platform keyboard automation + [project.scripts] ocr-screenshot = "tooling.cli:cli_main" tooling = "tooling.cli:cli_main" diff --git a/realtimesst.log b/realtimesst.log index b04a81b..a8b4b77 100644 --- a/realtimesst.log +++ b/realtimesst.log @@ -1406,3 +1406,386 @@ ValueError: unsupported device mps 2025-07-23 12:15:05.513 - RealTimeSTT: realtimestt - INFO - Setting listen time 2025-07-23 12:15:05.513 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' 2025-07-23 12:15:05.515 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:37:04.498 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT +2025-07-23 12:37:04.505 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512 +2025-07-23 12:37:04.511 - RealTimeSTT: realtimestt - INFO - Initializing faster_whisper realtime transcription model tiny, default device: cpu, compute type: default, device index: 0, download root: None +2025-07-23 12:37:05.119 - RealTimeSTT: realtimestt - DEBUG - Faster_whisper realtime speech to text transcription model initialized successfully +2025-07-23 12:37:05.119 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3 +2025-07-23 12:37:05.119 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully +2025-07-23 12:37:05.708 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully +2025-07-23 12:37:05.708 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker +2025-07-23 12:37:05.708 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start +2025-07-23 12:37:10.104 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready +2025-07-23 12:37:10.105 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully +2025-07-23 12:37:10.105 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:37:10.105 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:37:10.106 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:37:10.361 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:37:10.361 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:37:10.361 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:37:10.362 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:37:10.374 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 7168 +2025-07-23 12:37:10.758 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: This is +2025-07-23 12:37:10.758 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 13312 +2025-07-23 12:37:11.082 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: This is a pair. +2025-07-23 12:37:11.082 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 18432 +2025-07-23 12:37:11.452 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: This is a better +2025-07-23 12:37:11.962 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:37:11.962 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 32256 +2025-07-23 12:37:11.962 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:37:11.978 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:37:11.979 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:37:11.986 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.094 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.203 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.305 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.405 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.506 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:12.523 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:37:12.539 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.56 seconds +2025-07-23 12:37:13.113 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:37:13.113 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:37:13.114 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:37:13.817 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:37:13.817 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:37:13.817 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:37:13.817 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:37:13.832 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:37:14.192 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: and I wrote this. +2025-07-23 12:37:14.192 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:37:14.524 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: and I wrote myself. +2025-07-23 12:37:14.903 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 34304 +2025-07-23 12:37:15.216 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: and I wrote myself. +2025-07-23 12:37:15.734 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:37:15.734 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 47104 +2025-07-23 12:37:15.734 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:37:15.735 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:37:15.735 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:37:15.739 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:15.843 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:15.946 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:16.047 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:16.149 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:16.250 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:16.276 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:37:16.282 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.55 seconds +2025-07-23 12:37:16.958 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:37:16.958 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:37:16.959 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:37:17.144 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:37:17.144 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:37:17.144 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:37:17.144 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:37:17.150 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:37:17.479 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Why are you not typing it? +2025-07-23 12:37:17.480 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:37:17.866 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Why are you not typing in the code? +2025-07-23 12:37:17.866 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 28160 +2025-07-23 12:37:18.271 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Why are you not typing in the correct word? +2025-07-23 12:37:18.555 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:37:18.555 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 38912 +2025-07-23 12:37:18.555 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:37:18.650 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:37:18.651 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:37:18.659 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:18.760 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:18.862 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:18.963 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:19.064 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:19.165 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:37:19.218 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:37:19.222 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.57 seconds +2025-07-23 12:37:20.491 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:37:20.492 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:37:20.492 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:37:25.723 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread +2025-07-23 12:37:25.723 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0 +2025-07-23 12:37:25.723 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'inactive' +2025-07-23 12:37:25.729 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process +2025-07-23 12:37:26.385 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process +2025-07-23 12:37:26.386 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread +2025-07-23 12:40:01.289 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT +2025-07-23 12:40:01.301 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512 +2025-07-23 12:40:01.310 - RealTimeSTT: realtimestt - INFO - Initializing faster_whisper realtime transcription model tiny, default device: cpu, compute type: default, device index: 0, download root: None +2025-07-23 12:40:02.041 - RealTimeSTT: realtimestt - DEBUG - Faster_whisper realtime speech to text transcription model initialized successfully +2025-07-23 12:40:02.041 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3 +2025-07-23 12:40:02.041 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully +2025-07-23 12:40:02.547 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully +2025-07-23 12:40:02.547 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker +2025-07-23 12:40:02.547 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start +2025-07-23 12:40:06.631 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready +2025-07-23 12:40:06.631 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully +2025-07-23 12:40:06.632 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:40:06.632 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:40:06.632 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:40:06.735 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:40:06.735 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:40:06.735 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:40:06.735 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:40:06.744 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 8192 +2025-07-23 12:40:07.118 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Okay, I'll touch you. +2025-07-23 12:40:07.118 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 14336 +2025-07-23 12:40:07.479 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Okay, I'll close your eyes. +2025-07-23 12:40:07.479 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 19456 +2025-07-23 12:40:07.888 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Okay, I'll touch your eye on the duck. +2025-07-23 12:40:07.888 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 26624 +2025-07-23 12:40:08.252 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: I'll close your eye on the drawing. +2025-07-23 12:40:08.252 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 31744 +2025-07-23 12:40:08.705 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to do it differently? +2025-07-23 12:40:08.705 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 38912 +2025-07-23 12:40:09.130 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know the link on the people +2025-07-23 12:40:09.130 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 46080 +2025-07-23 12:40:09.546 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know the very link on the peoplehip myth. +2025-07-23 12:40:09.550 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 53248 +2025-07-23 12:40:10.009 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know the very link on the people clip method is +2025-07-23 12:40:10.009 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 60416 +2025-07-23 12:40:10.493 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know the very link on the people clip method is the most +2025-07-23 12:40:10.493 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 67584 +2025-07-23 12:40:10.978 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know directly on the people clip method is the most particular. +2025-07-23 12:40:10.979 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 75776 +2025-07-23 12:40:11.452 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know the very link on the people clip method is the most particular in recommend. +2025-07-23 12:40:11.452 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 82944 +2025-07-23 12:40:11.983 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: or do you want to know directly on the people clip method is the most particular in Recommendants. +2025-07-23 12:40:12.490 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:40:12.491 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 99840 +2025-07-23 12:40:12.491 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:40:12.491 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:40:12.492 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:40:12.498 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:12.599 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:12.709 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:12.814 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:12.915 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:13.016 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:13.117 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:13.218 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:13.267 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:40:13.284 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.79 seconds +2025-07-23 12:40:13.434 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:40:13.434 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:40:13.434 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:40:15.694 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:40:15.694 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:40:15.694 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:40:15.694 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:40:15.709 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:40:16.037 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Alright. +2025-07-23 12:40:16.037 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:40:16.378 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Are you serious? +2025-07-23 12:40:16.378 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 27136 +2025-07-23 12:40:16.750 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Are you serious enough? +2025-07-23 12:40:17.292 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:40:17.293 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 41984 +2025-07-23 12:40:17.293 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:40:17.361 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:40:17.362 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:40:17.363 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:17.464 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:17.565 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:17.666 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:17.767 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:40:17.858 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:40:17.876 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.51 seconds +2025-07-23 12:40:18.005 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:40:18.005 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:40:18.006 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:40:22.543 - RealTimeSTT: realtimestt - DEBUG - Receive from stdout pipe +2025-07-23 12:40:22.544 - RealTimeSTT: realtimestt - INFO - None +2025-07-23 12:40:22.558 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0 +2025-07-23 12:40:22.558 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'inactive' +2025-07-23 12:40:22.572 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:40:22.572 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:40:22.573 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:44:00.476 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT +2025-07-23 12:44:00.502 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512 +2025-07-23 12:44:00.510 - RealTimeSTT: realtimestt - INFO - Initializing faster_whisper realtime transcription model tiny, default device: cpu, compute type: default, device index: 0, download root: None +2025-07-23 12:44:01.152 - RealTimeSTT: realtimestt - DEBUG - Faster_whisper realtime speech to text transcription model initialized successfully +2025-07-23 12:44:01.152 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3 +2025-07-23 12:44:01.152 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully +2025-07-23 12:44:01.759 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully +2025-07-23 12:44:01.759 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker +2025-07-23 12:44:01.759 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start +2025-07-23 12:44:05.724 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready +2025-07-23 12:44:05.725 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully +2025-07-23 12:44:05.725 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:44:05.725 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:44:05.726 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:44:07.848 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:44:07.848 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:44:07.848 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:44:07.849 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:44:07.851 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:44:08.226 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Thank you. +2025-07-23 12:44:08.226 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:44:08.660 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Did you say so? +2025-07-23 12:44:08.661 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 29184 +2025-07-23 12:44:09.016 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Did you say something? +2025-07-23 12:44:09.017 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 35328 +2025-07-23 12:44:09.380 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: and just say something, do something +2025-07-23 12:44:09.380 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 40448 +2025-07-23 12:44:09.852 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Did you say something? Did you say something? +2025-07-23 12:44:09.900 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 49664 +2025-07-23 12:44:10.335 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Would you say something to something or whatever? +2025-07-23 12:44:10.335 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 55808 +2025-07-23 12:44:10.748 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Would you say something to something or whatever it is? +2025-07-23 12:44:10.748 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 62976 +2025-07-23 12:44:11.156 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Would you say something to something or whatever, or you want it? +2025-07-23 12:44:11.156 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 69120 +2025-07-23 12:44:11.635 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Would you say something to something or whatever, or you want to do it? +2025-07-23 12:44:11.817 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:44:11.818 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 79872 +2025-07-23 12:44:11.818 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:44:11.861 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:44:11.862 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:44:11.874 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:11.978 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.079 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.180 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.281 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.382 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.483 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:12.528 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:44:12.580 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.72 seconds +2025-07-23 12:44:12.581 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:44:12.581 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:44:12.581 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:44:16.551 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:44:16.551 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:44:16.551 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:44:16.551 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:44:16.557 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:44:16.913 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Okay. +2025-07-23 12:44:16.914 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:44:17.267 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: You cannot even. +2025-07-23 12:44:17.900 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:44:17.901 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 37888 +2025-07-23 12:44:17.901 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:44:17.991 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:44:17.991 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:44:17.993 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:18.113 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:18.218 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:18.319 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:18.421 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:44:18.512 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:44:18.564 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.57 seconds +2025-07-23 12:44:18.564 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:44:18.564 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:44:18.564 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:44:29.130 - RealTimeSTT: realtimestt - DEBUG - Receive from stdout pipe +2025-07-23 12:44:29.131 - RealTimeSTT: realtimestt - INFO - None +2025-07-23 12:44:29.136 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0 +2025-07-23 12:44:29.136 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'inactive' +2025-07-23 12:45:00.174 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT +2025-07-23 12:45:00.186 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512 +2025-07-23 12:45:00.191 - RealTimeSTT: realtimestt - INFO - Initializing faster_whisper realtime transcription model tiny, default device: cpu, compute type: default, device index: 0, download root: None +2025-07-23 12:45:00.865 - RealTimeSTT: realtimestt - DEBUG - Faster_whisper realtime speech to text transcription model initialized successfully +2025-07-23 12:45:00.865 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3 +2025-07-23 12:45:00.865 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully +2025-07-23 12:45:01.014 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully +2025-07-23 12:45:01.014 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker +2025-07-23 12:45:01.014 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start +2025-07-23 12:45:05.413 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready +2025-07-23 12:45:05.413 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully +2025-07-23 12:45:05.414 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:45:05.414 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:45:05.414 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:45:06.924 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:45:06.924 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:45:06.924 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:45:06.924 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:45:06.943 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:45:07.314 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: I'm sorry. +2025-07-23 12:45:07.314 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 23040 +2025-07-23 12:45:07.640 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: you +2025-07-23 12:45:07.640 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 28160 +2025-07-23 12:45:07.974 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: you +2025-07-23 12:45:08.073 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 35328 +2025-07-23 12:45:08.412 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: you +2025-07-23 12:45:08.462 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 41472 +2025-07-23 12:45:08.819 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: you +2025-07-23 12:45:08.820 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 46592 +2025-07-23 12:45:09.153 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you? +2025-07-23 12:45:09.153 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 51712 +2025-07-23 12:45:09.519 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you find... +2025-07-23 12:45:09.519 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 57856 +2025-07-23 12:45:09.891 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally talk? +2025-07-23 12:45:10.058 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 67072 +2025-07-23 12:45:10.460 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try? +2025-07-23 12:45:10.506 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 74240 +2025-07-23 12:45:10.864 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try? +2025-07-23 12:45:10.864 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 79360 +2025-07-23 12:45:11.272 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try? +2025-07-23 12:45:11.272 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 85504 +2025-07-23 12:45:11.657 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try? +2025-07-23 12:45:11.657 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 91648 +2025-07-23 12:45:12.108 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try? Are you evil? +2025-07-23 12:45:12.108 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 99840 +2025-07-23 12:45:12.547 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you finally try it? Are you even working? +2025-07-23 12:45:12.876 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:45:12.876 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 111616 +2025-07-23 12:45:12.876 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:45:12.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:45:12.946 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:45:12.947 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.048 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.149 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.251 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.352 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.453 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:13.546 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:45:13.574 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.63 seconds +2025-07-23 12:45:13.602 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:45:13.602 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:45:13.602 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:45:14.030 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:45:14.030 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:45:14.030 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:45:14.031 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:45:14.040 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:45:14.389 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: say. +2025-07-23 12:45:14.389 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:45:14.780 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: say something. +2025-07-23 12:45:14.780 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 28160 +2025-07-23 12:45:15.140 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: say something. +2025-07-23 12:45:15.436 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:45:15.437 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 38912 +2025-07-23 12:45:15.437 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:45:15.466 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:45:15.466 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:45:15.474 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:15.580 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:15.686 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:15.787 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:15.888 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:15.973 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:45:15.987 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.52 seconds +2025-07-23 12:45:15.992 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:45:15.992 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:45:15.992 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start +2025-07-23 12:45:19.274 - RealTimeSTT: realtimestt - INFO - voice activity detected +2025-07-23 12:45:19.274 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-23 12:45:19.274 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'recording' +2025-07-23 12:45:19.275 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording stop +2025-07-23 12:45:19.280 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 16896 +2025-07-23 12:45:19.653 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Okay. +2025-07-23 12:45:19.653 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 22016 +2025-07-23 12:45:19.975 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you talk? +2025-07-23 12:45:19.975 - RealTimeSTT: realtimestt - DEBUG - Current realtime buffer size: 27136 +2025-07-23 12:45:20.340 - RealTimeSTT: realtimestt - DEBUG - Realtime text detected: Can you type some? +2025-07-23 12:45:20.939 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-23 12:45:20.939 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 43008 +2025-07-23 12:45:20.939 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-23 12:45:21.044 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-23 12:45:21.044 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-23 12:45:21.067 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.168 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.273 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.375 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.476 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.577 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-23 12:45:21.634 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-23 12:45:21.650 - RealTimeSTT: realtimestt - DEBUG - Model base completed transcription in 0.61 seconds +2025-07-23 12:45:21.659 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-23 12:45:21.659 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' +2025-07-23 12:45:21.659 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start diff --git a/src/tooling/stt_cli.py b/src/tooling/stt_cli.py index 58b5b64..a3e4b2b 100644 --- a/src/tooling/stt_cli.py +++ b/src/tooling/stt_cli.py @@ -29,10 +29,10 @@ except ImportError: RUMPS_AVAILABLE = False try: - import pyautogui - PYAUTOGUI_AVAILABLE = True + from pynput.keyboard import Controller as KeyboardController + PYNPUT_AVAILABLE = True except ImportError: - PYAUTOGUI_AVAILABLE = False + PYNPUT_AVAILABLE = False # Create STT app that can be imported as a subcommand stt_app = typer.Typer( @@ -344,7 +344,14 @@ if RUMPS_AVAILABLE: if text.strip(): # Show notification with transcription rumps.notification("Transcription", "Speech detected:", text[:100] + ("..." if len(text) > 100 else ""), sound=False) - pyautogui.typewrite(text + " ") + + # Type the transcribed text using pynput + if PYNPUT_AVAILABLE: + keyboard = KeyboardController() + keyboard.type(text + " ") + else: + print(f"pynput not available. Transcription: {text}") + # Save to file if specified if self.output_file: timestamp = datetime.datetime.now().strftime("%H:%M:%S") diff --git a/uv.lock b/uv.lock index 9f6ec60..5d4e5f8 100644 --- a/uv.lock +++ b/uv.lock @@ -1320,6 +1320,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, ] +[[package]] +name = "evdev" +version = "1.9.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/fe/a17c106a1f4061ce83f04d14bcedcfb2c38c7793ea56bfb906a6fadae8cb/evdev-1.9.2.tar.gz", hash = "sha256:5d3278892ce1f92a74d6bf888cc8525d9f68af85dbe336c95d1c87fb8f423069", size = 33301, upload-time = "2025-05-01T19:53:47.69Z" } + [[package]] name = "events" version = "0.5" @@ -4896,6 +4902,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/94/e4181a1f6286f545507528c78016e00065ea913276888db2262507693ce5/PyMySQL-1.1.1-py3-none-any.whl", hash = "sha256:4de15da4c61dc132f4fb9ab763063e693d521a80fd0e87943b9a453dd4c19d6c", size = 44972, upload-time = "2024-05-21T11:03:41.216Z" }, ] +[[package]] +name = "pynput" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "evdev", marker = "'linux' in sys_platform" }, + { name = "pyobjc-framework-applicationservices", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" }, + { name = "python-xlib", marker = "'linux' in sys_platform" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/c3/dccf44c68225046df5324db0cc7d563a560635355b3e5f1d249468268a6f/pynput-1.8.1.tar.gz", hash = "sha256:70d7c8373ee98911004a7c938742242840a5628c004573d84ba849d4601df81e", size = 82289, upload-time = "2025-03-17T17:12:01.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/4f/ac3fa906ae8a375a536b12794128c5efacade9eaa917a35dfd27ce0c7400/pynput-1.8.1-py2.py3-none-any.whl", hash = "sha256:42dfcf27404459ca16ca889c8fb8ffe42a9fe54f722fd1a3e130728e59e768d2", size = 91693, upload-time = "2025-03-17T17:12:00.094Z" }, +] + [[package]] name = "pyobjc-core" version = "11.1" @@ -4910,6 +4932,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/0f/f5ab2b0e57430a3bec9a62b6153c0e79c05a30d77b564efdb9f9446eeac5/pyobjc_core-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:f2455683e807f8541f0d83fbba0f5d9a46128ab0d5cc83ea208f0bec759b7f96", size = 708807, upload-time = "2025-06-14T20:44:54.851Z" }, ] +[[package]] +name = "pyobjc-framework-applicationservices" +version = "11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-coretext", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/3f/b33ce0cecc3a42f6c289dcbf9ff698b0d9e85f5796db2e9cb5dadccffbb9/pyobjc_framework_applicationservices-11.1.tar.gz", hash = "sha256:03fcd8c0c600db98fa8b85eb7b3bc31491701720c795e3f762b54e865138bbaf", size = 224842, upload-time = "2025-06-14T20:56:40.648Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/2d/9fde6de0b2a95fbb3d77ba11b3cc4f289dd208f38cb3a28389add87c0f44/pyobjc_framework_applicationservices-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cf45d15eddae36dec2330a9992fc852476b61c8f529874b9ec2805c768a75482", size = 30991, upload-time = "2025-06-14T20:45:18.169Z" }, + { url = "https://files.pythonhosted.org/packages/38/ec/46a5c710e2d7edf55105223c34fed5a7b7cc7aba7d00a3a7b0405d6a2d1a/pyobjc_framework_applicationservices-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f4a85ccd78bab84f7f05ac65ff9be117839dfc09d48c39edd65c617ed73eb01c", size = 31056, upload-time = "2025-06-14T20:45:18.925Z" }, + { url = "https://files.pythonhosted.org/packages/c4/06/c2a309e6f37bfa73a2a581d3301321b2033e25b249e2a01e417a3c34e799/pyobjc_framework_applicationservices-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:385a89f4d0838c97a331e247519d9e9745aa3f7427169d18570e3c664076a63c", size = 31072, upload-time = "2025-06-14T20:45:19.707Z" }, + { url = "https://files.pythonhosted.org/packages/b4/5f/357bf498c27f1b4d48385860d8374b2569adc1522aabe32befd77089c070/pyobjc_framework_applicationservices-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f480fab20f3005e559c9d06c9a3874a1f1c60dde52c6d28a53ab59b45e79d55f", size = 31335, upload-time = "2025-06-14T20:45:20.462Z" }, + { url = "https://files.pythonhosted.org/packages/ab/b6/797fdd81399fe8251196f29a621ba3f3f04d5c579d95fd304489f5558202/pyobjc_framework_applicationservices-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:e8dee91c6a14fd042f98819dc0ac4a182e0e816282565534032f0e544bfab143", size = 31196, upload-time = "2025-06-14T20:45:21.555Z" }, + { url = "https://files.pythonhosted.org/packages/68/45/47eba8d7cdf16d778240ed13fb405e8d712464170ed29d0463363a695194/pyobjc_framework_applicationservices-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:a0ce40a57a9b993793b6f72c4fd93f80618ef54a69d76a1da97b8360a2f3ffc5", size = 31446, upload-time = "2025-06-14T20:45:22.313Z" }, +] + [[package]] name = "pyobjc-framework-cocoa" version = "11.1" @@ -4927,6 +4969,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/7c/54afe9ffee547c41e1161691e72067a37ed27466ac71c089bfdcd07ca70d/pyobjc_framework_cocoa-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:1b5de4e1757bb65689d6dc1f8d8717de9ec8587eb0c4831c134f13aba29f9b71", size = 396742, upload-time = "2025-06-14T20:46:57.64Z" }, ] +[[package]] +name = "pyobjc-framework-coretext" +version = "11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/65/e9/d3231c4f87d07b8525401fd6ad3c56607c9e512c5490f0a7a6abb13acab6/pyobjc_framework_coretext-11.1.tar.gz", hash = "sha256:a29bbd5d85c77f46a8ee81d381b847244c88a3a5a96ac22f509027ceceaffaf6", size = 274702, upload-time = "2025-06-14T20:57:16.059Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/59/d6cc5470157cfd328b2d1ee2c1b6f846a5205307fce17291b57236d9f46e/pyobjc_framework_coretext-11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4f4d2d2a6331fa64465247358d7aafce98e4fb654b99301a490627a073d021e", size = 30072, upload-time = "2025-06-14T20:48:34.248Z" }, + { url = "https://files.pythonhosted.org/packages/32/67/9cc5189c366e67dc3e5b5976fac73cc6405841095f795d3fa0d5fc43d76a/pyobjc_framework_coretext-11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1597bf7234270ee1b9963bf112e9061050d5fb8e1384b3f50c11bde2fe2b1570", size = 30175, upload-time = "2025-06-14T20:48:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d1/6ec2ef4f8133177203a742d5db4db90bbb3ae100aec8d17f667208da84c9/pyobjc_framework_coretext-11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:37e051e8f12a0f47a81b8efc8c902156eb5bc3d8123c43e5bd4cebd24c222228", size = 30180, upload-time = "2025-06-14T20:48:35.766Z" }, + { url = "https://files.pythonhosted.org/packages/0a/84/d4a95e49f6af59503ba257fbed0471b6932f0afe8b3725c018dd3ba40150/pyobjc_framework_coretext-11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:56a3a02202e0d50be3c43e781c00f9f1859ab9b73a8342ff56260b908e911e37", size = 30768, upload-time = "2025-06-14T20:48:36.869Z" }, + { url = "https://files.pythonhosted.org/packages/64/4c/16e1504e06a5cb23eec6276835ddddb087637beba66cf84b5c587eba99be/pyobjc_framework_coretext-11.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:15650ba99692d00953e91e53118c11636056a22c90d472020f7ba31500577bf5", size = 30155, upload-time = "2025-06-14T20:48:37.948Z" }, + { url = "https://files.pythonhosted.org/packages/ad/a4/cbfa9c874b2770fb1ba5c38c42b0e12a8b5aa177a5a86d0ad49b935aa626/pyobjc_framework_coretext-11.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:fb27f66a56660c31bb956191d64b85b95bac99cfb833f6e99622ca0ac4b3ba12", size = 30768, upload-time = "2025-06-14T20:48:38.734Z" }, +] + [[package]] name = "pyobjc-framework-quartz" version = "11.1" @@ -5258,6 +5319,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800, upload-time = "2025-04-12T15:46:58.412Z" }, ] +[[package]] +name = "python-xlib" +version = "0.33" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/f5/8c0653e5bb54e0cbdfe27bf32d41f27bc4e12faa8742778c17f2a71be2c0/python-xlib-0.33.tar.gz", hash = "sha256:55af7906a2c75ce6cb280a584776080602444f75815a7aff4d287bb2d7018b32", size = 269068, upload-time = "2022-12-25T18:53:00.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/b8/ff33610932e0ee81ae7f1269c890f697d56ff74b9f5b2ee5d9b7fa2c5355/python_xlib-0.33-py2.py3-none-any.whl", hash = "sha256:c3534038d42e0df2f1392a1b30a15a4ff5fdc2b86cfa94f072bf11b10a164398", size = 182185, upload-time = "2022-12-25T18:52:58.662Z" }, +] + [[package]] name = "python3-xlib" version = "0.15" @@ -6257,7 +6330,7 @@ source = { editable = "." } dependencies = [ { name = "pillow", version = "11.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, { name = "pillow", version = "11.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, - { name = "pyautogui" }, + { name = "pynput" }, { name = "pyperclip" }, { name = "python-doctr" }, { name = "realtimestt" }, @@ -6267,6 +6340,9 @@ dependencies = [ ] [package.optional-dependencies] +keyboard = [ + { name = "pynput" }, +] screenshot-all = [ { name = "mss" }, { name = "pyautogui" }, @@ -6293,9 +6369,10 @@ requires-dist = [ { name = "mss", marker = "extra == 'screenshot-all'", specifier = ">=7.0.0" }, { name = "mss", marker = "extra == 'screenshot-fast'", specifier = ">=7.0.0" }, { name = "pillow", specifier = ">=11.1.0" }, - { name = "pyautogui", specifier = ">=0.9.54" }, { name = "pyautogui", marker = "extra == 'screenshot-all'", specifier = ">=0.9.54" }, { name = "pyautogui", marker = "extra == 'screenshot-full'", specifier = ">=0.9.54" }, + { name = "pynput", specifier = ">=1.7.6" }, + { name = "pynput", marker = "extra == 'keyboard'", specifier = ">=1.7.6" }, { name = "pyperclip", specifier = ">=1.9.0" }, { name = "pyscreenshot", marker = "extra == 'screenshot-all'", specifier = ">=3.1" }, { name = "pyscreenshot", marker = "extra == 'screenshot-multi'", specifier = ">=3.1" }, @@ -6305,7 +6382,7 @@ requires-dist = [ { name = "rumps", specifier = ">=0.4.0" }, { name = "typer", specifier = ">=0.12.0" }, ] -provides-extras = ["screenshot-fast", "screenshot-full", "screenshot-multi", "screenshot-all"] +provides-extras = ["screenshot-fast", "screenshot-full", "screenshot-multi", "screenshot-all", "keyboard"] [package.metadata.requires-dev] dev = [{ name = "open-webui", specifier = ">=0.6.5" }]