"use strict"; /* Some of this code, together with the default options found in index.ts, were taken (or took inspiration) from https://github.com/snakers4/silero-vad */ Object.defineProperty(exports, "__esModule", { value: true }); exports.FrameProcessor = exports.validateOptions = exports.defaultV5FrameProcessorOptions = exports.defaultLegacyFrameProcessorOptions = void 0; const logging_1 = require("./logging"); const messages_1 = require("./messages"); const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536]; exports.defaultLegacyFrameProcessorOptions = { positiveSpeechThreshold: 0.5, negativeSpeechThreshold: 0.5 - 0.15, preSpeechPadFrames: 1, redemptionFrames: 8, frameSamples: 1536, minSpeechFrames: 3, submitUserSpeechOnPause: false, }; exports.defaultV5FrameProcessorOptions = { positiveSpeechThreshold: 0.5, negativeSpeechThreshold: 0.5 - 0.15, preSpeechPadFrames: 3, redemptionFrames: 24, frameSamples: 512, minSpeechFrames: 9, submitUserSpeechOnPause: false, }; function validateOptions(options) { if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) { logging_1.log.warn("You are using an unusual frame size"); } if (options.positiveSpeechThreshold < 0 || options.positiveSpeechThreshold > 1) { logging_1.log.error("positiveSpeechThreshold should be a number between 0 and 1"); } if (options.negativeSpeechThreshold < 0 || options.negativeSpeechThreshold > options.positiveSpeechThreshold) { logging_1.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold"); } if (options.preSpeechPadFrames < 0) { logging_1.log.error("preSpeechPadFrames should be positive"); } if (options.redemptionFrames < 0) { logging_1.log.error("redemptionFrames should be positive"); } } exports.validateOptions = validateOptions; const concatArrays = (arrays) => { const sizes = arrays.reduce((out, next) => { out.push(out.at(-1) + next.length); return out; }, [0]); const outArray = new Float32Array(sizes.at(-1)); arrays.forEach((arr, index) => { const place = sizes[index]; outArray.set(arr, place); }); return outArray; }; class FrameProcessor { constructor(modelProcessFunc, modelResetFunc, options) { this.modelProcessFunc = modelProcessFunc; this.modelResetFunc = modelResetFunc; this.options = options; this.speaking = false; this.redemptionCounter = 0; this.speechFrameCount = 0; this.active = false; this.speechRealStartFired = false; this.reset = () => { this.speaking = false; this.speechRealStartFired = false; this.audioBuffer = []; this.modelResetFunc(); this.redemptionCounter = 0; this.speechFrameCount = 0; }; this.pause = (handleEvent) => { this.active = false; if (this.options.submitUserSpeechOnPause) { this.endSegment(handleEvent); } else { this.reset(); } }; this.resume = () => { this.active = true; }; this.endSegment = (handleEvent) => { const audioBuffer = this.audioBuffer; this.audioBuffer = []; const speaking = this.speaking; this.reset(); if (speaking) { const speechFrameCount = audioBuffer.reduce((acc, item) => { return item.isSpeech ? (acc + 1) : acc; }, 0); if (speechFrameCount >= this.options.minSpeechFrames) { const audio = concatArrays(audioBuffer.map((item) => item.frame)); handleEvent({ msg: messages_1.Message.SpeechEnd, audio }); } else { handleEvent({ msg: messages_1.Message.VADMisfire }); } } return {}; }; this.process = async (frame, handleEvent) => { if (!this.active) { return; } const probs = await this.modelProcessFunc(frame); const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold; handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame }); this.audioBuffer.push({ frame, isSpeech, }); if (isSpeech) { this.speechFrameCount++; this.redemptionCounter = 0; } if (isSpeech && !this.speaking) { this.speaking = true; handleEvent({ msg: messages_1.Message.SpeechStart }); } if (this.speaking && this.speechFrameCount === this.options.minSpeechFrames && !this.speechRealStartFired) { this.speechRealStartFired = true; handleEvent({ msg: messages_1.Message.SpeechRealStart }); } if (probs.isSpeech < this.options.negativeSpeechThreshold && this.speaking && ++this.redemptionCounter >= this.options.redemptionFrames) { this.redemptionCounter = 0; this.speechFrameCount = 0; this.speaking = false; this.speechRealStartFired = false; const audioBuffer = this.audioBuffer; this.audioBuffer = []; const speechFrameCount = audioBuffer.reduce((acc, item) => { return item.isSpeech ? (acc + 1) : acc; }, 0); if (speechFrameCount >= this.options.minSpeechFrames) { const audio = concatArrays(audioBuffer.map((item) => item.frame)); handleEvent({ msg: messages_1.Message.SpeechEnd, audio }); } else { handleEvent({ msg: messages_1.Message.VADMisfire }); } } if (!this.speaking) { while (this.audioBuffer.length > this.options.preSpeechPadFrames) { this.audioBuffer.shift(); } this.speechFrameCount = 0; } }; this.audioBuffer = []; this.reset(); } } exports.FrameProcessor = FrameProcessor; //# sourceMappingURL=frame-processor.js.map