Files

209 lines
48 KiB
JavaScript

/*
* ATTENTION: The "eval" devtool has been used (maybe by default in mode: "development").
* This devtool is neither made for production nor for readable output files.
* It uses "eval()" calls to create a separate source file in the browser devtools.
* If you are trying to read the output file, select a different devtool (https://webpack.js.org/configuration/devtool/)
* or disable the default devtool with "devtool: false".
* If you are looking for production-ready output files, see mode: "production" (https://webpack.js.org/configuration/mode/).
*/
(function webpackUniversalModuleDefinition(root, factory) {
if(typeof exports === 'object' && typeof module === 'object')
module.exports = factory(require("onnxruntime-web"));
else if(typeof define === 'function' && define.amd)
define(["onnxruntime-web"], factory);
else if(typeof exports === 'object')
exports["vad"] = factory(require("onnxruntime-web"));
else
root["vad"] = factory(root["ort"]);
})(self, (__WEBPACK_EXTERNAL_MODULE_onnxruntime_web__) => {
return /******/ (() => { // webpackBootstrap
/******/ "use strict";
/******/ var __webpack_modules__ = ({
/***/ "./dist/asset-path.js":
/*!****************************!*\
!*** ./dist/asset-path.js ***!
\****************************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.baseAssetPath = void 0;\n// nextjs@14 bundler may attempt to execute this during SSR and crash\nconst isWeb = typeof window !== \"undefined\" && typeof window.document !== \"undefined\";\nconst currentScript = isWeb\n ? window.document.currentScript\n : null;\nlet basePath = \"/\";\nif (currentScript) {\n basePath = currentScript.src\n .replace(/#.*$/, \"\")\n .replace(/\\?.*$/, \"\")\n .replace(/\\/[^\\/]+$/, \"/\");\n}\nexports.baseAssetPath = basePath;\n//# sourceMappingURL=asset-path.js.map\n\n//# sourceURL=webpack://vad/./dist/asset-path.js?");
/***/ }),
/***/ "./dist/default-model-fetcher.js":
/*!***************************************!*\
!*** ./dist/default-model-fetcher.js ***!
\***************************************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.defaultModelFetcher = void 0;\nconst defaultModelFetcher = (path) => {\n return fetch(path).then((model) => model.arrayBuffer());\n};\nexports.defaultModelFetcher = defaultModelFetcher;\n//# sourceMappingURL=default-model-fetcher.js.map\n\n//# sourceURL=webpack://vad/./dist/default-model-fetcher.js?");
/***/ }),
/***/ "./dist/frame-processor.js":
/*!*********************************!*\
!*** ./dist/frame-processor.js ***!
\*********************************/
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
eval("\n/*\nSome of this code, together with the default options found in index.ts,\nwere taken (or took inspiration) from https://github.com/snakers4/silero-vad\n*/\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.FrameProcessor = exports.validateOptions = exports.defaultV5FrameProcessorOptions = exports.defaultLegacyFrameProcessorOptions = void 0;\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/logging.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nconst RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];\nexports.defaultLegacyFrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 1,\n redemptionFrames: 8,\n frameSamples: 1536,\n minSpeechFrames: 3,\n submitUserSpeechOnPause: false,\n};\nexports.defaultV5FrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 3,\n redemptionFrames: 24,\n frameSamples: 512,\n minSpeechFrames: 9,\n submitUserSpeechOnPause: false,\n};\nfunction validateOptions(options) {\n if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {\n logging_1.log.warn(\"You are using an unusual frame size\");\n }\n if (options.positiveSpeechThreshold < 0 ||\n options.positiveSpeechThreshold > 1) {\n logging_1.log.error(\"positiveSpeechThreshold should be a number between 0 and 1\");\n }\n if (options.negativeSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > options.positiveSpeechThreshold) {\n logging_1.log.error(\"negativeSpeechThreshold should be between 0 and positiveSpeechThreshold\");\n }\n if (options.preSpeechPadFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n if (options.redemptionFrames < 0) {\n logging_1.log.error(\"redemptionFrames should be positive\");\n }\n}\nexports.validateOptions = validateOptions;\nconst concatArrays = (arrays) => {\n const sizes = arrays.reduce((out, next) => {\n out.push(out.at(-1) + next.length);\n return out;\n }, [0]);\n const outArray = new Float32Array(sizes.at(-1));\n arrays.forEach((arr, index) => {\n const place = sizes[index];\n outArray.set(arr, place);\n });\n return outArray;\n};\nclass FrameProcessor {\n constructor(modelProcessFunc, modelResetFunc, options) {\n this.modelProcessFunc = modelProcessFunc;\n this.modelResetFunc = modelResetFunc;\n this.options = options;\n this.speaking = false;\n this.redemptionCounter = 0;\n this.speechFrameCount = 0;\n this.active = false;\n this.speechRealStartFired = false;\n this.reset = () => {\n this.speaking = false;\n this.speechRealStartFired = false;\n this.audioBuffer = [];\n this.modelResetFunc();\n this.redemptionCounter = 0;\n this.speechFrameCount = 0;\n };\n this.pause = (handleEvent) => {\n this.active = false;\n if (this.options.submitUserSpeechOnPause) {\n this.endSegment(handleEvent);\n }\n else {\n this.reset();\n }\n };\n this.resume = () => {\n this.active = true;\n };\n this.endSegment = (handleEvent) => {\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speaking = this.speaking;\n this.reset();\n if (speaking) {\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return item.isSpeech ? (acc + 1) : acc;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n handleEvent({ msg: messages_1.Message.SpeechEnd, audio });\n }\n else {\n handleEvent({ msg: messages_1.Message.VADMisfire });\n }\n }\n return {};\n };\n this.process = async (frame, handleEvent) => {\n if (!this.active) {\n return;\n }\n const probs = await this.modelProcessFunc(frame);\n const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;\n handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });\n this.audioBuffer.push({\n frame,\n isSpeech,\n });\n if (isSpeech) {\n this.speechFrameCount++;\n this.redemptionCounter = 0;\n }\n if (isSpeech && !this.speaking) {\n this.speaking = true;\n handleEvent({ msg: messages_1.Message.SpeechStart });\n }\n if (this.speaking &&\n this.speechFrameCount === this.options.minSpeechFrames &&\n !this.speechRealStartFired) {\n this.speechRealStartFired = true;\n handleEvent({ msg: messages_1.Message.SpeechRealStart });\n }\n if (probs.isSpeech < this.options.negativeSpeechThreshold &&\n this.speaking &&\n ++this.redemptionCounter >= this.options.redemptionFrames) {\n this.redemptionCounter = 0;\n this.speechFrameCount = 0;\n this.speaking = false;\n this.speechRealStartFired = false;\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return item.isSpeech ? (acc + 1) : acc;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n handleEvent({ msg: messages_1.Message.SpeechEnd, audio });\n }\n else {\n handleEvent({ msg: messages_1.Message.VADMisfire });\n }\n }\n if (!this.speaking) {\n while (this.audioBuffer.length > this.options.preSpeechPadFrames) {\n this.audioBuffer.shift();\n }\n this.speechFrameCount = 0;\n }\n };\n this.audioBuffer = [];\n this.reset();\n }\n}\nexports.FrameProcessor = FrameProcessor;\n//# sourceMappingURL=frame-processor.js.map\n\n//# sourceURL=webpack://vad/./dist/frame-processor.js?");
/***/ }),
/***/ "./dist/index.js":
/*!***********************!*\
!*** ./dist/index.js ***!
\***********************/
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.getDefaultRealTimeVADOptions = exports.MicVAD = exports.DEFAULT_MODEL = exports.AudioNodeVAD = exports.utils = exports.NonRealTimeVAD = exports.Message = exports.FrameProcessor = exports.defaultModelFetcher = exports.baseAssetPath = void 0;\nvar asset_path_1 = __webpack_require__(/*! ./asset-path */ \"./dist/asset-path.js\");\nObject.defineProperty(exports, \"baseAssetPath\", ({ enumerable: true, get: function () { return asset_path_1.baseAssetPath; } }));\nvar default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nObject.defineProperty(exports, \"defaultModelFetcher\", ({ enumerable: true, get: function () { return default_model_fetcher_1.defaultModelFetcher; } }));\nvar frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/frame-processor.js\");\nObject.defineProperty(exports, \"FrameProcessor\", ({ enumerable: true, get: function () { return frame_processor_1.FrameProcessor; } }));\nvar messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nObject.defineProperty(exports, \"Message\", ({ enumerable: true, get: function () { return messages_1.Message; } }));\nvar non_real_time_vad_1 = __webpack_require__(/*! ./non-real-time-vad */ \"./dist/non-real-time-vad.js\");\nObject.defineProperty(exports, \"NonRealTimeVAD\", ({ enumerable: true, get: function () { return non_real_time_vad_1.NonRealTimeVAD; } }));\nconst utils_1 = __webpack_require__(/*! ./utils */ \"./dist/utils.js\");\nexports.utils = {\n audioFileToArray: utils_1.audioFileToArray,\n minFramesForTargetMS: utils_1.minFramesForTargetMS,\n arrayBufferToBase64: utils_1.arrayBufferToBase64,\n encodeWAV: utils_1.encodeWAV,\n};\nvar real_time_vad_1 = __webpack_require__(/*! ./real-time-vad */ \"./dist/real-time-vad.js\");\nObject.defineProperty(exports, \"AudioNodeVAD\", ({ enumerable: true, get: function () { return real_time_vad_1.AudioNodeVAD; } }));\nObject.defineProperty(exports, \"DEFAULT_MODEL\", ({ enumerable: true, get: function () { return real_time_vad_1.DEFAULT_MODEL; } }));\nObject.defineProperty(exports, \"MicVAD\", ({ enumerable: true, get: function () { return real_time_vad_1.MicVAD; } }));\nObject.defineProperty(exports, \"getDefaultRealTimeVADOptions\", ({ enumerable: true, get: function () { return real_time_vad_1.getDefaultRealTimeVADOptions; } }));\n//# sourceMappingURL=index.js.map\n\n//# sourceURL=webpack://vad/./dist/index.js?");
/***/ }),
/***/ "./dist/logging.js":
/*!*************************!*\
!*** ./dist/logging.js ***!
\*************************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.log = exports.LOG_PREFIX = void 0;\nexports.LOG_PREFIX = \"[VAD]\";\nconst levels = [\"error\", \"debug\", \"warn\"];\nfunction getLog(level) {\n return (...args) => {\n console[level](exports.LOG_PREFIX, ...args);\n };\n}\nconst _log = levels.reduce((acc, level) => {\n acc[level] = getLog(level);\n return acc;\n}, {});\nexports.log = _log;\n//# sourceMappingURL=logging.js.map\n\n//# sourceURL=webpack://vad/./dist/logging.js?");
/***/ }),
/***/ "./dist/messages.js":
/*!**************************!*\
!*** ./dist/messages.js ***!
\**************************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.Message = void 0;\nvar Message;\n(function (Message) {\n Message[\"AudioFrame\"] = \"AUDIO_FRAME\";\n Message[\"SpeechStart\"] = \"SPEECH_START\";\n Message[\"VADMisfire\"] = \"VAD_MISFIRE\";\n Message[\"SpeechEnd\"] = \"SPEECH_END\";\n Message[\"SpeechStop\"] = \"SPEECH_STOP\";\n Message[\"SpeechRealStart\"] = \"SPEECH_REAL_START\";\n Message[\"FrameProcessed\"] = \"FRAME_PROCESSED\";\n})(Message || (exports.Message = Message = {}));\n//# sourceMappingURL=messages.js.map\n\n//# sourceURL=webpack://vad/./dist/messages.js?");
/***/ }),
/***/ "./dist/models/common.js":
/*!*******************************!*\
!*** ./dist/models/common.js ***!
\*******************************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\n//# sourceMappingURL=common.js.map\n\n//# sourceURL=webpack://vad/./dist/models/common.js?");
/***/ }),
/***/ "./dist/models/index.js":
/*!******************************!*\
!*** ./dist/models/index.js ***!
\******************************/
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __exportStar = (this && this.__exportStar) || function(m, exports) {\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.SileroV5 = exports.SileroLegacy = void 0;\n__exportStar(__webpack_require__(/*! ./common */ \"./dist/models/common.js\"), exports);\nvar legacy_1 = __webpack_require__(/*! ./legacy */ \"./dist/models/legacy.js\");\nObject.defineProperty(exports, \"SileroLegacy\", ({ enumerable: true, get: function () { return legacy_1.SileroLegacy; } }));\nvar v5_1 = __webpack_require__(/*! ./v5 */ \"./dist/models/v5.js\");\nObject.defineProperty(exports, \"SileroV5\", ({ enumerable: true, get: function () { return v5_1.SileroV5; } }));\n//# sourceMappingURL=index.js.map\n\n//# sourceURL=webpack://vad/./dist/models/index.js?");
/***/ }),
/***/ "./dist/models/legacy.js":
/*!*******************************!*\
!*** ./dist/models/legacy.js ***!
\*******************************/
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
eval("\nvar _a;\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.SileroLegacy = void 0;\nconst logging_1 = __webpack_require__(/*! ../logging */ \"./dist/logging.js\");\nclass SileroLegacy {\n constructor(ortInstance, _session, _h, _c, _sr) {\n this.ortInstance = ortInstance;\n this._session = _session;\n this._h = _h;\n this._c = _c;\n this._sr = _sr;\n this.reset_state = () => {\n const zeroes = Array(2 * 64).fill(0);\n this._h = new this.ortInstance.Tensor(\"float32\", zeroes, [2, 1, 64]);\n this._c = new this.ortInstance.Tensor(\"float32\", zeroes, [2, 1, 64]);\n };\n this.process = async (audioFrame) => {\n const t = new this.ortInstance.Tensor(\"float32\", audioFrame, [\n 1,\n audioFrame.length,\n ]);\n const inputs = {\n input: t,\n h: this._h,\n c: this._c,\n sr: this._sr,\n };\n const out = await this._session.run(inputs);\n this._h = out[\"hn\"];\n this._c = out[\"cn\"];\n const [isSpeech] = out[\"output\"]?.data;\n const notSpeech = 1 - isSpeech;\n return { notSpeech, isSpeech };\n };\n }\n}\nexports.SileroLegacy = SileroLegacy;\n_a = SileroLegacy;\nSileroLegacy.new = async (ortInstance, modelFetcher) => {\n logging_1.log.debug(\"initializing vad\");\n const modelArrayBuffer = await modelFetcher();\n const _session = await ortInstance.InferenceSession.create(modelArrayBuffer);\n // @ts-ignore\n const _sr = new ortInstance.Tensor(\"int64\", [16000n]);\n const zeroes = Array(2 * 64).fill(0);\n const _h = new ortInstance.Tensor(\"float32\", zeroes, [2, 1, 64]);\n const _c = new ortInstance.Tensor(\"float32\", zeroes, [2, 1, 64]);\n logging_1.log.debug(\"vad is initialized\");\n const model = new _a(ortInstance, _session, _h, _c, _sr);\n return model;\n};\n//# sourceMappingURL=legacy.js.map\n\n//# sourceURL=webpack://vad/./dist/models/legacy.js?");
/***/ }),
/***/ "./dist/models/v5.js":
/*!***************************!*\
!*** ./dist/models/v5.js ***!
\***************************/
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
eval("\nvar _a;\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.SileroV5 = void 0;\nconst logging_1 = __webpack_require__(/*! ../logging */ \"./dist/logging.js\");\nfunction getNewState(ortInstance) {\n const zeroes = Array(2 * 128).fill(0);\n return new ortInstance.Tensor(\"float32\", zeroes, [2, 1, 128]);\n}\nclass SileroV5 {\n constructor(_session, _state, _sr, ortInstance) {\n this._session = _session;\n this._state = _state;\n this._sr = _sr;\n this.ortInstance = ortInstance;\n this.reset_state = () => {\n this._state = getNewState(this.ortInstance);\n };\n this.process = async (audioFrame) => {\n const t = new this.ortInstance.Tensor(\"float32\", audioFrame, [\n 1,\n audioFrame.length,\n ]);\n const inputs = {\n input: t,\n state: this._state,\n sr: this._sr,\n };\n const out = await this._session.run(inputs);\n // @ts-ignore\n this._state = out[\"stateN\"];\n // @ts-ignore\n const [isSpeech] = out[\"output\"]?.data;\n const notSpeech = 1 - isSpeech;\n return { notSpeech, isSpeech };\n };\n }\n}\nexports.SileroV5 = SileroV5;\n_a = SileroV5;\nSileroV5.new = async (ortInstance, modelFetcher) => {\n logging_1.log.debug(\"Loading VAD...\");\n const modelArrayBuffer = await modelFetcher();\n const _session = await ortInstance.InferenceSession.create(modelArrayBuffer);\n // @ts-ignore\n const _sr = new ortInstance.Tensor(\"int64\", [16000n]);\n const _state = getNewState(ortInstance);\n logging_1.log.debug(\"...finished loading VAD\");\n return new _a(_session, _state, _sr, ortInstance);\n};\n//# sourceMappingURL=v5.js.map\n\n//# sourceURL=webpack://vad/./dist/models/v5.js?");
/***/ }),
/***/ "./dist/non-real-time-vad.js":
/*!***********************************!*\
!*** ./dist/non-real-time-vad.js ***!
\***********************************/
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.NonRealTimeVAD = exports.defaultNonRealTimeVADOptions = void 0;\nconst ortInstance = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst asset_path_1 = __webpack_require__(/*! ./asset-path */ \"./dist/asset-path.js\");\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/frame-processor.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/models/index.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/resampler.js\");\nexports.defaultNonRealTimeVADOptions = {\n ...frame_processor_1.defaultLegacyFrameProcessorOptions,\n ortConfig: undefined,\n modelURL: asset_path_1.baseAssetPath + \"silero_vad_legacy.onnx\",\n modelFetcher: default_model_fetcher_1.defaultModelFetcher,\n};\nclass NonRealTimeVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...exports.defaultNonRealTimeVADOptions,\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n if (fullOptions.ortConfig !== undefined) {\n fullOptions.ortConfig(ortInstance);\n }\n const modelFetcher = () => fullOptions.modelFetcher(fullOptions.modelURL);\n const model = await models_1.SileroLegacy.new(ortInstance, modelFetcher);\n const frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,\n });\n frameProcessor.resume();\n const vad = new this(modelFetcher, ortInstance, fullOptions, frameProcessor);\n return vad;\n }\n constructor(modelFetcher, ort, options, frameProcessor) {\n this.modelFetcher = modelFetcher;\n this.ort = ort;\n this.options = options;\n this.frameProcessor = frameProcessor;\n }\n async *run(inputAudio, sampleRate) {\n const resamplerOptions = {\n nativeSampleRate: sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples,\n };\n const resampler = new resampler_1.Resampler(resamplerOptions);\n let start = 0;\n let end = 0;\n let frameIndex = 0;\n let messageContainer = [];\n for await (const frame of resampler.stream(inputAudio)) {\n await this.frameProcessor.process(frame, (event) => {\n messageContainer.push(event);\n });\n for (const event of messageContainer) {\n switch (event.msg) {\n case messages_1.Message.SpeechStart:\n start = (frameIndex * this.options.frameSamples) / 16;\n break;\n case messages_1.Message.SpeechEnd:\n end = ((frameIndex + 1) * this.options.frameSamples) / 16;\n yield { audio: event.audio, start, end };\n break;\n default:\n break;\n }\n }\n frameIndex++;\n }\n const { msg, audio } = this.frameProcessor.endSegment((event) => {\n messageContainer.push(event);\n });\n for (const event of messageContainer) {\n switch (event.msg) {\n case messages_1.Message.SpeechEnd:\n yield {\n audio: event.audio,\n start,\n end: (frameIndex * this.options.frameSamples) / 16,\n };\n }\n }\n }\n}\nexports.NonRealTimeVAD = NonRealTimeVAD;\n//# sourceMappingURL=non-real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/non-real-time-vad.js?");
/***/ }),
/***/ "./dist/real-time-vad.js":
/*!*******************************!*\
!*** ./dist/real-time-vad.js ***!
\*******************************/
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.AudioNodeVAD = exports.MicVAD = exports.getDefaultRealTimeVADOptions = exports.ort = exports.DEFAULT_MODEL = void 0;\nconst ortInstance = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/frame-processor.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/logging.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/models/index.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/resampler.js\");\nexports.DEFAULT_MODEL = \"legacy\";\nexports.ort = ortInstance;\nconst workletFile = \"vad.worklet.bundle.min.js\";\nconst sileroV5File = \"silero_vad_v5.onnx\";\nconst sileroLegacyFile = \"silero_vad_legacy.onnx\";\nconst getDefaultRealTimeVADOptions = (model) => {\n const frameProcessorOptions = model === \"v5\"\n ? frame_processor_1.defaultV5FrameProcessorOptions\n : frame_processor_1.defaultLegacyFrameProcessorOptions;\n return {\n ...frameProcessorOptions,\n onFrameProcessed: (probabilities, frame) => { },\n onVADMisfire: () => {\n logging_1.log.debug(\"VAD misfire\");\n },\n onSpeechStart: () => {\n logging_1.log.debug(\"Detected speech start\");\n },\n onSpeechEnd: () => {\n logging_1.log.debug(\"Detected speech end\");\n },\n onSpeechRealStart: () => {\n logging_1.log.debug(\"Detected real speech start\");\n },\n baseAssetPath: \"https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@latest/dist/\",\n onnxWASMBasePath: \"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/\",\n stream: undefined,\n ortConfig: undefined,\n model: model,\n workletOptions: {},\n };\n};\nexports.getDefaultRealTimeVADOptions = getDefaultRealTimeVADOptions;\nclass MicVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n let stream;\n if (fullOptions.stream === undefined)\n stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n ...fullOptions.additionalAudioConstraints,\n channelCount: 1,\n echoCancellation: true,\n autoGainControl: true,\n noiseSuppression: true,\n },\n });\n else\n stream = fullOptions.stream;\n const audioContext = new AudioContext();\n const sourceNode = new MediaStreamAudioSourceNode(audioContext, {\n mediaStream: stream,\n });\n const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions);\n audioNodeVAD.receive(sourceNode);\n return new MicVAD(fullOptions, audioContext, stream, audioNodeVAD, sourceNode);\n }\n constructor(options, audioContext, stream, audioNodeVAD, sourceNode, listening = false) {\n this.options = options;\n this.audioContext = audioContext;\n this.stream = stream;\n this.audioNodeVAD = audioNodeVAD;\n this.sourceNode = sourceNode;\n this.listening = listening;\n this.pause = () => {\n this.audioNodeVAD.pause();\n this.listening = false;\n };\n this.start = () => {\n this.audioNodeVAD.start();\n this.listening = true;\n };\n this.destroy = () => {\n if (this.listening) {\n this.pause();\n }\n if (this.options.stream === undefined) {\n this.stream.getTracks().forEach((track) => track.stop());\n }\n this.sourceNode.disconnect();\n this.audioNodeVAD.destroy();\n this.audioContext.close();\n };\n this.setOptions = (options) => {\n this.audioNodeVAD.setFrameProcessorOptions(options);\n };\n }\n}\nexports.MicVAD = MicVAD;\nclass AudioNodeVAD {\n static async new(ctx, options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n exports.ort.env.wasm.wasmPaths = fullOptions.onnxWASMBasePath;\n if (fullOptions.ortConfig !== undefined) {\n fullOptions.ortConfig(exports.ort);\n }\n const modelFile = fullOptions.model === \"v5\" ? sileroV5File : sileroLegacyFile;\n const modelURL = fullOptions.baseAssetPath + modelFile;\n const modelFactory = fullOptions.model === \"v5\" ? models_1.SileroV5.new : models_1.SileroLegacy.new;\n let model;\n try {\n model = await modelFactory(exports.ort, () => (0, default_model_fetcher_1.defaultModelFetcher)(modelURL));\n }\n catch (e) {\n console.error(`Encountered an error while loading model file ${modelURL}`);\n throw e;\n }\n const frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,\n });\n const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor);\n await audioNodeVAD.setupAudioNode();\n return audioNodeVAD;\n }\n constructor(ctx, options, frameProcessor) {\n this.ctx = ctx;\n this.options = options;\n this.bufferIndex = 0;\n this.pause = () => {\n this.frameProcessor.pause(this.handleFrameProcessorEvent);\n };\n this.start = () => {\n this.frameProcessor.resume();\n };\n this.receive = (node) => {\n node.connect(this.audioNode);\n };\n this.processFrame = async (frame) => {\n await this.frameProcessor.process(frame, this.handleFrameProcessorEvent);\n };\n this.handleFrameProcessorEvent = (ev) => {\n switch (ev.msg) {\n case messages_1.Message.FrameProcessed:\n this.options.onFrameProcessed(ev.probs, ev.frame);\n break;\n case messages_1.Message.SpeechStart:\n this.options.onSpeechStart();\n break;\n case messages_1.Message.SpeechRealStart:\n this.options.onSpeechRealStart();\n break;\n case messages_1.Message.VADMisfire:\n this.options.onVADMisfire();\n break;\n case messages_1.Message.SpeechEnd:\n this.options.onSpeechEnd(ev.audio);\n break;\n }\n };\n this.destroy = () => {\n if (this.audioNode instanceof AudioWorkletNode) {\n this.audioNode.port.postMessage({\n message: messages_1.Message.SpeechStop,\n });\n }\n this.audioNode.disconnect();\n this.gainNode?.disconnect();\n };\n this.setFrameProcessorOptions = (options) => {\n this.frameProcessor.options = {\n ...this.frameProcessor.options,\n ...options,\n };\n };\n this.frameProcessor = frameProcessor;\n }\n async setupAudioNode() {\n const hasAudioWorklet = \"audioWorklet\" in this.ctx && typeof AudioWorkletNode === \"function\";\n if (hasAudioWorklet) {\n try {\n const workletURL = this.options.baseAssetPath + workletFile;\n await this.ctx.audioWorklet.addModule(workletURL);\n const workletOptions = this.options.workletOptions ?? {};\n workletOptions.processorOptions = {\n ...(workletOptions.processorOptions ?? {}),\n frameSamples: this.options.frameSamples,\n };\n this.audioNode = new AudioWorkletNode(this.ctx, \"vad-helper-worklet\", workletOptions);\n this.audioNode.port.onmessage = async (ev) => {\n switch (ev.data?.message) {\n case messages_1.Message.AudioFrame:\n let buffer = ev.data.data;\n if (!(buffer instanceof ArrayBuffer)) {\n buffer = new ArrayBuffer(ev.data.data.byteLength);\n new Uint8Array(buffer).set(new Uint8Array(ev.data.data));\n }\n const frame = new Float32Array(buffer);\n await this.processFrame(frame);\n break;\n }\n };\n return;\n }\n catch (e) {\n console.log(\"AudioWorklet setup failed, falling back to ScriptProcessor\", e);\n }\n }\n // Initialize resampler for ScriptProcessor\n this.resampler = new resampler_1.Resampler({\n nativeSampleRate: this.ctx.sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples ?? 480,\n });\n // Fallback to ScriptProcessor\n const bufferSize = 4096; // Increased for more stable processing\n this.audioNode = this.ctx.createScriptProcessor(bufferSize, 1, 1);\n // Create a gain node with zero gain to handle the audio chain\n this.gainNode = this.ctx.createGain();\n this.gainNode.gain.value = 0;\n let processingAudio = false;\n this.audioNode.onaudioprocess = async (e) => {\n if (processingAudio)\n return;\n processingAudio = true;\n try {\n const input = e.inputBuffer.getChannelData(0);\n const output = e.outputBuffer.getChannelData(0);\n output.fill(0);\n // Process through resampler\n if (this.resampler) {\n const frames = this.resampler.process(input);\n for (const frame of frames) {\n await this.processFrame(frame);\n }\n }\n }\n catch (error) {\n console.error(\"Error processing audio:\", error);\n }\n finally {\n processingAudio = false;\n }\n };\n // Connect the audio chain\n this.audioNode.connect(this.gainNode);\n this.gainNode.connect(this.ctx.destination);\n }\n}\nexports.AudioNodeVAD = AudioNodeVAD;\n//# sourceMappingURL=real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/real-time-vad.js?");
/***/ }),
/***/ "./dist/resampler.js":
/*!***************************!*\
!*** ./dist/resampler.js ***!
\***************************/
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.Resampler = void 0;\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/logging.js\");\nclass Resampler {\n constructor(options) {\n this.options = options;\n this.process = (audioFrame) => {\n const outputFrames = [];\n for (const sample of audioFrame) {\n this.inputBuffer.push(sample);\n while (this.hasEnoughDataForFrame()) {\n const outputFrame = this.generateOutputFrame();\n outputFrames.push(outputFrame);\n }\n }\n return outputFrames;\n };\n if (options.nativeSampleRate < 16000) {\n logging_1.log.error(\"nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate\");\n }\n this.inputBuffer = [];\n }\n async *stream(audioInput) {\n for (const sample of audioInput) {\n this.inputBuffer.push(sample);\n while (this.hasEnoughDataForFrame()) {\n const outputFrame = this.generateOutputFrame();\n yield outputFrame;\n }\n }\n }\n hasEnoughDataForFrame() {\n return ((this.inputBuffer.length * this.options.targetSampleRate) /\n this.options.nativeSampleRate >=\n this.options.targetFrameSize);\n }\n generateOutputFrame() {\n const outputFrame = new Float32Array(this.options.targetFrameSize);\n let outputIndex = 0;\n let inputIndex = 0;\n while (outputIndex < this.options.targetFrameSize) {\n let sum = 0;\n let num = 0;\n while (inputIndex <\n Math.min(this.inputBuffer.length, ((outputIndex + 1) * this.options.nativeSampleRate) /\n this.options.targetSampleRate)) {\n const value = this.inputBuffer[inputIndex];\n if (value !== undefined) {\n sum += value;\n num++;\n }\n inputIndex++;\n }\n outputFrame[outputIndex] = sum / num;\n outputIndex++;\n }\n this.inputBuffer = this.inputBuffer.slice(inputIndex);\n return outputFrame;\n }\n}\nexports.Resampler = Resampler;\n//# sourceMappingURL=resampler.js.map\n\n//# sourceURL=webpack://vad/./dist/resampler.js?");
/***/ }),
/***/ "./dist/utils.js":
/*!***********************!*\
!*** ./dist/utils.js ***!
\***********************/
/***/ ((__unused_webpack_module, exports) => {
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.audioFileToArray = exports.encodeWAV = exports.arrayBufferToBase64 = exports.minFramesForTargetMS = void 0;\nfunction minFramesForTargetMS(targetDuration, frameSamples, sr = 16000) {\n return Math.ceil((targetDuration * sr) / 1000 / frameSamples);\n}\nexports.minFramesForTargetMS = minFramesForTargetMS;\nfunction arrayBufferToBase64(buffer) {\n const bytes = new Uint8Array(buffer);\n const len = bytes.byteLength;\n const binary = new Array(len);\n for (var i = 0; i < len; i++) {\n const byte = bytes[i];\n if (byte === undefined) {\n break;\n }\n binary[i] = String.fromCharCode(byte);\n }\n return btoa(binary.join(\"\"));\n}\nexports.arrayBufferToBase64 = arrayBufferToBase64;\n/*\nThis rest of this was mostly copied from https://github.com/linto-ai/WebVoiceSDK\n*/\nfunction encodeWAV(samples, format = 3, sampleRate = 16000, numChannels = 1, bitDepth = 32) {\n var bytesPerSample = bitDepth / 8;\n var blockAlign = numChannels * bytesPerSample;\n var buffer = new ArrayBuffer(44 + samples.length * bytesPerSample);\n var view = new DataView(buffer);\n /* RIFF identifier */\n writeString(view, 0, \"RIFF\");\n /* RIFF chunk length */\n view.setUint32(4, 36 + samples.length * bytesPerSample, true);\n /* RIFF type */\n writeString(view, 8, \"WAVE\");\n /* format chunk identifier */\n writeString(view, 12, \"fmt \");\n /* format chunk length */\n view.setUint32(16, 16, true);\n /* sample format (raw) */\n view.setUint16(20, format, true);\n /* channel count */\n view.setUint16(22, numChannels, true);\n /* sample rate */\n view.setUint32(24, sampleRate, true);\n /* byte rate (sample rate * block align) */\n view.setUint32(28, sampleRate * blockAlign, true);\n /* block align (channel count * bytes per sample) */\n view.setUint16(32, blockAlign, true);\n /* bits per sample */\n view.setUint16(34, bitDepth, true);\n /* data chunk identifier */\n writeString(view, 36, \"data\");\n /* data chunk length */\n view.setUint32(40, samples.length * bytesPerSample, true);\n if (format === 1) {\n // Raw PCM\n floatTo16BitPCM(view, 44, samples);\n }\n else {\n writeFloat32(view, 44, samples);\n }\n return buffer;\n}\nexports.encodeWAV = encodeWAV;\nfunction interleave(inputL, inputR) {\n var length = inputL.length + inputR.length;\n var result = new Float32Array(length);\n var index = 0;\n var inputIndex = 0;\n while (index < length) {\n result[index++] = inputL[inputIndex];\n result[index++] = inputR[inputIndex];\n inputIndex++;\n }\n return result;\n}\nfunction writeFloat32(output, offset, input) {\n for (var i = 0; i < input.length; i++, offset += 4) {\n output.setFloat32(offset, input[i], true);\n }\n}\nfunction floatTo16BitPCM(output, offset, input) {\n for (var i = 0; i < input.length; i++, offset += 2) {\n var s = Math.max(-1, Math.min(1, input[i]));\n output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);\n }\n}\nfunction writeString(view, offset, string) {\n for (var i = 0; i < string.length; i++) {\n view.setUint8(offset + i, string.charCodeAt(i));\n }\n}\nasync function audioFileToArray(audioFileData) {\n const ctx = new OfflineAudioContext(1, 1, 44100);\n const reader = new FileReader();\n let audioBuffer = null;\n await new Promise((res) => {\n reader.addEventListener(\"loadend\", (ev) => {\n const audioData = reader.result;\n ctx.decodeAudioData(audioData, (buffer) => {\n audioBuffer = buffer;\n ctx\n .startRendering()\n .then((renderedBuffer) => {\n console.log(\"Rendering completed successfully\");\n res();\n })\n .catch((err) => {\n console.error(`Rendering failed: ${err}`);\n });\n }, (e) => {\n console.log(`Error with decoding audio data: ${e}`);\n });\n });\n reader.readAsArrayBuffer(audioFileData);\n });\n if (audioBuffer === null) {\n throw Error(\"some shit\");\n }\n let _audioBuffer = audioBuffer;\n let out = new Float32Array(_audioBuffer.length);\n for (let i = 0; i < _audioBuffer.length; i++) {\n for (let j = 0; j < _audioBuffer.numberOfChannels; j++) {\n // @ts-ignore\n out[i] += _audioBuffer.getChannelData(j)[i];\n }\n }\n return { audio: out, sampleRate: _audioBuffer.sampleRate };\n}\nexports.audioFileToArray = audioFileToArray;\n//# sourceMappingURL=utils.js.map\n\n//# sourceURL=webpack://vad/./dist/utils.js?");
/***/ }),
/***/ "onnxruntime-web":
/*!******************************************************************************************************************!*\
!*** external {"commonjs":"onnxruntime-web","commonjs2":"onnxruntime-web","amd":"onnxruntime-web","root":"ort"} ***!
\******************************************************************************************************************/
/***/ ((module) => {
module.exports = __WEBPACK_EXTERNAL_MODULE_onnxruntime_web__;
/***/ })
/******/ });
/************************************************************************/
/******/ // The module cache
/******/ var __webpack_module_cache__ = {};
/******/
/******/ // The require function
/******/ function __webpack_require__(moduleId) {
/******/ // Check if module is in cache
/******/ var cachedModule = __webpack_module_cache__[moduleId];
/******/ if (cachedModule !== undefined) {
/******/ return cachedModule.exports;
/******/ }
/******/ // Create a new module (and put it into the cache)
/******/ var module = __webpack_module_cache__[moduleId] = {
/******/ // no module.id needed
/******/ // no module.loaded needed
/******/ exports: {}
/******/ };
/******/
/******/ // Execute the module function
/******/ __webpack_modules__[moduleId].call(module.exports, module, module.exports, __webpack_require__);
/******/
/******/ // Return the exports of the module
/******/ return module.exports;
/******/ }
/******/
/************************************************************************/
/******/
/******/ // startup
/******/ // Load entry module and return exports
/******/ // This entry module can't be inlined because the eval devtool is used.
/******/ var __webpack_exports__ = __webpack_require__("./dist/index.js");
/******/
/******/ return __webpack_exports__;
/******/ })()
;
});