Files
voice_recognition/whispervad/node_modules/@ricky0123/vad-web/dist/models/v5.js

51 lines
1.7 KiB
JavaScript

"use strict";
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SileroV5 = void 0;
const logging_1 = require("../logging");
function getNewState(ortInstance) {
const zeroes = Array(2 * 128).fill(0);
return new ortInstance.Tensor("float32", zeroes, [2, 1, 128]);
}
class SileroV5 {
constructor(_session, _state, _sr, ortInstance) {
this._session = _session;
this._state = _state;
this._sr = _sr;
this.ortInstance = ortInstance;
this.reset_state = () => {
this._state = getNewState(this.ortInstance);
};
this.process = async (audioFrame) => {
const t = new this.ortInstance.Tensor("float32", audioFrame, [
1,
audioFrame.length,
]);
const inputs = {
input: t,
state: this._state,
sr: this._sr,
};
const out = await this._session.run(inputs);
// @ts-ignore
this._state = out["stateN"];
// @ts-ignore
const [isSpeech] = out["output"]?.data;
const notSpeech = 1 - isSpeech;
return { notSpeech, isSpeech };
};
}
}
exports.SileroV5 = SileroV5;
_a = SileroV5;
SileroV5.new = async (ortInstance, modelFetcher) => {
logging_1.log.debug("Loading VAD...");
const modelArrayBuffer = await modelFetcher();
const _session = await ortInstance.InferenceSession.create(modelArrayBuffer);
// @ts-ignore
const _sr = new ortInstance.Tensor("int64", [16000n]);
const _state = getNewState(ortInstance);
logging_1.log.debug("...finished loading VAD");
return new _a(_session, _state, _sr, ortInstance);
};
//# sourceMappingURL=v5.js.map