500 lines
19 KiB
TypeScript
500 lines
19 KiB
TypeScript
import { OnnxModelOptions } from './onnx-model.js';
|
|
import { OnnxValue, OnnxValueDataLocation } from './onnx-value.js';
|
|
import type { Tensor } from './tensor.js';
|
|
import { TryGetGlobalType } from './type-helper.js';
|
|
export declare namespace InferenceSession {
|
|
type OnnxValueMapType = {
|
|
readonly [name: string]: OnnxValue;
|
|
};
|
|
type NullableOnnxValueMapType = {
|
|
readonly [name: string]: OnnxValue | null;
|
|
};
|
|
/**
|
|
* A feeds (model inputs) is an object that uses input names as keys and OnnxValue as corresponding values.
|
|
*/
|
|
type FeedsType = OnnxValueMapType;
|
|
/**
|
|
* A fetches (model outputs) could be one of the following:
|
|
*
|
|
* - Omitted. Use model's output names definition.
|
|
* - An array of string indicating the output names.
|
|
* - An object that use output names as keys and OnnxValue or null as corresponding values.
|
|
*
|
|
* @remark
|
|
* different from input argument, in output, OnnxValue is optional. If an OnnxValue is present it will be
|
|
* used as a pre-allocated value by the inference engine; if omitted, inference engine will allocate buffer
|
|
* internally.
|
|
*/
|
|
type FetchesType = readonly string[] | NullableOnnxValueMapType;
|
|
/**
|
|
* A inferencing return type is an object that uses output names as keys and OnnxValue as corresponding values.
|
|
*/
|
|
type ReturnType = OnnxValueMapType;
|
|
/**
|
|
* A set of configurations for session behavior.
|
|
*/
|
|
interface SessionOptions extends OnnxModelOptions {
|
|
/**
|
|
* An array of execution provider options.
|
|
*
|
|
* An execution provider option can be a string indicating the name of the execution provider,
|
|
* or an object of corresponding type.
|
|
*/
|
|
executionProviders?: readonly ExecutionProviderConfig[];
|
|
/**
|
|
* The intra OP threads number.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native).
|
|
*/
|
|
intraOpNumThreads?: number;
|
|
/**
|
|
* The inter OP threads number.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native).
|
|
*/
|
|
interOpNumThreads?: number;
|
|
/**
|
|
* The free dimension override.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
freeDimensionOverrides?: {
|
|
readonly [dimensionName: string]: number;
|
|
};
|
|
/**
|
|
* The optimization level.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
graphOptimizationLevel?: 'disabled' | 'basic' | 'extended' | 'all';
|
|
/**
|
|
* Whether enable CPU memory arena.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
enableCpuMemArena?: boolean;
|
|
/**
|
|
* Whether enable memory pattern.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
enableMemPattern?: boolean;
|
|
/**
|
|
* Execution mode.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
executionMode?: 'sequential' | 'parallel';
|
|
/**
|
|
* Optimized model file path.
|
|
*
|
|
* If this setting is specified, the optimized model will be dumped. In browser, a blob will be created
|
|
* with a pop-up window.
|
|
*/
|
|
optimizedModelFilePath?: string;
|
|
/**
|
|
* Whether enable profiling.
|
|
*
|
|
* This setting is a placeholder for a future use.
|
|
*/
|
|
enableProfiling?: boolean;
|
|
/**
|
|
* File prefix for profiling.
|
|
*
|
|
* This setting is a placeholder for a future use.
|
|
*/
|
|
profileFilePrefix?: string;
|
|
/**
|
|
* Log ID.
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
logId?: string;
|
|
/**
|
|
* Log severity level. See
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
logSeverityLevel?: 0 | 1 | 2 | 3 | 4;
|
|
/**
|
|
* Log verbosity level.
|
|
*
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
*/
|
|
logVerbosityLevel?: number;
|
|
/**
|
|
* Specify string as a preferred data location for all outputs, or an object that use output names as keys and a
|
|
* preferred data location as corresponding values.
|
|
*
|
|
* This setting is available only in ONNXRuntime Web for WebGL and WebGPU EP.
|
|
*/
|
|
preferredOutputLocation?: OnnxValueDataLocation | {
|
|
readonly [outputName: string]: OnnxValueDataLocation;
|
|
};
|
|
/**
|
|
* Whether enable graph capture.
|
|
* This setting is available only in ONNXRuntime Web for WebGPU EP.
|
|
*/
|
|
enableGraphCapture?: boolean;
|
|
/**
|
|
* Store configurations for a session. See
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/
|
|
* onnxruntime_session_options_config_keys.h
|
|
*
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
*
|
|
* @example
|
|
* ```js
|
|
* extra: {
|
|
* session: {
|
|
* set_denormal_as_zero: "1",
|
|
* disable_prepacking: "1"
|
|
* },
|
|
* optimization: {
|
|
* enable_gelu_approximation: "1"
|
|
* }
|
|
* }
|
|
* ```
|
|
*/
|
|
extra?: Record<string, unknown>;
|
|
}
|
|
interface ExecutionProviderOptionMap {
|
|
coreml: CoreMLExecutionProviderOption;
|
|
cpu: CpuExecutionProviderOption;
|
|
cuda: CudaExecutionProviderOption;
|
|
dml: DmlExecutionProviderOption;
|
|
nnapi: NnapiExecutionProviderOption;
|
|
tensorrt: TensorRtExecutionProviderOption;
|
|
wasm: WebAssemblyExecutionProviderOption;
|
|
webgl: WebGLExecutionProviderOption;
|
|
webgpu: WebGpuExecutionProviderOption;
|
|
webnn: WebNNExecutionProviderOption;
|
|
qnn: QnnExecutionProviderOption;
|
|
xnnpack: XnnpackExecutionProviderOption;
|
|
}
|
|
type ExecutionProviderName = keyof ExecutionProviderOptionMap;
|
|
type ExecutionProviderConfig = ExecutionProviderOptionMap[ExecutionProviderName] | ExecutionProviderOption | ExecutionProviderName | string;
|
|
interface ExecutionProviderOption {
|
|
readonly name: string;
|
|
}
|
|
interface CpuExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'cpu';
|
|
useArena?: boolean;
|
|
}
|
|
interface CudaExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'cuda';
|
|
deviceId?: number;
|
|
}
|
|
interface DmlExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'dml';
|
|
deviceId?: number;
|
|
}
|
|
interface TensorRtExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'tensorrt';
|
|
deviceId?: number;
|
|
}
|
|
interface WebAssemblyExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'wasm';
|
|
}
|
|
interface WebGLExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'webgl';
|
|
}
|
|
interface XnnpackExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'xnnpack';
|
|
}
|
|
interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'webgpu';
|
|
preferredLayout?: 'NCHW' | 'NHWC';
|
|
}
|
|
interface WebNNExecutionProviderName extends ExecutionProviderOption {
|
|
readonly name: 'webnn';
|
|
}
|
|
/**
|
|
* Represents a set of options for creating a WebNN MLContext.
|
|
*
|
|
* @see https://www.w3.org/TR/webnn/#dictdef-mlcontextoptions
|
|
*/
|
|
interface WebNNContextOptions {
|
|
deviceType?: 'cpu' | 'gpu' | 'npu';
|
|
numThreads?: number;
|
|
powerPreference?: 'default' | 'low-power' | 'high-performance';
|
|
}
|
|
/**
|
|
* Represents a set of options for WebNN execution provider without MLContext.
|
|
*/
|
|
interface WebNNOptionsWithoutMLContext extends WebNNExecutionProviderName, WebNNContextOptions {
|
|
context?: never;
|
|
}
|
|
/**
|
|
* Represents a set of options for WebNN execution provider with MLContext.
|
|
*
|
|
* When MLContext is provided, the deviceType is also required so that the WebNN EP can determine the preferred
|
|
* channel layout.
|
|
*
|
|
* @see https://www.w3.org/TR/webnn/#dom-ml-createcontext
|
|
*/
|
|
interface WebNNOptionsWithMLContext extends WebNNExecutionProviderName, Omit<WebNNContextOptions, 'deviceType'>, Required<Pick<WebNNContextOptions, 'deviceType'>> {
|
|
context: TryGetGlobalType<'MLContext'>;
|
|
}
|
|
/**
|
|
* Represents a set of options for WebNN execution provider with MLContext which is created from GPUDevice.
|
|
*
|
|
* @see https://www.w3.org/TR/webnn/#dom-ml-createcontext-gpudevice
|
|
*/
|
|
interface WebNNOptionsWebGpu extends WebNNExecutionProviderName {
|
|
context: TryGetGlobalType<'MLContext'>;
|
|
gpuDevice: TryGetGlobalType<'GPUDevice'>;
|
|
}
|
|
/**
|
|
* Options for WebNN execution provider.
|
|
*/
|
|
type WebNNExecutionProviderOption = WebNNOptionsWithoutMLContext | WebNNOptionsWithMLContext | WebNNOptionsWebGpu;
|
|
interface QnnExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'qnn';
|
|
/**
|
|
* Specify the QNN backend type. E.g., 'cpu' or 'htp'.
|
|
* Mutually exclusive with `backendPath`.
|
|
*
|
|
* @default 'htp'
|
|
*/
|
|
backendType?: string;
|
|
/**
|
|
* Specify a path to the QNN backend library.
|
|
* Mutually exclusive with `backendType`.
|
|
*/
|
|
backendPath?: string;
|
|
/**
|
|
* Specify whether to enable HTP FP16 precision.
|
|
*
|
|
* @default true
|
|
*/
|
|
enableFp16Precision?: boolean;
|
|
}
|
|
interface CoreMLExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'coreml';
|
|
/**
|
|
* The bit flags for CoreML execution provider.
|
|
*
|
|
* ```
|
|
* COREML_FLAG_USE_CPU_ONLY = 0x001
|
|
* COREML_FLAG_ENABLE_ON_SUBGRAPH = 0x002
|
|
* COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004
|
|
* COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008
|
|
* COREML_FLAG_CREATE_MLPROGRAM = 0x010
|
|
* COREML_FLAG_USE_CPU_AND_GPU = 0x020
|
|
* ```
|
|
*
|
|
* See include/onnxruntime/core/providers/coreml/coreml_provider_factory.h for more details.
|
|
*
|
|
* This flag is available only in ONNXRuntime (Node.js binding).
|
|
*/
|
|
coreMlFlags?: number;
|
|
/**
|
|
* Specify whether to use CPU only in CoreML EP.
|
|
*
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
*/
|
|
useCPUOnly?: boolean;
|
|
useCPUAndGPU?: boolean;
|
|
/**
|
|
* Specify whether to enable CoreML EP on subgraph.
|
|
*
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
*/
|
|
enableOnSubgraph?: boolean;
|
|
/**
|
|
* Specify whether to only enable CoreML EP for Apple devices with ANE (Apple Neural Engine).
|
|
*
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
*/
|
|
onlyEnableDeviceWithANE?: boolean;
|
|
}
|
|
interface NnapiExecutionProviderOption extends ExecutionProviderOption {
|
|
readonly name: 'nnapi';
|
|
useFP16?: boolean;
|
|
useNCHW?: boolean;
|
|
cpuDisabled?: boolean;
|
|
cpuOnly?: boolean;
|
|
}
|
|
/**
|
|
* A set of configurations for inference run behavior
|
|
*/
|
|
interface RunOptions {
|
|
/**
|
|
* Log severity level. See
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
logSeverityLevel?: 0 | 1 | 2 | 3 | 4;
|
|
/**
|
|
* Log verbosity level.
|
|
*
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
*/
|
|
logVerbosityLevel?: number;
|
|
/**
|
|
* Terminate all incomplete OrtRun calls as soon as possible if true
|
|
*
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
*/
|
|
terminate?: boolean;
|
|
/**
|
|
* A tag for the Run() calls using this
|
|
*
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
*/
|
|
tag?: string;
|
|
/**
|
|
* Set a single run configuration entry. See
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/
|
|
* onnxruntime_run_options_config_keys.h
|
|
*
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
*
|
|
* @example
|
|
*
|
|
* ```js
|
|
* extra: {
|
|
* memory: {
|
|
* enable_memory_arena_shrinkage: "1",
|
|
* }
|
|
* }
|
|
* ```
|
|
*/
|
|
extra?: Record<string, unknown>;
|
|
}
|
|
/**
|
|
* The common part of the value metadata type for both tensor and non-tensor values.
|
|
*/
|
|
interface ValueMetadataBase {
|
|
/**
|
|
* The name of the specified input or output.
|
|
*/
|
|
readonly name: string;
|
|
}
|
|
/**
|
|
* Represents the metadata of a non-tensor value.
|
|
*/
|
|
interface NonTensorValueMetadata extends ValueMetadataBase {
|
|
/**
|
|
* Get a value indicating whether the value is a tensor.
|
|
*/
|
|
readonly isTensor: false;
|
|
}
|
|
/**
|
|
* Represents the metadata of a tensor value.
|
|
*/
|
|
interface TensorValueMetadata extends ValueMetadataBase {
|
|
/**
|
|
* Get a value indicating whether the value is a tensor.
|
|
*/
|
|
readonly isTensor: true;
|
|
/**
|
|
* Get the data type of the tensor.
|
|
*/
|
|
readonly type: Tensor.Type;
|
|
/**
|
|
* Get the shape of the tensor.
|
|
*
|
|
* If the shape is not defined, the value will an empty array. Otherwise, it will be an array representing the shape
|
|
* of the tensor. Each element in the array can be a number or a string. If the element is a number, it represents
|
|
* the corresponding dimension size. If the element is a string, it represents a symbolic dimension.
|
|
*/
|
|
readonly shape: ReadonlyArray<number | string>;
|
|
}
|
|
/**
|
|
* Represents the metadata of a value.
|
|
*/
|
|
type ValueMetadata = NonTensorValueMetadata | TensorValueMetadata;
|
|
}
|
|
/**
|
|
* Represent a runtime instance of an ONNX model.
|
|
*/
|
|
export interface InferenceSession {
|
|
/**
|
|
* Execute the model asynchronously with the given feeds and options.
|
|
*
|
|
* @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for detail.
|
|
* @param options - Optional. A set of options that controls the behavior of model inference.
|
|
* @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
|
|
*/
|
|
run(feeds: InferenceSession.FeedsType, options?: InferenceSession.RunOptions): Promise<InferenceSession.ReturnType>;
|
|
/**
|
|
* Execute the model asynchronously with the given feeds, fetches and options.
|
|
*
|
|
* @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for detail.
|
|
* @param fetches - Representation of the model output. See type description of `InferenceSession.OutputType` for
|
|
* detail.
|
|
* @param options - Optional. A set of options that controls the behavior of model inference.
|
|
* @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
|
|
*/
|
|
run(feeds: InferenceSession.FeedsType, fetches: InferenceSession.FetchesType, options?: InferenceSession.RunOptions): Promise<InferenceSession.ReturnType>;
|
|
/**
|
|
* Release the inference session and the underlying resources.
|
|
*/
|
|
release(): Promise<void>;
|
|
/**
|
|
* Start profiling.
|
|
*/
|
|
startProfiling(): void;
|
|
/**
|
|
* End profiling.
|
|
*/
|
|
endProfiling(): void;
|
|
/**
|
|
* Get input names of the loaded model.
|
|
*/
|
|
readonly inputNames: readonly string[];
|
|
/**
|
|
* Get output names of the loaded model.
|
|
*/
|
|
readonly outputNames: readonly string[];
|
|
/**
|
|
* Get input metadata of the loaded model.
|
|
*/
|
|
readonly inputMetadata: readonly InferenceSession.ValueMetadata[];
|
|
/**
|
|
* Get output metadata of the loaded model.
|
|
*/
|
|
readonly outputMetadata: readonly InferenceSession.ValueMetadata[];
|
|
}
|
|
export interface InferenceSessionFactory {
|
|
/**
|
|
* Create a new inference session and load model asynchronously from an ONNX model file.
|
|
*
|
|
* @param uri - The URI or file path of the model to load.
|
|
* @param options - specify configuration for creating a new inference session.
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
*/
|
|
create(uri: string, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
/**
|
|
* Create a new inference session and load model asynchronously from an array bufer.
|
|
*
|
|
* @param buffer - An ArrayBuffer representation of an ONNX model.
|
|
* @param options - specify configuration for creating a new inference session.
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
*/
|
|
create(buffer: ArrayBufferLike, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
/**
|
|
* Create a new inference session and load model asynchronously from segment of an array bufer.
|
|
*
|
|
* @param buffer - An ArrayBuffer representation of an ONNX model.
|
|
* @param byteOffset - The beginning of the specified portion of the array buffer.
|
|
* @param byteLength - The length in bytes of the array buffer.
|
|
* @param options - specify configuration for creating a new inference session.
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
*/
|
|
create(buffer: ArrayBufferLike, byteOffset: number, byteLength?: number, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
/**
|
|
* Create a new inference session and load model asynchronously from a Uint8Array.
|
|
*
|
|
* @param buffer - A Uint8Array representation of an ONNX model.
|
|
* @param options - specify configuration for creating a new inference session.
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
*/
|
|
create(buffer: Uint8Array, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
}
|
|
export declare const InferenceSession: InferenceSessionFactory;
|
|
//# sourceMappingURL=inference-session.d.ts.map
|