class InferenceEngine::AsyncInferRequestThreadSafeDefault¶
Overview¶
Base class with default implementation of asynchronous multi staged inference request. To customize pipeline stages derived class should change the content of AsyncInferRequestThreadSafeDefault::_pipeline member container. It consists of pairs of tasks and executors which will run the task. The class is recommended to be used by plugins as a base class for asynchronous inference request implementation. More…
#include <ie_infer_async_request_thread_safe_default.hpp>
class AsyncInferRequestThreadSafeDefault: public InferenceEngine::IInferRequestInternal
{
public:
// typedefs
typedef std::shared_ptr<AsyncInferRequestThreadSafeDefault> Ptr;
// structs
struct DisableCallbackGuard;
struct ImmediateStreamsExecutor;
// construction
AsyncInferRequestThreadSafeDefault(
const IInferRequestInternal::Ptr& request,
const ITaskExecutor::Ptr& taskExecutor,
const ITaskExecutor::Ptr& callbackExecutor
);
// methods
virtual StatusCode Wait(int64_t millis_timeout);
virtual void StartAsync();
virtual void Infer();
virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const;
virtual void SetBlob(const std::string& name, const Blob::Ptr& data);
virtual void SetBlob(
const std::string& name,
const Blob::Ptr& data,
const PreProcessInfo& info
);
virtual void SetBlobs(
const std::string& name,
const std::vector<Blob::Ptr>& blobs
);
virtual BatchedBlob::Ptr GetBlobs(const std::string& name);
virtual Blob::Ptr GetBlob(const std::string& name);
virtual const PreProcessInfo& GetPreProcess(const std::string& name) const;
virtual void SetBatch(int batch);
virtual void SetCallback(Callback callback);
virtual std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState();
void ThrowIfCanceled() const;
virtual void Cancel();
virtual void setModelInputsOutputs(
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs
);
};
Inherited Members¶
public:
// typedefs
typedef std::shared_ptr<IInferRequestInternal> Ptr;
typedef std::function<void(std::exception_ptr)> Callback;
// methods
virtual void Infer();
virtual void InferImpl();
virtual void Cancel();
virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const;
virtual void SetBlob(const std::string& name, const Blob::Ptr& data);
virtual void SetBlobs(
const std::string& name,
const std::vector<Blob::Ptr>& blobs
);
virtual void SetBlobsImpl(
const std::string& name,
const BatchedBlob::Ptr& batched_blob
);
virtual Blob::Ptr GetBlob(const std::string& name);
virtual BatchedBlob::Ptr GetBlobs(const std::string& name);
virtual void SetBlob(
const std::string& name,
const Blob::Ptr& data,
const PreProcessInfo& info
);
virtual const PreProcessInfo& GetPreProcess(const std::string& name) const;
virtual void SetBatch(int batch);
virtual std::vector<std::shared_ptr<IVariableStateInternal>> QueryState();
virtual void StartAsync();
virtual void StartAsyncImpl();
virtual StatusCode Wait(int64_t millis_timeout);
virtual void SetCallback(Callback callback);
void checkBlob(
const Blob::Ptr& blob,
const std::string& name,
bool isInput,
const SizeVector& refDims = {}
) const;
virtual void checkBlobs();
void setPointerToExecutableNetworkInternal(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork);
std::shared_ptr<IExecutableNetworkInternal> getPointerToExecutableNetworkInternal() const;
void setPointerToSo(const std::shared_ptr<void>& so);
std::shared_ptr<void> getPointerToSo() const;
void \* GetUserData();
void SetUserData(void \* userData);
const std::vector<std::shared_ptr<const ov::Node>>& GetInputs() const;
const std::vector<std::shared_ptr<const ov::Node>>& GetOutputs() const;
virtual void setModelInputsOutputs(
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs
);
Detailed Documentation¶
Base class with default implementation of asynchronous multi staged inference request. To customize pipeline stages derived class should change the content of AsyncInferRequestThreadSafeDefault::_pipeline member container. It consists of pairs of tasks and executors which will run the task. The class is recommended to be used by plugins as a base class for asynchronous inference request implementation.
To synchronize derived context with stages derived class should call AsyncInferRequestThreadSafeDefault::StopAndWait() function in destructor.
Here is an example of asynchronous inference request implementation for some accelerator device. It uses 5 different executors to run different stages of a synchronous inference request.
// Inherits from AsyncInferRequestThreadSafeDefault
class AcceleratorAsyncInferRequest : public AsyncInferRequestThreadSafeDefault {
// Store the pointer to the synchronous request and five executors
AcceleratorAsyncInferRequest(const AcceleratorSyncRequest::Ptr& syncRequest,
const ITaskExecutor::Ptr& preprocessExecutor,
const ITaskExecutor::Ptr& writeToDeviceExecutor,
const ITaskExecutor::Ptr& runOnDeviceExecutor,
const ITaskExecutor::Ptr& readFromDeviceExecutor,
const ITaskExecutor::Ptr& postProcessExecutor) :
AsyncInferRequestThreadSafeDefault(syncRequest, nullptr, nullptr),
_accSyncRequest{syncRequest},
_preprocessExecutor{preprocessExecutor},
_writeToDeviceExecutor{writeToDeviceExecutor},
_runOnDeviceExecutor{runOnDeviceExecutor},
_readFromDeviceExecutor{readFromDeviceExecutor},
_postProcessExecutor{postProcessExecutor}
{
// Five pipeline stages of synchronous infer request are run by different executors
_pipeline = {
{ _preprocessExecutor , [this] {
_accSyncRequest->preprocess();
}},
{ _writeToDeviceExecutor , [this] {
_accSyncRequest->write_to_device();
}},
{ _runOnDeviceExecutor , [this] {
_accSyncRequest->run_on_device();
}},
{ _readFromDeviceExecutor , [this] {
_accSyncRequest->read_from_device();
}},
{ _postProcessExecutor , [this] {
_accSyncRequest->post_process();
}},
};
}
// As all stages use _accSyncRequest member we should wait for all stages tasks before the destructor destroy this member.
~AcceleratorAsyncInferRequest() {
StopAndWait();
}
AcceleratorSyncRequest::Ptr _accSyncRequest;
ITaskExecutor::Ptr _preprocessExecutor, _writeToDeviceExecutor, _runOnDeviceExecutor, _readFromDeviceExecutor, _postProcessExecutor;
};
Typedefs¶
typedef std::shared_ptr<AsyncInferRequestThreadSafeDefault> Ptr
A shared pointer to AsyncInferRequestThreadSafeDefault.
Construction¶
AsyncInferRequestThreadSafeDefault(
const IInferRequestInternal::Ptr& request,
const ITaskExecutor::Ptr& taskExecutor,
const ITaskExecutor::Ptr& callbackExecutor
)
Wraps a IInferRequestInternal::Ptr implementation and constructs a AsyncInferRequestThreadSafeDefault::_pipeline where taskExecutor
is used to run IInferRequestInternal::Infer asynchronously.
Parameters:
request |
The synchronous request |
taskExecutor |
The task executor |
callbackExecutor |
The callback executor |
Methods¶
virtual StatusCode Wait(int64_t millis_timeout)
Waits for completion of all pipeline stages If the pipeline raises an exception it will be rethrown here.
Parameters:
millis_timeout |
A timeout is |
Returns:
A status code
virtual void StartAsync()
Start inference of specified input(s) in asynchronous mode.
The method returns immediately. Inference starts also immediately.
virtual void Infer()
Infers specified input(s) in synchronous mode.
blocks all method of InferRequest while request is ongoing (running or waiting in queue)
virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const
Queries performance measures per layer to get feedback of what is the most time consuming layer. Note: not all plugins may provide meaningful data.
Returns:
a map of layer names to profiling information for that layer.
virtual void SetBlob(const std::string& name, const Blob::Ptr& data)
Set input/output data to infer.
Memory allocation doesn’t happen
Parameters:
name |
|
data |
|
virtual void SetBlob(
const std::string& name,
const Blob::Ptr& data,
const PreProcessInfo& info
)
Sets pre-process for input data.
Parameters:
name |
Name of input blob. |
data |
|
info |
Preprocess info for blob. |
virtual void SetBlobs(
const std::string& name,
const std::vector<Blob::Ptr>& blobs
)
Set batch of input data to infer. Default implementation performs basic validation and checks that all tensors are not remote. Plugin-specific implementations may override this behavior to handle remote tensors case. If plugin expects only memory blobs (not remote blobs), consider to override only SetBlobsImpl and reuse basic existing implementation.
Parameters:
name |
|
blobs |
|
virtual BatchedBlob::Ptr GetBlobs(const std::string& name)
Get input/output data to infer.
Memory allocation doesn’t happen
Parameters:
name |
|
Returns:
data - a reference to input batched blob.
virtual Blob::Ptr GetBlob(const std::string& name)
Get input/output data to infer.
Memory allocation doesn’t happen
Parameters:
name |
|
data |
|
virtual const PreProcessInfo& GetPreProcess(const std::string& name) const
Gets pre-process for input data.
Parameters:
name |
Name of input blob. |
info |
pointer to a pointer to PreProcessInfo structure |
virtual void SetBatch(int batch)
Sets new batch size when dynamic batching is enabled in executable network that created this request.
Parameters:
batch |
|
virtual void SetCallback(Callback callback)
Set callback function which will be called on success or failure of asynchronous request.
Parameters:
callback |
|
virtual std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState()
Queries memory states.
Returns:
Returns memory states
virtual void Cancel()
Cancel current inference request execution.
Sets inputs/outputs from ov::Model.