namespace ngraph::runtime::reference

Overview

namespace reference {

// namespaces

namespace ngraph::runtime::reference::adaptive_pool;
namespace ngraph::runtime::reference::detail;
namespace ngraph::runtime::reference::details;
namespace ngraph::runtime::reference::fake_quantize_details;
namespace ngraph::runtime::reference::fft_common;
namespace ngraph::runtime::reference::internal;
namespace ngraph::runtime::reference::kernel;
namespace ngraph::runtime::reference::nms_common;

// typedefs

typedef ngraph::op::v4::Interpolate::NearestMode Nearest_mode;
typedef ngraph::op::v4::Interpolate::CoordinateTransformMode Transform_mode;
typedef ngraph::op::v4::Interpolate::InterpolateMode InterpolateMode;
typedef op::v3::ROIAlign::PoolingMode ROIPoolingMode;
typedef op::v9::ROIAlign::AlignedMode AlignedMode;
typedef std::function<void(const std::shared_ptr<ngraph::Function>&function, const HostTensorVector&inputs, HostTensorVector&outputs)> custom_evaluate_function;

// enums

enum
{
    idxLocation,
    idxConfidence,
    idxPriors,
    idxArmConfidence,
    idxArmLocation,
    numInputs,
};

enum CellType;
enum FFTKind;
enum PSROIPoolingMode;

// structs

struct CellArgs;
struct convert_types;
template <>
struct widen<double>;
template <typename T>
struct widen;
template <>
struct widen<float>;

// classes

class GetNearestPixel;
class GetOriginalCoordinate;
template <typename T>
class InterpolateEval;
class InterpolateEvalHelper;
template <typename Element>
class Span;
template <typename dataType>
class referenceDetectionOutput;

// global variables

const FILTER \* f;
const FILTER OUTPUT \* out;
const FILTER OUTPUT const Shape& in_shape;
const FILTER OUTPUT const Shape const Shape& filter_shape;
const FILTER OUTPUT const Shape const Shape const Shape& out_shape;
const FILTER OUTPUT const Shape const Shape const Shape const Strides& strides;
const FILTER OUTPUT const Shape const Shape const Shape const Strides const Strides& dilation;
const FILTER OUTPUT const Shape const Shape const Shape const Strides const Strides const CoordinateDiff& pads_begin;
const FILTER OUTPUT const Shape const Shape const Shape const Strides const Strides const CoordinateDiff const CoordinateDiff& pads_end {     const ngraph::CoordinateDiff output_padding(in_shape.size() - 2, 0);
const uint32_t crush_resistance_const_lower_value = 0x9E3779B9;
const uint32_t crush_resistance_const_upper_value = 0xBB67AE85;
const uint64_t statistic_maximizing_multiplier_n = 0xD2511F53;
const uint64_t statistic_maximizing_multiplier_counter = 0xCD9E8D57;
const size_t rounds_number = 10;
const uint64_t skip_const = 256;

// global functions

template <
    typename T,
    typename std::enable_if<std::is_unsigned<T>::value, bool>::type = true
    >
void abs(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void acos(const T \* arg, T \* out, size_t count);

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void acosh(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void adaptive_avg_pool(
    const T \* arg,
    T \* out,
    const Shape& arg_shape,
    const Shape& out_shape
    );

template <typename T, typename IT>
void adaptive_max_pool_1d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t h_in,
    size_t h_out
    );

template <typename T, typename IT>
void adaptive_max_pool_2d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t h_in,
    size_t h_out,
    size_t w_in,
    size_t w_out
    );

template <typename T, typename IT>
void adaptive_max_pool_3d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t d_in,
    size_t d_out,
    size_t h_in,
    size_t h_out,
    size_t w_in,
    size_t w_out
    );

template <typename T, typename IT>
void adaptive_max_pool(
    const T \* arg,
    T \* out,
    IT \* selected_indices,
    const Shape& arg_shape,
    const Shape& out_shape
    );

template <typename T>
void add(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void add(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void logical_and(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void logical_and(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void asin(const T \* arg, T \* out, size_t count);

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void asinh(
    const T \* arg,
    T \* out,
    size_t count
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void atan(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename X, typename Y, typename Z>
void atan2(
    const X \* py,
    const Y \* px,
    Z \* pout,
    size_t count
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void atanh(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T, typename U, typename Functor>
void autobroadcast_binop(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    );

template <typename T, typename U, typename Functor>
void autobroadcast_select(
    const U \* arg0,
    const T \* arg1,
    const T \* arg2,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& arg2_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    );

template <typename T>
void avg_pool_backprop(
    const T \* delta,
    T \* out,
    const Shape& delta_shape,
    const Shape& out_shape,
    const Shape& window_shape,
    const Strides& window_movement_strides,
    const Shape& padding_below,
    const Shape& padding_above,
    bool include_padding_in_avg_computation
    );

template <typename T>
void avg_pool(
    const T \* arg,
    T \* out,
    const Shape& arg_shape,
    const Shape& out_shape,
    const Shape& window_shape,
    const Strides& window_movement_strides,
    const Shape& padding_below,
    const Shape& padding_above,
    bool include_padding_in_avg_computation
    );

template <typename T>
static T norm(T val, T mean, T var, T eps);

template <typename T>
void batch_norm_inference(
    float eps,
    const T \* in,
    const T \* gamma,
    const T \* beta,
    const T \* mean,
    const T \* variance,
    T \* out,
    const Shape& in_shape
    );

template <typename T_IN, typename T_F>
void binary_convolution(
    const T_IN \* in,
    const T_F \* f,
    T_IN \* out,
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const float pad_value
    );

void broadcast(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const Shape& out_shape,
    const AxisSet& broadcast_axes,
    size_t elem_size
    );

template <typename T, typename B, typename P>
void bucketize(
    const T \* data,
    const B \* buckets,
    P \* out,
    const Shape& data_shape,
    const Shape& buckets_shape,
    bool with_right_bound
    );

template <typename T>
void ceiling(const T \* arg, T \* out, size_t count);

template <typename T>
void clamp(
    const T \* arg,
    T \* out,
    T min,
    T max,
    size_t count
    );

void concat(
    const std::vector<const char \*>& args,
    char \* out,
    const std::vector<Shape>& in_shapes,
    const Shape& out_shape,
    int64_t concatenation_axis,
    size_t elem_size
    );

template <typename T>
void constant(const T \* arg0, T \* out, size_t count);

template <typename TI, typename TO>
std::enable_if<!std::is_same<TO, char>::value>::type convert(
    const TI \* arg,
    TO \* out,
    size_t count
    );

void convert< uint8_t, float16 >(const uint8_t \* arg, float16 \* out, size_t count);
void convert< float16, float >(const float16 \* arg, float \* out, size_t count);
void convert< float, int8_t >(const float \* arg, int8_t \* out, size_t count);
void convert< float16, int8_t >(const float16 \* arg, int8_t \* out, size_t count);

template <typename TI, typename TO>
std::enable_if<std::is_same<TO, char>::value>::type convert(
    const TI \* arg,
    TO \* out,
    size_t count
    );

template <typename T>
std::tuple<T, T, T> yuv_pixel_to_rgb(
    float y_val,
    float u_val,
    float v_val
    );

template <typename T>
void color_convert_nv12(
    const T \* arg_y,
    const T \* arg_uv,
    T \* out_ptr,
    size_t batch_size,
    size_t image_h,
    size_t image_w,
    size_t stride_y,
    size_t stride_uv,
    ov::op::util::ConvertColorNV12Base::ColorConversion color_format
    );

template <typename T>
void color_convert_i420(
    const T \* arg_y,
    const T \* arg_u,
    const T \* arg_v,
    T \* out_ptr,
    size_t batch_size,
    size_t image_h,
    size_t image_w,
    size_t stride_y,
    size_t stride_uv,
    ov::op::util::ConvertColorI420Base::ColorConversion color_format
    );

template <ov::element::Type_t ET>
bool color_convert_nv12(
    const std::shared_ptr<Node>& op,
    const ov::HostTensorVector& outputs,
    const ov::HostTensorVector& inputs,
    ov::op::util::ConvertColorNV12Base::ColorConversion type
    );

template <ov::element::Type_t ET>
bool color_convert_i420(
    const std::shared_ptr<Node>& op,
    const ov::HostTensorVector& outputs,
    const ov::HostTensorVector& inputs,
    ov::op::util::ConvertColorI420Base::ColorConversion type
    );

template <typename T>
void convolution(
    const T \* in,
    const T \* f,
    T \* out,
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <typename T>
void convolution_backprop_in(
    const T \* delta_in,
    const T \* filter,
    T \* delta_out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& in_dilation,
    const Strides& filter_dilation,
    const CoordinateDiff& forward_in_pad_bellow,
    const CoordinateDiff& forward_in_pad_above,
    const Strides& stride,
    const CoordinateDiff& output_padding
    );

template <typename T>
void copy(const T \* arg, T \* out, size_t count);

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void cos(
    const T \* arg,
    T \* out,
    size_t count
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void cosh(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void ctc_greedy_decoder(
    const T \* data,
    const T \* sequence_masks,
    T \* out,
    const Shape& data_shape,
    const Shape& sequence_masks_shape,
    const Shape& out_shape,
    const bool ctc_merge_repeated
    );

template <typename TF, typename TI, typename TCI, typename TSL>
void ctc_greedy_decoder_seq_len(
    const TF \* data,
    const TI \* sequence_length,
    const TI \* blank_index,
    TCI \* out1,
    TSL \* out2,
    const Shape& data_shape,
    const Shape& out_shape,
    const bool ctc_merge_repeated
    );

template <typename T, typename U>
void CTCLoss(
    const T \* logits,
    const Shape& logitsShape,
    const U \* logitsLength,
    const U \* labels,
    const U \* labelsLength,
    const U \* blankIndexP,
    const bool preprocessCollapseRepeated,
    const bool ctcMergeRepeated,
    const bool unique,
    T \* output
    );

template <typename T, typename P>
void cumsum(
    const T \* arg,
    const P \* axis_tensor,
    T \* out,
    const Shape& tensor_shape,
    const bool exclusive,
    const bool reverse
    );

template <typename T>
void deformable_convolution(
    const T \* in,
    const T \* offsets,
    const T \* filters,
    const T \* mask,
    T \* out,
    const Shape& in_shape,
    const Shape& o_shape,
    const Shape& f_shape,
    const Shape& m_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const int64_t groups,
    const int64_t deformable_groups,
    const bool bilinear_interpolation_pad
    );

template <typename T>
void deformable_convolution(
    const T \* in,
    const T \* offsets,
    const T \* filters,
    T \* out,
    const Shape& in_shape,
    const Shape& o_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const int64_t groups,
    const int64_t deformable_groups,
    const bool bilinear_interpolation_pad = false
    );

template <typename T>
void deformable_psroi_pooling(
    const T \* data_input,
    const Shape& data_input_shape,
    const T \* rois_input,
    const Shape& rois_input_shape,
    const T \* offsets_input,
    const Shape& offsets_input_shape,
    T \* output,
    const Shape& output_shape,
    const std::string& mode_str,
    const float spatial_scale,
    const int64_t spatial_bins_x,
    const int64_t spatial_bins_y,
    const float trans_std,
    const int64_t part_size
    );

void depth_to_space(
    const char \*const in,
    const Shape& in_shape,
    char \*const out,
    const Shape& out_shape,
    const size_t block_size,
    const op::DepthToSpace::DepthToSpaceMode mode,
    const size_t elem_size
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_floating_point<T>::value||std::is_same<T, bfloat16>::value||std::is_same<T, float16>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_floating_point<T>::value||std::is_same<T, bfloat16>::value||std::is_same<T, float16>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    bool pythondiv
    );

void einsum(
    const HostTensorVector& outputs,
    const HostTensorVector& inputs,
    const std::string& equation
    );

template <typename T>
void elu(
    const T \* arg,
    T \* out,
    size_t count,
    double alpha
    );

template <typename T, typename U>
void embeddingBagOffsetsSum(
    const T \* emb_table,
    const U \* indices,
    const U \* offsets,
    const U \* default_index,
    const T \* weights,
    T \* out,
    const size_t indices_count,
    const Shape& outShape
    );

template <typename T, typename U>
void embeddingBagPackedSum(
    const T \* emb_table,
    const U \* indices,
    const T \* weights,
    T \* out,
    const Shape& indicesShape,
    const Shape& outShape
    );

template <typename T, typename U>
void embeddingSegmentsSum(
    const T \* embTable,
    const U \* indices,
    const U \* segmentIds,
    const U \* defaultIndex,
    const T \* weights,
    T \* out,
    const Shape& embTableShape,
    const Shape& indicesShape,
    const Shape& outShape
    );

template <typename T>
void equal(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void equal(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void erf(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void exp(const T \* arg, T \* out, size_t count);

void experimental_detectron_detection_output(
    const float \* input_rois,
    const float \* input_deltas,
    const float \* input_scores,
    const float \* input_im_info,
    size_t roi_count,
    const op::v6::ExperimentalDetectronDetectionOutput::Attributes& attrs,
    float \* output_boxes,
    float \* output_scores,
    int32_t \* output_classes
    );

void experimental_detectron_detection_output_postprocessing(
    void \* pboxes,
    void \* pclasses,
    void \* pscores,
    const ngraph::element::Type output_type,
    const std::vector<float>& output_boxes,
    const std::vector<int32_t>& output_classes,
    const std::vector<float>& output_scores,
    const Shape& output_boxes_shape,
    const Shape& output_classes_shape,
    const Shape& output_scores_shape
    );

template <typename T>
void experimental_detectron_prior_grid_generator(
    const T \* priors,
    const Shape& priors_shape,
    const Shape& feature_map_shape,
    const Shape& im_data_shape,
    T \* output_rois,
    int64_t grid_h,
    int64_t grid_w,
    float stride_h,
    float stride_w
    );

void experimental_detectron_proposals_single_image(
    const float \* im_info,
    const float \* anchors,
    const float \* deltas,
    const float \* scores,
    const op::v6::ExperimentalDetectronGenerateProposalsSingleImage::Attributes& attrs,
    const Shape& im_info_shape,
    const Shape& anchors_shape,
    const Shape& deltas_shape,
    const Shape& scores_shape,
    float \* output_rois,
    float \* output_scores
    );

void experimental_detectron_proposals_single_image_postprocessing(
    void \* prois,
    void \* pscores,
    const ngraph::element::Type output_type,
    const std::vector<float>& output_rois,
    const std::vector<float>& output_scores,
    const Shape& output_rois_shape,
    const Shape& output_scores_shape
    );

void experimental_detectron_roi_feature_extractor(
    const std::vector<std::vector<float>>& inputs,
    const std::vector<Shape>& input_shapes,
    const op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes& attrs,
    float \* output_rois_features,
    float \* output_rois
    );

void experimental_detectron_roi_feature_extractor_postprocessing(
    void \* prois_features,
    void \* prois,
    const ngraph::element::Type output_type,
    const std::vector<float>& output_roi_features,
    const std::vector<float>& output_rois,
    const Shape& output_roi_features_shape,
    const Shape& output_rois_shape
    );

template <typename T>
void experimental_detectron_topk_rois(
    const T \* input_rois,
    const T \* input_probs,
    const Shape& input_rois_shape,
    const Shape& input_probs_shape,
    size_t max_rois,
    T \* output_rois
    );

template <typename T>
void extract_image_patches(
    const std::shared_ptr<op::ExtractImagePatches> extImgPatches,
    const T \* input,
    T \* out,
    const Shape& inShape,
    const Shape& outShape
    );

template <typename T>
void eye(
    T \* data,
    const Shape& out_shape,
    const int64_t diagonal_index
    );

template <typename T>
void fake_quantize(
    const T \*const arg,
    const T \*const in_low,
    const T \*const in_high,
    const T \*const out_low,
    const T \*const out_high,
    T \*const out,
    const Shape& arg_shape,
    const Shape& in_low_shape,
    const Shape& in_high_shape,
    const Shape& out_low_shape,
    const Shape& out_high_shape,
    size_t levels,
    const op::AutoBroadcastSpec& broadcast
    );

void fft(
    const float \* input_data,
    const Shape& input_data_shape,
    const int64_t \* axes_data,
    const Shape& axes_data_shape,
    float \* fft_result,
    const Shape& output_shape,
    FFTKind fft_kind
    );

void fft_postprocessing(
    const HostTensorVector& outputs,
    const ngraph::element::Type output_type,
    const std::vector<float>& fft_result
    );

std::vector<int64_t> canonicalize_axes(
    const int64_t \* axes_data,
    const Shape& axes_data_shape,
    int64_t complex_data_rank
    );

template <typename T>
void floor(const T \* arg, T \* out, size_t count);

template <typename T>
void floor_mod(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void function(
    const std::shared_ptr<Function>& function,
    const HostTensorVector& inputs,
    HostTensorVector& outputs
    );

template <typename T, typename U>
void gather(
    const T \*const data,
    const U \*const indices,
    T \* out,
    const Shape& data_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    size_t axis,
    size_t batch_dims = 0
    );

template <typename T, typename U>
void gather_elements(
    const T \* data,
    const U \* indices,
    T \* out,
    const Shape& data_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    int64_t axis
    );

template <typename T, typename U>
void gather_nd(
    const T \*const params,
    const U \*const indices,
    T \*const out,
    const Shape& params_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    const int batch_dims = 0
    );

void gather_tree(
    const char \* step_ids,
    const char \* parent_ids,
    const char \* max_seq_len,
    const char \* end_token,
    char \* out,
    const Shape& step_ids_shape,
    const Shape& parent_ids_shape,
    const Shape& max_seq_len_shape,
    const Shape& end_token_shape,
    const element::Type& type
    );

template <typename T>
void gelu(
    const T \* arg,
    T \* out,
    op::GeluApproximationMode mode,
    size_t count
    );

void generate_proposals(
    const std::vector<float>& im_info,
    const std::vector<float>& anchors,
    const std::vector<float>& deltas,
    const std::vector<float>& scores,
    const op::v9::GenerateProposals::Attributes& attrs,
    const Shape& im_info_shape,
    const Shape& anchors_shape,
    const Shape& deltas_shape,
    const Shape& scores_shape,
    std::vector<float>& output_rois,
    std::vector<float>& output_scores,
    std::vector<int64_t>& num_rois
    );

void generate_proposals_postprocessing(
    void \* prois,
    void \* pscores,
    void \* proi_num,
    const ngraph::element::Type& output_type,
    const ngraph::element::Type& roi_num_type,
    const std::vector<float>& output_rois,
    const std::vector<float>& output_scores,
    const std::vector<int64_t>& num_rois,
    const Shape& output_rois_shape,
    const Shape& output_scores_shape
    );

template <typename T>
void greater(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void greater(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void greater_eq(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void greater_eq(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename DATA_ET, typename GRID_ET>
void grid_sample(
    DATA_ET \* output,
    const DATA_ET \* data,
    const GRID_ET \* grid,
    const Shape& data_shape,
    const Shape& grid_shape,
    const bool align_corners,
    const ov::op::v9::GridSample::InterpolationMode interpolation_mode,
    const ov::op::v9::GridSample::PaddingMode padding_mode
    );

template <typename T>
void grn(
    const T \* data,
    T \* out,
    float bias,
    const Shape& data_shape
    );

void validate_group_convolution_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <
    typename INPUT,
    typename FILTER,
    typename OUTPUT,
    typename ACCU = typename widen<OUTPUT>::type
    >
void group_convolution(
    const INPUT \* in,
    const FILTER \* f,
    OUTPUT \* out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void infer_backward_conv_output_shape(
    const Shape& in_spatial_shape,
    const Shape& f_spatial_shape,
    Shape& out_spatial_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void validate_convolution_backprop_data_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void validate_group_convolution_backprop_data_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <typename T>
void group_convolution_backprop_data(
    const T \* in,
    const T \* f,
    T \* out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const CoordinateDiff& output_padding
    );

template <
    typename OUTPUT,
    typename FILTER,
    typename INPUT,
    typename ACCUMULATION = typename widen<INPUT>::type
    >
NGRAPH_DEPRECATED(
    "group_convolution_backprop_data function without output_paddings is deprecated,
    " "use the one with output_padding."
    ) const;

group_convolution_backprop_data(
    in,
    f,
    out,
    in_shape,
    filter_shape,
    out_shape,
    strides,
    dilation,
    pads_begin,
    pads_end,
    output_padding
    );

template <typename T>
void gru_cell(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    T \* dst_data,
    const std::string& activation_f,
    const std::string& activation_g,
    float clip,
    bool linear_before_reset
    );

template <typename T>
void hard_sigmoid(
    const T \* arg,
    const T alpha,
    const T beta,
    T \* out,
    size_t count
    );

template <typename T>
void hsigmoid(const T \* arg, T \* out, size_t count);

template <typename T>
void hswish(const T \* arg, T \* out, size_t count);

void if_reference(
    const std::vector<std::shared_ptr<Function>>& body,
    const std::vector<op::util::MultiSubgraphOutputDescriptionVector>& out_descs,
    const std::vector<op::util::MultiSubgraphInputDescriptionVector>& input_descs,
    const HostTensorVector& out,
    const HostTensorVector& args
    );

static void pad_input_data(
    const uint8_t \* data_ptr,
    uint8_t \* padded_data_ptr,
    size_t type_size,
    const ov::Shape& input_shape,
    const ov::Shape& padded_input_shape,
    const std::vector<size_t>& pads_begin
    );

static PartialShape get_padded_input_shape(
    const PartialShape& input_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

static std::vector<float> get_scales(
    const PartialShape& input_data_partial_shape,
    const Shape& out_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

static op::v4::Interpolate::InterpolateAttrs transform_v0_to_v4(
    const PartialShape& input_partial_shape,
    const op::v0::Interpolate::Attributes& attrs_v0
    );

template <typename T>
void interpolate(
    const T \* input_data,
    const Shape& input_data_shape,
    const std::vector<float>& scales,
    const std::vector<int64_t>& axes,
    T \* out,
    const Shape& out_shape,
    const op::v4::Interpolate::InterpolateAttrs& attrs
    );

template <typename T>
void interpolate(
    T \* input_data,
    const PartialShape& input_data_shape,
    T \* out,
    const Shape& out_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

void irdft(
    const std::vector<float>& input_data,
    const Shape& input_data_shape,
    const std::vector<int64_t>& axes_data,
    float \* irdft_result,
    const Shape& fft_output_shape,
    const Shape& irdft_output_shape,
    const int64_t last_signal_size
    );

template <typename T>
void less(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void less(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void less_eq(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void less_eq(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void log(const T \* arg, T \* out, size_t count);

template <typename T>
void log_softmax(
    const T \* arg,
    T \* out,
    const Shape& shape,
    const AxisSet& axes
    );

static void reduce_logical_and(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

static void reduce_logical_or(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

void loop(
    const std::shared_ptr<Function>& body,
    const op::util::OutputDescriptionVector& out_descs,
    const op::util::InputDescriptionVector& input_descs,
    const opset5::Loop::SpecialBodyPorts& special_ports,
    const HostTensorVector& out,
    const HostTensorVector& args
    );

static size_t point_to_flat_idx(
    const Shape& shape,
    const std::vector<size_t>& point
    );

static std::vector<size_t> slice_indices(
    const Shape& full_shape,
    const std::vector<size_t>& begin,
    const Shape& slice_shape
    );

template <typename T>
static T sum_region_across_axes(
    const T \* arg,
    const std::vector<size_t>& indices
    );

template <typename T>
void lrn(
    const T \* arg,
    const AxisSet& axes,
    T \* out,
    const Shape& arg_shape,
    double dalpha,
    double dbeta,
    double dbias,
    size_t size
    );

template <typename T>
void lstm_cell(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* C,
    const Shape& C_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    T \* out_Ht,
    T \* out_Ct,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip
    );

template <typename T>
void lstm_cell_v1(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* C,
    const Shape& C_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    const T \* P,
    const Shape& P_shape,
    T \* out_Ht,
    T \* out_Ct,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip,
    const ov::op::LSTMWeightsFormat weight_format,
    bool input_forget
    );

template <typename T>
void matmul(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& out_shape,
    bool transpose_arg0,
    bool transpose_arg1
    );

void matrix_nms(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    const op::v8::MatrixNms::Attributes& attrs,
    float \* selected_outputs,
    const Shape& selected_outputs_shape,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    int64_t \* valid_outputs
    );

template <typename T>
void max(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void max_pool(
    const T \* arg,
    T \* out,
    const Shape& arg_shape,
    const Shape& out_shape,
    const Shape& window_shape,
    const Strides& window_movement_strides,
    const Shape& padding_below,
    const Shape& padding_above
    );

template <typename Values_t, typename Indices_t>
void max_pool(
    const Values_t \* data,
    Values_t \* values,
    Indices_t \* indices,
    const Shape& data_shape,
    const Shape& out_shape,
    const Shape& kernel,
    const Strides& strides,
    const Strides& dilations,
    const Shape& pads_begin,
    const Shape& pads_end,
    const int64_t axis = 0
    );

template <typename T>
void maximum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void maximum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void mean(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void min(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void minimum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void minimum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void mish(const T \* arg, T \* out, size_t count);

template <typename T>
void mod(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg_shape0,
    const Shape& arg_shape1,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void multiclass_nms(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    const int64_t \* roisnum_data,
    const Shape& roisnum_data_shape,
    const op::util::MulticlassNmsBase::Attributes& attrs,
    float \* selected_outputs,
    const Shape& selected_outputs_shape,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    int64_t \* valid_outputs
    );

template <typename T>
void multiply(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void multiply(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void mvn(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const bool normalize_variance,
    const AxisSet& reduction_axes,
    const double eps
    );

template <typename T>
void mvn_6(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    AxisSet reduction_axes,
    bool normalize_variance,
    double eps,
    op::MVNEpsMode eps_mode
    );

template <typename T>
void negate(const T \* arg, T \* out, size_t count);

void non_max_suppression5(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    int64_t max_output_boxes_per_class,
    float iou_threshold,
    float score_threshold,
    float soft_nms_sigma,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    float \* selected_scores,
    const Shape& selected_scores_shape,
    int64_t \* valid_outputs,
    const bool sort_result_descending
    );

void nms5_postprocessing(
    const HostTensorVector& outputs,
    const ngraph::element::Type output_type,
    const std::vector<int64_t>& selected_indices,
    const std::vector<float>& selected_scores,
    int64_t valid_outputs,
    const ngraph::element::Type selected_scores_type
    );

void non_max_suppression(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    int64_t max_output_boxes_per_class,
    float iou_threshold,
    float score_threshold,
    float soft_nms_sigma,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    float \* selected_scores,
    const Shape& selected_scores_shape,
    int64_t \* valid_outputs,
    const bool sort_result_descending
    );

void nms_postprocessing(
    const HostTensorVector& outputs,
    const ngraph::element::Type output_type,
    const std::vector<int64_t>& selected_indices,
    const std::vector<float>& selected_scores,
    int64_t valid_outputs,
    const ngraph::element::Type selected_scores_type
    );

template <typename T>
size_t non_zero_get_count(
    const T \* arg,
    const Shape& arg_shape
    );

template <typename T, typename U>
void non_zero(
    const T \* arg,
    U \* out,
    const Shape& arg_shape
    );

template <typename T>
void normalize_l2(
    const T \* data,
    T \* out,
    const Shape& data_shape,
    const AxisSet& reduction_axes,
    float eps,
    op::EpsMode eps_mode
    );

template <typename T>
void logical_not(const T \* arg, T \* out, size_t count);

template <typename T, typename U>
void not_equal(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename INPUT_TYPE>
void one_hot(
    const INPUT_TYPE \* indices,
    const Shape& indices_shape,
    char \* out,
    const size_t out_elem_size,
    const size_t depth,
    const int64_t one_hot_axis,
    const char \* on_value,
    const char \* off_value
    );

template <typename T>
void logical_or(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void logical_or(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void pad(
    const char \* data,
    const char \* pad_value,
    char \* out,
    const size_t elem_size,
    const Shape& data_shape,
    const Shape& out_shape,
    const CoordinateDiff& padding_below,
    const CoordinateDiff& padding_above,
    const op::PadMode pad_mode
    );

template <typename T>
void power(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void power(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void prelu(
    const T \* arg,
    const T \* slope,
    T \* out,
    const Shape& arg_shape,
    const Shape& slope_shape
    );

static float clip_great(float x, float threshold);
static float clip_less(float x, float threshold);

template <typename T>
void prior_box(
    const T \* data,
    const T \* img,
    float \* dst_data,
    const Shape& out_shape,
    const op::v8::PriorBox::Attributes& attrs
    );

template <typename T>
void prior_box_clustered(
    const T \* data,
    const T \* img,
    float \* dst_data,
    const Shape& out_shape,
    const op::PriorBoxClusteredAttrs& attrs
    );

template <typename T>
void product(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void proposal_v0(
    const T \* class_probs,
    const T \* bbox_deltas,
    const T \* image_shape,
    T \* output,
    const Shape& class_probs_shape,
    const Shape& bbox_deltas_shape,
    const Shape& image_shape_shape,
    const Shape& output_shape,
    const op::ProposalAttrs& attrs
    );

template <typename T>
void proposal_v4(
    const T \* class_probs,
    const T \* bbox_deltas,
    const T \* image_shape,
    T \* output,
    T \* out_probs,
    const Shape& class_probs_shape,
    const Shape& bbox_deltas_shape,
    const Shape& image_shape_shape,
    const Shape& output_shape,
    const Shape& out_probs_shape,
    const op::ProposalAttrs& attrs
    );

template <typename T>
void psroi_pooling(
    const T \* input,
    const Shape& input_shape,
    const T \* rois,
    const Shape& rois_shape,
    T \* output,
    const Shape& output_shape,
    const std::string& mode_str,
    float spatial_scale,
    int spatial_bins_x,
    int spatial_bins_y
    );

template <typename REAL, typename QUANT>
void quantize(
    const REAL \* input,
    const REAL \* scale,
    const QUANT \* zero_point,
    QUANT \* output,
    const Shape& input_shape,
    const Shape& scale_zero_point_shape,
    const AxisSet& axes,
    op::Quantize::RoundMode round_mode
    );

std::pair<uint64_t, uint64_t> random_uniform(
    const uint64_t \* out_shape,
    const char \* min_val,
    const char \* max_val,
    char \* out,
    const Shape& out_shape_shape,
    const ngraph::element::Type& elem_type,
    uint64_t seed,
    uint64_t seed2,
    std::pair<uint64_t, uint64_t> prev_state
    );

template <typename T>
std::enable_if<std::is_floating_point<T>::value||std::is_same<T, bfloat16>::value||std::is_same<T, float16>::value>::type range(
    const T \* start,
    const T \* step,
    const size_t& num_elem,
    T \* out
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type range(
    const T \* start,
    const T \* step,
    const size_t& num_elem,
    T \* out
    );

void rdft(
    const std::vector<float>& input_data,
    const Shape& input_data_shape,
    const std::vector<int64_t>& axes_data,
    const Shape& output_fft_shape,
    float \* rdft_result
    );

template <typename T>
void reduce_l1(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void reduce_l2(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

static int entry_index(
    int width,
    int height,
    int coords,
    int classes,
    int outputs,
    int batch,
    int location,
    int entry
    );

template <typename T>
static T sigmoid(float x);

template <typename T>
static void softmax_generic(
    const T \* src_data,
    T \* dst_data,
    int batches,
    int channels,
    int height,
    int width
    );

template <typename T>
void region_yolo(
    const T \* input,
    T \* output,
    const Shape& input_shape,
    const int coords,
    const int classes,
    const int regions,
    const bool do_softmax,
    const std::vector<int64_t>& mask
    );

template <typename T>
void relu(const T \* arg, T \* out, size_t count);

void reorg_yolo(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    int64_t stride,
    const size_t elem_size
    );

void reshape(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const AxisVector& in_axis_order,
    const Shape& out_shape,
    size_t elem_size
    );

template <typename T>
void result(const T \* arg, T \* out, size_t count);

void reverse(
    const char \* arg,
    char \* out,
    const Shape& arg_shape,
    const Shape& out_shape,
    const AxisSet& reversed_axes,
    size_t elem_size
    );

template <typename T, typename U>
void reverse_sequence(
    const T \* arg,
    T \* out,
    const Shape& arg_shape,
    size_t batch_axis,
    size_t sequence_axis,
    const U \* sequence_lengths
    );

template <typename T>
void rnn_cell(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    T \* dst_data,
    const std::string& activation_f,
    float clip
    );

template <typename T>
void roi_align(
    const T \* feature_maps,
    const T \* rois,
    const int64_t \* batch_indices,
    T \* out,
    const Shape& feature_maps_shape,
    const Shape& rois_shape,
    const Shape& batch_indices_shape,
    const Shape& out_shape,
    const int pooled_height,
    const int pooled_width,
    const int sampling_ratio,
    const float spatial_scale,
    const ROIPoolingMode& pooling_mode,
    const AlignedMode& aligned_mode = AlignedMode::ASYMMETRIC
    );

template <typename T>
void roi_pooling(
    const T \* feature_maps,
    const T \* rois,
    T \* output,
    const Shape& feature_maps_shape,
    const Shape& rois_shape,
    const Shape& output_shape,
    const float spatial_scale,
    const std::string& pooling_method
    );

size_t shift_pos(
    size_t pos_in_spanned_data,
    size_t dim_shift,
    size_t spanned_shape_size,
    size_t dim_size
    );

void roll(
    const char \* arg,
    const int64_t \* shift,
    const int64_t \* axes,
    char \* out,
    const Shape& arg_shape,
    const Shape& shift_shape,
    const Shape& axes_shape,
    size_t elem_size
    );

template <typename T>
T round_to_nearest_even(const T arg);

template <typename T>
void round(
    const T \* arg,
    T \* out,
    size_t count,
    const op::v5::Round::RoundMode mode
    );

template <typename DataType, typename IndicesType>
void scatter_elem_update(
    const DataType \* input_data,
    const IndicesType \* indices,
    const DataType \* updates,
    const int64_t& axis,
    DataType \* out_buf,
    const Shape& data_shape,
    const Shape& indices_shape
    );

template <typename dataType, typename indicesType>
void scatterNdUpdate(
    const dataType \*const inputData,
    const indicesType \*const indices,
    const dataType \*const updates,
    dataType \*const outBuf,
    const Shape& dataShape,
    const Shape& indicesShape,
    const Shape& updatesShape
    );

static const CoordinateTransformBasic get_target_shape(
    const Shape& data_shape,
    const Coordinate& start_corner,
    const Coordinate& end_corner
    );

static void scatter_update(
    const char \* input_data,
    const int64_t \* indices,
    const char \* updates,
    const int64_t axis,
    char \* out_buf,
    const size_t elem_size,
    const Shape& data_shape,
    const Shape& indices_shape,
    const Shape& updates_shape
    );

template <typename T>
void select(
    const char \* arg0,
    const T \* arg1,
    const T \* arg2,
    T \* out,
    size_t arg0_count,
    size_t arg1_count,
    size_t arg2_count,
    size_t out_count
    );

template <typename T>
void select(
    const char \* arg0,
    const T \* arg1,
    const T \* arg2,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& arg2_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void selu(
    const T \* arg,
    const T \* alpha,
    const T \* lambda,
    T \* out,
    size_t size_arg,
    size_t size_alpha,
    size_t size_lambda
    );

template <typename T, typename U>
void cell_pass(
    CellType type,
    const std::vector<const char \*>& inputs,
    const std::vector<Shape>& shapes,
    const std::vector<char \*>& outputs,
    const CellArgs& args,
    bool is_reverse
    );

template <typename T, typename U>
void lstm_sequence(
    const char \* X,
    const Shape& X_shape,
    const char \* H,
    const Shape& H_shape,
    const char \* C,
    const Shape& C_shape,
    const char \* seq_lengths,
    const Shape& seq_lengths_shape,
    const char \* W,
    const Shape& W_shape,
    const char \* R,
    const Shape& R_shape,
    const char \* B,
    const Shape& B_shape,
    char \* Y,
    char \* Ho,
    char \* Co,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip,
    op::RecurrentSequenceDirection direction
    );

template <typename T, typename U>
void lstm_sequence_v1(
    const char \* X,
    const Shape& X_shape,
    const char \* H,
    const Shape& H_shape,
    const char \* C,
    const Shape& C_shape,
    const char \* seq_lengths,
    const Shape& seq_lengths_shape,
    const char \* W,
    const Shape& W_shape,
    const char \* R,
    const Shape& R_shape,
    const char \* B,
    const Shape& B_shape,
    const char \* P,
    const Shape& P_shape,
    char \* Y,
    char \* Ho,
    char \* Co,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip,
    const ov::op::LSTMWeightsFormat weight_format,
    bool input_forget,
    op::RecurrentSequenceDirection direction
    );

template <typename T, typename U>
void gru_sequence(
    const char \* X,
    const Shape& X_shape,
    const char \* H,
    const Shape& H_shape,
    const char \* seq_lengths,
    const Shape& seq_lengths_shape,
    const char \* W,
    const Shape& W_shape,
    const char \* R,
    const Shape& R_shape,
    const char \* B,
    const Shape& B_shape,
    char \* Y,
    char \* Ho,
    const std::string& activation_f,
    const std::string& activation_g,
    const float clip,
    const op::RecurrentSequenceDirection direction,
    const bool linear_before_reset
    );

template <typename T, typename U>
void rnn_sequence(
    const char \* X,
    const Shape& X_shape,
    const char \* H,
    const Shape& H_shape,
    const char \* seq_lengths,
    const Shape& seq_lengths_shape,
    const char \* W,
    const Shape& W_shape,
    const char \* R,
    const Shape& R_shape,
    const char \* B,
    const Shape& B_shape,
    char \* Y,
    char \* Ho,
    const std::string& activation_f,
    float clip,
    const op::RecurrentSequenceDirection direction
    );

template <typename T>
void shape_of(const Shape& arg_shape, T \* out);

void shuffle_channels(
    const char \* arg,
    char \* out,
    const Shape& data_shape,
    size_t elem_size,
    const int64_t axis,
    const int64_t group
    );

template <
    typename T,
    typename std::enable_if<std::is_integral<T>::value, bool>::type = true
    >
void sigmoid(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void sign(const T \* arg, T \* out, size_t count);

template <typename T>
void sin(const T \* arg, T \* out, size_t count);

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void sinh(
    const T \* arg,
    T \* out,
    size_t count
    );

void slice(
    const char \* data,
    const Shape& data_shape,
    char \* out,
    const Shape& out_shape,
    size_t elem_size,
    const std::vector<int64_t>& starts,
    const std::vector<int64_t>& steps,
    const std::vector<int64_t>& axes
    );

void slice(
    const char \* arg,
    char \* out,
    const Shape& arg_shape,
    const Coordinate& lower_bounds,
    const Coordinate& upper_bounds,
    const Strides& strides,
    const Shape& out_shape,
    size_t elem_size
    );

template <typename T>
void softmax(
    const T \* arg,
    T \* out,
    const Shape& shape,
    const AxisSet& axes
    );

template <typename T>
void softplus(const T \* arg, T \* out, size_t count);

template <typename T>
void softsign(const T \* arg, T \* out, size_t count);

void space_to_depth(
    const char \*const in,
    const Shape& in_shape,
    char \*const out,
    const Shape& out_shape,
    const size_t block_size,
    const op::SpaceToDepth::SpaceToDepthMode mode,
    const size_t elem_size
    );

void split(
    const char \* data,
    const Shape& data_shape,
    size_t elem_size,
    int64_t axis,
    size_t num_splits,
    char \*\* out_data
    );

template <typename T>
std::enable_if<!std::is_integral<T>::value>::type sqrt(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type sqrt(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void squared_difference(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void strided_slice(
    const char \* arg,
    char \* out,
    const Shape& arg_shape,
    const SlicePlan& sp,
    size_t elem_type
    );

template <typename T>
void subtract(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void subtract(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void sum(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

template <typename T>
void swish(
    const T \* arg,
    const T \* beta,
    T \* out,
    size_t count
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void tan(
    const T \* arg,
    T \* out,
    size_t count
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void tanh(
    const T \* arg,
    T \* out,
    size_t count
    );

void tensor_iterator(
    uint64_t num_iterations,
    const std::shared_ptr<Function>& body,
    const op::util::OutputDescriptionVector& out_descs,
    const op::util::InputDescriptionVector& input_descs,
    const HostTensorVector& out,
    const HostTensorVector& args,
    const custom_evaluate_function& evaluate = nullptr
    );

void tile(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const Shape& out_shape,
    const size_t elem_size,
    const std::vector<int64_t>& repeats
    );

template <typename T, typename U>
bool compare_max(
    const std::tuple<T, U>& a,
    const std::tuple<T, U>& b
    );

template <typename T, typename U>
bool compare_min(
    const std::tuple<T, U>& a,
    const std::tuple<T, U>& b
    );

template <typename T, typename U>
bool sort_indices_ascending(
    const std::tuple<T, U>& a,
    const std::tuple<T, U>& b
    );

template <typename T, typename U>
void topk(
    const T \* arg,
    U \* out_indices,
    T \* out_values,
    const Shape& in_shape,
    const Shape& out_shape,
    size_t axis,
    size_t k,
    bool compute_max,
    op::v1::TopK::SortType sort = op::v1::TopK::SortType::NONE
    );

void transpose(
    const char \* data,
    char \* out,
    const Shape& data_shape,
    size_t element_size,
    const int64_t \* axes_order,
    Shape out_shape
    );

template <
    typename Iterator,
    typename value = typename details::from_iterator<Iterator>::stored_value,
    details::Required<details::IsRandomAccessIt<Iterator>::value> = true
    >
constexpr auto span(
    Iterator first,
    Iterator second
    );

template <
    typename Container,
    typename = details::void_t<decltype(std::declval<Container>().data()), decltype(std::declval<Container>().size())>
    >
constexpr auto span(const Container& c);

template <
    typename Container,
    typename = details::void_t<decltype(std::declval<Container>().data()), decltype(std::declval<Container>().size())>
    >
constexpr auto span(Container& c);

template <typename Element>
constexpr auto span(
    const Element \* data,
    std::size_t size
    );

template <typename Element>
constexpr auto span(
    Element \* data,
    std::size_t size
    );

template <typename T>
void logical_xor(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void logical_xor(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

} // namespace reference

Detailed Documentation

Global Functions

template <typename T, typename U, typename Functor>
void autobroadcast_binop(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    )

Helper function to implement autobroadcasting elementwise binop references.

Parameters:

T

Element type of the input tensors.

U

Element type of the output tensor.

Functor

Type of the functor for the elementwise operation. Must support operator()(T,T), and operator()(T,T) must return a value of type U.

arg0

Pointer to the buffer for left operand input tensor.

arg1

Pointer to the buffer for right operand input tensor.

out

Pointer to the buffer for output tensor. This must be pre-allocated by the caller, and must be large enough to hold a tensor of the correct shape.

broadcast_spec

Specification of the auto-broadcasting scheme.

elementwise_functor

Functor implementing the elementwise operation to be applied across the input tensors. Must accept two arguments of type T, and return a value of type U.

template <typename T, typename U, typename Functor>
void autobroadcast_select(
    const U \* arg0,
    const T \* arg1,
    const T \* arg2,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& arg2_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    )

Helper function to implement autobroadcasting elementwise ternaryop references.

Parameters:

U

Element type of the selector tensor.

T

Element type of the input tensors.

Functor

Type of the functor for the elementwise operation. Must support operator()(U,T,T), and operator()(U,T,T) must return a value of type T.

arg0

Pointer to the buffer for selector tensor.

arg1

Pointer to the buffer for left operand input tensor.

arg2

Pointer to the buffer for right operand input tensor.

out

Pointer to the buffer for output tensor. This must be pre-allocated by the caller, and must be large enough to hold a tensor of the correct shape.

broadcast_spec

Specification of the auto-broadcasting scheme.

elementwise_functor

Functor implementing the elementwise operation to be applied across the input tensors. Must accept an argument of type U and two of type T, and return a value of type T.

template <typename T, typename U>
void gather_nd(
    const T \*const params,
    const U \*const indices,
    T \*const out,
    const Shape& params_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    const int batch_dims = 0
    )

Implementation find maximum length of slice of input params which might be copied to out index by index. +—- +———– +—- + | batch | indices[:-1] | slice | | shape | shape | shape | +—- +———– +—- +

template <typename T>
void matmul(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& out_shape,
    bool transpose_arg0,
    bool transpose_arg1
    )

Reference kernel for matmul computation.

Parameters:

T

Type of input and output tensors.

arg0

Pointer to the buffer for left operand input tensor.

arg1

Pointer to the buffer for right operand input tensor.

out

Pointer to the buffer for output tensor. This must be pre-allocated by the caller, and must be large enough to hold a tensor of the correct shape.

arg0_shape

Shape of arg0.

arg1_shape

Shape of arg1.

out_shape

Shape of out.

transpose_arg0

Flag to indicate if transpose on arg0.

transpose_arg1

Flag to indicate if transpose on arg1.

template <typename T>
size_t non_zero_get_count(
    const T \* arg,
    const Shape& arg_shape
    )

Return number of non-zero entries in the input argument.

Parameters:

arg

Input tensor

arg_shape

Input tensor shape Output number of non-zero entries in arg

template <typename T, typename U>
void non_zero(
    const T \* arg,
    U \* out,
    const Shape& arg_shape
    )

Return indices of non-zero entries in input argument.

Parameters:

arg

Input tensor

arg_shape

Input tensor shape

out

Output containing indices of non-zero entries in arg