OpenCV  4.1.0
Open Source Computer Vision
Namespaces | Macros
intrin_neon.hpp File Reference
#include <algorithm>
#include "opencv2/core/utility.hpp"

Namespaces

namespace  cv
 "black box" representation of the file storage associated with a file on disk.
 

Macros

#define CV_SIMD128   1
 
#define CV_SIMD128_64F   0
 
#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix)   inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
 
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin)
 
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin)
 
#define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin)
 
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift)
 
#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix)
 
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin)
 
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix)
 
#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix)
 
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix)
 
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix)
 
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix)
 
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec)
 
#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr)
 
#define OPENCV_HAL_IMPL_NEON_POPCOUNT(_Tpvec, cast)
 
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix)
 
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix)
 
#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix)
 
#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix)
 
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix)
 

Macro Definition Documentation

#define CV_SIMD128   1
#define CV_SIMD128_64F   0
#define OPENCV_HAL_IMPL_NEON_ABS (   _Tpuvec,
  _Tpsvec,
  usuffix,
  ssuffix 
)    inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC (   _Tpvec,
  func,
  intrin 
)
Value:
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(intrin(a.val, b.val)); \
}
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2 (   _Tpvec,
  _Tpvec2,
  cast,
  func,
  intrin 
)
Value:
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec2(cast(intrin(a.val, b.val))); \
}
#define OPENCV_HAL_IMPL_NEON_BIN_OP (   bin_op,
  _Tpvec,
  intrin 
)
Value:
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(intrin(a.val, b.val)); \
} \
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
{ \
a.val = intrin(a.val, b.val); \
return a; \
}
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY (   _Tpvec,
  suffix,
  shift 
)
Value:
inline bool v_check_all(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
} \
inline bool v_check_any(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
}
#define OPENCV_HAL_IMPL_NEON_EXPAND (   _Tpvec,
  _Tpwvec,
  _Tp,
  suffix 
)
Value:
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
{ \
b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
} \
inline _Tpwvec v_expand_low(const _Tpvec& a) \
{ \
return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
} \
inline _Tpwvec v_expand_high(const _Tpvec& a) \
{ \
return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \
} \
inline _Tpwvec v_load_expand(const _Tp* ptr) \
{ \
return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
}
#define OPENCV_HAL_IMPL_NEON_EXTRACT (   _Tpvec,
  suffix 
)
Value:
template <int s> \
inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
}
#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP (   bin_op,
  intrin 
)
Value:
inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
{ \
return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \
} \
inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
{ \
a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \
return a; \
}
#define OPENCV_HAL_IMPL_NEON_INIT (   _Tpv,
  _Tp,
  suffix 
)
Value:
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \
inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \
inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \
inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \
inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \
inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \
inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \
inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \
inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \
inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); }
#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP (   _Tpvec,
  cast,
  suffix,
  not_suffix 
)
Value:
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \
inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \
inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \
inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \
inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); }
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED (   _Tpvec,
  _Tp,
  suffix 
)
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64 (   tp,
  suffix 
)
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP (   _Tpvec,
  _Tp,
  suffix 
)
Value:
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
{ vst1_##suffix(ptr, vget_high_##suffix(a.val)); }
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP (   _Tpvec,
  suffix 
)
Value:
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \
inline _Tpvec operator ~ (const _Tpvec& a) \
{ \
return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \
}
#define OPENCV_HAL_IMPL_NEON_MUL_SAT (   _Tpvec,
  _Tpwvec 
)
Value:
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpwvec c, d; \
v_mul_expand(a, b, c, d); \
return v_pack(c, d); \
} \
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
{ a = a * b; return a; }
#define OPENCV_HAL_IMPL_NEON_PACK (   _Tpvec,
  _Tp,
  hreg,
  suffix,
  _Tpwvec,
  pack,
  mov,
  rshr 
)
Value:
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \
hreg a1 = mov(a.val), b1 = mov(b.val); \
return _Tpvec(vcombine_##suffix(a1, b1)); \
} \
inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \
hreg a1 = mov(a.val); \
vst1_##suffix(ptr, a1); \
} \
template<int n> inline \
_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \
hreg a1 = rshr(a.val, n); \
hreg b1 = rshr(b.val, n); \
return _Tpvec(vcombine_##suffix(a1, b1)); \
} \
template<int n> inline \
void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \
hreg a1 = rshr(a.val, n); \
vst1_##suffix(ptr, a1); \
}
#define OPENCV_HAL_IMPL_NEON_POPCOUNT (   _Tpvec,
  cast 
)
Value:
inline v_uint32x4 v_popcount(const _Tpvec& a) \
{ \
uint8x16_t t = vcntq_u8(cast(a.val)); \
uint16x8_t t0 = vpaddlq_u8(t); /* 16 -> 8 */ \
uint32x4_t t1 = vpaddlq_u16(t0); /* 8 -> 4 */ \
return v_uint32x4(t1); \
}
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4 (   _Tpvec,
  _Tpnvec,
  scalartype,
  func,
  vectorfunc,
  suffix 
)
Value:
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
}
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8 (   _Tpvec,
  _Tpnvec,
  scalartype,
  func,
  vectorfunc,
  suffix 
)
Value:
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
#define OPENCV_HAL_IMPL_NEON_ROTATE_OP (   _Tpvec,
  suffix 
)
Value:
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
{ return a; } \
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
{ CV_UNUSED(b); return a; }
#define OPENCV_HAL_IMPL_NEON_SELECT (   _Tpvec,
  suffix,
  usuffix 
)
Value:
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \
}
#define OPENCV_HAL_IMPL_NEON_SHIFT_OP (   _Tpvec,
  suffix,
  _Tps,
  ssuffix 
)
Value:
inline _Tpvec operator << (const _Tpvec& a, int n) \
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \
inline _Tpvec operator >> (const _Tpvec& a, int n) \
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \
template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
{ return _Tpvec(vshlq_n_##suffix(a.val, n)); } \
template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \
template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); }
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4 (   _Tpvec,
  suffix 
)
Value:
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
v_##_Tpvec& b0, v_##_Tpvec& b1, \
v_##_Tpvec& b2, v_##_Tpvec& b3) \
{ \
/* m00 m01 m02 m03 */ \
/* m10 m11 m12 m13 */ \
/* m20 m21 m22 m23 */ \
/* m30 m31 m32 m33 */ \
_Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \
_Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \
/* m00 m10 m02 m12 */ \
/* m01 m11 m03 m13 */ \
/* m20 m30 m22 m32 */ \
/* m21 m31 m23 m33 */ \
b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \
b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \
b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \
b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \
}
#define OPENCV_HAL_IMPL_NEON_UNPACKS (   _Tpvec,
  suffix 
)
Value:
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
{ \
_Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \
b0.val = p.val[0]; \
b1.val = p.val[1]; \
} \
inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
} \
inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
} \
inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
{ \
c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
}