OpenCV  4.1.0
Open Source Computer Vision
Namespaces | Macros
intrin_sse.hpp File Reference
#include <algorithm>
#include "opencv2/core/utility.hpp"

Namespaces

namespace  cv
 "black box" representation of the file storage associated with a file on disk.
 

Macros

#define CV_SIMD128   1
 
#define CV_SIMD128_64F   1
 
#define CV_SIMD128_FP16   0
 
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast)
 
#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth)
 
#define OPENCV_HAL_IMPL_SSE_BIN_FUNC(_Tpvec, func, intrin)
 
#define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin)
 
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask)
 
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin)
 
#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin)
 
#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast)
 
#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit)
 
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(_Tpvec, _Tp, suffix)
 
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp)
 
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1)
 
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const)
 
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec)
 
#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec)
 
#define OPENCV_HAL_IMPL_SSE_POPCOUNT(_Tpvec)
 
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func)
 
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract)
 
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit)
 
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(_Tpvec, scalartype, suffix)
 
#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin)
 
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_SSE_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai)
 
#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to)
 
#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to)
 
#define OPENCV_HAL_PACKS(a)   _mm_packs_epi16(a, a)
 

Macro Definition Documentation

#define CV_SIMD128   1
#define CV_SIMD128_64F   1
#define CV_SIMD128_FP16   0
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP (   _Tpvec,
  cast 
)
Value:
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC (   _Tpuvec,
  _Tpsvec,
  func,
  suffix,
  subWidth 
)
Value:
inline _Tpuvec v_abs(const _Tpsvec& x) \
{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
#define OPENCV_HAL_IMPL_SSE_BIN_FUNC (   _Tpvec,
  func,
  intrin 
)
Value:
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(intrin(a.val, b.val)); \
}
#define OPENCV_HAL_IMPL_SSE_BIN_OP (   bin_op,
  _Tpvec,
  intrin 
)
Value:
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(intrin(a.val, b.val)); \
} \
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
{ \
a.val = intrin(a.val, b.val); \
return a; \
}
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS (   _Tpvec,
  suffix,
  pack_op,
  and_op,
  signmask,
  allmask 
)
Value:
inline int v_signmask(const _Tpvec& a) \
{ \
return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
} \
inline bool v_check_all(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
inline bool v_check_any(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
#define OPENCV_HAL_IMPL_SSE_EXPAND (   _Tpvec,
  _Tpwvec,
  _Tp,
  intrin 
)
Value:
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
{ \
b0.val = intrin(a.val); \
b1.val = __CV_CAT(intrin, _high)(a.val); \
} \
inline _Tpwvec v_expand_low(const _Tpvec& a) \
{ return _Tpwvec(intrin(a.val)); } \
inline _Tpwvec v_expand_high(const _Tpvec& a) \
{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \
inline _Tpwvec v_load_expand(const _Tp* ptr) \
{ \
__m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
return _Tpwvec(intrin(a)); \
}
#define OPENCV_HAL_IMPL_SSE_EXPAND_Q (   _Tpvec,
  _Tp,
  intrin 
)
Value:
inline _Tpvec v_load_expand_q(const _Tp* ptr) \
{ \
__m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \
return _Tpvec(intrin(a)); \
}
#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmpneq_##suffix(a.val, b.val)); } \
inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmplt_##suffix(a.val, b.val)); } \
inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmpgt_##suffix(a.val, b.val)); } \
inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmple_##suffix(a.val, b.val)); } \
inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm_cmpge_##suffix(a.val, b.val)); }
#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec v_reinterpret_as_##suffix(const v_float32x4& a) \
{ return _Tpvec(_mm_castps_si128(a.val)); } \
inline _Tpvec v_reinterpret_as_##suffix(const v_float64x2& a) \
{ return _Tpvec(_mm_castpd_si128(a.val)); }
#define OPENCV_HAL_IMPL_SSE_INITVEC (   _Tpvec,
  _Tp,
  suffix,
  zsuffix,
  ssuffix,
  _Tps,
  cast 
)
Value:
inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
{ return _Tpvec(cast(a.val)); }
#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP (   _Tpuvec,
  _Tpsvec,
  suffix,
  sbit 
)
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP (   _Tpvec,
  _Tp,
  suffix 
)
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP (   _Tpvec,
  _Tp 
)
Value:
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ \
return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
_mm_loadl_epi64((const __m128i*)ptr1))); \
} \
inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ _mm_storeu_si128((__m128i*)ptr, a.val); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ _mm_store_si128((__m128i*)ptr, a.val); } \
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
{ _mm_stream_si128((__m128i*)ptr, a.val); } \
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
{ \
if( mode == hal::STORE_UNALIGNED ) \
_mm_storeu_si128((__m128i*)ptr, a.val); \
else if( mode == hal::STORE_ALIGNED_NOCACHE ) \
_mm_stream_si128((__m128i*)ptr, a.val); \
else \
_mm_store_si128((__m128i*)ptr, a.val); \
} \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ _mm_storel_epi64((__m128i*)ptr, a.val); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
{ _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a.val, a.val)); }
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE (   _Tpvec0,
  _Tp0,
  suffix0,
  _Tpvec1,
  _Tp1,
  suffix1 
)
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP (   _Tpvec,
  suffix,
  not_const 
)
Value:
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
OPENCV_HAL_IMPL_SSE_BIN_OP(^, _Tpvec, _mm_xor_##suffix) \
inline _Tpvec operator ~ (const _Tpvec& a) \
{ \
return _Tpvec(_mm_xor_##suffix(a.val, not_const)); \
}
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP (   _Tpvec,
  _Tp,
  _Tpreg,
  suffix,
  absmask_vec 
)
Value:
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpreg absmask = _mm_castsi128_##suffix(absmask_vec); \
return _Tpvec(_mm_and_##suffix(_mm_sub_##suffix(a.val, b.val), absmask)); \
} \
inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpvec res = v_fma(a, a, b*b); \
return _Tpvec(_mm_sqrt_##suffix(res.val)); \
} \
inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
{ \
return v_fma(a, a, b*b); \
} \
inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
{ \
return v_fma(a, b, c); \
}
#define OPENCV_HAL_IMPL_SSE_MUL_SAT (   _Tpvec,
  _Tpwvec 
)
Value:
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpwvec c, d; \
v_mul_expand(a, b, c, d); \
return v_pack(c, d); \
} \
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
{ a = a * b; return a; }
#define OPENCV_HAL_IMPL_SSE_POPCOUNT (   _Tpvec)
Value:
inline v_uint32x4 v_popcount(const _Tpvec& a) \
{ \
__m128i m1 = _mm_set1_epi32(0x55555555); \
__m128i m2 = _mm_set1_epi32(0x33333333); \
__m128i m4 = _mm_set1_epi32(0x0f0f0f0f); \
__m128i p = a.val; \
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); \
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); \
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); \
p = _mm_adds_epi8(p, _mm_srli_si128(p, 1)); \
p = _mm_adds_epi8(p, _mm_srli_si128(p, 2)); \
return v_uint32x4(_mm_and_si128(p, _mm_set1_epi32(0x000000ff))); \
}
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4 (   _Tpvec,
  scalartype,
  func,
  scalar_func 
)
Value:
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
scalartype CV_DECL_ALIGNED(16) buf[4]; \
v_store_aligned(buf, a); \
scalartype s0 = scalar_func(buf[0], buf[1]); \
scalartype s1 = scalar_func(buf[2], buf[3]); \
return scalar_func(s0, s1); \
}
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM (   _Tpvec,
  scalartype,
  regtype,
  suffix,
  cast_from,
  cast_to,
  extract 
)
Value:
inline scalartype v_reduce_sum(const _Tpvec& a) \
{ \
regtype val = a.val; \
val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \
val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \
return (scalartype)_mm_cvt##extract(val); \
}
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8 (   _Tpvec,
  scalartype,
  func,
  suffix,
  sbit 
)
Value:
inline scalartype v_reduce_##func(const v_##_Tpvec& a) \
{ \
__m128i val = a.val; \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
return (scalartype)_mm_cvtsi128_si32(val); \
} \
inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \
{ \
__m128i val = a.val; \
__m128i smask = _mm_set1_epi16(sbit); \
val = _mm_xor_si128(val, smask); \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \
}
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM (   _Tpvec,
  scalartype,
  suffix 
)
Value:
inline scalartype v_reduce_sum(const v_##_Tpvec& a) \
{ \
__m128i val = a.val; \
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 8)); \
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 4)); \
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 2)); \
return (scalartype)_mm_cvtsi128_si32(val); \
} \
inline unsigned scalartype v_reduce_sum(const v_u##_Tpvec& a) \
{ \
__m128i val = a.val; \
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 8)); \
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 4)); \
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 2)); \
return (unsigned scalartype)_mm_cvtsi128_si32(val); \
}
#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW (   to_sse_type,
  from_sse_type,
  sse_cast_intrin 
)
Value:
template<> inline \
to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \
{ return sse_cast_intrin(a); }
#define OPENCV_HAL_IMPL_SSE_SELECT (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(_mm_xor_##suffix(b.val, _mm_and_##suffix(_mm_xor_##suffix(b.val, a.val), mask.val))); \
}
#define OPENCV_HAL_IMPL_SSE_SHIFT_OP (   _Tpuvec,
  _Tpsvec,
  suffix,
  srai 
)
#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4 (   _Tpvec,
  suffix,
  cast_from,
  cast_to 
)
Value:
inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
const _Tpvec& a2, const _Tpvec& a3, \
_Tpvec& b0, _Tpvec& b1, \
_Tpvec& b2, _Tpvec& b3) \
{ \
__m128i t0 = cast_from(_mm_unpacklo_##suffix(a0.val, a1.val)); \
__m128i t1 = cast_from(_mm_unpacklo_##suffix(a2.val, a3.val)); \
__m128i t2 = cast_from(_mm_unpackhi_##suffix(a0.val, a1.val)); \
__m128i t3 = cast_from(_mm_unpackhi_##suffix(a2.val, a3.val)); \
\
b0.val = cast_to(_mm_unpacklo_epi64(t0, t1)); \
b1.val = cast_to(_mm_unpackhi_epi64(t0, t1)); \
b2.val = cast_to(_mm_unpacklo_epi64(t2, t3)); \
b3.val = cast_to(_mm_unpackhi_epi64(t2, t3)); \
}
#define OPENCV_HAL_IMPL_SSE_UNPACKS (   _Tpvec,
  suffix,
  cast_from,
  cast_to 
)
Value:
inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
{ \
b0.val = _mm_unpacklo_##suffix(a0.val, a1.val); \
b1.val = _mm_unpackhi_##suffix(a0.val, a1.val); \
} \
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
{ \
__m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
return _Tpvec(cast_to(_mm_unpacklo_epi64(a1, b1))); \
} \
inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
{ \
__m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
return _Tpvec(cast_to(_mm_unpackhi_epi64(a1, b1))); \
} \
inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
{ \
__m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
c.val = cast_to(_mm_unpacklo_epi64(a1, b1)); \
d.val = cast_to(_mm_unpackhi_epi64(a1, b1)); \
}
#define OPENCV_HAL_PACKS (   a)    _mm_packs_epi16(a, a)