OpenCV  4.1.0
Open Source Computer Vision
Namespaces | Macros
intrin_avx.hpp File Reference

Namespaces

namespace  cv
 "black box" representation of the file storage associated with a file on disk.
 

Macros

#define CV_SIMD256   1
 
#define CV_SIMD256_64F   1
 
#define CV_SIMD256_FP16   0
 
#define OPENCV_HAL_AVX_SPLAT2_PS(a, im)   v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im)))
 
#define OPENCV_HAL_IMPL_AVX_ABS(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin)
 
#define OPENCV_HAL_IMPL_AVX_BIN_OP(bin_op, _Tpvec, intrin)
 
#define OPENCV_HAL_IMPL_AVX_BLEND(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, _Tpvecf, suffix, cast)
 
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, and_op, allmask)
 
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask)
 
#define OPENCV_HAL_IMPL_AVX_CMP_FLT(bin_op, imm8, _Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(_Tpvec)
 
#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT(_Tpuvec, _Tpsvec, suffix, sbit)
 
#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpvec)
 
#define OPENCV_HAL_IMPL_AVX_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin)
 
#define OPENCV_HAL_IMPL_AVX_EXPAND_Q(_Tpvec, _Tp, intrin)
 
#define OPENCV_HAL_IMPL_AVX_EXTRACT(_Tpvec)
 
#define OPENCV_HAL_IMPL_AVX_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s)
 
#define OPENCV_HAL_IMPL_AVX_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast)
 
#define OPENCV_HAL_IMPL_AVX_LOADSTORE(_Tpvec, _Tp)
 
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg)
 
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1)
 
#define OPENCV_HAL_IMPL_AVX_LOGIC_OP(_Tpvec, suffix, not_const)
 
#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_POPCOUNT(_Tpvec)
 
#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin)
 
#define OPENCV_HAL_IMPL_AVX_REDUCE_8(_Tpvec, sctype, func, intrin)
 
#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT(func, intrin)
 
#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec)
 
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast)
 
#define OPENCV_HAL_IMPL_AVX_SELECT(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai)
 
#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin)
 
#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to)
 
#define OPENCV_HAL_IMPL_AVX_UNPACK(_Tpvec, suffix)
 
#define OPENCV_HAL_IMPL_AVX_ZIP(_Tpvec)
 

Macro Definition Documentation

#define CV_SIMD256   1
#define CV_SIMD256_64F   1
#define CV_SIMD256_FP16   0
#define OPENCV_HAL_AVX_SPLAT2_PS (   a,
  im 
)    v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im)))
#define OPENCV_HAL_IMPL_AVX_ABS (   _Tpvec,
  suffix 
)
Value:
inline v_u##_Tpvec v_abs(const v_##_Tpvec& x) \
{ return v_u##_Tpvec(_mm256_abs_##suffix(x.val)); }
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC (   func,
  _Tpvec,
  intrin 
)
Value:
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(intrin(a.val, b.val)); }
#define OPENCV_HAL_IMPL_AVX_BIN_OP (   bin_op,
  _Tpvec,
  intrin 
)
Value:
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(intrin(a.val, b.val)); } \
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
{ a.val = intrin(a.val, b.val); return a; }
#define OPENCV_HAL_IMPL_AVX_BLEND (   _Tpvec,
  suffix 
)
Value:
template<int m> \
inline _Tpvec v256_blend(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_blend_##suffix(a.val, b.val, m)); }
#define OPENCV_HAL_IMPL_AVX_CAST (   _Tpvec,
  _Tpvecf,
  suffix,
  cast 
)
Value:
inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \
{ return _Tpvec(cast(a.val)); }
#define OPENCV_HAL_IMPL_AVX_CHECK (   _Tpvec,
  and_op,
  allmask 
)
Value:
inline bool v_check_all(const _Tpvec& a) \
{ \
return and_op(mask, allmask) == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(v_reinterpret_as_s8(a)); \
return and_op(mask, allmask) != 0; \
}
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT (   _Tpvec,
  allmask 
)
Value:
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask != 0; \
}
#define OPENCV_HAL_IMPL_AVX_CMP_FLT (   bin_op,
  imm8,
  _Tpvec,
  suffix 
)
Value:
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_cmp_##suffix(a.val, b.val, imm8)); }
#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT (   _Tpvec)
Value:
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_cmpeq_epi64(a.val, b.val)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); }
#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT (   _Tpvec,
  suffix 
)
Value:
OPENCV_HAL_IMPL_AVX_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, suffix) \
OPENCV_HAL_IMPL_AVX_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, suffix) \
OPENCV_HAL_IMPL_AVX_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, suffix) \
OPENCV_HAL_IMPL_AVX_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, suffix) \
OPENCV_HAL_IMPL_AVX_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, suffix) \
OPENCV_HAL_IMPL_AVX_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, suffix)
#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT (   _Tpuvec,
  _Tpsvec,
  suffix,
  sbit 
)
Value:
inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \
{ return _Tpuvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \
inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \
{ \
__m256i smask = _mm256_set1_##suffix(sbit); \
return _Tpuvec(_mm256_cmpgt_##suffix( \
_mm256_xor_si256(a.val, smask), \
_mm256_xor_si256(b.val, smask))); \
} \
inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \
{ return _Tpsvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \
inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \
{ return _Tpsvec(_mm256_cmpgt_##suffix(a.val, b.val)); } \
OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpuvec) \
OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpsvec)
#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV (   _Tpvec)
Value:
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); } \
inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
{ return b > a; } \
inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a < b); } \
inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
{ return b >= a; }
#define OPENCV_HAL_IMPL_AVX_EXPAND (   _Tpvec,
  _Tpwvec,
  _Tp,
  intrin 
)
Value:
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
{ \
b0.val = intrin(_v256_extract_low(a.val)); \
b1.val = intrin(_v256_extract_high(a.val)); \
} \
inline _Tpwvec v_expand_low(const _Tpvec& a) \
{ return _Tpwvec(intrin(_v256_extract_low(a.val))); } \
inline _Tpwvec v_expand_high(const _Tpvec& a) \
{ return _Tpwvec(intrin(_v256_extract_high(a.val))); } \
inline _Tpwvec v256_load_expand(const _Tp* ptr) \
{ \
__m128i a = _mm_loadu_si128((const __m128i*)ptr); \
return _Tpwvec(intrin(a)); \
}
#define OPENCV_HAL_IMPL_AVX_EXPAND_Q (   _Tpvec,
  _Tp,
  intrin 
)
Value:
inline _Tpvec v256_load_expand_q(const _Tp* ptr) \
{ \
__m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
return _Tpvec(intrin(a)); \
}
#define OPENCV_HAL_IMPL_AVX_EXTRACT (   _Tpvec)
Value:
template<int s> \
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
{ return v_rotate_right<s>(a, b); }
#define OPENCV_HAL_IMPL_AVX_INIT (   _Tpvec,
  _Tp,
  suffix,
  ssuffix,
  ctype_s 
)
Value:
inline _Tpvec v256_setzero_##suffix() \
{ return _Tpvec(_mm256_setzero_si256()); } \
inline _Tpvec v256_setall_##suffix(_Tp v) \
{ return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, OPENCV_HAL_NOP) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float32x8, suffix, _mm256_castps_si256) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float64x4, suffix, _mm256_castpd_si256)
#define OPENCV_HAL_IMPL_AVX_INIT_FLT (   _Tpvec,
  _Tp,
  suffix,
  zsuffix,
  cast 
)
Value:
inline _Tpvec v256_setzero_##suffix() \
{ return _Tpvec(_mm256_setzero_##zsuffix()); } \
inline _Tpvec v256_setall_##suffix(_Tp v) \
{ return _Tpvec(_mm256_set1_##zsuffix(v)); } \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, cast) \
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, cast)
#define OPENCV_HAL_IMPL_AVX_LOADSTORE (   _Tpvec,
  _Tp 
)
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT (   _Tpvec,
  _Tp,
  suffix,
  halfreg 
)
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE (   _Tpvec0,
  _Tp0,
  suffix0,
  _Tpvec1,
  _Tp1,
  suffix1 
)
#define OPENCV_HAL_IMPL_AVX_LOGIC_OP (   _Tpvec,
  suffix,
  not_const 
)
Value:
OPENCV_HAL_IMPL_AVX_BIN_OP(&, _Tpvec, _mm256_and_##suffix) \
OPENCV_HAL_IMPL_AVX_BIN_OP(|, _Tpvec, _mm256_or_##suffix) \
OPENCV_HAL_IMPL_AVX_BIN_OP(^, _Tpvec, _mm256_xor_##suffix) \
inline _Tpvec operator ~ (const _Tpvec& a) \
{ return _Tpvec(_mm256_xor_##suffix(a.val, not_const)); }
#define OPENCV_HAL_IMPL_AVX_MULADD (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
{ return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \
inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
{ return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \
inline _Tpvec v_sqrt(const _Tpvec& x) \
{ return _Tpvec(_mm256_sqrt_##suffix(x.val)); } \
inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
{ return v_fma(a, a, b * b); } \
inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
{ return v_sqrt(v_fma(a, a, b*b)); }
#define OPENCV_HAL_IMPL_AVX_POPCOUNT (   _Tpvec)
Value:
inline v_uint32x8 v_popcount(const _Tpvec& a) \
{ \
const v_uint32x8 m1 = v256_setall_u32(0x55555555); \
const v_uint32x8 m2 = v256_setall_u32(0x33333333); \
const v_uint32x8 m4 = v256_setall_u32(0x0f0f0f0f); \
v_uint32x8 p = v_reinterpret_as_u32(a); \
p = ((p >> 1) & m1) + (p & m1); \
p = ((p >> 2) & m2) + (p & m2); \
p = ((p >> 4) & m4) + (p & m4); \
p.val = _mm256_sad_epu8(p.val, _mm256_setzero_si256()); \
return p; \
}
#define OPENCV_HAL_IMPL_AVX_REDUCE_16 (   _Tpvec,
  sctype,
  func,
  intrin 
)
Value:
inline sctype v_reduce_##func(const _Tpvec& a) \
{ \
__m128i v0 = _v256_extract_low(a.val); \
__m128i v1 = _v256_extract_high(a.val); \
v0 = intrin(v0, v1); \
v0 = intrin(v0, _mm_srli_si128(v0, 8)); \
v0 = intrin(v0, _mm_srli_si128(v0, 4)); \
v0 = intrin(v0, _mm_srli_si128(v0, 2)); \
return (sctype) _mm_cvtsi128_si32(v0); \
}
#define OPENCV_HAL_IMPL_AVX_REDUCE_8 (   _Tpvec,
  sctype,
  func,
  intrin 
)
Value:
inline sctype v_reduce_##func(const _Tpvec& a) \
{ \
__m128i v0 = _v256_extract_low(a.val); \
__m128i v1 = _v256_extract_high(a.val); \
v0 = intrin(v0, v1); \
v0 = intrin(v0, _mm_srli_si128(v0, 8)); \
v0 = intrin(v0, _mm_srli_si128(v0, 4)); \
return (sctype) _mm_cvtsi128_si32(v0); \
}
#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT (   func,
  intrin 
)
Value:
inline float v_reduce_##func(const v_float32x8& a) \
{ \
__m128 v0 = _v256_extract_low(a.val); \
__m128 v1 = _v256_extract_high(a.val); \
v0 = intrin(v0, v1); \
v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \
v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 3))); \
return _mm_cvtss_f32(v0); \
}
#define OPENCV_HAL_IMPL_AVX_ROTATE (   _Tpvec)
Value:
OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, _Tpvec, OPENCV_HAL_NOP)
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST (   intrin,
  _Tpvec,
  cast 
)
Value:
template<int imm> \
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
{ \
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a), \
v_reinterpret_as_u8(b)); \
return _Tpvec(cast(ret.val)); \
} \
template<int imm> \
inline _Tpvec intrin(const _Tpvec& a) \
{ \
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a)); \
return _Tpvec(cast(ret.val)); \
}
#define OPENCV_HAL_IMPL_AVX_SELECT (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_blendv_##suffix(b.val, a.val, mask.val)); }
#define OPENCV_HAL_IMPL_AVX_SHIFT_OP (   _Tpuvec,
  _Tpsvec,
  suffix,
  srai 
)
Value:
inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
{ return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \
inline _Tpsvec operator << (const _Tpsvec& a, int imm) \
{ return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \
inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \
{ return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \
inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \
{ return _Tpsvec(srai(a.val, imm)); } \
template<int imm> \
inline _Tpuvec v_shl(const _Tpuvec& a) \
{ return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \
template<int imm> \
inline _Tpsvec v_shl(const _Tpsvec& a) \
{ return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \
template<int imm> \
inline _Tpuvec v_shr(const _Tpuvec& a) \
{ return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \
template<int imm> \
inline _Tpsvec v_shr(const _Tpsvec& a) \
{ return _Tpsvec(srai(a.val, imm)); }
#define OPENCV_HAL_IMPL_AVX_SHUFFLE (   _Tpvec,
  intrin 
)
Value:
template<int m> \
inline _Tpvec v256_shuffle(const _Tpvec& a) \
{ return _Tpvec(_mm256_##intrin(a.val, m)); }
#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4 (   _Tpvec,
  suffix,
  cast_from,
  cast_to 
)
Value:
inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
const _Tpvec& a2, const _Tpvec& a3, \
_Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \
{ \
__m256i t0 = cast_from(_mm256_unpacklo_##suffix(a0.val, a1.val)); \
__m256i t1 = cast_from(_mm256_unpacklo_##suffix(a2.val, a3.val)); \
__m256i t2 = cast_from(_mm256_unpackhi_##suffix(a0.val, a1.val)); \
__m256i t3 = cast_from(_mm256_unpackhi_##suffix(a2.val, a3.val)); \
b0.val = cast_to(_mm256_unpacklo_epi64(t0, t1)); \
b1.val = cast_to(_mm256_unpackhi_epi64(t0, t1)); \
b2.val = cast_to(_mm256_unpacklo_epi64(t2, t3)); \
b3.val = cast_to(_mm256_unpackhi_epi64(t2, t3)); \
}
#define OPENCV_HAL_IMPL_AVX_UNPACK (   _Tpvec,
  suffix 
)
Value:
inline _Tpvec v256_unpacklo(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_unpacklo_##suffix(a.val, b.val)); } \
inline _Tpvec v256_unpackhi(const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(_mm256_unpackhi_##suffix(a.val, b.val)); }
#define OPENCV_HAL_IMPL_AVX_ZIP (   _Tpvec)
Value:
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
{ return v256_permute2x128<0x20>(a, b); } \
inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
{ return v256_permute2x128<0x31>(a, b); } \
inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \
_Tpvec& c, _Tpvec& d) \
{ \
_Tpvec a1b0 = v256_alignr_128(a, b); \
c = v256_combine_diagonal(a, a1b0); \
d = v256_combine_diagonal(a1b0, b); \
} \
inline void v_zip(const _Tpvec& a, const _Tpvec& b, \
_Tpvec& ab0, _Tpvec& ab1) \
{ \
_Tpvec ab0ab2, ab1ab3; \
v256_zip(a, b, ab0ab2, ab1ab3); \
v_recombine(ab0ab2, ab1ab3, ab0, ab1); \
}