// processor: x86 with SSE3 // Execute the hadd_ps instruction using the intrinsic // _mm_hadd_ps #include <stdio.h> #include <intrin.h> #pragma intrinsic ( _mm_hadd_ps ) int main( ) { __m128 u, v, w; __declspec(align(16)) float a[4] = { 0.1, 0.2, 0.3, 0.4 }; __declspec(align(16)) float b[4] = { 0.0001, 0.002, 0.003, 0.004 }; printf_s("Loading floating-point values\n" "%5.3f %5.3f %5.3f %5.3f into XMM register.\n ", a[0], a[1], a[2], a[3] ); u = _mm_load_ps(a); printf_s("Loading floating-point values\n" "%5.3f %5.3f %5.3f %5.3f into XMM register.\n", b[0], b[1], b[2], b[3] ); v = _mm_load_ps(b); printf_s("Calling _mm_hadd_ps to modify these values.\n"); w = _mm_hadd_ps ( u , v); printf_s("Result: %5.3f %5.3f %5.3f %5.3f\n", w.m128_f32[0], w.m128_f32[1], w.m128_f32[2], w.m128_f32[3] ); } |