Prefer memcpy to manual pointer arithmetic.
diff --git a/intel/filter_sse2_intrinsics.c b/intel/filter_sse2_intrinsics.c
index ef1fc7d..a5f5ecf 100644
--- a/intel/filter_sse2_intrinsics.c
+++ b/intel/filter_sse2_intrinsics.c
@@ -29,39 +29,25 @@
*/
static __m128i load4(const void* p) {
- return _mm_cvtsi32_si128(*(const int*)p);
+ int tmp;
+ memcpy(&tmp, p, sizeof(tmp));
+ return _mm_cvtsi32_si128(tmp);
}
static void store4(void* p, __m128i v) {
- *(int*)p = _mm_cvtsi128_si32(v);
+ int tmp = _mm_cvtsi128_si32(v);
+ memcpy(p, &tmp, sizeof(int));
}
static __m128i load3(const void* p) {
- /* We'll load 2 bytes, then 1 byte,
- * then mask them together, and finally load into SSE.
- */
- const png_uint_16* p01 = (png_const_uint_16p)p;
- const png_byte* p2 = (const png_byte*)(p01+1);
-
- png_uint_32 v012 = (png_uint_32)(*p01)
- | (png_uint_32)(*p2) << 16;
- return load4(&v012);
+ png_uint_32 tmp = 0;
+ memcpy(&tmp, p, 3);
+ return _mm_cvtsi32_si128(tmp);
}
static void store3(void* p, __m128i v) {
- /* We'll pull from SSE as a 32-bit int, then write
- * its bottom two bytes, then its third byte.
- */
- png_uint_32 v012;
- png_uint_16* p01;
- png_byte* p2;
-
- store4(&v012, v);
-
- p01 = (png_uint_16p)p;
- p2 = (png_byte*)(p01+1);
- *p01 = (png_uint_16)v012;
- *p2 = (png_byte)(v012 >> 16);
+ int tmp = _mm_cvtsi128_si32(v);
+ memcpy(p, &tmp, 3);
}
void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,