Merge tag 'v1.6.52' into branch 'libpng18' into develop
diff --git a/AUTHORS.md b/AUTHORS.md index 4c4d09d..a205024 100644 --- a/AUTHORS.md +++ b/AUTHORS.md
@@ -58,6 +58,8 @@ - ZhangLixia (张利霞) * Samsung Group - Filip Wasil + * SpacemiT Hangzhou Technology, Co. + - Liang Junzhao The build projects, the build scripts, the test scripts, and other files in the "projects", "scripts" and "tests" directories, have
diff --git a/CHANGES b/CHANGES index 871f5e7..527dc76 100644 --- a/CHANGES +++ b/CHANGES
@@ -6304,6 +6304,17 @@ Added GitHub Actions workflows for automated testing. Performed various refactorings and cleanups. +Version 1.6.52 [December 3, 2025] + Fixed CVE-2025-66293 (high severity): + Out-of-bounds read in `png_image_read_composite`. + (Reported by flyfish101 <flyfish101@users.noreply.github.com>.) + Fixed the Paeth filter handling in the RISC-V RVV implementation. + (Reported by Filip Wasil; fixed by Liang Junzhao.) + Improved the performance of the RISC-V RVV implementation. + (Contributed by Liang Junzhao.) + Added allocation failure fuzzing to oss-fuzz. + (Contributed by Philippe Antoine.) + Version 2.0.0 [TODO] Send comments/corrections/commendations to png-mng-implement at lists.sf.net.
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e6e08c..7b3e52e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -14,7 +14,7 @@ # # SPDX-License-Identifier: libpng-2.0 -cmake_minimum_required(VERSION 3.14...4.0) +cmake_minimum_required(VERSION 3.14...4.2) set(PNGLIB_MAJOR 2) set(PNGLIB_MINOR 0)
diff --git a/configure.ac b/configure.ac index fafa24a..90182d1 100644 --- a/configure.ac +++ b/configure.ac
@@ -174,6 +174,19 @@ [AC_CHECK_LIB([z], [${ZPREFIX}zlibVersion], , [AC_MSG_ERROR([zlib not installed])])]) +AC_MSG_CHECKING([zlib version]) +AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([ +#include <zlib.h> +#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1280 +#error zlib version 1.2.8 or later is required +#endif +int main(void) { return 0; } +])], + [AC_MSG_RESULT([acceptable])], + [AC_MSG_RESULT([too old or ZLIB_VERNUM not defined]) + AC_MSG_ERROR([zlib version 1.2.8 or later is required])]) + # The following is for pngvalid, to ensure it catches FP errors even on # platforms that don't enable FP exceptions, the function appears in the math # library (typically), it's not an error if it is not found.
diff --git a/contrib/tools/pngfix.c b/contrib/tools/pngfix.c index a0c623a..9c5eebd 100644 --- a/contrib/tools/pngfix.c +++ b/contrib/tools/pngfix.c
@@ -52,23 +52,10 @@ # undef const #endif -/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility - * with older builds. - */ -#if ZLIB_VERNUM < 0x1260 -# define PNGZ_MSG_CAST(s) constcast(char*,s) -# define PNGZ_INPUT_CAST(b) constcast(png_byte *,b) -#else -# define PNGZ_MSG_CAST(s) (s) -# define PNGZ_INPUT_CAST(b) (b) -#endif - #ifndef PNG_MAXIMUM_INFLATE_WINDOW # error pngfix requires libpng with PNG_MAXIMUM_INFLATE_WINDOW supported #endif -#if ZLIB_VERNUM >= 0x1240 - /* Copied from pngpriv.h */ #ifdef __cplusplus # define voidcast(type, value) static_cast<type>(value) @@ -2653,7 +2640,7 @@ case ZLIB_OK: /* Truncated stream; unrecoverable, gets converted to ZLIB_FATAL */ - zlib.z.msg = PNGZ_MSG_CAST("[truncated]"); + zlib.z.msg = "[truncated]"; zlib_message(&zlib, 0/*expected*/); /* FALLTHROUGH */ @@ -2692,8 +2679,7 @@ /* Output the error that wasn't output before: */ if (zlib.z.msg == NULL) - zlib.z.msg = PNGZ_MSG_CAST( - "invalid distance too far back"); + zlib.z.msg = "invalid distance too far back"; zlib_message(&zlib, 0/*stream error*/); zlib_end(&zlib); return 0; @@ -4009,17 +3995,6 @@ return global_end(&global); } -#else /* ZLIB_VERNUM < 0x1240 */ -int -main(void) -{ - fprintf(stderr, - "pngfix needs libpng with a zlib >=1.2.4 (not 0x%x)\n", - ZLIB_VERNUM); - return 77; -} -#endif /* ZLIB_VERNUM */ - #else /* No read support */ int
diff --git a/manuals/libpng-manual.txt b/manuals/libpng-manual.txt index b746783..1946f8c 100644 --- a/manuals/libpng-manual.txt +++ b/manuals/libpng-manual.txt
@@ -9,7 +9,7 @@ Based on: - libpng version 1.6.36, December 2018, through 1.6.51 - November 2025 + libpng version 1.6.36, December 2018, through 1.6.52 - December 2025 Updated and distributed by Cosmin Truta Copyright (c) 2018-2025 Cosmin Truta
diff --git a/manuals/libpng.3 b/manuals/libpng.3 index 33e68e7..c406dcc 100644 --- a/manuals/libpng.3 +++ b/manuals/libpng.3
@@ -1,4 +1,4 @@ -.TH LIBPNG 3 "November 21, 2025" +.TH LIBPNG 3 "December 3, 2025" .SH NAME libpng \- Portable Network Graphics (PNG) Reference Library 1.8.0.git @@ -516,7 +516,7 @@ Based on: - libpng version 1.6.36, December 2018, through 1.6.51 - November 2025 + libpng version 1.6.36, December 2018, through 1.6.52 - December 2025 Updated and distributed by Cosmin Truta Copyright (c) 2018-2025 Cosmin Truta
diff --git a/manuals/png.5 b/manuals/png.5 index a22a33b..fbb88ab 100644 --- a/manuals/png.5 +++ b/manuals/png.5
@@ -1,4 +1,4 @@ -.TH PNG 5 "November 21, 2025" +.TH PNG 5 "December 3, 2025" .SH NAME png \- Portable Network Graphics (PNG) format
diff --git a/png.c b/png.c index fe3e6e1..24b2201 100644 --- a/png.c +++ b/png.c
@@ -982,48 +982,48 @@ { default: case Z_OK: - png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return code"); + png_ptr->zstream.msg = "unexpected zlib return code"; break; case Z_STREAM_END: /* Normal exit */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected end of LZ stream"); + png_ptr->zstream.msg = "unexpected end of LZ stream"; break; case Z_NEED_DICT: /* This means the deflate stream did not have a dictionary; this * indicates a bogus PNG. */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("missing LZ dictionary"); + png_ptr->zstream.msg = "missing LZ dictionary"; break; case Z_ERRNO: /* gz APIs only: should not happen */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("zlib IO error"); + png_ptr->zstream.msg = "zlib IO error"; break; case Z_STREAM_ERROR: /* internal libpng error */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("bad parameters to zlib"); + png_ptr->zstream.msg = "bad parameters to zlib"; break; case Z_DATA_ERROR: - png_ptr->zstream.msg = PNGZ_MSG_CAST("damaged LZ stream"); + png_ptr->zstream.msg = "damaged LZ stream"; break; case Z_MEM_ERROR: - png_ptr->zstream.msg = PNGZ_MSG_CAST("insufficient memory"); + png_ptr->zstream.msg = "insufficient memory"; break; case Z_BUF_ERROR: /* End of input or output; not a problem if the caller is doing * incremental read or write. */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("truncated"); + png_ptr->zstream.msg = "truncated"; break; case Z_VERSION_ERROR: - png_ptr->zstream.msg = PNGZ_MSG_CAST("unsupported zlib version"); + png_ptr->zstream.msg = "unsupported zlib version"; break; case PNG_UNEXPECTED_ZLIB_RETURN: @@ -1032,7 +1032,7 @@ * and change pngpriv.h. Note that this message is "... return", * whereas the default/Z_OK one is "... return code". */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return"); + png_ptr->zstream.msg = "unexpected zlib return"; break; } }
diff --git a/png.h b/png.h index 6776260..35be19c 100644 --- a/png.h +++ b/png.h
@@ -14,7 +14,7 @@ * libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger * libpng versions 0.97, January 1998, through 1.6.35, July 2018: * Glenn Randers-Pehrson - * libpng versions 1.6.36, December 2018, through 1.6.51, November 2025: + * libpng versions 1.6.36, December 2018, through 1.6.52, December 2025: * Cosmin Truta * See also "Contributing Authors", below. */
diff --git a/pngpread.c b/pngpread.c index f79ba4c..340c636 100644 --- a/pngpread.c +++ b/pngpread.c
@@ -724,7 +724,7 @@ * change the current behavior (see comments in inflate.c * for why this doesn't happen at present with zlib 1.2.5). */ - ret = PNG_INFLATE(png_ptr, Z_SYNC_FLUSH); + ret = png_zlib_inflate(png_ptr, Z_SYNC_FLUSH); /* Check for any failure before proceeding. */ if (ret != Z_OK && ret != Z_STREAM_END)
diff --git a/pngpriv.h b/pngpriv.h index 6c407bc..ac6f716 100644 --- a/pngpriv.h +++ b/pngpriv.h
@@ -1377,14 +1377,9 @@ (png_struct *png_ptr), PNG_EMPTY); -#if ZLIB_VERNUM >= 0x1240 PNG_INTERNAL_FUNCTION(int, png_zlib_inflate, (png_struct *png_ptr, int flush), PNG_EMPTY); -# define PNG_INFLATE(pp, flush) png_zlib_inflate(pp, flush) -#else /* Zlib < 1.2.4 */ -# define PNG_INFLATE(pp, flush) inflate(&(pp)->zstream, flush) -#endif /* Zlib < 1.2.4 */ #ifdef PNG_READ_TRANSFORMS_SUPPORTED /* Optional call to update the users info structure */
diff --git a/pngread.c b/pngread.c index d79e5dc..eb2cccc 100644 --- a/pngread.c +++ b/pngread.c
@@ -3295,6 +3295,7 @@ ptrdiff_t step_row = display->row_bytes; unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? 3 : 1; + int optimize_alpha = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0; int pass; for (pass = 0; pass < passes; ++pass) @@ -3351,20 +3352,44 @@ if (alpha < 255) /* else just use component */ { - /* This is PNG_OPTIMIZED_ALPHA, the component value - * is a linear 8-bit value. Combine this with the - * current outrow[c] value which is sRGB encoded. - * Arithmetic here is 16-bits to preserve the output - * values correctly. - */ - component *= 257*255; /* =65535 */ - component += (255-alpha)*png_sRGB_table[outrow[c]]; + if (optimize_alpha != 0) + { + /* This is PNG_OPTIMIZED_ALPHA, the component value + * is a linear 8-bit value. Combine this with the + * current outrow[c] value which is sRGB encoded. + * Arithmetic here is 16-bits to preserve the output + * values correctly. + */ + component *= 257*255; /* =65535 */ + component += (255-alpha)*png_sRGB_table[outrow[c]]; - /* So 'component' is scaled by 255*65535 and is - * therefore appropriate for the sRGB to linear - * conversion table. - */ - component = PNG_sRGB_FROM_LINEAR(component); + /* Clamp to the valid range to defend against + * unforeseen cases where the data might be sRGB + * instead of linear premultiplied. + * (Belt-and-suspenders for GitHub Issue #764.) + */ + if (component > 255*65535) + component = 255*65535; + + /* So 'component' is scaled by 255*65535 and is + * therefore appropriate for the sRGB-to-linear + * conversion table. + */ + component = PNG_sRGB_FROM_LINEAR(component); + } + else + { + /* Compositing was already done on the palette + * entries. The data is sRGB premultiplied on black. + * Composite with the background in sRGB space. + * This is not gamma-correct, but matches what was + * done to the palette. + */ + png_uint_32 background = outrow[c]; + component += ((255-alpha) * background + 127) / 255; + if (component > 255) + component = 255; + } } outrow[c] = (png_byte)component;
diff --git a/pngrtran.c b/pngrtran.c index 7636a28..0d8c596 100644 --- a/pngrtran.c +++ b/pngrtran.c
@@ -1825,6 +1825,7 @@ * transformations elsewhere. */ png_ptr->transformations &= ~(PNG_COMPOSE | PNG_GAMMA); + png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA; } /* color_type == PNG_COLOR_TYPE_PALETTE */ /* if (png_ptr->background_gamma_type!=PNG_BACKGROUND_GAMMA_UNKNOWN) */
diff --git a/pngrutil.c b/pngrutil.c index c05fa1c..85ddb98 100644 --- a/pngrutil.c +++ b/pngrutil.c
@@ -439,14 +439,9 @@ * follow that because, for systems with with limited capabilities, we * would otherwise reject the application's attempts to use a smaller window * size (zlib doesn't have an interface to say "this or lower"!). - * - * inflateReset2 was added to zlib 1.2.4; before this the window could not be - * reset, therefore it is necessary to always allocate the maximum window - * size with earlier zlibs just in case later compressed chunks need it. */ { int ret; /* zlib return code */ -#if ZLIB_VERNUM >= 0x1240 int window_bits = 0; if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) == @@ -460,7 +455,6 @@ { png_ptr->zstream_start = 1; } -#endif /* ZLIB_VERNUM >= 0x1240 */ /* Set this for safety, just in case the previous owner left pointers to * memory allocations. @@ -472,20 +466,12 @@ if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0) { -#if ZLIB_VERNUM >= 0x1240 ret = inflateReset2(&png_ptr->zstream, window_bits); -#else - ret = inflateReset(&png_ptr->zstream); -#endif } else { -#if ZLIB_VERNUM >= 0x1240 ret = inflateInit2(&png_ptr->zstream, window_bits); -#else - ret = inflateInit(&png_ptr->zstream); -#endif if (ret == Z_OK) png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED; @@ -511,7 +497,6 @@ #endif } -#if ZLIB_VERNUM >= 0x1240 /* Handle the start of the inflate stream if we called inflateInit2(strm,0); * in this case some zlib versions skip validation of the CINFO field and, in * certain circumstances, libpng may end up displaying an invalid image, in @@ -534,7 +519,6 @@ return inflate(&png_ptr->zstream, flush); } -#endif /* Zlib >= 1.2.4 */ #ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED #if defined(PNG_READ_zTXt_SUPPORTED) || defined (PNG_READ_iTXt_SUPPORTED) @@ -569,7 +553,7 @@ * a performance advantage, because it reduces the amount of data accessed * at each step and that may give the OS more time to page it in. */ - png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input); + png_ptr->zstream.next_in = input; /* avail_in and avail_out are set below from 'size' */ png_ptr->zstream.avail_in = 0; png_ptr->zstream.avail_out = 0; @@ -630,7 +614,7 @@ * the previous chunk of input data. Tell zlib if we have reached the * end of the output buffer. */ - ret = PNG_INFLATE(png_ptr, avail_out > 0 ? Z_NO_FLUSH : + ret = png_zlib_inflate(png_ptr, avail_out > 0 ? Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH)); } while (ret == Z_OK); @@ -662,7 +646,7 @@ * pointer, which is not owned by the caller, but this is safe; it's only * used on errors! */ - png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed"); + png_ptr->zstream.msg = "zstream unclaimed"; return Z_STREAM_ERROR; } } @@ -871,7 +855,7 @@ * the available output is produced; this allows reading of truncated * streams. */ - ret = PNG_INFLATE(png_ptr, *chunk_bytes > 0 ? + ret = png_zlib_inflate(png_ptr, *chunk_bytes > 0 ? Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH)); } while (ret == Z_OK && (*out_size > 0 || png_ptr->zstream.avail_out > 0)); @@ -886,7 +870,7 @@ else { - png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed"); + png_ptr->zstream.msg = "zstream unclaimed"; return Z_STREAM_ERROR; } } @@ -4469,7 +4453,7 @@ * * TODO: deal more elegantly with truncated IDAT lists. */ - ret = PNG_INFLATE(png_ptr, Z_NO_FLUSH); + ret = png_zlib_inflate(png_ptr, Z_NO_FLUSH); /* Take the unconsumed output back. */ if (output != NULL)
diff --git a/pngstruct.h b/pngstruct.h index 9d01246..082177e 100644 --- a/pngstruct.h +++ b/pngstruct.h
@@ -30,17 +30,6 @@ # undef const #endif -/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility - * with older builds. - */ -#if ZLIB_VERNUM < 0x1260 -# define PNGZ_MSG_CAST(s) png_constcast(char*,s) -# define PNGZ_INPUT_CAST(b) png_constcast(png_byte *,b) -#else -# define PNGZ_MSG_CAST(s) (s) -# define PNGZ_INPUT_CAST(b) (b) -#endif - /* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib * can handle at once. This type need be no larger than 16 bits (so maximum of * 65535), this define allows us to discover how big it is, but limited by the @@ -247,9 +236,7 @@ /* pixel depth used for the row buffers */ png_byte transformed_pixel_depth; /* pixel depth after read/write transforms */ -#if ZLIB_VERNUM >= 0x1240 png_byte zstream_start; /* at start of an input zlib stream */ -#endif /* Zlib >= 1.2.4 */ #if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED) png_uint_16 filler; /* filler bytes for pixel expansion */ #endif
diff --git a/pngwutil.c b/pngwutil.c index a8e72f2..dfb708a 100644 --- a/pngwutil.c +++ b/pngwutil.c
@@ -330,7 +330,7 @@ /* Attempt sane error recovery */ if (png_ptr->zowner == png_IDAT) /* don't steal from IDAT */ { - png_ptr->zstream.msg = PNGZ_MSG_CAST("in use by IDAT"); + png_ptr->zstream.msg = "in use by IDAT"; return Z_STREAM_ERROR; } @@ -534,7 +534,7 @@ png_uint_32 output_len; /* zlib updates these for us: */ - png_ptr->zstream.next_in = PNGZ_INPUT_CAST(comp->input); + png_ptr->zstream.next_in = comp->input; png_ptr->zstream.avail_in = 0; /* Set below */ png_ptr->zstream.next_out = comp->output; png_ptr->zstream.avail_out = (sizeof comp->output); @@ -618,7 +618,7 @@ */ if (output_len + prefix_len >= PNG_UINT_31_MAX) { - png_ptr->zstream.msg = PNGZ_MSG_CAST("compressed data too long"); + png_ptr->zstream.msg = "compressed data too long"; ret = Z_MEM_ERROR; } @@ -985,7 +985,7 @@ * terminates the operation. The _out values are maintained across calls to * this function, but the input must be reset each time. */ - png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input); + png_ptr->zstream.next_in = input; png_ptr->zstream.avail_in = 0; /* set below */ for (;;) {
diff --git a/riscv/filter_rvv_intrinsics.c b/riscv/filter_rvv_intrinsics.c index 1ab4e10..d91f80f 100644 --- a/riscv/filter_rvv_intrinsics.c +++ b/riscv/filter_rvv_intrinsics.c
@@ -3,7 +3,8 @@ * Copyright (c) 2023 Google LLC * Written by Manfred SCHLAEGL, 2022 * Dragoș Tiselice <dtiselice@google.com>, May 2023. - * Filip Wasil <f.wasil@samsung.com>, March 2025. + * Filip Wasil <f.wasil@samsung.com>, March 2025. + * Liang Junzhao <junzhao.liang@spacemit.com>, November 2025. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -140,11 +141,8 @@ /* x = *row */ x = __riscv_vle8_v_u8m1(row, vl); - /* tmp = a + b */ - vuint16m2_t tmp = __riscv_vwaddu_vv_u16m2(a, b, vl); - - /* a = tmp/2 */ - a = __riscv_vnsrl_wx_u8m1(tmp, 1, vl); + /* a = (a + b) / 2, round to zero with vxrm = 2 */ + a = __riscv_vaaddu_wx_u8m1(a, b, 2, vl); /* a += x */ a = __riscv_vadd_vv_u8m1(a, x, vl); @@ -265,27 +263,22 @@ /* x = *row */ vuint8m1_t x = __riscv_vle8_v_u8m1(row, vl); - /* Calculate p = b - c and pc = a - c using widening subtraction */ - vuint16m2_t p_wide = __riscv_vwsubu_vv_u16m2(b, c, vl); - vuint16m2_t pc_wide = __riscv_vwsubu_vv_u16m2(a, c, vl); - - /* Convert to signed for easier manipulation */ - size_t vl16 = __riscv_vsetvl_e16m2(bpp); - vint16m2_t p = __riscv_vreinterpret_v_u16m2_i16m2(p_wide); - vint16m2_t pc = __riscv_vreinterpret_v_u16m2_i16m2(pc_wide); + /* p = b - c and pc = a - c */ + vuint16m2_t p = __riscv_vwsubu_vv_u16m2(b, c, vl); + vuint16m2_t pc = __riscv_vwsubu_vv_u16m2(a, c, vl); /* pa = |p| */ - vbool8_t p_neg_mask = __riscv_vmslt_vx_i16m2_b8(p, 0, vl16); - vint16m2_t pa = __riscv_vrsub_vx_i16m2_m(p_neg_mask, p, 0, vl16); + vuint16m2_t tmp = __riscv_vrsub_vx_u16m2(p, 0, vl); + vuint16m2_t pa = __riscv_vminu_vv_u16m2(p, tmp, vl); /* pb = |pc| */ - vbool8_t pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(pc, 0, vl16); - vint16m2_t pb = __riscv_vrsub_vx_i16m2_m(pc_neg_mask, pc, 0, vl16); + tmp = __riscv_vrsub_vx_u16m2(pc, 0, vl); + vuint16m2_t pb = __riscv_vminu_vv_u16m2(pc, tmp, vl); /* pc = |p + pc| */ - vint16m2_t p_plus_pc = __riscv_vadd_vv_i16m2(p, pc, vl16); - vbool8_t p_plus_pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(p_plus_pc, 0, vl16); - pc = __riscv_vrsub_vx_i16m2_m(p_plus_pc_neg_mask, p_plus_pc, 0, vl16); + pc = __riscv_vadd_vv_u16m2(p, pc, vl); + tmp = __riscv_vrsub_vx_u16m2(pc, 0, vl); + pc = __riscv_vminu_vv_u16m2(pc, tmp, vl); /* * The key insight is that we want the minimum of pa, pb, pc. @@ -294,31 +287,17 @@ * - Else use c */ - /* Find which predictor to use based on minimum absolute difference */ - vbool8_t pa_le_pb = __riscv_vmsle_vv_i16m2_b8(pa, pb, vl16); - vbool8_t pa_le_pc = __riscv_vmsle_vv_i16m2_b8(pa, pc, vl16); - vbool8_t pb_le_pc = __riscv_vmsle_vv_i16m2_b8(pb, pc, vl16); + /* if (pb < pa) { pa = pb; a = b; } */ + vbool8_t m1 = __riscv_vmsltu_vv_u16m2_b8(pb, pa, vl); + pa = __riscv_vmerge_vvm_u16m2(pa, pb, m1, vl); + a = __riscv_vmerge_vvm_u8m1(a, b, m1, vl); - /* use_a = pa <= pb && pa <= pc */ - vbool8_t use_a = __riscv_vmand_mm_b8(pa_le_pb, pa_le_pc, vl16); - - /* use_b = !use_a && pb <= pc */ - vbool8_t not_use_a = __riscv_vmnot_m_b8(use_a, vl16); - vbool8_t use_b = __riscv_vmand_mm_b8(not_use_a, pb_le_pc, vl16); - - /* Switch back to e8m1 for final operations */ - vl = __riscv_vsetvl_e8m1(bpp); - - /* Start with a, then conditionally replace with b or c */ - vuint8m1_t result = a; - result = __riscv_vmerge_vvm_u8m1(result, b, use_b, vl); - - /* use_c = !use_a && !use_b */ - vbool8_t use_c = __riscv_vmnand_mm_b8(__riscv_vmor_mm_b8(use_a, use_b, vl), __riscv_vmor_mm_b8(use_a, use_b, vl), vl); - result = __riscv_vmerge_vvm_u8m1(result, c, use_c, vl); + /* if (pc < pa) a = c; */ + vbool8_t m2 = __riscv_vmsltu_vv_u16m2_b8(pc, pa, vl); + a = __riscv_vmerge_vvm_u8m1(a, c, m2, vl); /* a = result + x */ - a = __riscv_vadd_vv_u8m1(result, x, vl); + a = __riscv_vadd_vv_u8m1(a, x, vl); /* *row = a */ __riscv_vse8_v_u8m1(row, a, vl);