Merge tag 'v1.6.52' into branch 'libpng18' into develop

diff --git a/AUTHORS.md b/AUTHORS.md
index 4c4d09d..a205024 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md

@@ -58,6 +58,8 @@
     - ZhangLixia (张利霞)
  * Samsung Group
     - Filip Wasil
+ * SpacemiT Hangzhou Technology, Co.
+    - Liang Junzhao
 
 The build projects, the build scripts, the test scripts, and other
 files in the "projects", "scripts" and "tests" directories, have

diff --git a/CHANGES b/CHANGES
index 871f5e7..527dc76 100644
--- a/CHANGES
+++ b/CHANGES

@@ -6304,6 +6304,17 @@
   Added GitHub Actions workflows for automated testing.
   Performed various refactorings and cleanups.
 
+Version 1.6.52 [December 3, 2025]
+  Fixed CVE-2025-66293 (high severity):
+    Out-of-bounds read in `png_image_read_composite`.
+    (Reported by flyfish101 <flyfish101@users.noreply.github.com>.)
+  Fixed the Paeth filter handling in the RISC-V RVV implementation.
+    (Reported by Filip Wasil; fixed by Liang Junzhao.)
+  Improved the performance of the RISC-V RVV implementation.
+    (Contributed by Liang Junzhao.)
+  Added allocation failure fuzzing to oss-fuzz.
+    (Contributed by Philippe Antoine.)
+
 Version 2.0.0 [TODO]
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net.

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e6e08c..7b3e52e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -14,7 +14,7 @@
 #
 # SPDX-License-Identifier: libpng-2.0
 
-cmake_minimum_required(VERSION 3.14...4.0)
+cmake_minimum_required(VERSION 3.14...4.2)
 
 set(PNGLIB_MAJOR 2)
 set(PNGLIB_MINOR 0)

diff --git a/configure.ac b/configure.ac
index fafa24a..90182d1 100644
--- a/configure.ac
+++ b/configure.ac

@@ -174,6 +174,19 @@
   [AC_CHECK_LIB([z], [${ZPREFIX}zlibVersion], ,
      [AC_MSG_ERROR([zlib not installed])])])
 
+AC_MSG_CHECKING([zlib version])
+AC_COMPILE_IFELSE(
+  [AC_LANG_SOURCE([
+#include <zlib.h>
+#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1280
+#error zlib version 1.2.8 or later is required
+#endif
+int main(void) { return 0; }
+])],
+  [AC_MSG_RESULT([acceptable])],
+  [AC_MSG_RESULT([too old or ZLIB_VERNUM not defined])
+   AC_MSG_ERROR([zlib version 1.2.8 or later is required])])
+
 # The following is for pngvalid, to ensure it catches FP errors even on
 # platforms that don't enable FP exceptions, the function appears in the math
 # library (typically), it's not an error if it is not found.

diff --git a/contrib/tools/pngfix.c b/contrib/tools/pngfix.c
index a0c623a..9c5eebd 100644
--- a/contrib/tools/pngfix.c
+++ b/contrib/tools/pngfix.c

@@ -52,23 +52,10 @@
 #  undef const
 #endif
 
-/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility
- * with older builds.
- */
-#if ZLIB_VERNUM < 0x1260
-#  define PNGZ_MSG_CAST(s) constcast(char*,s)
-#  define PNGZ_INPUT_CAST(b) constcast(png_byte *,b)
-#else
-#  define PNGZ_MSG_CAST(s) (s)
-#  define PNGZ_INPUT_CAST(b) (b)
-#endif
-
 #ifndef PNG_MAXIMUM_INFLATE_WINDOW
 #  error pngfix requires libpng with PNG_MAXIMUM_INFLATE_WINDOW supported
 #endif
 
-#if ZLIB_VERNUM >= 0x1240
-
 /* Copied from pngpriv.h */
 #ifdef __cplusplus
 #  define voidcast(type, value) static_cast<type>(value)
@@ -2653,7 +2640,7 @@
 
          case ZLIB_OK:
             /* Truncated stream; unrecoverable, gets converted to ZLIB_FATAL */
-            zlib.z.msg = PNGZ_MSG_CAST("[truncated]");
+            zlib.z.msg = "[truncated]";
             zlib_message(&zlib, 0/*expected*/);
             /* FALLTHROUGH */
 
@@ -2692,8 +2679,7 @@
 
                      /* Output the error that wasn't output before: */
                      if (zlib.z.msg == NULL)
-                        zlib.z.msg = PNGZ_MSG_CAST(
-                           "invalid distance too far back");
+                        zlib.z.msg = "invalid distance too far back";
                      zlib_message(&zlib, 0/*stream error*/);
                      zlib_end(&zlib);
                      return 0;
@@ -4009,17 +3995,6 @@
    return global_end(&global);
 }
 
-#else /* ZLIB_VERNUM < 0x1240 */
-int
-main(void)
-{
-   fprintf(stderr,
-      "pngfix needs libpng with a zlib >=1.2.4 (not 0x%x)\n",
-      ZLIB_VERNUM);
-   return 77;
-}
-#endif /* ZLIB_VERNUM */
-
 #else /* No read support */
 
 int

diff --git a/manuals/libpng-manual.txt b/manuals/libpng-manual.txt
index b746783..1946f8c 100644
--- a/manuals/libpng-manual.txt
+++ b/manuals/libpng-manual.txt

@@ -9,7 +9,7 @@
 
  Based on:
 
- libpng version 1.6.36, December 2018, through 1.6.51 - November 2025
+ libpng version 1.6.36, December 2018, through 1.6.52 - December 2025
  Updated and distributed by Cosmin Truta
  Copyright (c) 2018-2025 Cosmin Truta
 

diff --git a/manuals/libpng.3 b/manuals/libpng.3
index 33e68e7..c406dcc 100644
--- a/manuals/libpng.3
+++ b/manuals/libpng.3

@@ -1,4 +1,4 @@
-.TH LIBPNG 3 "November 21, 2025"
+.TH LIBPNG 3 "December 3, 2025"
 .SH NAME
 libpng \- Portable Network Graphics (PNG) Reference Library 1.8.0.git
 
@@ -516,7 +516,7 @@
 
  Based on:
 
- libpng version 1.6.36, December 2018, through 1.6.51 - November 2025
+ libpng version 1.6.36, December 2018, through 1.6.52 - December 2025
  Updated and distributed by Cosmin Truta
  Copyright (c) 2018-2025 Cosmin Truta
 

diff --git a/manuals/png.5 b/manuals/png.5
index a22a33b..fbb88ab 100644
--- a/manuals/png.5
+++ b/manuals/png.5

@@ -1,4 +1,4 @@
-.TH PNG 5 "November 21, 2025"
+.TH PNG 5 "December 3, 2025"
 .SH NAME
 png \- Portable Network Graphics (PNG) format
 

diff --git a/png.c b/png.c
index fe3e6e1..24b2201 100644
--- a/png.c
+++ b/png.c

@@ -982,48 +982,48 @@
    {
       default:
       case Z_OK:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return code");
+         png_ptr->zstream.msg = "unexpected zlib return code";
          break;
 
       case Z_STREAM_END:
          /* Normal exit */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected end of LZ stream");
+         png_ptr->zstream.msg = "unexpected end of LZ stream";
          break;
 
       case Z_NEED_DICT:
          /* This means the deflate stream did not have a dictionary; this
           * indicates a bogus PNG.
           */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("missing LZ dictionary");
+         png_ptr->zstream.msg = "missing LZ dictionary";
          break;
 
       case Z_ERRNO:
          /* gz APIs only: should not happen */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("zlib IO error");
+         png_ptr->zstream.msg = "zlib IO error";
          break;
 
       case Z_STREAM_ERROR:
          /* internal libpng error */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("bad parameters to zlib");
+         png_ptr->zstream.msg = "bad parameters to zlib";
          break;
 
       case Z_DATA_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("damaged LZ stream");
+         png_ptr->zstream.msg = "damaged LZ stream";
          break;
 
       case Z_MEM_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("insufficient memory");
+         png_ptr->zstream.msg = "insufficient memory";
          break;
 
       case Z_BUF_ERROR:
          /* End of input or output; not a problem if the caller is doing
           * incremental read or write.
           */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("truncated");
+         png_ptr->zstream.msg = "truncated";
          break;
 
       case Z_VERSION_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unsupported zlib version");
+         png_ptr->zstream.msg = "unsupported zlib version";
          break;
 
       case PNG_UNEXPECTED_ZLIB_RETURN:
@@ -1032,7 +1032,7 @@
           * and change pngpriv.h.  Note that this message is "... return",
           * whereas the default/Z_OK one is "... return code".
           */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return");
+         png_ptr->zstream.msg = "unexpected zlib return";
          break;
    }
 }

diff --git a/png.h b/png.h
index 6776260..35be19c 100644
--- a/png.h
+++ b/png.h

@@ -14,7 +14,7 @@
  *   libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger
  *   libpng versions 0.97, January 1998, through 1.6.35, July 2018:
  *     Glenn Randers-Pehrson
- *   libpng versions 1.6.36, December 2018, through 1.6.51, November 2025:
+ *   libpng versions 1.6.36, December 2018, through 1.6.52, December 2025:
  *     Cosmin Truta
  *   See also "Contributing Authors", below.
  */

diff --git a/pngpread.c b/pngpread.c
index f79ba4c..340c636 100644
--- a/pngpread.c
+++ b/pngpread.c

@@ -724,7 +724,7 @@
        * change the current behavior (see comments in inflate.c
        * for why this doesn't happen at present with zlib 1.2.5).
        */
-      ret = PNG_INFLATE(png_ptr, Z_SYNC_FLUSH);
+      ret = png_zlib_inflate(png_ptr, Z_SYNC_FLUSH);
 
       /* Check for any failure before proceeding. */
       if (ret != Z_OK && ret != Z_STREAM_END)

diff --git a/pngpriv.h b/pngpriv.h
index 6c407bc..ac6f716 100644
--- a/pngpriv.h
+++ b/pngpriv.h

@@ -1377,14 +1377,9 @@
    (png_struct *png_ptr),
    PNG_EMPTY);
 
-#if ZLIB_VERNUM >= 0x1240
 PNG_INTERNAL_FUNCTION(int, png_zlib_inflate,
    (png_struct *png_ptr, int flush),
    PNG_EMPTY);
-#  define PNG_INFLATE(pp, flush) png_zlib_inflate(pp, flush)
-#else /* Zlib < 1.2.4 */
-#  define PNG_INFLATE(pp, flush) inflate(&(pp)->zstream, flush)
-#endif /* Zlib < 1.2.4 */
 
 #ifdef PNG_READ_TRANSFORMS_SUPPORTED
 /* Optional call to update the users info structure */

diff --git a/pngread.c b/pngread.c
index d79e5dc..eb2cccc 100644
--- a/pngread.c
+++ b/pngread.c

@@ -3295,6 +3295,7 @@
       ptrdiff_t step_row = display->row_bytes;
       unsigned int channels =
           (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? 3 : 1;
+      int optimize_alpha = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0;
       int pass;
 
       for (pass = 0; pass < passes; ++pass)
@@ -3351,20 +3352,44 @@
 
                      if (alpha < 255) /* else just use component */
                      {
-                        /* This is PNG_OPTIMIZED_ALPHA, the component value
-                         * is a linear 8-bit value.  Combine this with the
-                         * current outrow[c] value which is sRGB encoded.
-                         * Arithmetic here is 16-bits to preserve the output
-                         * values correctly.
-                         */
-                        component *= 257*255; /* =65535 */
-                        component += (255-alpha)*png_sRGB_table[outrow[c]];
+                        if (optimize_alpha != 0)
+                        {
+                           /* This is PNG_OPTIMIZED_ALPHA, the component value
+                            * is a linear 8-bit value.  Combine this with the
+                            * current outrow[c] value which is sRGB encoded.
+                            * Arithmetic here is 16-bits to preserve the output
+                            * values correctly.
+                            */
+                           component *= 257*255; /* =65535 */
+                           component += (255-alpha)*png_sRGB_table[outrow[c]];
 
-                        /* So 'component' is scaled by 255*65535 and is
-                         * therefore appropriate for the sRGB to linear
-                         * conversion table.
-                         */
-                        component = PNG_sRGB_FROM_LINEAR(component);
+                           /* Clamp to the valid range to defend against
+                            * unforeseen cases where the data might be sRGB
+                            * instead of linear premultiplied.
+                            * (Belt-and-suspenders for GitHub Issue #764.)
+                            */
+                           if (component > 255*65535)
+                              component = 255*65535;
+
+                           /* So 'component' is scaled by 255*65535 and is
+                            * therefore appropriate for the sRGB-to-linear
+                            * conversion table.
+                            */
+                           component = PNG_sRGB_FROM_LINEAR(component);
+                        }
+                        else
+                        {
+                           /* Compositing was already done on the palette
+                            * entries.  The data is sRGB premultiplied on black.
+                            * Composite with the background in sRGB space.
+                            * This is not gamma-correct, but matches what was
+                            * done to the palette.
+                            */
+                           png_uint_32 background = outrow[c];
+                           component += ((255-alpha) * background + 127) / 255;
+                           if (component > 255)
+                              component = 255;
+                        }
                      }
 
                      outrow[c] = (png_byte)component;

diff --git a/pngrtran.c b/pngrtran.c
index 7636a28..0d8c596 100644
--- a/pngrtran.c
+++ b/pngrtran.c

@@ -1825,6 +1825,7 @@
              * transformations elsewhere.
              */
             png_ptr->transformations &= ~(PNG_COMPOSE | PNG_GAMMA);
+            png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
          } /* color_type == PNG_COLOR_TYPE_PALETTE */
 
          /* if (png_ptr->background_gamma_type!=PNG_BACKGROUND_GAMMA_UNKNOWN) */

diff --git a/pngrutil.c b/pngrutil.c
index c05fa1c..85ddb98 100644
--- a/pngrutil.c
+++ b/pngrutil.c

@@ -439,14 +439,9 @@
     * follow that because, for systems with with limited capabilities, we
     * would otherwise reject the application's attempts to use a smaller window
     * size (zlib doesn't have an interface to say "this or lower"!).
-    *
-    * inflateReset2 was added to zlib 1.2.4; before this the window could not be
-    * reset, therefore it is necessary to always allocate the maximum window
-    * size with earlier zlibs just in case later compressed chunks need it.
     */
    {
       int ret; /* zlib return code */
-#if ZLIB_VERNUM >= 0x1240
       int window_bits = 0;
 
       if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) ==
@@ -460,7 +455,6 @@
       {
          png_ptr->zstream_start = 1;
       }
-#endif /* ZLIB_VERNUM >= 0x1240 */
 
       /* Set this for safety, just in case the previous owner left pointers to
        * memory allocations.
@@ -472,20 +466,12 @@
 
       if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
       {
-#if ZLIB_VERNUM >= 0x1240
          ret = inflateReset2(&png_ptr->zstream, window_bits);
-#else
-         ret = inflateReset(&png_ptr->zstream);
-#endif
       }
 
       else
       {
-#if ZLIB_VERNUM >= 0x1240
          ret = inflateInit2(&png_ptr->zstream, window_bits);
-#else
-         ret = inflateInit(&png_ptr->zstream);
-#endif
 
          if (ret == Z_OK)
             png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED;
@@ -511,7 +497,6 @@
 #endif
 }
 
-#if ZLIB_VERNUM >= 0x1240
 /* Handle the start of the inflate stream if we called inflateInit2(strm,0);
  * in this case some zlib versions skip validation of the CINFO field and, in
  * certain circumstances, libpng may end up displaying an invalid image, in
@@ -534,7 +519,6 @@
 
    return inflate(&png_ptr->zstream, flush);
 }
-#endif /* Zlib >= 1.2.4 */
 
 #ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED
 #if defined(PNG_READ_zTXt_SUPPORTED) || defined (PNG_READ_iTXt_SUPPORTED)
@@ -569,7 +553,7 @@
        * a performance advantage, because it reduces the amount of data accessed
        * at each step and that may give the OS more time to page it in.
        */
-      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
+      png_ptr->zstream.next_in = input;
       /* avail_in and avail_out are set below from 'size' */
       png_ptr->zstream.avail_in = 0;
       png_ptr->zstream.avail_out = 0;
@@ -630,7 +614,7 @@
           * the previous chunk of input data.  Tell zlib if we have reached the
           * end of the output buffer.
           */
-         ret = PNG_INFLATE(png_ptr, avail_out > 0 ? Z_NO_FLUSH :
+         ret = png_zlib_inflate(png_ptr, avail_out > 0 ? Z_NO_FLUSH :
              (finish ? Z_FINISH : Z_SYNC_FLUSH));
       } while (ret == Z_OK);
 
@@ -662,7 +646,7 @@
        * pointer, which is not owned by the caller, but this is safe; it's only
        * used on errors!
        */
-      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
+      png_ptr->zstream.msg = "zstream unclaimed";
       return Z_STREAM_ERROR;
    }
 }
@@ -871,7 +855,7 @@
           * the available output is produced; this allows reading of truncated
           * streams.
           */
-         ret = PNG_INFLATE(png_ptr, *chunk_bytes > 0 ?
+         ret = png_zlib_inflate(png_ptr, *chunk_bytes > 0 ?
              Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH));
       }
       while (ret == Z_OK && (*out_size > 0 || png_ptr->zstream.avail_out > 0));
@@ -886,7 +870,7 @@
 
    else
    {
-      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
+      png_ptr->zstream.msg = "zstream unclaimed";
       return Z_STREAM_ERROR;
    }
 }
@@ -4469,7 +4453,7 @@
        *
        * TODO: deal more elegantly with truncated IDAT lists.
        */
-      ret = PNG_INFLATE(png_ptr, Z_NO_FLUSH);
+      ret = png_zlib_inflate(png_ptr, Z_NO_FLUSH);
 
       /* Take the unconsumed output back. */
       if (output != NULL)

diff --git a/pngstruct.h b/pngstruct.h
index 9d01246..082177e 100644
--- a/pngstruct.h
+++ b/pngstruct.h

@@ -30,17 +30,6 @@
 #  undef const
 #endif
 
-/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility
- * with older builds.
- */
-#if ZLIB_VERNUM < 0x1260
-#  define PNGZ_MSG_CAST(s) png_constcast(char*,s)
-#  define PNGZ_INPUT_CAST(b) png_constcast(png_byte *,b)
-#else
-#  define PNGZ_MSG_CAST(s) (s)
-#  define PNGZ_INPUT_CAST(b) (b)
-#endif
-
 /* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib
  * can handle at once.  This type need be no larger than 16 bits (so maximum of
  * 65535), this define allows us to discover how big it is, but limited by the
@@ -247,9 +236,7 @@
                               /* pixel depth used for the row buffers */
    png_byte transformed_pixel_depth;
                               /* pixel depth after read/write transforms */
-#if ZLIB_VERNUM >= 0x1240
    png_byte zstream_start;    /* at start of an input zlib stream */
-#endif /* Zlib >= 1.2.4 */
 #if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
    png_uint_16 filler;           /* filler bytes for pixel expansion */
 #endif

diff --git a/pngwutil.c b/pngwutil.c
index a8e72f2..dfb708a 100644
--- a/pngwutil.c
+++ b/pngwutil.c

@@ -330,7 +330,7 @@
          /* Attempt sane error recovery */
          if (png_ptr->zowner == png_IDAT) /* don't steal from IDAT */
          {
-            png_ptr->zstream.msg = PNGZ_MSG_CAST("in use by IDAT");
+            png_ptr->zstream.msg = "in use by IDAT";
             return Z_STREAM_ERROR;
          }
 
@@ -534,7 +534,7 @@
       png_uint_32 output_len;
 
       /* zlib updates these for us: */
-      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(comp->input);
+      png_ptr->zstream.next_in = comp->input;
       png_ptr->zstream.avail_in = 0; /* Set below */
       png_ptr->zstream.next_out = comp->output;
       png_ptr->zstream.avail_out = (sizeof comp->output);
@@ -618,7 +618,7 @@
        */
       if (output_len + prefix_len >= PNG_UINT_31_MAX)
       {
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("compressed data too long");
+         png_ptr->zstream.msg = "compressed data too long";
          ret = Z_MEM_ERROR;
       }
 
@@ -985,7 +985,7 @@
     * terminates the operation.  The _out values are maintained across calls to
     * this function, but the input must be reset each time.
     */
-   png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
+   png_ptr->zstream.next_in = input;
    png_ptr->zstream.avail_in = 0; /* set below */
    for (;;)
    {

diff --git a/riscv/filter_rvv_intrinsics.c b/riscv/filter_rvv_intrinsics.c
index 1ab4e10..d91f80f 100644
--- a/riscv/filter_rvv_intrinsics.c
+++ b/riscv/filter_rvv_intrinsics.c

@@ -3,7 +3,8 @@
  * Copyright (c) 2023 Google LLC
  * Written by Manfred SCHLAEGL, 2022
  *            Dragoș Tiselice <dtiselice@google.com>, May 2023.
- *            Filip Wasil     <f.wasil@samsung.com>, March 2025.
+ *            Filip Wasil <f.wasil@samsung.com>, March 2025.
+ *            Liang Junzhao <junzhao.liang@spacemit.com>, November 2025.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -140,11 +141,8 @@
       /* x = *row */
       x = __riscv_vle8_v_u8m1(row, vl);
 
-      /* tmp = a + b */
-      vuint16m2_t tmp = __riscv_vwaddu_vv_u16m2(a, b, vl);
-
-      /* a = tmp/2 */
-      a = __riscv_vnsrl_wx_u8m1(tmp, 1, vl);
+      /* a = (a + b) / 2, round to zero with vxrm = 2 */
+      a = __riscv_vaaddu_wx_u8m1(a, b, 2, vl);
 
       /* a += x */
       a = __riscv_vadd_vv_u8m1(a, x, vl);
@@ -265,27 +263,22 @@
       /* x = *row */
       vuint8m1_t x = __riscv_vle8_v_u8m1(row, vl);
 
-      /* Calculate p = b - c and pc = a - c using widening subtraction */
-      vuint16m2_t p_wide = __riscv_vwsubu_vv_u16m2(b, c, vl);
-      vuint16m2_t pc_wide = __riscv_vwsubu_vv_u16m2(a, c, vl);
-
-      /* Convert to signed for easier manipulation */
-      size_t vl16 = __riscv_vsetvl_e16m2(bpp);
-      vint16m2_t p = __riscv_vreinterpret_v_u16m2_i16m2(p_wide);
-      vint16m2_t pc = __riscv_vreinterpret_v_u16m2_i16m2(pc_wide);
+      /* p = b - c and pc = a - c */
+      vuint16m2_t p = __riscv_vwsubu_vv_u16m2(b, c, vl);
+      vuint16m2_t pc = __riscv_vwsubu_vv_u16m2(a, c, vl);
 
       /* pa = |p| */
-      vbool8_t p_neg_mask = __riscv_vmslt_vx_i16m2_b8(p, 0, vl16);
-      vint16m2_t pa = __riscv_vrsub_vx_i16m2_m(p_neg_mask, p, 0, vl16);
+      vuint16m2_t tmp = __riscv_vrsub_vx_u16m2(p, 0, vl);
+      vuint16m2_t pa = __riscv_vminu_vv_u16m2(p, tmp, vl);
 
       /* pb = |pc| */
-      vbool8_t pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(pc, 0, vl16);
-      vint16m2_t pb = __riscv_vrsub_vx_i16m2_m(pc_neg_mask, pc, 0, vl16);
+      tmp = __riscv_vrsub_vx_u16m2(pc, 0, vl);
+      vuint16m2_t pb = __riscv_vminu_vv_u16m2(pc, tmp, vl);
 
       /* pc = |p + pc| */
-      vint16m2_t p_plus_pc = __riscv_vadd_vv_i16m2(p, pc, vl16);
-      vbool8_t p_plus_pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(p_plus_pc, 0, vl16);
-      pc = __riscv_vrsub_vx_i16m2_m(p_plus_pc_neg_mask, p_plus_pc, 0, vl16);
+      pc = __riscv_vadd_vv_u16m2(p, pc, vl);
+      tmp = __riscv_vrsub_vx_u16m2(pc, 0, vl);
+      pc = __riscv_vminu_vv_u16m2(pc, tmp, vl);
 
       /*
        * The key insight is that we want the minimum of pa, pb, pc.
@@ -294,31 +287,17 @@
        * - Else use c
        */
 
-      /* Find which predictor to use based on minimum absolute difference */
-      vbool8_t pa_le_pb = __riscv_vmsle_vv_i16m2_b8(pa, pb, vl16);
-      vbool8_t pa_le_pc = __riscv_vmsle_vv_i16m2_b8(pa, pc, vl16);
-      vbool8_t pb_le_pc = __riscv_vmsle_vv_i16m2_b8(pb, pc, vl16);
+      /* if (pb < pa) { pa = pb; a = b; } */
+      vbool8_t m1 = __riscv_vmsltu_vv_u16m2_b8(pb, pa, vl);
+      pa = __riscv_vmerge_vvm_u16m2(pa, pb, m1, vl);
+      a = __riscv_vmerge_vvm_u8m1(a, b, m1, vl);
 
-      /* use_a = pa <= pb && pa <= pc */
-      vbool8_t use_a = __riscv_vmand_mm_b8(pa_le_pb, pa_le_pc, vl16);
-
-      /* use_b = !use_a && pb <= pc */
-      vbool8_t not_use_a = __riscv_vmnot_m_b8(use_a, vl16);
-      vbool8_t use_b = __riscv_vmand_mm_b8(not_use_a, pb_le_pc, vl16);
-
-      /* Switch back to e8m1 for final operations */
-      vl = __riscv_vsetvl_e8m1(bpp);
-
-      /* Start with a, then conditionally replace with b or c */
-      vuint8m1_t result = a;
-      result = __riscv_vmerge_vvm_u8m1(result, b, use_b, vl);
-
-      /* use_c = !use_a && !use_b */
-      vbool8_t use_c = __riscv_vmnand_mm_b8(__riscv_vmor_mm_b8(use_a, use_b, vl), __riscv_vmor_mm_b8(use_a, use_b, vl), vl);
-      result = __riscv_vmerge_vvm_u8m1(result, c, use_c, vl);
+      /* if (pc < pa) a = c; */
+      vbool8_t m2 = __riscv_vmsltu_vv_u16m2_b8(pc, pa, vl);
+      a = __riscv_vmerge_vvm_u8m1(a, c, m2, vl);
 
       /* a = result + x */
-      a = __riscv_vadd_vv_u8m1(result, x, vl);
+      a = __riscv_vadd_vv_u8m1(a, x, vl);
 
       /* *row = a */
       __riscv_vse8_v_u8m1(row, a, vl);