Optimize png16 with loongson mmi for 64-bit os
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c69756..9a57e83 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -227,19 +227,44 @@
CACHE STRING "Enable MIPS_MSA optimizations: on|off; on is default")
set_property(CACHE PNG_MIPS_MSA
PROPERTY STRINGS ${PNG_MIPS_MSA_POSSIBLE_VALUES})
- list(FIND PNG_MIPS_MSA_POSSIBLE_VALUES ${PNG_MIPS_MSA} index)
- if(index EQUAL -1)
+ list(FIND PNG_MIPS_MSA_POSSIBLE_VALUES ${PNG_MIPS_MSA} index_msa)
+ if(index_msa EQUAL -1)
message(FATAL_ERROR "PNG_MIPS_MSA must be one of [${PNG_MIPS_MSA_POSSIBLE_VALUES}]")
- elseif(NOT PNG_MIPS_MSA STREQUAL "off")
+ endif()
+
+ set(PNG_MIPS_MMI_POSSIBLE_VALUES on off)
+ set(PNG_MIPS_MMI "on"
+ CACHE STRING "Enable MIPS_MMI optimizations: on|off; on is default")
+ set_property(CACHE PNG_MIPS_MMI
+ PROPERTY STRINGS ${PNG_MIPS_MMI_POSSIBLE_VALUES})
+ list(FIND PNG_MIPS_MMI_POSSIBLE_VALUES ${PNG_MIPS_MMI} index_mmi)
+ if(index_mmi EQUAL -1)
+ message(FATAL_ERROR "PNG_MIPS_MMI must be one of [${PNG_MIPS_MMI_POSSIBLE_VALUES}]")
+ endif()
+
+ if(PNG_MIPS_MSA STREQUAL "on" AND PNG_MIPS_MMI STREQUAL "on")
+ set(libpng_mips_sources
+ mips/mips_init.c
+ mips/filter_msa_intrinsics.c
+ mips/filter_mmi_inline_assembly.c)
+ add_definitions(-DPNG_MIPS_MSA_OPT=2)
+ add_definitions(-DPNG_MIPS_MMI_OPT=1)
+ elseif(PNG_MIPS_MSA STREQUAL "on")
set(libpng_mips_sources
mips/mips_init.c
mips/filter_msa_intrinsics.c)
- if(PNG_MIPS_MSA STREQUAL "on")
- add_definitions(-DPNG_MIPS_MSA_OPT=2)
- endif()
- else()
+ add_definitions(-DPNG_MIPS_MSA_OPT=2)
+ add_definitions(-DPNG_MIPS_MMI_OPT=0)
+ elseif(PNG_MIPS_MMI STREQUAL "on")
+ set(libpng_mips_sources
+ mips/mips_init.c
+ mips/filter_mmi_inline_assembly.c)
add_definitions(-DPNG_MIPS_MSA_OPT=0)
- endif()
+ add_definitions(-DPNG_MIPS_MMI_OPT=1)
+ else()
+ add_definitions(-DPNG_MIPS_MSA_OPT=0)
+ add_definitions(-DPNG_MIPS_MMI_OPT=0)
+ endif()
endif()
else(PNG_HARDWARE_OPTIMIZATIONS)
diff --git a/Makefile.am b/Makefile.am
index 370bdbf..43ad6e2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -117,6 +117,13 @@
mips/filter_msa_intrinsics.c
endif
+if PNG_MIPS_MMI
+if !PNG_MIPS_MSA
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c
+endif
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/filter_mmi_inline_assembly.c
+endif
+
if PNG_INTEL_SSE
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += intel/intel_init.c\
intel/filter_sse2_intrinsics.c
diff --git a/configure.ac b/configure.ac
index 938c106..c485a6f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -328,6 +328,9 @@
enable_mips_msa=no
AC_DEFINE([PNG_MIPS_MSA_OPT], [0],
[Disable MIPS_MSA optimizations])
+ enable_mips_mmi=no
+ AC_DEFINE([PNG_MIPS_MMI_OPT], [0],
+ [Disable MIPS_MMI optimizations])
enable_powerpc_vsx=no
AC_DEFINE([PNG_POWERPC_VSX_OPT], [0],
[Disable POWERPC VSX optimizations])
@@ -347,7 +350,10 @@
[Enable ARM_NEON optimizations])
;;
mipsel*|mips64el*)
+ enable_mips_mmi=yes
enable_mips_msa=yes
+ AC_DEFINE([PNG_MIPS_MMI_OPT], [1],
+ [Enable MIPS_MMI optimizations])
AC_DEFINE([PNG_MIPS_MSA_OPT], [2],
[Enable MIPS_MSA optimizations])
;;
@@ -461,6 +467,51 @@
mipsel*|mips64el*) : ;;
esac])
+# MIPS
+# ===
+#
+# MIPS MMI (SIMD) support.
+
+AC_ARG_ENABLE([mips-mmi],
+ AS_HELP_STRING([[[--enable-mips-mmi]]],
+ [Enable MIPS MMI optimizations: =no/off, check, api, yes/on:]
+ [no/off: disable the optimizations; check: use internal checking code]
+ [(deprecated and poorly supported); api: disable by default, enable by]
+ [a call to png_set_option; yes/on: turn on unconditionally.]
+ [If not specified: determined by the compiler.]),
+ [case "$enableval" in
+ no|off)
+ # disable the default enabling on __mips_mmi systems:
+ AC_DEFINE([PNG_MIPS_MMI_OPT], [0],
+ [Disable MIPS MMI optimizations])
+ # Prevent inclusion of the assembler files below:
+ enable_mips_mmi=no;;
+ check)
+ AC_DEFINE([PNG_MIPS_MMI_CHECK_SUPPORTED], [],
+ [Check for MIPS MMI support at run-time]);;
+ api)
+ AC_DEFINE([PNG_MIPS_MMI_API_SUPPORTED], [],
+ [Turn on MIPS MMI optimizations at run-time]);;
+ yes|on)
+ AC_DEFINE([PNG_MIPS_MMI_OPT], [1],
+ [Enable MIPS MMI optimizations])
+ AC_MSG_WARN([--enable-mips-mmi: please specify 'check' or 'api', if]
+ [you want the optimizations unconditionally pass '-mloongson-mmi -march=loongson3a']
+ [to the compiler.]);;
+ *)
+ AC_MSG_ERROR([--enable-mips-mmi=${enable_mips_mmi}: invalid value])
+ esac])
+
+# Add MIPS specific files to all builds where the host_cpu is mips ('mips*') or
+# where MIPS optimizations were explicitly requested (this allows a fallback if a
+# future host CPU does not match 'mips*')
+
+AM_CONDITIONAL([PNG_MIPS_MMI],
+ [test "$enable_mips_mmi" != 'no' &&
+ case "$host_cpu" in
+ mipsel*|mips64el*) :;;
+ esac])
+
# INTEL
# =====
#
diff --git a/contrib/mips-mmi/linux.c b/contrib/mips-mmi/linux.c
new file mode 100644
index 0000000..5bb79a6
--- /dev/null
+++ b/contrib/mips-mmi/linux.c
@@ -0,0 +1,140 @@
+/* contrib/mips-mmi/linux.c
+ *
+ * Written by guxiwei 2023
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+/*
+ * parse_r var, r - Helper assembler macro for parsing register names.
+ *
+ * This converts the register name in $n form provided in \r to the
+ * corresponding register number, which is assigned to the variable \var. It is
+ * needed to allow explicit encoding of instructions in inline assembly where
+ * registers are chosen by the compiler in $n form, allowing us to avoid using
+ * fixed register numbers.
+ *
+ * It also allows newer instructions (not implemented by the assembler) to be
+ * transparently implemented using assembler macros, instead of needing separate
+ * cases depending on toolchain support.
+ *
+ * Simple usage example:
+ * __asm__ __volatile__("parse_r __rt, %0\n\t"
+ * ".insn\n\t"
+ * "# di %0\n\t"
+ * ".word (0x41606000 | (__rt << 16))"
+ * : "=r" (status);
+ */
+
+/* Match an individual register number and assign to \var */
+#define _IFC_REG(n) \
+ ".ifc \\r, $" #n "\n\t" \
+ "\\var = " #n "\n\t" \
+ ".endif\n\t"
+
+__asm__(".macro parse_r var r\n\t"
+ "\\var = -1\n\t"
+ _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3)
+ _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7)
+ _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11)
+ _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
+ _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
+ _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
+ _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
+ _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
+ ".iflt \\var\n\t"
+ ".error \"Unable to parse register name \\r\"\n\t"
+ ".endif\n\t"
+ ".endm");
+
+#define HWCAP_LOONGSON_CPUCFG (1 << 14)
+
+static int cpucfg_available(void)
+{
+ return getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG;
+}
+
+static int strstart(const char *str, const char *pfx, const char **ptr)
+{
+ while (*pfx && *pfx == *str) {
+ pfx++;
+ str++;
+ }
+ if (!*pfx && ptr)
+ *ptr = str;
+ return !*pfx;
+}
+
+/* Most toolchains have no CPUCFG support yet */
+static uint32_t read_cpucfg(uint32_t reg)
+{
+ uint32_t __res;
+
+ __asm__ __volatile__(
+ "parse_r __res,%0\n\t"
+ "parse_r reg,%1\n\t"
+ ".insn \n\t"
+ ".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
+ :"=r"(__res)
+ :"r"(reg)
+ :
+ );
+ return __res;
+}
+
+#define LOONGSON_CFG1 0x1
+
+#define LOONGSON_CFG1_MMI (1 << 4)
+
+static int cpu_flags_cpucfg(void)
+{
+ int flags = 0;
+ uint32_t cfg1 = read_cpucfg(LOONGSON_CFG1);
+
+ if (cfg1 & LOONGSON_CFG1_MMI)
+ flags = 1;
+
+ return flags;
+}
+
+static int cpu_flags_cpuinfo(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ char buf[200];
+ int flags = 0;
+
+ if (!f)
+ return flags;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ /* Legacy kernel may not export MMI in ASEs implemented */
+ if (strstart(buf, "cpu model", NULL)) {
+ if (strstr(buf, "Loongson-3 "))
+ flags = 1;
+ break;
+ }
+ if (strstart(buf, "ASEs implemented", NULL)) {
+ if (strstr(buf, " loongson-mmi"))
+ flags = 1;
+ break;
+ }
+ }
+ fclose(f);
+ return flags;
+}
+
+static int png_have_mmi()
+{
+ if (cpucfg_available())
+ return cpu_flags_cpucfg();
+ else
+ return cpu_flags_cpuinfo();
+ return 0;
+}
diff --git a/mips/filter_mmi_inline_assembly.c b/mips/filter_mmi_inline_assembly.c
new file mode 100644
index 0000000..06cb1cc
--- /dev/null
+++ b/mips/filter_mmi_inline_assembly.c
@@ -0,0 +1,524 @@
+/* filter_mmi_intrinsics.c - MMI optimized filter functions
+ *
+ * Written by zhanglixia and guxiwei
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */
+
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ * prev: c b
+ * row: a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ */
+
+void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ int istop = row_info->rowbytes;
+ double rp,pp;
+ __asm__ volatile (
+ "1: \n\t"
+ "ldc1 %[rp], 0x00(%[row]) \n\t"
+ "ldc1 %[pp], 0x00(%[prev_row]) \n\t"
+ "paddb %[rp], %[rp], %[pp] \n\t"
+ "sdc1 %[rp], 0x00(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x08 \n\t"
+ "daddiu %[prev_row], %[prev_row], 0x08 \n\t"
+ "daddiu %[istop], %[istop], -0x08 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp)
+ : [row]"r"(row), [prev_row]"r"(prev_row),
+ [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp, pp, dest;
+ double eight, sixteen, twenty_four, forty_eight;
+ double tmp0;
+ double ftmp[2];
+
+ __asm__ volatile (
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+ "xor %[dest], %[dest], %[dest] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x08(%[row]) \n\t"
+ "gsldlc1 %[pp], 0x0f(%[row]) \n\t"
+
+ "paddb %[ftmp0], %[dest], %[rp] \n\t"
+ "swc1 %[ftmp0], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp1], %[rp], %[twenty_four] \n\t"
+ "paddb %[dest], %[ftmp1], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x03(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "paddb %[dest], %[dest], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x06(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[pp], %[eight] \n\t"
+ "paddb %[dest], %[dest], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x09(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [dest]"=&f"(dest),
+ [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [eight]"=&f"(eight),
+ [sixteen]"=&f"(sixteen), [twenty_four]"=&f"(twenty_four),
+ [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [istop]"r"(istop)
+ : "memory"
+ );
+
+ PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* The Sub filter predicts each pixel as the previous pixel, a.
+ * There is no pixel to the left of the first pixel. It's encoded directly.
+ * That works with our main loop if we just say that left pixel was zero.
+ */
+ int istop = row_info->rowbytes;
+ double rp,pp;
+
+ __asm__ volatile (
+ "1: \n\t"
+ "lwc1 %[pp], 0x00(%[row]) \n\t"
+ "lwc1 %[rp], 0x04(%[row]) \n\t"
+ "paddb %[rp], %[rp], %[pp] \n\t"
+ "swc1 %[rp], 0x04(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp)
+ : [row]"r"(row), [istop]"r"(istop)
+ : "memory"
+ );
+
+ PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp, pp, rp1, pp1;
+ double tmp0;
+ double ftmp[3];
+ double one, dest;
+ double eight, sixteen, twenty_four, forty_eight;
+
+ __asm__ volatile (
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+ "xor %[dest], %[dest], %[dest] \n\t"
+
+ "li %[tmp0], 0x01 \n\t"
+ "ins %[tmp0], %[tmp0], 8, 8 \n\t"
+ "dmtc1 %[tmp0], %[one] \n\t"
+ "pshufh %[one], %[one], %[dest] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x00(%[prev]) \n\t"
+ "gsldlc1 %[pp], 0x07(%[prev]) \n\t"
+ "gsldrc1 %[rp1], 0x08(%[row]) \n\t"
+ "gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
+ "gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
+ "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
+
+ "xor %[ftmp0], %[pp], %[dest] \n\t"
+ "pavgb %[ftmp1], %[pp], %[dest] \n\t"
+ "and %[ftmp0], %[ftmp0], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
+ "paddb %[dest], %[rp], %[ftmp1] \n\t"
+ "swc1 %[dest], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
+ "dsrl %[ftmp1], %[pp], %[twenty_four] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x03(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
+ "or %[ftmp1], %[ftmp2], %[ftmp1] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x06(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp1], %[eight] \n\t"
+ "dsrl %[ftmp1], %[pp1], %[eight] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x09(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[prev], %[prev], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1),
+ [pp1]"=&f"(pp1), [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [one]"=&f"(one),
+ [dest]"=&f"(dest), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
+ [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp,pp;
+ double dest;
+ double ftmp[2];
+ double tmp;
+
+ __asm__ volatile (
+ "xor %[dest], %[dest], %[dest] \n\t"
+ "li %[tmp], 0x01 \n\t"
+ "ins %[tmp], %[tmp], 8, 8 \n\t"
+ "dmtc1 %[tmp], %[ftmp1] \n\t"
+ "pshufh %[ftmp1], %[ftmp1], %[dest] \n\t"
+
+ "1: \n\t"
+ "lwc1 %[rp], 0x00(%[row]) \n\t"
+ "lwc1 %[pp], 0x00(%[prev]) \n\t"
+ "xor %[ftmp0], %[pp], %[dest] \n\t"
+ "pavgb %[pp], %[pp], %[dest] \n\t"
+ "and %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "psubb %[pp], %[pp], %[ftmp0] \n\t"
+ "paddb %[dest], %[rp], %[pp] \n\t"
+ "swc1 %[dest], 0x00(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[prev], %[prev], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [dest]"=&f"(dest), [tmp]"=&r"(tmp)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+ * and two pixels from the previous row, b and c:
+ * prev: c b
+ * row: a d
+ * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+ * p=a+b-c.
+ *
+ * The first pixel has no left context, and so uses an Up filter, p = b.
+ * This works naturally with our main loop's p = a+b-c if we force a and c
+ * to zero.
+ * Here we zero b and d, which become c and a respectively at the start of
+ * the loop.
+ */
+ int istop = row_info->rowbytes;
+ double rp, pp, rp1, pp1, zero;
+ double a, b, c, d, pa, pb, pc;
+ double tmp0;
+ double ftmp[3];
+ double eight, sixteen, twenty_four, forty_eight;
+
+ __asm__ volatile (
+ "xor %[a], %[a], %[a] \n\t"
+ "xor %[c], %[c], %[c] \n\t"
+ "xor %[zero], %[zero], %[zero] \n\t"
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x00(%[prev]) \n\t"
+ "gsldlc1 %[pp], 0x07(%[prev]) \n\t"
+ "gsldrc1 %[rp1], 0x08(%[row]) \n\t"
+ "gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
+ "gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
+ "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
+
+ "punpcklbh %[b], %[pp], %[zero] \n\t"
+ "punpcklbh %[d], %[rp], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[pp], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[pp], %[zero] \n\t"
+ "swc1 %[d], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
+ "dsrl %[ftmp2], %[pp], %[twenty_four] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x03(%[row]) \n\t"
+ "gsswlc1 %[d], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
+ "or %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x06(%[row]) \n\t"
+ "gsswlc1 %[d], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp1], %[eight] \n\t"
+ "dsrl %[ftmp2], %[pp1], %[eight] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x09(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[prev], %[prev], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), [pp1]"=&f"(pp1),
+ [zero]"=&f"(zero), [a]"=&f"(a),[b]"=&f"(b), [c]"=&f"(c),
+ [d]"=&f"(d), [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
+ [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
+ [ftmp2]"=&f"(ftmp[2]), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
+ [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+ * and two pixels from the previous row, b and c:
+ * prev: c b
+ * row: a d
+ * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+ * p=a+b-c.
+ *
+ * The first pixel has no left context, and so uses an Up filter, p = b.
+ * This works naturally with our main loop's p = a+b-c if we force a and c
+ * to zero.
+ * Here we zero b and d, which become c and a respectively at the start of
+ * the loop.
+ */
+ int istop = row_info->rowbytes;
+ double rp, pp, zero;
+ double a, b, c, d, pa, pb, pc;
+ double ftmp[2];
+
+ __asm__ volatile (
+ "xor %[a], %[a], %[a] \n\t"
+ "xor %[c], %[c], %[c] \n\t"
+ "xor %[zero], %[zero], %[zero] \n\t"
+
+ "1: \n\t"
+ "lwc1 %[rp], 0x00(%[row]) \n\t"
+ "lwc1 %[pp], 0x00(%[prev]) \n\t"
+ "punpcklbh %[b], %[pp], %[zero] \n\t"
+ "punpcklbh %[d], %[rp], %[zero] \n\t"
+
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[pp], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "swc1 %[d], 0x00(%[row]) \n\t"
+ "punpcklbh %[c], %[pp], %[zero] \n\t"
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[prev], %[prev], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [zero]"=&f"(zero),
+ [a]"=&f"(a), [b]"=&f"(b), [c]"=&f"(c), [d]"=&f"(d),
+ [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
+ [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
+#endif /* READ */
diff --git a/mips/mips_init.c b/mips/mips_init.c
index 8dd283d..20a9fa8 100644
--- a/mips/mips_init.c
+++ b/mips/mips_init.c
@@ -4,6 +4,7 @@
* Copyright (c) 2018 Cosmin Truta
* Copyright (c) 2016 Glenn Randers-Pehrson
* Written by Mandar Sahastrabuddhe, 2016.
+ * Update by guxiwei, 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -20,8 +21,9 @@
#ifdef PNG_READ_SUPPORTED
-#if PNG_MIPS_MSA_OPT > 0
-#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */
+#if PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0
+
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the MIPS
* MSA instructions there is no processor-specific way of detecting the
@@ -51,13 +53,83 @@
#endif /* PNG_MIPS_MSA_FILE */
#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */
+#ifndef PNG_MIPS_MMI_FILE
+# ifdef __linux__
+# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c"
+# endif
+#endif
+
+#ifdef PNG_MIPS_MMI_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_mmi();
+#include PNG_MIPS_MMI_FILE
+
+#else /* PNG_MIPS_MMI_FILE */
+# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks"
+#endif /* PNG_MIPS_MMI_FILE */
+#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/
+
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
+/* MIPS supports two optimizations: MMI and MSA. The appropriate
+ * optimization is chosen at runtime
+ */
void
-png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
+png_init_filter_functions_mips(png_structp pp, unsigned int bpp)
{
+#if PNG_MIPS_MMI_IMPLEMENTATION > 0
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ switch ((pp->options >> PNG_MIPS_MMI) & 3)
+ {
+ case PNG_OPTION_UNSET:
+#endif /* PNG_MIPS_MMI_API_SUPPORTED */
+#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED
+ {
+ static volatile sig_atomic_t no_mmi = -1; /* not checked */
+
+ if (no_mmi < 0)
+ no_mmi = !png_have_mmi();
+
+ if (no_mmi)
+ goto MIPS_MSA_INIT;
+ }
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ break;
+#endif
+#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */
+
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ default: /* OFF or INVALID */
+ goto MIPS_MSA_INIT;
+
+ case PNG_OPTION_ON:
+ /* Option turned on */
+ break;
+ }
+#endif
+ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi;
+ if (bpp == 3)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth3_mmi;
+ }
+ else if (bpp == 4)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth4_mmi;
+ }
+#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
+
+MIPS_MSA_INIT:
+#if PNG_MIPS_MSA_OPT > 0
/* The switch statement is compiled in for MIPS_MSA_API, the call to
* png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined
* the check is only performed if the API has not set the MSA option on
@@ -125,6 +197,8 @@
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa;
}
-}
#endif /* PNG_MIPS_MSA_OPT > 0 */
+ return;
+}
+#endif /* PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */
#endif /* READ */
diff --git a/png.h b/png.h
index eaee5a3..457c932 100644
--- a/png.h
+++ b/png.h
@@ -3208,7 +3208,11 @@
#ifdef PNG_POWERPC_VSX_API_SUPPORTED
# define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions supported */
#endif
-#define PNG_OPTION_NEXT 12 /* Next option - numbers must be even */
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+# define PNG_MIPS_MMI 12 /* HARDWARE: MIPS MMI SIMD instructions supported */
+#endif
+
+#define PNG_OPTION_NEXT 14 /* Next option - numbers must be even */
/* Return values: NOTE: there are four values and 'off' is *not* zero */
#define PNG_OPTION_UNSET 0 /* Unset - defaults to off */
diff --git a/pngpriv.h b/pngpriv.h
index cdbc6c3..fdf0f35 100644
--- a/pngpriv.h
+++ b/pngpriv.h
@@ -197,6 +197,16 @@
# endif
#endif
+#ifndef PNG_MIPS_MMI_OPT
+# ifdef PNG_MIPS_MMI
+# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
+# define PNG_MIPS_MMI_OPT 1
+# else
+# define PNG_MIPS_MMI_OPT 0
+# endif
+# endif
+#endif
+
#ifndef PNG_POWERPC_VSX_OPT
# if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)
# define PNG_POWERPC_VSX_OPT 2
@@ -248,7 +258,7 @@
#endif
#if PNG_MIPS_MSA_OPT > 0
-# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
# ifndef PNG_MIPS_MSA_IMPLEMENTATION
# if defined(__mips_msa)
# if defined(__clang__)
@@ -269,6 +279,22 @@
# define PNG_MIPS_MSA_IMPLEMENTATION 0
#endif /* PNG_MIPS_MSA_OPT > 0 */
+#if PNG_MIPS_MMI_OPT > 0
+# ifndef PNG_MIPS_MMI_IMPLEMENTATION
+# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64)
+# define PNG_MIPS_MMI_IMPLEMENTATION 2
+# else /* !defined __mips_loongson_mmi || _MIPS_SIM != _ABI64 */
+# define PNG_MIPS_MMI_IMPLEMENTATION 0
+# endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */
+# endif /* !PNG_MIPS_MMI_IMPLEMENTATION */
+
+# if PNG_MIPS_MMI_IMPLEMENTATION > 0
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
+# endif
+#else
+# define PNG_INTEL_SSE_IMPLEMENTATION 0
+#endif /* PNG_MIPS_MMI_OPT > 0 */
+
#if PNG_POWERPC_VSX_OPT > 0
# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx
# define PNG_POWERPC_VSX_IMPLEMENTATION 1
@@ -1329,6 +1355,23 @@
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
+#if PNG_MIPS_MMI_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info,
+ png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
#if PNG_POWERPC_VSX_OPT > 0
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info,
png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
@@ -2118,10 +2161,15 @@
#endif
#if PNG_MIPS_MSA_OPT > 0
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa,
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif
+# if PNG_MIPS_MMI_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
+ (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+# endif
+
# if PNG_INTEL_SSE_IMPLEMENTATION > 0
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
diff --git a/scripts/pnglibconf.dfa b/scripts/pnglibconf.dfa
index e15a23c..f4d14a2 100644
--- a/scripts/pnglibconf.dfa
+++ b/scripts/pnglibconf.dfa
@@ -256,6 +256,55 @@
option POWERPC_VSX_CHECK disabled,
sets POWERPC_VSX_OPT 1
+# These options are specific to the MIPS MSA hardware optimizations.
+#
+# MIPS_MSA_OPT: unset: check at compile time (__mips_msa must be defined by
+# the compiler, typically as a result of specifying
+# "-mmsa -mfp64" compiler flags)
+# 0: disable (even if the CPU supports MSA.)
+# 1: check at run time (via MIPS_MSA_{API,CHECK})
+# 2: switch on unconditionally (inadvisable - instead pass
+# -mmsa -mfp64 to compiler options)
+# When building libpng avoid using any setting other than '0'; '1' is
+# set automatically when either 'API' or 'CHECK' are configured in,
+# '2' should not be necessary as "-mmsa -mfp64" will achieve the same
+# effect as well as applying MSA optimizations to the rest of the
+# libpng code.
+# NOTE: any setting other than '0' requires ALIGNED_MEMORY
+# MIPS_MSA_API: (PNG_MIPS_MSA == 1) allow the optimization to be switched on
+# with png_set_option.
+# MIPS_MSA_CHECK: (PNG_MIPS_MSA == 1) compile a run-time check to see if MSA
+# extensions are supported.
+setting MIPS_MSA_OPT
+option MIPS_MSA_API disabled requires ALIGNED_MEMORY enables SET_OPTION,
+ sets MIPS_MSA_OPT 1
+option MIPS_MSA_CHECK disabled requires ALIGNED_MEMORY,
+ sets MIPS_MSA_OPT 1
+
+# These options are specific to the MIPS MMI hardware optimizations.
+#
+# MIPS_MMI_OPT: unset: check at compile time (__mips_loongson_mmi must be defined by
+# the compiler, typically as a result of specifying
+# "-mloongson-mmi -march=loongson3a" compiler flags)
+# 0: disable (even if the CPU supports MMI.)
+# 1: check at run time (via MIPS_MMI_{API,CHECK})
+# 2: switch on unconditionally (inadvisable - instead pass
+# -mloongson-mmi -march=loongson3a to compiler options)
+# When building libpng avoid using any setting other than '0'; '1' is
+# set automatically when either 'API' or 'CHECK' are configured in,
+# '2' should not be necessary as "-mloongson-mmi -march=loongson3a" will achieve the same
+# effect as well as applying MMI optimizations to the rest of the
+# libpng code.
+# MIPS_MMI_API: (PNG_MIPS_MMI == 1) allow the optimization to be switched on
+# with png_set_option
+# MIPS_MMI_CHECK: (PNG_MIPS_MMI == 1) compile a run-time check to see if MMI
+# extensions are supported.
+setting MIPS_MMI_OPT
+option MIPS_MMI_API disabled requires ALIGNED_MEMORY enables SET_OPTION,
+ sets MIPS_MMI_OPT 1
+option MIPS_MMI_CHECK disabled requires ALIGNED_MEMORY,
+ sets MIPS_MMI_OPT 1
+
# These settings configure the default compression level (0-9) and 'strategy';
# strategy is as defined by the implementors of zlib. It describes the input