Snap for 6001391 from 6444bd7c5c74b582973d8a340bddeae05fcee55e to qt-aml-tzdata-release
Change-Id: I7666caebe23d0888e2e1e4bb5fb33a6333e6d5d5
diff --git a/cpp/Android.bp b/cpp/Android.bp
index 35b09cd..9d9041c 100644
--- a/cpp/Android.bp
+++ b/cpp/Android.bp
@@ -65,7 +65,6 @@
shared_libs: [
"libdl",
"liblog",
- "libz",
],
}
@@ -73,8 +72,15 @@
name: "libRScpp",
defaults: ["libRScpp-defaults"],
+ header_libs: [
+ "libarect_headers",
+ "libbase_headers",
+ "libgui_headers",
+ "libnativebase_headers",
+ "libnativewindow_headers",
+ ],
+
shared_libs: [
- "libgui",
"libutils",
],
diff --git a/cpu_ref/Android.bp b/cpu_ref/Android.bp
index e69c208..c409989 100644
--- a/cpu_ref/Android.bp
+++ b/cpu_ref/Android.bp
@@ -79,6 +79,9 @@
x86_64: {
cflags: ["-DARCH_X86_HAVE_SSSE3"],
srcs: ["rsCpuIntrinsics_x86.cpp"],
+ avx2: {
+ cflags: ["-DARCH_X86_HAVE_AVX2", "-mavx2", "-mfma"],
+ },
},
},
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index ff42d79..8a3dd1a 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+#if defined(ARCH_X86_HAVE_AVX2)
+#include <stdint.h>
+#include <x86intrin.h>
+#include <xmmintrin.h>
+#endif
#include "rsCpuIntrinsic.h"
#include "rsCpuIntrinsicInlines.h"
@@ -78,10 +83,20 @@
+ x * (3.f * (p1 - p2) + p3 - p0)));
}
+
+#if defined(ARCH_X86_HAVE_AVX2)
+static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
+ return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 +
+ _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(4.f), _mm_set1_ps(p2),_mm_set1_ps(p3)))
+ + x * (_mm_cvtss_f32(_mm_fmadd_ss (_mm_set1_ps(3.f),_mm_set1_ps(p1 - p2),_mm_set1_ps(p3 - p0))))));
+
+}
+#else
static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
+ x * (3.f * (p1 - p2) + p3 - p0)));
}
+#endif
static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
float xf, float yf, int width) {
@@ -317,7 +332,14 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -363,7 +385,11 @@
#endif
while(x1 < x2) {
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -384,7 +410,13 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -430,7 +462,12 @@
#endif
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -451,7 +488,13 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -497,7 +540,13 @@
#endif
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -518,7 +567,12 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -537,7 +591,13 @@
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -558,7 +618,13 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -577,7 +643,13 @@
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
@@ -598,7 +670,13 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float yf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(info->current.y + 0.5f),_mm_set1_ps(cp->scaleY), _mm_set1_ps(0.5f)));
+#else
float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
+#endif
+
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -617,7 +695,13 @@
uint32_t x2 = xend;
while(x1 < x2) {
+
+#if defined(ARCH_X86_HAVE_AVX2)
+ float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(cp->scaleX) , _mm_set1_ps(0.5f)));
+#else
float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
+#endif
+
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;