Add Toolkit class definition and test files.

Adds the Java/Kt class definition of the RenderScript Toolkit.
Adds the JNI file that calls the C++ code.
Add test files. For each Intrinsic, we do a 3-way comparison:
- Toolkit
- RenderScript Intrinsic
- Reference code written in Kotlin

Bug: 178476084
Test: This CL does not have build files and has not been independently tested. that will come with the next CL.
Change-Id: I0c8f85465a2dd42b42a98f6c7847b55fe13b5994
diff --git a/toolkit/ColorMatrix.cpp b/toolkit/ColorMatrix.cpp
index a9393e0..dd426cf 100644
--- a/toolkit/ColorMatrix.cpp
+++ b/toolkit/ColorMatrix.cpp
@@ -476,6 +476,7 @@
 static uint8_t * addVMOV_32(uint8_t *buf, uint32_t dest_q, uint32_t imm) {
     //vmov.32 Q#1, #imm
     assert(imm == 0);
+    (void) imm; // Avoid unused parameter warnings for non-debug builds
     uint32_t op = 0xf2800050 | encodeSIMDRegs(dest_q << 1, 0, 0);
     ((uint32_t *)buf)[0] = op;
     return buf + 4;
diff --git a/toolkit/JniEntryPoints.cpp b/toolkit/JniEntryPoints.cpp
new file mode 100644
index 0000000..3bf5911
--- /dev/null
+++ b/toolkit/JniEntryPoints.cpp
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <android/bitmap.h>
+#include <assert.h>
+#include <jni.h>
+
+#include "RenderScriptToolkit.h"
+#include "Utils.h"
+
+#define LOG_TAG "renderscript.toolkit.JniEntryPoints"
+
+using namespace android::renderscript;
+
+/**
+ * I compared using env->GetPrimitiveArrayCritical vs. env->GetByteArrayElements to get access
+ * to the underlying data. On Pixel 4, it's actually faster to not use critical. The code is left
+ * here if you want to experiment. Note that USE_CRITICAL could block the garbage collector.
+ */
+// #define USE_CRITICAL
+
+class ByteArrayGuard {
+   private:
+    JNIEnv* env;
+    jbyteArray array;
+    jbyte* data;
+
+   public:
+    ByteArrayGuard(JNIEnv* env, jbyteArray array) : env{env}, array{array} {
+#ifdef USE_CRITICAL
+        data = reinterpret_cast<jbyte*>(env->GetPrimitiveArrayCritical(array, nullptr));
+#else
+        data = env->GetByteArrayElements(array, nullptr);
+#endif
+    }
+    ~ByteArrayGuard() {
+#ifdef USE_CRITICAL
+        env->ReleasePrimitiveArrayCritical(array, data, 0);
+#else
+        env->ReleaseByteArrayElements(array, data, 0);
+#endif
+    }
+    uint8_t* get() { return reinterpret_cast<uint8_t*>(data); }
+};
+
+class IntArrayGuard {
+   private:
+    JNIEnv* env;
+    jintArray array;
+    jint* data;
+
+   public:
+    IntArrayGuard(JNIEnv* env, jintArray array) : env{env}, array{array} {
+#ifdef USE_CRITICAL
+        data = reinterpret_cast<jint*>(env->GetPrimitiveArrayCritical(array, nullptr));
+#else
+        data = env->GetIntArrayElements(array, nullptr);
+#endif
+    }
+    ~IntArrayGuard() {
+#ifdef USE_CRITICAL
+        env->ReleasePrimitiveArrayCritical(array, data, 0);
+#else
+        env->ReleaseIntArrayElements(array, data, 0);
+#endif
+    }
+    int* get() { return reinterpret_cast<int*>(data); }
+};
+
+class FloatArrayGuard {
+   private:
+    JNIEnv* env;
+    jfloatArray array;
+    jfloat* data;
+
+   public:
+    FloatArrayGuard(JNIEnv* env, jfloatArray array) : env{env}, array{array} {
+#ifdef USE_CRITICAL
+        data = reinterpret_cast<jfloat*>(env->GetPrimitiveArrayCritical(array, nullptr));
+#else
+        data = env->GetFloatArrayElements(array, nullptr);
+#endif
+    }
+    ~FloatArrayGuard() {
+#ifdef USE_CRITICAL
+        env->ReleasePrimitiveArrayCritical(array, data, 0);
+#else
+        env->ReleaseFloatArrayElements(array, data, 0);
+#endif
+    }
+    float* get() { return reinterpret_cast<float*>(data); }
+};
+
+class BitmapGuard {
+   private:
+    JNIEnv* env;
+    jobject bitmap;
+    AndroidBitmapInfo info;
+    int bytesPerPixel;
+    void* bytes;
+    bool valid;
+
+   public:
+    BitmapGuard(JNIEnv* env, jobject jBitmap) : env{env}, bitmap{jBitmap}, bytes{nullptr} {
+        valid = false;
+        if (AndroidBitmap_getInfo(env, bitmap, &info) != ANDROID_BITMAP_RESULT_SUCCESS) {
+            ALOGE("AndroidBitmap_getInfo failed");
+            return;
+        }
+        if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 &&
+            info.format != ANDROID_BITMAP_FORMAT_A_8) {
+            ALOGE("AndroidBitmap in the wrong format");
+            return;
+        }
+        bytesPerPixel = info.stride / info.width;
+        if (bytesPerPixel != 1 && bytesPerPixel != 4) {
+            ALOGE("Expected a vector size of 1 or 4. Got %d. Extra padding per line not currently "
+                  "supported",
+                  bytesPerPixel);
+            return;
+        }
+        if (AndroidBitmap_lockPixels(env, bitmap, &bytes) != ANDROID_BITMAP_RESULT_SUCCESS) {
+            ALOGE("AndroidBitmap_lockPixels failed");
+            return;
+        }
+        valid = true;
+    }
+    ~BitmapGuard() {
+        if (valid) {
+            AndroidBitmap_unlockPixels(env, bitmap);
+        }
+    }
+    uint8_t* get() const {
+        assert(valid);
+        return reinterpret_cast<uint8_t*>(bytes);
+    }
+    int width() const { return info.width; }
+    int height() const { return info.height; }
+    int vectorSize() const { return bytesPerPixel; }
+};
+
+/**
+ * Copies the content of Kotlin Range2d object into the equivalent C++ struct.
+ */
+class RestrictionParameter {
+   private:
+    bool isNull;
+    Restriction restriction;
+
+   public:
+    RestrictionParameter(JNIEnv* env, jobject jRestriction) : isNull{jRestriction == nullptr} {
+        if (isNull) {
+            return;
+        }
+        /* TODO Measure how long FindClass and related functions take. Consider passing the
+         * four values instead. This would also require setting the default when Range2D is null.
+         */
+        jclass restrictionClass = env->FindClass("android/renderscript/toolkit/Range2d");
+        if (restrictionClass == nullptr) {
+            ALOGE("RenderScriptToolit. Internal error. Could not find the Kotlin Range2d class.");
+            isNull = true;
+            return;
+        }
+        jfieldID startXId = env->GetFieldID(restrictionClass, "startX", "I");
+        jfieldID startYId = env->GetFieldID(restrictionClass, "startY", "I");
+        jfieldID endXId = env->GetFieldID(restrictionClass, "endX", "I");
+        jfieldID endYId = env->GetFieldID(restrictionClass, "endY", "I");
+        restriction.startX = env->GetIntField(jRestriction, startXId);
+        restriction.startY = env->GetIntField(jRestriction, startYId);
+        restriction.endX = env->GetIntField(jRestriction, endXId);
+        restriction.endY = env->GetIntField(jRestriction, endYId);
+    }
+    Restriction* get() { return isNull ? nullptr : &restriction; }
+};
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_android_renderscript_toolkit_Toolkit_createNative(JNIEnv* /*env*/, jobject /*thiz*/) {
+    return reinterpret_cast<jlong>(new RenderScriptToolkit());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_destroyNative(
+        JNIEnv* /*env*/, jobject /*thiz*/, jlong native_handle) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    delete toolkit;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlend(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jint jmode, jbyteArray source_array,
+        jbyteArray dest_array, jint size_x, jint size_y, jobject restriction) {
+    auto toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    auto mode = static_cast<RenderScriptToolkit::BlendingMode>(jmode);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard source{env, source_array};
+    ByteArrayGuard dest{env, dest_array};
+
+    toolkit->blend(mode, source.get(), dest.get(), size_x, size_y, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlendBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jint jmode, jobject source_bitmap,
+        jobject dest_bitmap, jobject restriction) {
+    auto toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    auto mode = static_cast<RenderScriptToolkit::BlendingMode>(jmode);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard source{env, source_bitmap};
+    BitmapGuard dest{env, dest_bitmap};
+
+    toolkit->blend(mode, source.get(), dest.get(), source.width(), source.height(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlur(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint vectorSize,
+        jint size_x, jint size_y, jint radius, jbyteArray output_array, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+
+    toolkit->blur(input.get(), output.get(), size_x, size_y, vectorSize, radius, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeBlurBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jint radius, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+
+    toolkit->blur(input.get(), output.get(), input.width(), input.height(), input.vectorSize(),
+                  radius, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeColorMatrix(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jint input_vector_size, jint size_x, jint size_y, jbyteArray output_array,
+        jint output_vector_size, jfloatArray jmatrix, jfloatArray add_vector, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+    FloatArrayGuard matrix{env, jmatrix};
+    FloatArrayGuard add{env, add_vector};
+
+    toolkit->colorMatrix(input.get(), output.get(), input_vector_size, output_vector_size, size_x,
+                         size_y, matrix.get(), add.get(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeColorMatrixBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jfloatArray jmatrix, jfloatArray add_vector, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+    FloatArrayGuard matrix{env, jmatrix};
+    FloatArrayGuard add{env, add_vector};
+
+    toolkit->colorMatrix(input.get(), output.get(), input.vectorSize(), output.vectorSize(),
+                         input.width(), input.height(), matrix.get(), add.get(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeConvolve(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint vectorSize,
+        jint size_x, jint size_y, jbyteArray output_array, jfloatArray coefficients,
+        jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+    FloatArrayGuard coeffs{env, coefficients};
+
+    switch (env->GetArrayLength(coefficients)) {
+        case 9:
+            toolkit->convolve3x3(input.get(), output.get(), vectorSize, size_x, size_y,
+                                 coeffs.get(), restrict.get());
+            break;
+        case 25:
+            toolkit->convolve5x5(input.get(), output.get(), vectorSize, size_x, size_y,
+                                 coeffs.get(), restrict.get());
+            break;
+    }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeConvolveBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jfloatArray coefficients, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+    FloatArrayGuard coeffs{env, coefficients};
+
+    switch (env->GetArrayLength(coefficients)) {
+        case 9:
+            toolkit->convolve3x3(input.get(), output.get(), input.vectorSize(), input.width(),
+                                 input.height(), coeffs.get(), restrict.get());
+            break;
+        case 25:
+            toolkit->convolve5x5(input.get(), output.get(), input.vectorSize(), input.width(),
+                                 input.height(), coeffs.get(), restrict.get());
+            break;
+    }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogram(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jint vector_size, jint size_x, jint size_y, jintArray output_array, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    IntArrayGuard output{env, output_array};
+
+    toolkit->histogram(input.get(), output.get(), size_x, size_y, vector_size, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jintArray output_array, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    IntArrayGuard output{env, output_array};
+
+    toolkit->histogram(input.get(), output.get(), input.width(), input.height(), input.vectorSize(),
+                       restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramDot(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jint vector_size, jint size_x, jint size_y, jintArray output_array,
+        jfloatArray coefficients, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    IntArrayGuard output{env, output_array};
+    FloatArrayGuard coeffs{env, coefficients};
+
+    toolkit->histogramDot(input.get(), output.get(), size_x, size_y, vector_size, coeffs.get(),
+                          restrict.get());
+}
+
+extern "C" JNIEXPORT
+void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeHistogramDotBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jintArray output_array, jfloatArray coefficients, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    IntArrayGuard output{env, output_array};
+    FloatArrayGuard coeffs{env, coefficients};
+
+    toolkit->histogramDot(input.get(), output.get(), input.width(), input.height(),
+                          input.vectorSize(), coeffs.get(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jbyteArray output_array, jint size_x, jint size_y, jbyteArray red_table,
+        jbyteArray green_table, jbyteArray blue_table, jbyteArray alpha_table,
+        jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+    ByteArrayGuard red{env, red_table};
+    ByteArrayGuard green{env, green_table};
+    ByteArrayGuard blue{env, blue_table};
+    ByteArrayGuard alpha{env, alpha_table};
+
+    toolkit->lut(input.get(), output.get(), size_x, size_y, red.get(), green.get(), blue.get(),
+                 alpha.get(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLutBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jbyteArray red_table, jbyteArray green_table, jbyteArray blue_table,
+        jbyteArray alpha_table, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+    ByteArrayGuard red{env, red_table};
+    ByteArrayGuard green{env, green_table};
+    ByteArrayGuard blue{env, blue_table};
+    ByteArrayGuard alpha{env, alpha_table};
+
+    toolkit->lut(input.get(), output.get(), input.width(), input.height(), red.get(), green.get(),
+                 blue.get(), alpha.get(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut3d(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jbyteArray output_array, jint size_x, jint size_y, jbyteArray cube_values, jint cubeSizeX,
+        jint cubeSizeY, jint cubeSizeZ, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+    ByteArrayGuard cube{env, cube_values};
+
+    toolkit->lut3d(input.get(), output.get(), size_x, size_y, cube.get(), cubeSizeX, cubeSizeY,
+                   cubeSizeZ, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeLut3dBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jbyteArray cube_values, jint cubeSizeX, jint cubeSizeY,
+        jint cubeSizeZ, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+    ByteArrayGuard cube{env, cube_values};
+
+    toolkit->lut3d(input.get(), output.get(), input.width(), input.height(), cube.get(), cubeSizeX,
+                   cubeSizeY, cubeSizeZ, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeResize(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jint vector_size, jint input_size_x, jint input_size_y, jbyteArray output_array,
+        jint output_size_x, jint output_size_y, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+
+    toolkit->resize(input.get(), output.get(), input_size_x, input_size_y, vector_size,
+                    output_size_x, output_size_y, restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeResizeBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jobject input_bitmap,
+        jobject output_bitmap, jobject restriction) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    RestrictionParameter restrict {env, restriction};
+    BitmapGuard input{env, input_bitmap};
+    BitmapGuard output{env, output_bitmap};
+
+    toolkit->resize(input.get(), output.get(), input.width(), input.height(), input.vectorSize(),
+                    output.width(), output.height(), restrict.get());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeYuvToRgb(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array,
+        jbyteArray output_array, jint size_x, jint size_y, jint format) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    ByteArrayGuard input{env, input_array};
+    ByteArrayGuard output{env, output_array};
+
+    toolkit->yuvToRgb(input.get(), output.get(), size_x, size_y,
+                      static_cast<RenderScriptToolkit::YuvFormat>(format));
+}
+
+extern "C" JNIEXPORT void JNICALL Java_android_renderscript_toolkit_Toolkit_nativeYuvToRgbBitmap(
+        JNIEnv* env, jobject /*thiz*/, jlong native_handle, jbyteArray input_array, jint size_x,
+        jint size_y, jobject output_bitmap, jint format) {
+    RenderScriptToolkit* toolkit = reinterpret_cast<RenderScriptToolkit*>(native_handle);
+    BitmapGuard output{env, output_bitmap};
+    ByteArrayGuard input{env, input_array};
+
+    toolkit->yuvToRgb(input.get(), output.get(), size_x, size_y,
+                      static_cast<RenderScriptToolkit::YuvFormat>(format));
+}
diff --git a/toolkit/java/Toolkit.kt b/toolkit/java/Toolkit.kt
new file mode 100644
index 0000000..41dc432
--- /dev/null
+++ b/toolkit/java/Toolkit.kt
@@ -0,0 +1,1558 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.renderscript.toolkit
+
+import android.graphics.Bitmap
+import java.lang.IllegalArgumentException
+
+// This string is used for error messages.
+private const val externalName = "RenderScript Toolkit"
+
+/**
+ * A collection of high-performance graphic utility functions like blur and blend.
+ *
+ * This toolkit provides ten image manipulation functions: blend, blur, color matrix, convolve,
+ * histogram, histogramDot, lut, lut3d, resize, and YUV to RGB. These functions execute
+ * multithreaded on the CPU.
+ *
+ * Most of the functions have two variants: one that manipulates Bitmaps, the other ByteArrays.
+ * For ByteArrays, you need to specify the width and height of the data to be processed, as
+ * well as the number of bytes per pixel. For most use cases, this will be 4.
+ *
+ * You should instantiate the Toolkit once and reuse it throughout your application.
+ * On instantiation, the Toolkit creates a thread pool that's used for processing all the functions.
+ * You can limit the number of poolThreads used by the Toolkit via the constructor. The poolThreads
+ * are destroyed once the Toolkit is destroyed, after any pending work is done.
+ *
+ * This library is thread safe. You can call methods from different poolThreads. The functions will
+ * execute sequentially.
+ *
+ * A native C++ version of this Toolkit is available.
+ *
+ * This toolkit can be used as a replacement for most RenderScript Intrinsic functions. Compared
+ * to RenderScript, it's simpler to use and more than twice as fast on the CPU. However RenderScript
+ * Intrinsics allow more flexibility for the type of allocation supported. In particular, this
+ * toolkit does not support allocations of floats.
+ */
+class Toolkit {
+    /**
+     * Blends a source buffer with the destination buffer.
+     *
+     * Blends a source buffer and a destination buffer, placing the result in the destination
+     * buffer. The blending is done pairwise between two corresponding RGBA values found in
+     * each buffer. The mode parameter specifies one of fifteen supported blending operations.
+     * See {@link BlendingMode}.
+     *
+     * A variant of this method is also available to blend Bitmaps.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY.
+     *
+     * The source and destination buffer must have the same dimensions. Both arrays should have
+     * a size greater or equal to sizeX * sizeY * 4. The buffers have a row-major layout.
+     *
+     * @param mode The specific blending operation to do.
+     * @param sourceArray The RGBA input buffer.
+     * @param destArray The destination buffer. Used for input and output.
+     * @param sizeX The width of both buffers, as a number of RGBA values.
+     * @param sizeY The height of both buffers, as a number of RGBA values.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     */
+    @JvmOverloads
+    fun blend(
+        mode: BlendingMode,
+        sourceArray: ByteArray,
+        destArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        restriction: Range2d? = null
+    ) {
+        require(sourceArray.size >= sizeX * sizeY * 4) {
+            "$externalName blend. sourceArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*4 < ${sourceArray.size}."
+        }
+        require(destArray.size >= sizeX * sizeY * 4) {
+            "$externalName blend. sourceArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*4 < ${sourceArray.size}."
+        }
+        validateRestriction("blend", sizeX, sizeY, restriction)
+
+        nativeBlend(nativeHandle, mode.value, sourceArray, destArray, sizeX, sizeY, restriction)
+    }
+
+    /**
+     * Blends a source bitmap with the destination bitmap.
+     *
+     * Blends a source bitmap and a destination bitmap, placing the result in the destination
+     * bitmap. The blending is done pairwise between two corresponding RGBA values found in
+     * each bitmap. The mode parameter specify one of fifteen supported blending operations.
+     * See {@link BlendingMode}.
+     *
+     * A variant of this method is available to blend ByteArrays.
+     *
+     * The bitmaps should have identical width and height, and have a config of ARGB_8888.
+     * Bitmaps with a stride different than width * vectorSize are not currently supported.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each bitmap. If provided, the range must be wholly contained with the dimensions
+     * of the bitmap.
+     *
+     * @param mode The specific blending operation to do.
+     * @param sourceBitmap The RGBA input buffer.
+     * @param destBitmap The destination buffer. Used for input and output.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     */
+    @JvmOverloads
+    fun blend(
+        mode: BlendingMode,
+        sourceBitmap: Bitmap,
+        destBitmap: Bitmap,
+        restriction: Range2d? = null
+    ) {
+        validateBitmap("blend", sourceBitmap)
+        validateBitmap("blend", destBitmap)
+        require(
+            sourceBitmap.width == destBitmap.width &&
+                    sourceBitmap.height == destBitmap.height
+        ) {
+            "$externalName blend. Source and destination bitmaps should be the same size. " +
+                    "${sourceBitmap.width}x${sourceBitmap.height} and " +
+                    "${destBitmap.width}x${destBitmap.height} provided."
+        }
+        require(sourceBitmap.config == destBitmap.config) {
+            "RenderScript Toolkit blend. Source and destination bitmaps should have the same " +
+                    "config. ${sourceBitmap.config} and ${destBitmap.config} provided."
+        }
+        validateRestriction("blend", sourceBitmap.width, sourceBitmap.height, restriction)
+
+        nativeBlendBitmap(nativeHandle, mode.value, sourceBitmap, destBitmap, restriction)
+    }
+
+    /**
+     * Blurs an image.
+     *
+     * Performs a Gaussian blur of an image and returns result in a ByteArray buffer. A variant of
+     * this method is available to blur Bitmaps.
+     *
+     * The radius determines which pixels are used to compute each blurred pixels. This Toolkit
+     * accepts values between 1 and 25. Larger values create a more blurred effect but also
+     * take longer to compute. When the radius extends past the edge, the edge pixel will
+     * be used as replacement for the pixel that's out off boundary.
+     *
+     * Each input pixel can either be represented by four bytes (RGBA format) or one byte
+     * for the less common blurring of alpha channel only image.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output buffer will still be full size, with the
+     * section that's not blurred all set to 0. This is to stay compatible with RenderScript.
+     *
+     * The source buffer should be large enough for sizeX * sizeY * mVectorSize bytes. It has a
+     * row-major layout.
+     *
+     * @param inputArray The buffer of the image to be blurred.
+     * @param vectorSize Either 1 or 4, the number of bytes in each cell, i.e. A vs. RGBA.
+     * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
+     * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
+     * @param radius The radius of the pixels used to blur, a value from 1 to 25.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The blurred pixels, a ByteArray of size.
+     */
+    @JvmOverloads
+    fun blur(
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        radius: Int = 5,
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(vectorSize == 1 || vectorSize == 4) {
+            "$externalName blur. The vectorSize should be 1 or 4. $vectorSize provided."
+        }
+        require(inputArray.size >= sizeX * sizeY * vectorSize) {
+            "$externalName blur. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*$vectorSize < ${inputArray.size}."
+        }
+        require(radius in 1..25) {
+            "$externalName blur. The radius should be between 1 and 25. $radius provided."
+        }
+        validateRestriction("blur", sizeX, sizeY, restriction)
+
+        val outputArray = ByteArray(inputArray.size)
+        nativeBlur(
+            nativeHandle, inputArray, vectorSize, sizeX, sizeY, radius, outputArray, restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Blurs an image.
+     *
+     * Performs a Gaussian blur of a Bitmap and returns result as a Bitmap. A variant of
+     * this method is available to blur ByteArrays.
+     *
+     * The radius determines which pixels are used to compute each blurred pixels. This Toolkit
+     * accepts values between 1 and 25. Larger values create a more blurred effect but also
+     * take longer to compute. When the radius extends past the edge, the edge pixel will
+     * be used as replacement for the pixel that's out off boundary.
+     *
+     * This method supports input Bitmap of config ARGB_8888 and ALPHA_8. Bitmaps with a stride
+     * different than width * vectorSize are not currently supported. The returned Bitmap has the
+     * same config.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the
+     * section that's not blurred all set to 0. This is to stay compatible with RenderScript.
+     *
+     * @param inputBitmap The buffer of the image to be blurred.
+     * @param radius The radius of the pixels used to blur, a value from 1 to 25. Default is 5.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The blurred Bitmap.
+     */
+    @JvmOverloads
+    fun blur(inputBitmap: Bitmap, radius: Int = 5, restriction: Range2d? = null): Bitmap {
+        validateBitmap("blur", inputBitmap)
+        require(radius in 1..25) {
+            "$externalName blur. The radius should be between 1 and 25. $radius provided."
+        }
+        validateRestriction("blur", inputBitmap.width, inputBitmap.height, restriction)
+
+        val outputBitmap = createCompatibleBitmap(inputBitmap)
+        nativeBlurBitmap(nativeHandle, inputBitmap, outputBitmap, radius, restriction)
+        return outputBitmap
+    }
+
+    /**
+     * Identity matrix that can be passed to the {@link RenderScriptToolkit::colorMatrix} method.
+     *
+     * Using this matrix will result in no change to the pixel through multiplication although
+     * the pixel value can still be modified by the add vector, or transformed to a different
+     * format.
+     */
+    val identityMatrix
+        get() = floatArrayOf(
+            1f, 0f, 0f, 0f,
+            0f, 1f, 0f, 0f,
+            0f, 0f, 1f, 0f,
+            0f, 0f, 0f, 1f
+        )
+
+    /**
+     * Matrix to turn color pixels to a grey scale.
+     *
+     * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert an
+     * image from color to greyscale.
+     */
+    val greyScaleColorMatrix
+        get() = floatArrayOf(
+            0.299f, 0.299f, 0.299f, 0f,
+            0.587f, 0.587f, 0.587f, 0f,
+            0.114f, 0.114f, 0.114f, 0f,
+            0f, 0f, 0f, 1f
+        )
+
+    /**
+     * Matrix to convert RGB to YUV.
+     *
+     * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
+     * first three bytes of each pixel from RGB to YUV. This leaves the last byte (the alpha
+     * channel) untouched.
+     *
+     * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
+     * by this method.
+     */
+    val rgbToYuvMatrix
+        get() = floatArrayOf(
+            0.299f, -0.14713f, 0.615f, 0f,
+            0.587f, -0.28886f, -0.51499f, 0f,
+            0.114f, 0.436f, -0.10001f, 0f,
+            0f, 0f, 0f, 1f
+        )
+
+    /**
+     * Matrix to convert YUV to RGB.
+     *
+     * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
+     * first three bytes of each pixel from YUV to RGB. This leaves the last byte (the alpha
+     * channel) untouched.
+     *
+     * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
+     * by this method. Use {@link RenderScriptToolkit::yuvToRgb} to convert these buffers.
+     */
+    val yuvToRgbMatrix
+        get() = floatArrayOf(
+            1f, 1f, 1f, 0f,
+            0f, -0.39465f, 2.03211f, 0f,
+            1.13983f, -0.5806f, 0f, 0f,
+            0f, 0f, 0f, 1f
+        )
+
+    /**
+     * Transform an image using a color matrix.
+     *
+     * Converts a 2D array of vectors of unsigned bytes, multiplying each vectors by a 4x4 matrix
+     * and adding an optional vector.
+     *
+     * Each input vector is composed of 1-4 unsigned bytes. If less than 4 bytes, it's extended to
+     * 4, padding with zeroes. The unsigned bytes are converted from 0-255 to 0.0-1.0 floats
+     * before the multiplication is done.
+     *
+     * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output.
+     * If the output vector size is less than four, the unused channels are discarded.
+     *
+     * If addVector is not specified, a vector of zeroes is added, i.e. a noop.
+     *
+     * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes.
+     *
+     * Check identityMatrix, greyScaleColorMatrix, rgbToYuvMatrix, and yuvToRgbMatrix for sample
+     * matrices. The YUV conversion may not work for all color spaces.
+     *
+     * @param inputArray The buffer of the image to be converted.
+     * @param inputVectorSize The number of bytes in each input cell, a value from 1 to 4.
+     * @param sizeX The width of both buffers, as a number of 1 to 4 byte cells.
+     * @param sizeY The height of both buffers, as a number of 1 to 4 byte cells.
+     * @param outputVectorSize The number of bytes in each output cell, a value from 1 to 4.
+     * @param matrix The 4x4 matrix to multiply, in row major format.
+     * @param addVector A vector of four floats that's added to the result of the multiplication.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The converted buffer.
+     */
+    @JvmOverloads
+    fun colorMatrix(
+        inputArray: ByteArray,
+        inputVectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputVectorSize: Int,
+        matrix: FloatArray,
+        addVector: FloatArray = floatArrayOf(0f, 0f, 0f, 0f),
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(inputVectorSize in 1..4) {
+            "$externalName colorMatrix. The inputVectorSize should be between 1 and 4. " +
+                    "$inputVectorSize provided."
+        }
+        require(outputVectorSize in 1..4) {
+            "$externalName colorMatrix. The outputVectorSize should be between 1 and 4. " +
+                    "$outputVectorSize provided."
+        }
+        require(inputArray.size >= sizeX * sizeY * inputVectorSize) {
+            "$externalName colorMatrix. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*$inputVectorSize < ${inputArray.size}."
+        }
+        require(matrix.size == 16) {
+            "$externalName colorMatrix. matrix should have 16 entries. ${matrix.size} provided."
+        }
+        require(addVector.size == 4) {
+            "$externalName colorMatrix. addVector should have 4 entries. " +
+                    "${addVector.size} provided."
+        }
+        validateRestriction("colorMatrix", sizeX, sizeY, restriction)
+
+        val outputArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize))
+        nativeColorMatrix(
+            nativeHandle, inputArray, inputVectorSize, sizeX, sizeY, outputArray, outputVectorSize,
+            matrix, addVector, restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Transform an image using a color matrix.
+     *
+     * Converts a bitmap, multiplying each RGBA value by a 4x4 matrix and adding an optional vector.
+     * Each byte of the RGBA is converted from 0-255 to 0.0-1.0 floats before the multiplication
+     * is done.
+     *
+     * Bitmaps with a stride different than width * vectorSize are not currently supported.
+     *
+     * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output.
+     *
+     * If addVector is not specified, a vector of zeroes is added, i.e. a noop.
+     *
+     * Check identityMatrix, greyScaleColorMatrix, rgbToYuvMatrix, and yuvToRgbMatrix for sample
+     * matrices. The YUV conversion may not work for all color spaces.
+     *
+     * @param inputBitmap The image to be converted.
+     * @param matrix The 4x4 matrix to multiply, in row major format.
+     * @param addVector A vector of four floats that's added to the result of the multiplication.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The converted buffer.
+     */
+    @JvmOverloads
+    fun colorMatrix(
+        inputBitmap: Bitmap,
+        matrix: FloatArray,
+        addVector: FloatArray = floatArrayOf(0f, 0f, 0f, 0f),
+        restriction: Range2d? = null
+    ): Bitmap {
+        validateBitmap("colorMatrix", inputBitmap)
+        require(matrix.size == 16) {
+            "$externalName colorMatrix. matrix should have 16 entries. ${matrix.size} provided."
+        }
+        require(addVector.size == 4) {
+            "$externalName colorMatrix. addVector should have 4 entries."
+        }
+        validateRestriction("colorMatrix", inputBitmap.width, inputBitmap.height, restriction)
+
+        val outputBitmap = createCompatibleBitmap(inputBitmap)
+        nativeColorMatrixBitmap(
+            nativeHandle,
+            inputBitmap,
+            outputBitmap,
+            matrix,
+            addVector,
+            restriction
+        )
+        return outputBitmap
+    }
+
+    /**
+     * Convolve a ByteArray.
+     *
+     * Applies a 3x3 or 5x5 convolution to the input array using the provided coefficients.
+     * A variant of this method is available to convolve Bitmaps.
+     *
+     * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed.
+     * The coefficients should be provided in row-major format.
+     *
+     * When the square extends past the edge, the edge values will be used as replacement for the
+     * values that's are off boundary.
+     *
+     * Each input cell can either be represented by one to four bytes. Each byte is multiplied
+     * and accumulated independently of the other bytes of the cell.
+     *
+     * An optional range parameter can be set to restrict the convolve operation to a rectangular
+     * subset of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output buffer will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * The source array should be large enough for sizeX * sizeY * vectorSize bytes. It has a
+     * row-major layout. The output array will have the same dimensions.
+     *
+     * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes.
+     *
+     * @param inputArray The buffer of the image to be blurred.
+     * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
+     * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
+     * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
+     * @param coefficients A FloatArray of size 9 or 25, containing the multipliers.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The convolved array.
+     */
+    @JvmOverloads
+    fun convolve(
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        coefficients: FloatArray,
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(vectorSize in 1..4) {
+            "$externalName convolve. The vectorSize should be between 1 and 4. " +
+                    "$vectorSize provided."
+        }
+        require(inputArray.size >= sizeX * sizeY * vectorSize) {
+            "$externalName convolve. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*$vectorSize < ${inputArray.size}."
+        }
+        require(coefficients.size == 9 || coefficients.size == 25) {
+            "$externalName convolve. Only 3x3 or 5x5 convolutions are supported. " +
+                    "${coefficients.size} coefficients provided."
+        }
+        validateRestriction("convolve", sizeX, sizeY, restriction)
+
+        val outputArray = ByteArray(inputArray.size)
+        nativeConvolve(
+            nativeHandle,
+            inputArray,
+            vectorSize,
+            sizeX,
+            sizeY,
+            outputArray,
+            coefficients,
+            restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Convolve a Bitmap.
+     *
+     * Applies a 3x3 or 5x5 convolution to the input Bitmap using the provided coefficients.
+     * A variant of this method is available to convolve ByteArrays. Bitmaps with a stride different
+     * than width * vectorSize are not currently supported.
+     *
+     * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed.
+     * The coefficients should be provided in row-major format.
+     *
+     * Each input cell can either be represented by one to four bytes. Each byte is multiplied
+     * and accumulated independently of the other bytes of the cell.
+     *
+     * An optional range parameter can be set to restrict the convolve operation to a rectangular
+     * subset of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * @param inputBitmap The image to be blurred.
+     * @param coefficients A FloatArray of size 9 or 25, containing the multipliers.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The convolved Bitmap.
+     */
+    @JvmOverloads
+    fun convolve(
+        inputBitmap: Bitmap,
+        coefficients: FloatArray,
+        restriction: Range2d? = null
+    ): Bitmap {
+        validateBitmap("convolve", inputBitmap)
+        require(coefficients.size == 9 || coefficients.size == 25) {
+            "$externalName convolve. Only 3x3 or 5x5 convolutions are supported. " +
+                    "${coefficients.size} coefficients provided."
+        }
+        validateRestriction("convolve", inputBitmap, restriction)
+
+        val outputBitmap = createCompatibleBitmap(inputBitmap)
+        nativeConvolveBitmap(nativeHandle, inputBitmap, outputBitmap, coefficients, restriction)
+        return outputBitmap
+    }
+
+    /**
+     * Compute the histogram of an image.
+     *
+     * Tallies how many times each of the 256 possible values of a byte is found in the input.
+     * A variant of this method is available to do the histogram of a Bitmap.
+     *
+     * An input cell can be represented by one to four bytes. The tally is done independently
+     * for each of the bytes of the cell. Correspondingly, the returned IntArray will have
+     * 256 * vectorSize entries. The counts for value 0 are consecutive, followed by those for
+     * value 1, etc.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY.
+     *
+     * The source buffer should be large enough for sizeX * sizeY * vectorSize bytes. It has a
+     * row-major layout.
+     *
+     * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes.
+     *
+     * @param inputArray The buffer of the image to be analyzed.
+     * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
+     * @param sizeX The width of the input buffers, as a number of 1 to 4 byte cells.
+     * @param sizeY The height of the input buffers, as a number of 1 to 4 byte cells.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The resulting array of counts.
+     */
+    @JvmOverloads
+    fun histogram(
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        restriction: Range2d? = null
+    ): IntArray {
+        require(vectorSize in 1..4) {
+            "$externalName histogram. The vectorSize should be between 1 and 4. " +
+                    "$vectorSize provided."
+        }
+        require(inputArray.size >= sizeX * sizeY * vectorSize) {
+            "$externalName histogram. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*$vectorSize < ${inputArray.size}."
+        }
+        validateRestriction("histogram", sizeX, sizeY, restriction)
+
+        val outputArray = IntArray(256 * paddedSize(vectorSize))
+        nativeHistogram(
+            nativeHandle,
+            inputArray,
+            vectorSize,
+            sizeX,
+            sizeY,
+            outputArray,
+            restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Compute the histogram of an image.
+     *
+     * Tallies how many times each of the 256 possible values of a byte is found in the bitmap.
+     * This method supports Bitmaps of config ARGB_8888 and ALPHA_8.
+     *
+     * For ARGB_8888, the tally is done independently of the four bytes. Correspondingly, the
+     * returned IntArray will have 4 * 256 entries. The counts for value 0 are consecutive,
+     * followed by those for value 1, etc.
+     *
+     * For ALPHA_8, an IntArray of size 256 is returned.
+     *
+     * Bitmaps with a stride different than width * vectorSize are not currently supported.
+     *
+     * A variant of this method is available to do the histogram of a ByteArray.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY.
+     *
+     * @param inputBitmap The bitmap to be analyzed.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The resulting array of counts.
+     */
+    @JvmOverloads
+    fun histogram(
+        inputBitmap: Bitmap,
+        restriction: Range2d? = null
+    ): IntArray {
+        validateBitmap("histogram", inputBitmap)
+        validateRestriction("histogram", inputBitmap, restriction)
+
+        val outputArray = IntArray(256 * vectorSize(inputBitmap))
+        nativeHistogramBitmap(nativeHandle, inputBitmap, outputArray, restriction)
+        return outputArray
+    }
+
+    /**
+     * Compute the histogram of the dot product of an image.
+     *
+     * This method supports cells of 1 to 4 bytes in length. For each cell of the array,
+     * the dot product of its bytes with the provided coefficients is computed. The resulting
+     * floating point value is converted to an unsigned byte and tallied in the histogram.
+     *
+     * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used,
+     * i.e. the values [0.299f, 0.587f, 0.114f, 0.f].
+     *
+     * Each coefficients must be >= 0 and their sum must be 1.0 or less. There must be the same
+     * number of coefficients as vectorSize.
+     *
+     * A variant of this method is available to do the histogram of a Bitmap.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY.
+     *
+     * The source buffer should be large enough for sizeX * sizeY * vectorSize bytes. The returned
+     * array will have 256 ints.
+     *
+     * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes.
+     *
+     * @param inputArray The buffer of the image to be analyzed.
+     * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
+     * @param sizeX The width of the input buffers, as a number of 1 to 4 byte cells.
+     * @param sizeY The height of the input buffers, as a number of 1 to 4 byte cells.
+     * @param coefficients The dot product multipliers. Size should equal vectorSize. Can be null.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The resulting vector of counts.
+     */
+    @JvmOverloads
+    fun histogramDot(
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        coefficients: FloatArray? = null,
+        restriction: Range2d? = null
+    ): IntArray {
+        require(vectorSize in 1..4) {
+            "$externalName histogramDot. The vectorSize should be between 1 and 4. " +
+                    "$vectorSize provided."
+        }
+        require(inputArray.size >= sizeX * sizeY * vectorSize) {
+            "$externalName histogramDot. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*$vectorSize < ${inputArray.size}."
+        }
+        validateHistogramDotCoefficients(coefficients, vectorSize)
+        validateRestriction("histogramDot", sizeX, sizeY, restriction)
+
+        val outputArray = IntArray(256)
+        val actualCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f)
+        nativeHistogramDot(
+            nativeHandle,
+            inputArray,
+            vectorSize,
+            sizeX,
+            sizeY,
+            outputArray,
+            actualCoefficients,
+            restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Compute the histogram of the dot product of an image.
+     *
+     * This method supports Bitmaps of config ARGB_8888 and ALPHA_8. For each pixel of the bitmap,
+     * the dot product of its bytes with the provided coefficients is computed. The resulting
+     * floating point value is converted to an unsigned byte and tallied in the histogram.
+     *
+     * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used,
+     * i.e. the values [0.299f, 0.587f, 0.114f, 0.f].
+     *
+     * Each coefficients must be >= 0 and their sum must be 1.0 or less. For ARGB_8888, four values
+     * must be provided; for ALPHA_8, one.
+     *
+     * Bitmaps with a stride different than width * vectorSize are not currently supported.
+     *
+     * A variant of this method is available to do the histogram of a ByteArray.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY.
+     *
+     * The returned array will have 256 ints.
+     *
+     * @param inputBitmap The bitmap to be analyzed.
+     * @param coefficients The one or four values used for the dot product. Can be null.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The resulting vector of counts.
+     */
+    @JvmOverloads
+    fun histogramDot(
+        inputBitmap: Bitmap,
+        coefficients: FloatArray? = null,
+        restriction: Range2d? = null
+    ): IntArray {
+        validateBitmap("histogramDot", inputBitmap)
+        validateHistogramDotCoefficients(coefficients, vectorSize(inputBitmap))
+        validateRestriction("histogramDot", inputBitmap, restriction)
+
+        val outputArray = IntArray(256)
+        val actualCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f)
+        nativeHistogramDotBitmap(
+            nativeHandle, inputBitmap, outputArray, actualCoefficients, restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Transform an image using a look up table
+     *
+     * Transforms an image by using a per-channel lookup table. Each channel of the input has an
+     * independent lookup table. The tables are 256 entries in size and can cover the full value
+     * range of a byte.
+     *
+     * The input array should be in RGBA format, where four consecutive bytes form an cell.
+     * A variant of this method is available to transform a Bitmap.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned
+     * ray has the same dimensions as the input. The arrays have a row-major layout.
+     *
+     * @param inputArray The buffer of the image to be transformed.
+     * @param sizeX The width of both buffers, as a number of 4 byte cells.
+     * @param sizeY The height of both buffers, as a number of 4 byte cells.
+     * @param table The four arrays of 256 values that's used to convert each channel.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The transformed image.
+     */
+    @JvmOverloads
+    fun lut(
+        inputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        table: LookupTable,
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(inputArray.size >= sizeX * sizeY * 4) {
+            "$externalName lut. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*4 < ${inputArray.size}."
+        }
+        validateRestriction("lut", sizeX, sizeY, restriction)
+
+        val outputArray = ByteArray(inputArray.size)
+        nativeLut(
+            nativeHandle,
+            inputArray,
+            outputArray,
+            sizeX,
+            sizeY,
+            table.red,
+            table.green,
+            table.blue,
+            table.alpha,
+            restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Transform an image using a look up table
+     *
+     * Transforms an image by using a per-channel lookup table. Each channel of the input has an
+     * independent lookup table. The tables are 256 entries in size and can cover the full value
+     * range of a byte.
+     *
+     * The input Bitmap should be in config ARGB_8888. A variant of this method is available to
+     * transform a ByteArray. Bitmaps with a stride different than width * vectorSize are not
+     * currently supported.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output Bitmap will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * @param inputBitmap The buffer of the image to be transformed.
+     * @param table The four arrays of 256 values that's used to convert each channel.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The transformed image.
+     */
+    @JvmOverloads
+    fun lut(
+        inputBitmap: Bitmap,
+        table: LookupTable,
+        restriction: Range2d? = null
+    ): Bitmap {
+        validateBitmap("lut", inputBitmap)
+        validateRestriction("lut", inputBitmap, restriction)
+
+        val outputBitmap = createCompatibleBitmap(inputBitmap)
+        nativeLutBitmap(
+            nativeHandle,
+            inputBitmap,
+            outputBitmap,
+            table.red,
+            table.green,
+            table.blue,
+            table.alpha,
+            restriction
+        )
+        return outputBitmap
+    }
+
+    /**
+     * Transform an image using a 3D look up table
+     *
+     * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G,
+     * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest
+     * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry
+     * is returned in the output array.
+     *
+     * The input array should be in RGBA format, where four consecutive bytes form an cell.
+     * The fourth byte of each input cell is ignored. A variant of this method is also available
+     * to transform Bitmaps.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output array will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned
+     * array will have the same dimensions. The arrays have a row-major layout.
+     *
+     * @param inputArray The buffer of the image to be transformed.
+     * @param sizeX The width of both buffers, as a number of 4 byte cells.
+     * @param sizeY The height of both buffers, as a number of 4 byte cells.
+     * @param cube The translation cube.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The transformed image.
+     */
+    @JvmOverloads
+    fun lut3d(
+        inputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        cube: Rgba3dArray,
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(inputArray.size >= sizeX * sizeY * 4) {
+            "$externalName lut3d. inputArray is too small for the given dimensions. " +
+                    "$sizeX*$sizeY*4 < ${inputArray.size}."
+        }
+        require(
+            cube.sizeX >= 2 && cube.sizeY >= 2 && cube.sizeZ >= 2 &&
+                    cube.sizeX <= 256 && cube.sizeY <= 256 && cube.sizeZ <= 256
+        ) {
+            "$externalName lut3d. The dimensions of the cube should be between 2 and 256. " +
+                    "(${cube.sizeX}, ${cube.sizeY}, ${cube.sizeZ}) provided."
+        }
+        validateRestriction("lut3d", sizeX, sizeY, restriction)
+
+        val outputArray = ByteArray(inputArray.size)
+        nativeLut3d(
+            nativeHandle, inputArray, outputArray, sizeX, sizeY, cube.values, cube.sizeX,
+            cube.sizeY, cube.sizeZ, restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Transform an image using a 3D look up table
+     *
+     * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G,
+     * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest
+     * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry
+     * is returned in the output array.
+     *
+     * The input bitmap should be in RGBA_8888 format. The A channel is preserved. A variant of this
+     * method is also available to transform ByteArray. Bitmaps with a stride different than
+     * width * vectorSize are not currently supported.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of each buffer. If provided, the range must be wholly contained with the dimensions
+     * described by sizeX and sizeY. NOTE: The output array will still be full size, with the
+     * section that's not convolved all set to 0. This is to stay compatible with RenderScript.
+     *
+     * The source array should be large enough for sizeX * sizeY * vectorSize bytes. The returned
+     * array will have the same dimensions. The arrays have a row-major layout.
+     *
+     * @param inputBitmap The image to be transformed.
+     * @param cube The translation cube.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return The transformed image.
+     */
+    @JvmOverloads
+    fun lut3d(
+        inputBitmap: Bitmap,
+        cube: Rgba3dArray,
+        restriction: Range2d? = null
+    ): Bitmap {
+        validateBitmap("lut3d", inputBitmap)
+        validateRestriction("lut3d", inputBitmap, restriction)
+
+        val outputBitmap = createCompatibleBitmap(inputBitmap)
+        nativeLut3dBitmap(
+            nativeHandle, inputBitmap, outputBitmap, cube.values, cube.sizeX,
+            cube.sizeY, cube.sizeZ, restriction
+        )
+        return outputBitmap
+    }
+
+    /**
+     * Resize an image.
+     *
+     * Resizes an image using bicubic interpolation.
+     *
+     * This method supports elements of 1 to 4 bytes in length. Each byte of the element is
+     * interpolated independently from the others.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of the output buffer. The corresponding scaled range of the input will be used.  If provided,
+     * the range must be wholly contained with the dimensions described by outputSizeX and
+     * outputSizeY.
+     *
+     * The input and output arrays have a row-major layout. The input array should be
+     * large enough for sizeX * sizeY * vectorSize bytes.
+     *
+     * Like the RenderScript Intrinsics, vectorSize of size 3 are padded to occupy 4 bytes.
+     *
+     * @param inputArray The buffer of the image to be resized.
+     * @param vectorSize The number of bytes in each element of both buffers. A value from 1 to 4.
+     * @param inputSizeX The width of the input buffer, as a number of 1-4 byte elements.
+     * @param inputSizeY The height of the input buffer, as a number of 1-4 byte elements.
+     * @param outputSizeX The width of the output buffer, as a number of 1-4 byte elements.
+     * @param outputSizeY The height of the output buffer, as a number of 1-4 byte elements.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return An array that contains the rescaled image.
+     */
+    @JvmOverloads
+    fun resize(
+        inputArray: ByteArray,
+        vectorSize: Int,
+        inputSizeX: Int,
+        inputSizeY: Int,
+        outputSizeX: Int,
+        outputSizeY: Int,
+        restriction: Range2d? = null
+    ): ByteArray {
+        require(vectorSize in 1..4) {
+            "$externalName resize. The vectorSize should be between 1 and 4. $vectorSize provided."
+        }
+        require(inputArray.size >= inputSizeX * inputSizeY * vectorSize) {
+            "$externalName resize. inputArray is too small for the given dimensions. " +
+                    "$inputSizeX*$inputSizeY*$vectorSize < ${inputArray.size}."
+        }
+        validateRestriction("resize", outputSizeX, outputSizeY, restriction)
+
+        val outputArray = ByteArray(outputSizeX * outputSizeY * paddedSize(vectorSize))
+        nativeResize(
+            nativeHandle,
+            inputArray,
+            vectorSize,
+            inputSizeX,
+            inputSizeY,
+            outputArray,
+            outputSizeX,
+            outputSizeY,
+            restriction
+        )
+        return outputArray
+    }
+
+    /**
+     * Resize an image.
+     *
+     * Resizes an image using bicubic interpolation.
+     *
+     * This method supports input Bitmap of config ARGB_8888 and ALPHA_8. The returned Bitmap
+     * has the same config. Bitmaps with a stride different than width * vectorSize are not
+     * currently supported.
+     *
+     * An optional range parameter can be set to restrict the operation to a rectangular subset
+     * of the output buffer. The corresponding scaled range of the input will be used.  If provided,
+     * the range must be wholly contained with the dimensions described by outputSizeX and
+     * outputSizeY.
+     *
+     * @param inputBitmap  The Bitmap to be resized.
+     * @param outputSizeX The width of the output buffer, as a number of 1-4 byte elements.
+     * @param outputSizeY The height of the output buffer, as a number of 1-4 byte elements.
+     * @param restriction When not null, restricts the operation to a 2D range of pixels.
+     * @return A Bitmap that contains the rescaled image.
+     */
+    @JvmOverloads
+    fun resize(
+        inputBitmap: Bitmap,
+        outputSizeX: Int,
+        outputSizeY: Int,
+        restriction: Range2d? = null
+    ): Bitmap {
+        validateBitmap("resize", inputBitmap)
+        validateRestriction("resize", outputSizeX, outputSizeY, restriction)
+
+        val outputBitmap = Bitmap.createBitmap(outputSizeX, outputSizeY, Bitmap.Config.ARGB_8888)
+        nativeResizeBitmap(nativeHandle, inputBitmap, outputBitmap, restriction)
+        return outputBitmap
+    }
+
+    /**
+     * Convert an image from YUV to RGB.
+     *
+     * Converts a YUV buffer to RGB. The input array should be supplied in a supported YUV format.
+     * The output is RGBA; the alpha channel will be set to 255.
+     *
+     * Note that for YV12 and a sizeX that's not a multiple of 32, the RenderScript Intrinsic may
+     * not have converted the image correctly. This Toolkit method should.
+     *
+     * @param inputArray The buffer of the image to be converted.
+     * @param sizeX The width in pixels of the image.
+     * @param sizeY The height in pixels of the image.
+     * @param format Either YV12 or NV21.
+     * @return The converted image as a byte array.
+     */
+    fun yuvToRgb(inputArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray {
+        require(sizeX % 2 == 0 && sizeY % 2 == 0) {
+            "$externalName yuvToRgb. Non-even dimensions are not supported. " +
+                    "$sizeX and $sizeY were provided."
+        }
+
+        val outputArray = ByteArray(sizeX * sizeY * 4)
+        nativeYuvToRgb(nativeHandle, inputArray, outputArray, sizeX, sizeY, format.value)
+        return outputArray
+    }
+
+    /**
+     * Convert an image from YUV to an RGB Bitmap.
+     *
+     * Converts a YUV buffer to an RGB Bitmap. The input array should be supplied in a supported
+     * YUV format. The output is RGBA; the alpha channel will be set to 255.
+     *
+     * Note that for YV12 and a sizeX that's not a multiple of 32, the RenderScript Intrinsic may
+     * not have converted the image correctly. This Toolkit method should.
+     *
+     * @param inputArray The buffer of the image to be converted.
+     * @param sizeX The width in pixels of the image.
+     * @param sizeY The height in pixels of the image.
+     * @param format Either YV12 or NV21.
+     * @return The converted image.
+     */
+    fun yuvToRgbBitmap(inputArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): Bitmap {
+        require(sizeX % 2 == 0 && sizeY % 2 == 0) {
+            "$externalName yuvToRgbBitmap. Non-even dimensions are not supported. " +
+                    "$sizeX and $sizeY were provided."
+        }
+
+        val outputBitmap = Bitmap.createBitmap(sizeX, sizeY, Bitmap.Config.ARGB_8888)
+        nativeYuvToRgbBitmap(nativeHandle, inputArray, sizeX, sizeY, outputBitmap, format.value)
+        return outputBitmap
+    }
+
+    companion object {
+        init {
+            System.loadLibrary("renderscript-toolkit")
+        }
+    }
+
+    private var nativeHandle: Long = 0
+
+    private external fun createNative(): Long
+
+    private external fun destroyNative(nativeHandle: Long)
+
+    private external fun nativeBlend(
+        nativeHandle: Long,
+        mode: Int,
+        sourceArray: ByteArray,
+        destArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        restriction: Range2d?
+    )
+
+    private external fun nativeBlendBitmap(
+        nativeHandle: Long,
+        mode: Int,
+        sourceBitmap: Bitmap,
+        destBitmap: Bitmap,
+        restriction: Range2d?
+    )
+
+    private external fun nativeBlur(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        radius: Int,
+        outputArray: ByteArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeBlurBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        radius: Int,
+        restriction: Range2d?
+    )
+
+    private external fun nativeColorMatrix(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        inputVectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputArray: ByteArray,
+        outputVectorSize: Int,
+        matrix: FloatArray,
+        addVector: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeColorMatrixBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        matrix: FloatArray,
+        addVector: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeConvolve(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputArray: ByteArray,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeConvolveBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeHistogram(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputArray: IntArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeHistogramBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputArray: IntArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeHistogramDot(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputArray: IntArray,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeHistogramDotBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputArray: IntArray,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeLut(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        outputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        red: ByteArray,
+        green: ByteArray,
+        blue: ByteArray,
+        alpha: ByteArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeLutBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        red: ByteArray,
+        green: ByteArray,
+        blue: ByteArray,
+        alpha: ByteArray,
+        restriction: Range2d?
+    )
+
+    private external fun nativeLut3d(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        outputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        cube: ByteArray,
+        cubeSizeX: Int,
+        cubeSizeY: Int,
+        cubeSizeZ: Int,
+        restriction: Range2d?
+    )
+
+    private external fun nativeLut3dBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        cube: ByteArray,
+        cubeSizeX: Int,
+        cubeSizeY: Int,
+        cubeSizeZ: Int,
+        restriction: Range2d?
+    )
+
+    private external fun nativeResize(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        vectorSize: Int,
+        inputSizeX: Int,
+        inputSizeY: Int,
+        outputArray: ByteArray,
+        outputSizeX: Int,
+        outputSizeY: Int,
+        restriction: Range2d?
+    )
+
+    private external fun nativeResizeBitmap(
+        nativeHandle: Long,
+        inputBitmap: Bitmap,
+        outputBitmap: Bitmap,
+        restriction: Range2d?
+    )
+
+    private external fun nativeYuvToRgb(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        outputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        format: Int
+    )
+
+    private external fun nativeYuvToRgbBitmap(
+        nativeHandle: Long,
+        inputArray: ByteArray,
+        sizeX: Int,
+        sizeY: Int,
+        outputBitmap: Bitmap,
+        value: Int
+    )
+
+    fun finalize() {
+        destroyNative(nativeHandle)
+    }
+
+    init {
+        nativeHandle = createNative()
+    }
+}
+
+/**
+ * Determines how a source buffer is blended into a destination buffer.
+ * See {@link RenderScriptToolkit::blend}.
+ *
+ * blend only works on 4 byte RGBA data. In the descriptions below, ".a" represents
+ * the alpha channel.
+ */
+enum class BlendingMode(val value: Int) {
+    /**
+     * dest = 0
+     *
+     * The destination is cleared, i.e. each pixel is set to (0, 0, 0, 0)
+     */
+    CLEAR(0),
+
+    /**
+     * dest = src
+     *
+     * Sets each pixel of the destination to the corresponding one in the source.
+     */
+    SRC(1),
+
+    /**
+     * dest = dest
+     *
+     * Leaves the destination untouched. This is a no-op.
+     */
+    DST(2),
+
+    /**
+     * dest = src + dest * (1.0 - src.a)
+     */
+    SRC_OVER(3),
+
+    /**
+     * dest = dest + src * (1.0 - dest.a)
+     */
+    DST_OVER(4),
+
+    /**
+     * dest = src * dest.a
+     */
+    SRC_IN(5),
+
+    /**
+     * dest = dest * src.a
+     */
+    DST_IN(6),
+
+    /**
+     * dest = src * (1.0 - dest.a)
+     */
+    SRC_OUT(7),
+
+    /**
+     * dest = dest * (1.0 - src.a)
+     */
+    DST_OUT(8),
+
+    /**
+     * dest.rgb = src.rgb * dest.a + (1.0 - src.a) * dest.rgb, dest.a = dest.a
+     */
+    SRC_ATOP(9),
+
+    /**
+     * dest = dest.rgb * src.a + (1.0 - dest.a) * src.rgb, dest.a = src.a
+     */
+    DST_ATOP(10),
+
+    /**
+     * dest = {src.r ^ dest.r, src.g ^ dest.g, src.b ^ dest.b, src.a ^ dest.a}
+     *
+     * Note: this is NOT the Porter/Duff XOR mode; this is a bitwise xor.
+     */
+    XOR(11),
+
+    /**
+     * dest = src * dest
+     */
+    MULTIPLY(12),
+
+    /**
+     * dest = min(src + dest, 1.0)
+     */
+    ADD(13),
+
+    /**
+     * dest = max(dest - src, 0.0)
+     */
+    SUBTRACT(14)
+}
+
+/**
+ * A translation table used by the lut method. For each potential red, green, blue, and alpha
+ * value, specifies it's replacement value.
+ *
+ * The fields are initialized to be a no-op operation, i.e. replace 1 by 1, 2 by 2, etc.
+ * You can modify just the values you're interested in having a translation.
+ */
+class LookupTable {
+    var red = ByteArray(256) { it.toByte() }
+    var green = ByteArray(256) { it.toByte() }
+    var blue = ByteArray(256) { it.toByte() }
+    var alpha = ByteArray(256) { it.toByte() }
+}
+
+/**
+ * The YUV formats supported by yuvToRgb.
+ */
+enum class YuvFormat(val value: Int) {
+    NV21(0x11),
+    YV12(0x32315659),
+}
+
+/**
+ * Define a range of data to process.
+ *
+ * This class is used to restrict a [Toolkit] operation to a rectangular subset of the input
+ * tensor.
+ *
+ * @property startX The index of the first value to be included on the X axis.
+ * @property endX The index after the last value to be included on the X axis.
+ * @property startY The index of the first value to be included on the Y axis.
+ * @property endY The index after the last value to be included on the Y axis.
+ */
+data class Range2d(
+    val startX: Int,
+    val endX: Int,
+    val startY: Int,
+    val endY: Int
+) {
+    constructor() : this(0, 0, 0, 0)
+}
+
+class Rgba3dArray(val values: ByteArray, val sizeX: Int, val sizeY: Int, val sizeZ: Int) {
+    init {
+        require(values.size >= sizeX * sizeY * sizeZ * 4)
+    }
+
+    operator fun get(x: Int, y: Int, z: Int): ByteArray {
+        val index = indexOfVector(x, y, z)
+        return ByteArray(4) { values[index + it] }
+    }
+
+    operator fun set(x: Int, y: Int, z: Int, value: ByteArray) {
+        require(value.size == 4)
+        val index = indexOfVector(x, y, z)
+        for (i in 0..3) {
+            values[index + i] = value[i]
+        }
+    }
+
+    private fun indexOfVector(x: Int, y: Int, z: Int): Int {
+        require(x in 0 until sizeX)
+        require(y in 0 until sizeY)
+        require(z in 0 until sizeZ)
+        return ((z * sizeY + y) * sizeX + x) * 4
+    }
+}
+
+private fun validateBitmap(
+    function: String,
+    inputBitmap: Bitmap,
+    alphaAllowed: Boolean = true
+) {
+    if (alphaAllowed) {
+        require(
+            inputBitmap.config == Bitmap.Config.ARGB_8888 ||
+                    inputBitmap.config == Bitmap.Config.ALPHA_8
+        ) {
+            "$externalName. $function supports only ARGB_8888 and ALPHA_8 bitmaps. " +
+                    "${inputBitmap.config} provided."
+        }
+    } else {
+        require(inputBitmap.config == Bitmap.Config.ARGB_8888) {
+            "$externalName. $function supports only ARGB_8888. " +
+                    "${inputBitmap.config} provided."
+        }
+    }
+    require(inputBitmap.width * vectorSize(inputBitmap) == inputBitmap.rowBytes) {
+        "$externalName $function. Only bitmaps with rowSize equal to the width * vectorSize are " +
+                "currently supported. Provided were rowBytes=${inputBitmap.rowBytes}, " +
+                "width={${inputBitmap.width}, and vectorSize=${vectorSize(inputBitmap)}."
+    }
+}
+
+private fun createCompatibleBitmap(inputBitmap: Bitmap) =
+    Bitmap.createBitmap(inputBitmap.width, inputBitmap.height, inputBitmap.config)
+
+private fun validateHistogramDotCoefficients(
+    coefficients: FloatArray?,
+    vectorSize: Int
+) {
+    require(coefficients == null || coefficients.size == vectorSize) {
+        "$externalName histogramDot. The coefficients should be null or have $vectorSize values."
+    }
+    if (coefficients !== null) {
+        var sum = 0f
+        for (i in 0 until vectorSize) {
+            require(coefficients[i] >= 0.0f) {
+                "$externalName histogramDot. Coefficients should not be negative. " +
+                        "Coefficient $i was ${coefficients[i]}."
+            }
+            sum += coefficients[i]
+        }
+        require(sum <= 1.0f) {
+            "$externalName histogramDot. Coefficients should add to 1 or less. Their sum is $sum."
+        }
+    }
+}
+
+private fun validateRestriction(tag: String, bitmap: Bitmap, restriction: Range2d? = null) {
+    validateRestriction(tag, bitmap.width, bitmap.height, restriction)
+}
+
+private fun validateRestriction(tag: String, sizeX: Int, sizeY: Int, restriction: Range2d? = null) {
+    if (restriction == null) return
+    require(restriction.startX < sizeX && restriction.endX <= sizeX) {
+        "$externalName $tag. sizeX should be greater than restriction.startX and greater " +
+                "or equal to restriction.endX. $sizeX, ${restriction.startX}, " +
+                "and ${restriction.endX} were provided respectively."
+    }
+    require(restriction.startY < sizeY && restriction.endY <= sizeY) {
+        "$externalName $tag. sizeY should be greater than restriction.startY and greater " +
+                "or equal to restriction.endY. $sizeY, ${restriction.startY}, " +
+                "and ${restriction.endY} were provided respectively."
+    }
+    require(restriction.startX < restriction.endX) {
+        "$externalName $tag. Restriction startX should be less than endX. " +
+                "${restriction.startX} and ${restriction.endX} were provided respectively."
+    }
+    require(restriction.startY < restriction.endY) {
+        "$externalName $tag. Restriction startY should be less than endY. " +
+                "${restriction.startY} and ${restriction.endY} were provided respectively."
+    }
+}
+
+private fun vectorSize(bitmap: Bitmap): Int {
+    return when (bitmap.config) {
+        Bitmap.Config.ARGB_8888 -> 4
+        Bitmap.Config.ALPHA_8 -> 1
+        else -> throw IllegalArgumentException(
+            "$externalName. Only ARGB_8888 and ALPHA_8 Bitmap are supported."
+        )
+    }
+}
+
+private fun paddedSize(vectorSize: Int) = if (vectorSize == 3) 4 else vectorSize
diff --git a/toolkit/test/AllTests.kt b/toolkit/test/AllTests.kt
new file mode 100644
index 0000000..5833795
--- /dev/null
+++ b/toolkit/test/AllTests.kt
@@ -0,0 +1,1244 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Rename to something better
+package com.example.testapp
+
+import android.content.Context
+import android.graphics.Bitmap
+import android.graphics.BitmapFactory
+import android.renderscript.RenderScript
+import android.renderscript.toolkit.BlendingMode
+import android.renderscript.toolkit.LookupTable
+import android.renderscript.toolkit.Range2d
+import android.renderscript.toolkit.Rgba3dArray
+import android.renderscript.toolkit.Toolkit
+import android.renderscript.toolkit.YuvFormat
+import kotlin.math.abs
+import kotlin.math.min
+
+data class TestLayout(
+    val sizeX: Int,
+    val sizeY: Int,
+    val restriction: Range2d?
+)
+
+// List of dimensions (sizeX, sizeY) to try when generating random data.
+val commonLayoutsToTry = listOf(
+    // Small layouts to start with
+    TestLayout(3, 4, null),
+    TestLayout(3, 4, Range2d(0, 1, 0, 3)),
+    TestLayout(3, 4, Range2d(2, 3, 1, 4)),
+    TestLayout(10, 14, null),
+    TestLayout(10, 14, Range2d(2, 3, 8, 14)),
+    // The size of most CTS intrinsic tests
+    TestLayout(160, 100, null),
+    TestLayout(125, 227, Range2d(50, 125, 100, 227)),
+    // A larger one
+    TestLayout(800, 600, null),
+    // Weirdly shaped ones
+    TestLayout(1, 1, null), // A single item
+    // TODO This size makes Intrinsic Blur fail.
+    TestLayout(16000, 1, null), // A single item
+    TestLayout(1, 16000, null), // One large row
+    // A very large test
+    TestLayout(1024, 2048, null),
+)
+
+
+class Tester(context: Context, private val validate: Boolean) {
+    private val renderscriptContext = RenderScript.create(context)
+    private val toolkit = Toolkit()
+    private val testImage1 = BitmapFactory.decodeResource(context.resources, R.drawable.img800x450a)
+    private val testImage2 = BitmapFactory.decodeResource(context.resources, R.drawable.img800x450b)
+
+    init {
+        validateTestImage(testImage1)
+        validateTestImage(testImage2)
+    }
+
+    /**
+     * Verify that the test images are in format that works for our tests.
+     */
+    private fun validateTestImage(bitmap: Bitmap) {
+        require(bitmap.config == Bitmap.Config.ARGB_8888)
+        require(bitmap.rowBytes == bitmap.width * 4) {
+            "Can't handle bitmaps that have extra padding. " +
+                "${bitmap.rowBytes} != ${bitmap.width} * 4." }
+        require(bitmap.byteCount == bitmap.rowBytes * bitmap.height)
+    }
+
+    fun destroy() {
+        renderscriptContext.destroy()
+    }
+
+    @ExperimentalUnsignedTypes
+    fun testAll(timer: TimingTracker): String {
+        val tests  = listOf(
+            Pair("blend", ::testBlend),
+            Pair("blur", ::testBlur),
+            Pair("colorMatrix", ::testColorMatrix),
+            Pair("convolve", ::testConvolve),
+            Pair("histogram", ::testHistogram),
+            Pair("lut", ::testLut),
+            Pair("lut3d", ::testLut3d),
+            Pair("resize", ::testResize),
+            Pair("yuvToRgb", ::testYuvToRgb),
+        )
+        val results = Array(tests.size) { "" }
+        for (i in tests.indices) {
+            val (name, test) = tests[i]
+            println("Doing $name")
+            val success = test(timer)
+            results[i] = "$name " + if (success) "succeeded" else "FAILED! FAILED! FAILED! FAILED!"
+            println("      ${results[i]}")
+        }
+
+        return results.joinToString("\n")
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testBlend(timer: TimingTracker): Boolean {
+        return BlendingMode.values().all { mode ->
+            testOneBitmapBlend(timer, testImage1, testImage2, mode, null) and
+                    testOneBitmapBlend(
+                        timer, testImage1, testImage2, mode,
+                        Range2d(6, 23, 2, 4)
+                    ) and
+                    commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+                        testOneRandomBlend(timer, sizeX, sizeY, mode, restriction)
+                    }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomBlend(
+        timer: TimingTracker,
+        sizeX: Int,
+        sizeY: Int,
+        mode: BlendingMode,
+        restriction: Range2d?
+    ): Boolean {
+        val sourceArray = randomByteArray(0x50521f0, sizeX, sizeY, 4)
+        val destArray = randomByteArray(0x2932147, sizeX, sizeY, 4)
+        // Make clones because these will be modified by the blend.
+        val intrinsicDestArray = destArray.clone()
+        val referenceDestArray = destArray.clone()
+        val toolkitDestArray = destArray.clone()
+
+        timer.measure("IntrinsicBlend") {
+            intrinsicBlend(
+                renderscriptContext, mode, sourceArray, intrinsicDestArray, sizeX, sizeY,
+                restriction
+            )
+        }
+        timer.measure("ToolkitBlend") {
+            toolkit.blend(mode, sourceArray, toolkitDestArray, sizeX, sizeY, restriction)
+        }
+        if (!validate) return true
+
+        timer.measure("ReferenceBlend") {
+            referenceBlend(mode, sourceArray, referenceDestArray, sizeX, sizeY, restriction)
+        }
+
+        return validateSame(
+            "Blend_$mode", intrinsicDestArray, referenceDestArray, toolkitDestArray
+        ) {
+            println("blend $mode ($sizeX, $sizeY) $restriction")
+            logArray("Blend_$mode src", sourceArray, 48)
+            logArray("Blend_$mode dst", destArray, 48)
+            logArray("Blend_$mode reference out", referenceDestArray, 48)
+            logArray("Blend_$mode intrinsic out", intrinsicDestArray, 48)
+            logArray("Blend_$mode toolkit   out", toolkitDestArray, 48)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapBlend(
+        timer: TimingTracker,
+        sourceBitmap: Bitmap,
+        destBitmap: Bitmap,
+        mode: BlendingMode,
+        restriction: Range2d?
+    ): Boolean {
+        // Make clones because these will be modified by the blend.
+        val intrinsicDestBitmap = duplicateBitmap(destBitmap)
+        val toolkitDestBitmap = duplicateBitmap(destBitmap)
+        val referenceDestBitmap = duplicateBitmap(destBitmap)
+
+        timer.measure("IntrinsicBlend") {
+            intrinsicBlend(
+                renderscriptContext, mode, sourceBitmap, intrinsicDestBitmap, restriction
+            )
+        }
+        timer.measure("ToolkitBlend") {
+            toolkit.blend(mode, sourceBitmap, toolkitDestBitmap, restriction)
+        }
+        if (!validate) return true
+
+        val referenceDestArray = getBitmapBytes(referenceDestBitmap)
+        timer.measure("ReferenceBlend") {
+            referenceBlend(
+                mode, getBitmapBytes(sourceBitmap), referenceDestArray, sourceBitmap.width,
+                sourceBitmap.height, restriction
+            )
+        }
+
+        val intrinsicDestArray = getBitmapBytes(intrinsicDestBitmap)
+        val toolkitDestArray = getBitmapBytes(toolkitDestBitmap)
+        return validateSame(
+            "BlendBitmap_$mode", intrinsicDestArray, referenceDestArray, toolkitDestArray
+        ) {
+            println("BlendBitmap $mode $restriction")
+            //logArray("BlendBitmap_$mode src", sourceArray, 48)
+            //logArray("BlendBitmap_$mode dst", destArray, 48)
+            logArray("BlendBitmap_$mode reference out", referenceDestArray, 48)
+            logArray("BlendBitmap_$mode intrinsic out", intrinsicDestArray, 48)
+            logArray("BlendBitmap_$mode toolkit   out", toolkitDestArray, 48)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testBlur(timer: TimingTracker): Boolean {
+        return arrayOf(1, 3, 8, 25).all { radius ->
+            testOneBitmapBlur(timer, testImage1, radius, null) and
+                    testOneBitmapBlur(timer, testImage1, radius, Range2d(6, 23, 2, 4)) and
+                    commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+                        arrayOf(1, 4).all { vectorSize ->
+                            testOneRandomBlur(timer, vectorSize, sizeX, sizeY, radius, restriction)
+                        }
+                    }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomBlur(
+        timer: TimingTracker,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        radius: Int,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, vectorSize)
+        val intrinsicOutArray = timer.measure("IntrinsicBlur") {
+            intrinsicBlur(
+                renderscriptContext, inputArray, vectorSize, sizeX, sizeY, radius, restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitBlur") {
+            toolkit.blur(inputArray, vectorSize, sizeX, sizeY, radius, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceBlur") {
+            referenceBlur(inputArray, vectorSize, sizeX, sizeY, radius, restriction)
+        }
+        return validateSame("blur", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("blur $vectorSize ($sizeX, $sizeY) radius = $radius $restriction")
+            logArray("blur input        ", inputArray)
+            logArray("blur reference out", referenceOutArray)
+            logArray("blur intrinsic out", intrinsicOutArray)
+            logArray("blur toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapBlur(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        radius: Int,
+        restriction: Range2d?
+    ): Boolean {
+        val intrinsicOutArray = timer.measure("IntrinsicBlur") {
+            intrinsicBlur(renderscriptContext, bitmap, radius, restriction)
+        }
+
+        val toolkitOutBitmap = timer.measure("ToolkitBlur") {
+            toolkit.blur(bitmap, radius, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceBlur") {
+            referenceBlur(
+                getBitmapBytes(bitmap),
+                vectorSizeOfBitmap(bitmap),
+                bitmap.width,
+                bitmap.height,
+                radius,
+                restriction
+            )
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame("blur", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("BlurBitmap ${bitmap.config} $radius $restriction")
+            logArray("blur reference out", referenceOutArray)
+            logArray("blur intrinsic out", intrinsicOutArray)
+            logArray("blur toolkit   out", toolkitOutArray)
+        }
+    }
+
+    enum class ColorMatrixConversionType {
+        RGB_TO_YUV,
+        YUV_TO_RGB,
+        GREYSCALE,
+        RANDOM
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testColorMatrix(timer: TimingTracker): Boolean {
+        return ColorMatrixConversionType.values().all { conversion ->
+            testOneBitmapColorMatrix(timer, testImage1, conversion, null) and
+                    testOneBitmapColorMatrix(
+                        timer,
+                        testImage1,
+                        conversion,
+                        Range2d(6, 23, 2, 4)
+                    ) and
+                    commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+                        (1..4).all { inputVectorSize ->
+                            (1..4).all { outputVectorSize ->
+                                testOneRandomColorMatrix(
+                                    timer,
+                                    inputVectorSize,
+                                    sizeX,
+                                    sizeY,
+                                    outputVectorSize,
+                                    conversion,
+                                    restriction
+                                )
+                            }
+                        }
+                    }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomColorMatrix(
+        timer: TimingTracker,
+        inputVectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        outputVectorSize: Int,
+        conversion: ColorMatrixConversionType,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(inputVectorSize))
+        val addVector = randomFloatArray(0x243238, 4, 1, 1, 0.3f)
+        val matrix = when (conversion) {
+            ColorMatrixConversionType.RGB_TO_YUV -> toolkit.rgbToYuvMatrix
+            ColorMatrixConversionType.YUV_TO_RGB -> toolkit.yuvToRgbMatrix
+            ColorMatrixConversionType.GREYSCALE -> toolkit.greyScaleColorMatrix
+            ColorMatrixConversionType.RANDOM -> randomFloatArray(0x234348, 4, 4, 1)
+        }
+
+        val intrinsicOutArray = timer.measure("IntrinsicColorMatrix") {
+            intrinsicColorMatrix(
+                renderscriptContext,
+                conversion,
+                inputArray,
+                inputVectorSize,
+                sizeX,
+                sizeY,
+                outputVectorSize,
+                matrix,
+                addVector,
+                restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitColorMatrix") {
+            toolkit.colorMatrix(
+                inputArray,
+                inputVectorSize,
+                sizeX,
+                sizeY,
+                outputVectorSize,
+                matrix,
+                addVector,
+                restriction
+            )
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceColorMatrix") {
+            referenceColorMatrix(
+                inputArray, inputVectorSize, sizeX, sizeY, outputVectorSize, matrix, addVector,
+                restriction
+            )
+        }
+
+        return validateSame("colorMatrix", intrinsicOutArray, referenceOutArray, toolkitOutArray,
+            outputVectorSize == 3) {
+            println("colorMatrix ($sizeX, $sizeY) $inputVectorSize->$outputVectorSize $restriction")
+            logArray("colorMatrix matrix   ", matrix, 16)
+            logArray("colorMatrix addVector", addVector, 4)
+            logArray("colorMatrix in           ", inputArray)
+            logArray("colorMatrix reference out", referenceOutArray, 300)
+            logArray("colorMatrix intrinsic out", intrinsicOutArray, 300)
+            logArray("colorMatrix toolkit   out", toolkitOutArray, 300)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapColorMatrix(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        conversion: ColorMatrixConversionType,
+        restriction: Range2d?
+    ): Boolean {
+        val addVector = randomFloatArray(0x243238, 4, 1, 1, 0.3f)
+        val matrix = when (conversion) {
+            ColorMatrixConversionType.RGB_TO_YUV -> toolkit.rgbToYuvMatrix
+            ColorMatrixConversionType.YUV_TO_RGB -> toolkit.yuvToRgbMatrix
+            ColorMatrixConversionType.GREYSCALE -> toolkit.greyScaleColorMatrix
+            ColorMatrixConversionType.RANDOM -> randomFloatArray(0x234348, 4, 4, 1)
+        }
+
+        val intrinsicOutArray = timer.measure("IntrinsicColorMatrix") {
+            intrinsicColorMatrix(
+                renderscriptContext, conversion, bitmap, matrix, addVector, restriction
+            )
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitColorMatrix") {
+            toolkit.colorMatrix(bitmap, matrix, addVector, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceColorMatrix") {
+            referenceColorMatrix(
+                getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height,
+                vectorSizeOfBitmap(bitmap), matrix, addVector, restriction
+            )
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame("ColorMatrix", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("colorMatrixBitmap $restriction")
+            logArray("colorMatrixBitmap matrix   ", matrix, 16)
+            logArray("colorMatrixBitmap addVector", addVector, 4)
+            logArray("colorMatrixBitmap reference out", referenceOutArray)
+            logArray("colorMatrixBitmap intrinsic out", intrinsicOutArray)
+            logArray("colorMatrixBitmap toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testConvolve(timer: TimingTracker): Boolean {
+        val coefficientsToTry = listOf(
+            randomFloatArray(0x2937021, 3, 3, 1, 0.1f),
+            randomFloatArray(0x2937021, 5, 5, 1, 0.05f)
+        )
+        return coefficientsToTry.all { coefficients ->
+            testOneBitmapConvolve(timer, testImage1, coefficients, null) and
+                    testOneBitmapConvolve(timer, testImage1, coefficients, Range2d(6, 23, 2, 4)) and
+
+                    commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+                        (1..4).all { vectorSize ->
+                            testOneRandomConvolve(
+                                timer,
+                                vectorSize,
+                                sizeX,
+                                sizeY,
+                                coefficients,
+                                restriction
+                            )
+                        }
+                    }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomConvolve(
+        timer: TimingTracker,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize))
+
+        val intrinsicOutArray = timer.measure("IntrinsicConvolve") {
+            intrinsicConvolve(
+                renderscriptContext, inputArray, vectorSize, sizeX, sizeY, coefficients, restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitConvolve") {
+            toolkit.convolve(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceConvolve") {
+            referenceConvolve(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction)
+        }
+
+        val task = if (coefficients.size == 9) "convolve3x3 $vectorSize" else "convolve5x5 $vectorSize"
+        return validateSame(task, intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("Convolve $vectorSize ($sizeX, $sizeY) $restriction")
+            logArray("Convolve coefficients", coefficients, 25)
+            logArray("Convolve in           ", inputArray)
+            logArray("Convolve reference out", referenceOutArray)
+            logArray("Convolve intrinsic out", intrinsicOutArray)
+            logArray("Convolve toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapConvolve(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        coefficients: FloatArray,
+        restriction: Range2d?
+    ): Boolean {
+        val intrinsicOutArray = timer.measure("IntrinsicConvolve") {
+            intrinsicConvolve(renderscriptContext, bitmap, coefficients, restriction)
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitConvolve") {
+            toolkit.convolve(bitmap, coefficients, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceConvolve") {
+            referenceConvolve(
+                getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height,
+                coefficients, restriction
+            )
+        }
+
+        val task = if (coefficients.size == 9) "convolve3x3" else "convolve5x5"
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame(task, intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("ConvolveBitmap $restriction")
+            logArray("ConvolveBitmap coefficients", coefficients, 25)
+            //logArray("ConvolveBitmap in           ", inputArray)
+            logArray("ConvolveBitmap reference out", referenceOutArray)
+            logArray("ConvolveBitmap intrinsic out", intrinsicOutArray)
+            logArray("ConvolveBitmap toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testHistogram(timer: TimingTracker): Boolean {
+        val coefficients = floatArrayOf(0.1f, 0.3f, 0.5f, 0.05f)
+        return testOneBitmapHistogram(timer, testImage1, null) and
+                testOneBitmapHistogram(timer, testImage1, Range2d(6, 23, 2, 4)) and
+                testOneBitmapHistogramDot(timer, testImage1, null, null) and
+                testOneBitmapHistogramDot(timer, testImage1, coefficients, null) and
+                testOneBitmapHistogramDot(timer, testImage1, coefficients, Range2d(6, 23, 2, 4)) and
+        commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+            (1..4).all { vectorSize ->
+                testOneRandomHistogram(timer, vectorSize, sizeX, sizeY, restriction) &&
+                        testOneRandomHistogramDot(
+                            timer,
+                            vectorSize,
+                            sizeX,
+                            sizeY,
+                            null,
+                            restriction
+                        ) &&
+                        testOneRandomHistogramDot(
+                            timer,
+                            vectorSize,
+                            sizeX,
+                            sizeY,
+                            coefficients.sliceArray(0 until vectorSize),
+                            restriction
+                        )
+            }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomHistogram(
+        timer: TimingTracker,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize))
+
+        val intrinsicOutput = timer.measure("IntrinsicHistogram") {
+            intrinsicHistogram(
+                renderscriptContext, inputArray, vectorSize, sizeX, sizeY, restriction
+            )
+        }
+        val toolkitOutput = timer.measure("ToolkitHistogram") {
+            toolkit.histogram(inputArray, vectorSize, sizeX, sizeY, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutput = timer.measure("ReferenceHistogram") {
+            referenceHistogram(
+                inputArray, vectorSize, sizeX, sizeY, restriction
+            )
+        }
+
+        return validateSame("histogram", intrinsicOutput, referenceOutput, toolkitOutput, 0) {
+            println("histogram $vectorSize ($sizeX, $sizeY) $restriction")
+            logArray("histogram in           ", inputArray, 200)
+            logArray("histogram reference out", referenceOutput, 200)
+            logArray("histogram intrinsic out", intrinsicOutput, 200)
+            logArray("histogram toolkit   out", toolkitOutput, 200)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapHistogram(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        restriction: Range2d?
+    ): Boolean {
+        val intrinsicOutput = timer.measure("IntrinsicHistogram") {
+            intrinsicHistogram(renderscriptContext, bitmap, restriction)
+        }
+        val toolkitOutput = timer.measure("ToolkitHistogram") {
+            toolkit.histogram(bitmap, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutput = timer.measure("ReferenceHistogram") {
+            referenceHistogram(
+                getBitmapBytes(bitmap), vectorSizeOfBitmap(bitmap), bitmap.width, bitmap.height,
+                restriction
+            )
+        }
+
+        return validateSame("histogram", intrinsicOutput, referenceOutput, toolkitOutput, 0) {
+            println("HistogramBitmap $restriction")
+            logArray("HistogramBitmap reference out", referenceOutput)
+            logArray("HistogramBitmap intrinsic out", intrinsicOutput)
+            logArray("HistogramBitmap toolkit   out", toolkitOutput)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomHistogramDot(
+        timer: TimingTracker,
+        vectorSize: Int,
+        sizeX: Int,
+        sizeY: Int,
+        coefficients: FloatArray?, restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, paddedSize(vectorSize))
+
+        val intrinsicOutArray = timer.measure("IntrinsicHistogramDot") {
+            intrinsicHistogramDot(
+                renderscriptContext, inputArray, vectorSize, sizeX, sizeY, coefficients, restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitHistogramDot") {
+            toolkit.histogramDot(
+                inputArray, vectorSize, sizeX, sizeY, coefficients, restriction
+            )
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceHistogramDot") {
+            referenceHistogramDot(inputArray, vectorSize, sizeX, sizeY, coefficients, restriction)
+        }
+
+        return validateSame("histogramDot", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("histogramDot $vectorSize ($sizeX, $sizeY) $restriction")
+            logArray("histogramDot coefficients ", coefficients)
+            logArray("histogramDot in           ", inputArray)
+            logArray("histogramDot reference out", referenceOutArray, 256)
+            logArray("histogramDot intrinsic out", intrinsicOutArray, 256)
+            logArray("histogramDot toolkit   out", toolkitOutArray, 256)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapHistogramDot(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        coefficients: FloatArray?,
+        restriction: Range2d?
+    ): Boolean {
+        val intrinsicOutArray = timer.measure("IntrinsicHistogramDot") {
+            intrinsicHistogramDot(renderscriptContext, bitmap, coefficients, restriction)
+        }
+        val toolkitOutArray = timer.measure("ToolkitHistogramDot") {
+            toolkit.histogramDot(bitmap, coefficients, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceHistogramDot") {
+            referenceHistogramDot(
+                getBitmapBytes(bitmap),
+                vectorSizeOfBitmap(bitmap),
+                bitmap.width,
+                bitmap.height,
+                coefficients,
+                restriction
+            )
+        }
+
+        return validateSame(
+            "HistogramDotBitmap",
+            intrinsicOutArray,
+            referenceOutArray,
+            toolkitOutArray
+        ) {
+            println("HistogramDotBitmap $restriction")
+            logArray("HistogramDotBitmap coefficients ", coefficients)
+            //logArray("HistogramDotBitmap in           ", inputArray)
+            logArray("HistogramDotBitmap reference out", referenceOutArray, 256)
+            logArray("HistogramDotBitmap intrinsic out", intrinsicOutArray, 256)
+            logArray("HistogramDotBitmap toolkit   out", toolkitOutArray, 256)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testLut(timer: TimingTracker): Boolean {
+        return testOneBitmapLut(timer, testImage1, null) and
+                testOneBitmapLut(timer, testImage1, Range2d(6, 23, 2, 4)) and
+        commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+            testOneRandomLut(timer, sizeX, sizeY, restriction)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomLut(
+        timer: TimingTracker,
+        sizeX: Int,
+        sizeY: Int,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, 4)
+        val newRed = randomByteArray(0x32425, 256, 1, 1)
+        val newGreen = randomByteArray(0x1F3225, 256, 1, 1)
+        val newBlue = randomByteArray(0x32D4F27, 256, 1, 1)
+        val newAlpha = randomByteArray(0x3A20001, 256, 1, 1)
+        val table = LookupTable()
+        table.red = newRed
+        table.blue = newBlue
+        table.green = newGreen
+        table.alpha = newAlpha
+
+        val intrinsicOutArray = timer.measure("IntrinsicLUT") {
+            intrinsicLut(
+                renderscriptContext, inputArray, sizeX, sizeY, newRed, newGreen, newBlue, newAlpha,
+                restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitLUT") {
+            toolkit.lut(inputArray, sizeX, sizeY, table, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceLUT") {
+            referenceLut(inputArray, sizeX, sizeY, table, restriction)
+        }
+
+        return validateSame("LUT", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("lut ($sizeX, $sizeY) $restriction")
+            logArray("LUT red  ", newRed, 256)
+            logArray("LUT green", newGreen, 256)
+            logArray("LUT blue ", newBlue, 256)
+            logArray("LUT alpha", newAlpha, 256)
+            logArray("LUT in           ", inputArray)
+            logArray("LUT reference out", referenceOutArray)
+            logArray("LUT intrinsic out", intrinsicOutArray)
+            logArray("LUT toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapLut(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        restriction: Range2d?
+    ): Boolean {
+        val newRed = randomByteArray(0x32425, 256, 1, 1)
+        val newGreen = randomByteArray(0x1F3225, 256, 1, 1)
+        val newBlue = randomByteArray(0x32D4F27, 256, 1, 1)
+        val newAlpha = randomByteArray(0x3A20001, 256, 1, 1)
+        val table = LookupTable()
+        table.red = newRed
+        table.blue = newBlue
+        table.green = newGreen
+        table.alpha = newAlpha
+
+        val intrinsicOutArray = timer.measure("IntrinsicLUT") {
+            intrinsicLut(
+                renderscriptContext, bitmap, newRed, newGreen, newBlue, newAlpha, restriction
+            )
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitLUT") {
+            toolkit.lut(bitmap, table, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceLUT") {
+            referenceLut(
+                getBitmapBytes(bitmap),
+                bitmap.width,
+                bitmap.height,
+                table,
+                restriction
+            )
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame("LutBitmap", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("LutBitmap $restriction")
+            logArray("LutBitmap red  ", newRed, 256)
+            logArray("LutBitmap green", newGreen, 256)
+            logArray("LutBitmap blue ", newBlue, 256)
+            logArray("LutBitmap alpha", newAlpha, 256)
+            //logArray("LutBitmap in           ", inputArray, 80)
+            logArray("LutBitmap reference out", referenceOutArray)
+            logArray("LutBitmap intrinsic out", intrinsicOutArray)
+            logArray("LutBitmap toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testLut3d(timer: TimingTracker): Boolean {
+        val cubeSizesToTry = listOf(
+            Dimension(2, 2, 2),
+            Dimension(32, 32, 16),
+            Dimension(256, 256, 256)
+        )
+        return cubeSizesToTry.all { cubeSize ->
+                val identityCube = identityCube(cubeSize)
+                val randomCube = randomCube(0x23424, cubeSize)
+                testOneBitmapLut3d(timer, testImage1, cubeSize, identityCube, 1, null) and
+                        testOneBitmapLut3d(timer, testImage2, cubeSize, randomCube, 3, null) and
+                        testOneBitmapLut3d(timer, testImage2, cubeSize, randomCube, 3, Range2d(6, 23, 2, 4)) and
+                commonLayoutsToTry.all { (sizeX, sizeY, restriction) ->
+                    testOneRandomLut3d(timer, sizeX, sizeY, cubeSize, identityCube, 1, restriction) &&
+                            testOneRandomLut3d(
+                                timer,
+                                sizeX,
+                                sizeY,
+                                cubeSize,
+                                randomCube,
+                                3,
+                                restriction
+                            )
+                }
+            }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomLut3d(
+        timer: TimingTracker,
+        sizeX: Int,
+        sizeY: Int,
+        cubeSize: Dimension,
+        cubeArray: ByteArray,
+        allowedIntError: Int, restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, sizeX, sizeY, 4)
+
+        val intrinsicOutArray = timer.measure("IntrinsicLut3d") {
+            intrinsicLut3d(
+                renderscriptContext, inputArray, sizeX, sizeY, cubeArray, cubeSize, restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitLut3d") {
+            val toolkitCube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ)
+            toolkit.lut3d(inputArray, sizeX, sizeY, toolkitCube, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceLut3d") {
+            val cube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ)
+            referenceLut3d(inputArray, sizeX, sizeY, cube, restriction)
+        }
+
+        return validateSame(
+            "lut3d",
+            intrinsicOutArray,
+            referenceOutArray,
+            toolkitOutArray,
+            false,
+            allowedIntError
+        ) {
+            println("lut3d ($sizeX, $sizeY) $restriction")
+            logArray("lut3d cube", cubeArray, 256)
+            logArray("lut3d in           ", inputArray, 64)
+            logArray("lut3d reference out", referenceOutArray, 64)
+            logArray("lut3d intrinsic out", intrinsicOutArray, 64)
+            logArray("lut3d toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapLut3d(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        cubeSize: Dimension,
+        cubeArray: ByteArray,
+        allowedIntError: Int, restriction: Range2d?
+    ): Boolean {
+        val intrinsicOutArray = timer.measure("IntrinsicLut3d") {
+            intrinsicLut3d(renderscriptContext, bitmap, cubeArray, cubeSize, restriction)
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitLut3d") {
+            val toolkitCube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ)
+            toolkit.lut3d(bitmap, toolkitCube, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceLut3d") {
+            val cube = Rgba3dArray(cubeArray, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ)
+            referenceLut3d(getBitmapBytes(bitmap), bitmap.width, bitmap.height, cube, restriction)
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame(
+            "Lut3dBitmap",
+            intrinsicOutArray,
+            referenceOutArray,
+            toolkitOutArray,
+            false,
+            allowedIntError
+        ) {
+            println("Lut3dBitmap $restriction")
+            logArray("Lut3dBitmap cube", cubeArray, 256)
+            //logArray("Lut3dBitmap in           ", inputArray, 64)
+            logArray("Lut3dBitmap reference out", referenceOutArray, 64)
+            logArray("Lut3dBitmap intrinsic out", intrinsicOutArray, 64)
+            logArray("Lut3dBitmap toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testResize(timer: TimingTracker): Boolean {
+        val factorsToTry = listOf(
+            Pair(1f, 1f),
+            Pair(0.5f, 1f),
+            Pair(2f, 2f),
+            Pair(0.5f, 2f),
+            Pair(2f, 0.5f),
+            // The RenderScript Intrinsic tests used the above factors. It's tempting to use
+            // less regular ones like Pair(6.37f, 0.17f) however this creates small offset
+            // errors between the result provided by the C++ code and the SIMD code. This is
+            // due to the SIMD code using a scaled integer to increment going from one pixel to the
+            // next, while the C++ code uses float operations.
+        )
+        val layoutsToTry = listOf(
+            TestLayout(37, 47, null),
+            TestLayout(60, 10, null),
+            TestLayout(6, 4, Range2d(1, 3, 0, 2)),
+            TestLayout(10, 14, Range2d(2, 3, 3, 7)),
+        )
+
+        return factorsToTry.all { (scaleX, scaleY) ->
+            // Do one resize that's greater than 4x, as that's used in the code but don't do it
+            // for everything, as some images will get very large
+            testOneRandomResize(timer, 1, 25, 30, 6f, 6f, null) and
+            testOneBitmapResize(timer, testImage1, scaleX, scaleY, null) and
+                    testOneBitmapResize(timer, testImage1, scaleX, scaleY, Range2d(6, 23, 2, 4)) and
+                    layoutsToTry.all { (sizeX, sizeY, restriction) ->
+                        (1..4).all { vectorSize ->
+                            testOneRandomResize(
+                                timer,
+                                vectorSize,
+                                sizeX,
+                                sizeY,
+                                scaleX,
+                                scaleY,
+                                restriction
+                            )
+                        }
+                    }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomResize(
+        timer: TimingTracker,
+        vectorSize: Int,
+        inSizeX: Int,
+        inSizeY: Int,
+        scaleX: Float,
+        scaleY: Float,
+        restriction: Range2d?
+    ): Boolean {
+        val inputArray = randomByteArray(0x50521f0, inSizeX, inSizeY, paddedSize(vectorSize))
+        val outSizeX = (inSizeX * scaleX).toInt()
+        val outSizeY = (inSizeY * scaleY).toInt()
+
+        val intrinsicOutArray = timer.measure("IntrinsicResize") {
+            intrinsicResize(
+                renderscriptContext, inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY,
+                restriction
+            )
+        }
+        val toolkitOutArray = timer.measure("ToolkitResize") {
+            toolkit.resize(
+                inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY, restriction
+            )
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceResize") {
+            referenceResize(
+                inputArray, vectorSize, inSizeX, inSizeY, outSizeX, outSizeY, restriction
+            )
+        }
+
+        return validateSame("resize", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("resize $vectorSize ($inSizeX, $inSizeY) by ($scaleX, $scaleY) to ($outSizeX, $outSizeY), $restriction")
+            logArray("resize in           ", inputArray)
+            logArray("resize reference out", referenceOutArray)
+            logArray("resize intrinsic out", intrinsicOutArray)
+            logArray("resize toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneBitmapResize(
+        timer: TimingTracker,
+        bitmap: Bitmap,
+        scaleX: Float,
+        scaleY: Float,
+        restriction: Range2d?
+    ): Boolean {
+        // println("Doing resize $inSizeX x $inSizeY x $vectorSize, $scaleX x $scaleY, $restriction")
+        val outSizeX = (bitmap.width * scaleX).toInt()
+        val outSizeY = (bitmap.height * scaleY).toInt()
+
+        val intrinsicOutArray = timer.measure("IntrinsicResize") {
+            intrinsicResize(renderscriptContext, bitmap, outSizeX, outSizeY, restriction)
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitResize") {
+            toolkit.resize(bitmap, outSizeX, outSizeY, restriction)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceResize") {
+            referenceResize(
+                getBitmapBytes(bitmap),
+                vectorSizeOfBitmap(bitmap),
+                bitmap.width,
+                bitmap.height,
+                outSizeX,
+                outSizeY,
+                restriction
+            )
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame("ResizeBitmap", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("ResizeBitmap by ($scaleX, $scaleY) to ($outSizeX, $outSizeY), $restriction")
+            //logArray("ResizeBitmap in           ", inputArray, 100)
+            logArray("ResizeBitmap reference out", referenceOutArray)
+            logArray("ResizeBitmap intrinsic out", intrinsicOutArray)
+            logArray("ResizeBitmap toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testYuvToRgb(timer: TimingTracker): Boolean {
+        val layoutsToTry = listOf(
+            // Don't try sizeX with odd values. That's not allowed by definition of some
+            // of the video formats.
+            TestLayout(10, 14, null),
+            TestLayout(64, 40, null),
+            TestLayout(96, 94, null),
+        )
+        return layoutsToTry.all { (sizeX, sizeY, _) ->
+            YuvFormat.values().all { format ->
+                testOneRandomYuvToRgb(timer, sizeX, sizeY, format) and
+                testOneRandomYuvToRgbBitmap(timer, sizeX, sizeY, format)
+            }
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomYuvToRgb(
+        timer: TimingTracker,
+        sizeX: Int,
+        sizeY: Int,
+        format: YuvFormat
+    ): Boolean {
+        // The RenderScript Intrinsic does not handle this combination correctly.
+        if (format == YuvFormat.YV12 && sizeX % 32 != 0) {
+            return true
+        }
+        val inputArray = randomYuvArray(0x50521f0, sizeX, sizeY, format)
+
+        val intrinsicOutArray = timer.measure("IntrinsicYuvToRgb") {
+            intrinsicYuvToRgb(renderscriptContext, inputArray, sizeX, sizeY, format)
+        }
+        val toolkitOutArray = timer.measure("ToolkitYuvToRgb") {
+            toolkit.yuvToRgb(inputArray, sizeX, sizeY, format)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceYuvToRgb") {
+            referenceYuvToRgb(inputArray, sizeX, sizeY, format)
+        }
+
+        return validateSame("yuvToRgb", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("yuvToRgb ($sizeX, $sizeY) $format")
+            logArray("yuvToRgb in           ", inputArray)
+            logArray("yuvToRgb reference out", referenceOutArray)
+            logArray("yuvToRgb intrinsic out", intrinsicOutArray)
+            logArray("yuvToRgb toolkit   out", toolkitOutArray)
+        }
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun testOneRandomYuvToRgbBitmap(
+        timer: TimingTracker,
+        sizeX: Int,
+        sizeY: Int,
+        format: YuvFormat
+    ): Boolean {
+        // The RenderScript Intrinsic does not handle this combination correctly.
+        if (format == YuvFormat.YV12 && sizeX % 32 != 0) {
+            return true
+        }
+        val inputArray = randomYuvArray(0x50521f0, sizeX, sizeY, format)
+
+        val intrinsicOutArray = timer.measure("IntrinsicYuvToRgb") {
+            intrinsicYuvToRgb(renderscriptContext, inputArray, sizeX, sizeY, format)
+        }
+        val toolkitOutBitmap = timer.measure("ToolkitYuvToRgb") {
+            toolkit.yuvToRgbBitmap(inputArray, sizeX, sizeY, format)
+        }
+        if (!validate) return true
+
+        val referenceOutArray = timer.measure("ReferenceYuvToRgb") {
+            referenceYuvToRgb(inputArray, sizeX, sizeY, format)
+        }
+
+        val toolkitOutArray = getBitmapBytes(toolkitOutBitmap)
+        return validateSame("yuvToRgb", intrinsicOutArray, referenceOutArray, toolkitOutArray) {
+            println("yuvToRgb ($sizeX, $sizeY) $format")
+            logArray("yuvToRgb in           ", inputArray)
+            logArray("yuvToRgb reference out", referenceOutArray)
+            logArray("yuvToRgb intrinsic out", intrinsicOutArray)
+            logArray("yuvToRgb toolkit   out", toolkitOutArray)
+        }
+    }
+
+    /**
+     * Verifies that the arrays returned by the Intrinsic, the reference code, and the Toolkit
+     * are all within a margin of error.
+     *
+     * RenderScript Intrinsic test (rc/android/cts/rscpp/RSCppTest.java) used 3 for ints.
+     * For floats, rc/android/cts/rscpp/verify.rscript uses 0.0001f.
+     */
+    @ExperimentalUnsignedTypes
+    private fun validateSame(
+        task: String,
+        intrinsic: ByteArray,
+        reference: ByteArray,
+        toolkit: ByteArray,
+        skipFourth: Boolean = false,
+        allowedIntDelta: Int = 3,
+        errorLogging: () -> Unit
+    ): Boolean {
+        val success = validateAgainstReference(
+            task, reference, "Intrinsic", intrinsic, skipFourth, allowedIntDelta
+        ) and validateAgainstReference(
+            task, reference, "Toolkit", toolkit, skipFourth, allowedIntDelta
+        )
+        if (!success) {
+            println("$task FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!")
+            errorLogging()
+        }
+        return success
+    }
+
+    private fun validateSame(
+        task: String,
+        intrinsic: IntArray,
+        reference: IntArray,
+        toolkit: IntArray,
+        allowedIntDelta: Int = 3,
+        errorLogging: () -> Unit
+    ): Boolean {
+        val success = validateAgainstReference(
+            task, reference, "Intrinsic", intrinsic, allowedIntDelta
+        ) and validateAgainstReference(
+            task, reference, "Toolkit", toolkit, allowedIntDelta
+        )
+        if (!success) {
+            println("$task FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!FAIL!")
+            errorLogging()
+        }
+        return success
+    }
+
+    @ExperimentalUnsignedTypes
+    private fun validateAgainstReference(
+        task: String,
+        in1: ByteArray,
+        name2: String,
+        in2: ByteArray,
+        skipFourth: Boolean,
+        allowedIntDelta: Int
+    ): Boolean {
+        if (in1.size != in2.size) {
+            println("$task. Sizes don't match: Reference ${in1.size}, $name2 ${in2.size}")
+            return false
+        }
+        var same = true
+        val maxDetails = 80
+        val diffs = CharArray(min(in1.size, maxDetails)) {'.'}
+        for (i in in1.indices) {
+            if (skipFourth && i % 4 == 3) {
+                continue
+            }
+            val delta = abs(in1[i].toUByte().toInt() - in2[i].toUByte().toInt())
+            if (delta > allowedIntDelta) {
+                if (same) {
+                    println(
+                        "$task. At $i, Reference is ${in1[i].toUByte()}, $name2 is ${in2[i].toUByte()}"
+                    )
+                }
+                if (i < maxDetails) diffs[i] = 'X'
+                same = false
+            }
+        }
+        if (!same) {
+            for (i in 0 until (min(in1.size, maxDetails) / 4)) print("%-3d|".format(i))
+            println()
+            println(diffs)
+        }
+        return same
+    }
+
+    private fun validateAgainstReference(
+        task: String,
+        in1: IntArray,
+        name2: String,
+        in2: IntArray,
+        allowedIntDelta: Int
+    ): Boolean {
+        if (in1.size != in2.size) {
+            println("$task. Sizes don't match: Reference ${in1.size}, $name2 ${in2.size}")
+            return false
+        }
+        for (i in in1.indices) {
+            val delta = abs(in1[i] - in2[i])
+            if (delta > allowedIntDelta) {
+                println("$task. At $i, Reference is ${in1[i]}, $name2 is ${in2[i]}")
+                return false
+            }
+        }
+        return true
+    }
+}
diff --git a/toolkit/test/Android.bp b/toolkit/test/Android.bp
new file mode 100644
index 0000000..abeace1
--- /dev/null
+++ b/toolkit/test/Android.bp
@@ -0,0 +1,35 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+android_app {
+    name: "RenderScriptToolkitTest",
+//    srcs: ["src/**/*.kt"],
+    sdk_version: "current",
+    resource_dirs: ["res"],
+//    jni_libs: [ "librenderscripttoolkit"],
+//    certificate: "platform",
+//    //product_specific: true,
+//    //optimize: {
+//    //    proguard_flags_files: ["proguard.flags"],
+//    //},
+//    shared_libs: ["librenderscripttoolkit",
+//
+//    ]
+}
diff --git a/toolkit/test/AndroidManifest.xml b/toolkit/test/AndroidManifest.xml
new file mode 100644
index 0000000..f709790
--- /dev/null
+++ b/toolkit/test/AndroidManifest.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+        package="com.example.testapp">
+
+    <application
+            android:allowBackup="true"
+            android:label="Toolkit Test"
+            android:supportsRtl="true">
+        <activity android:name=".MainActivity">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+</manifest>
diff --git a/toolkit/test/BufferUtils.kt b/toolkit/test/BufferUtils.kt
new file mode 100644
index 0000000..f2197b0
--- /dev/null
+++ b/toolkit/test/BufferUtils.kt
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.graphics.Canvas
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.toolkit.Range2d
+import android.renderscript.toolkit.Rgba3dArray
+import android.renderscript.toolkit.YuvFormat
+import java.nio.ByteBuffer
+import java.util.Random
+import kotlin.math.floor
+import kotlin.math.max
+import kotlin.math.min
+
+/**
+ * A vector of 4 integers.
+ */
+class Int4(
+    var x: Int = 0,
+    var y: Int = 0,
+    var z: Int = 0,
+    var w: Int = 0
+) {
+    operator fun plus(other: Int4) = Int4(x + other.x, y + other.y, z + other.z, w + other.w)
+    operator fun plus(n: Int) = Int4(x + n, y + n, z + n, w + n)
+
+    operator fun minus(other: Int4) = Int4(x - other.x, y - other.y, z - other.z, w - other.w)
+    operator fun minus(n: Int) = Int4(x - n, y - n, z - n, w - n)
+
+    operator fun times(other: Int4) = Int4(x * other.x, y * other.y, z * other.z, w * other.w)
+    operator fun times(n: Int) = Int4(x * n, y * n, z * n, w * n)
+
+    fun toFloat4() = Float4(x.toFloat(), y.toFloat(), z.toFloat(), w.toFloat())
+}
+
+fun min(a: Int4, b: Int4) = Int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w))
+
+/**
+ * A vector of 4 floats.
+ */
+data class Float4(
+    var x: Float = 0f,
+    var y: Float = 0f,
+    var z: Float = 0f,
+    var w: Float = 0f
+) {
+    operator fun plus(other: Float4) = Float4(x + other.x, y + other.y, z + other.z, w + other.w)
+    operator fun plus(f: Float) = Float4(x + f, y + f, z + f, w + f)
+
+    operator fun minus(other: Float4) = Float4(x - other.x, y - other.y, z - other.z, w - other.w)
+    operator fun minus(f: Float) = Float4(x - f, y - f, z - f, w - f)
+
+    operator fun times(other: Float4) = Float4(x * other.x, y * other.y, z * other.z, w * other.w)
+    operator fun times(f: Float) = Float4(x * f, y * f, z * f, w * f)
+
+    operator fun div(other: Float4) = Float4(x / other.x, y / other.y, z / other.z, w / other.w)
+    operator fun div(f: Float) = Float4(x / f, y / f, z / f, w / f)
+
+    fun intFloor() = Int4(floor(x).toInt(), floor(y).toInt(), floor(z).toInt(), floor(w).toInt())
+}
+
+/**
+ * Convert a UByteArray to a Float4 vector
+ */
+@ExperimentalUnsignedTypes
+fun UByteArray.toFloat4(): Float4 {
+    require(size == 4)
+    return Float4(this[0].toFloat(), this[1].toFloat(), this[2].toFloat(), this[3].toFloat())
+}
+
+/**
+ * Convert a ByteArray to a Float4 vector
+ */
+@ExperimentalUnsignedTypes
+fun ByteArray.toFloat4(): Float4 {
+    require(size == 4)
+    return Float4(
+        this[0].toUByte().toFloat(),
+        this[1].toUByte().toFloat(),
+        this[2].toUByte().toFloat(),
+        this[3].toUByte().toFloat()
+    )
+}
+
+data class Dimension(val sizeX: Int, val sizeY: Int, val sizeZ: Int)
+
+/**
+ * An RGBA value represented by 4 Int.
+ *
+ * Note that the arithmetical operations consider a 0..255 value the equivalent of 0f..1f.
+ * After adding or subtracting, the value is clamped. After multiplying, the value is rescaled to
+ * stay in the 0..255 range. This is useful for the Blend operation.
+ */
+@ExperimentalUnsignedTypes
+data class Rgba(
+    var r: Int = 0,
+    var g: Int = 0,
+    var b: Int = 0,
+    var a: Int = 0
+) {
+    operator fun plus(other: Rgba) =
+        Rgba(r + other.r, g + other.g, b + other.b, a + other.a).clampToUByteRange()
+
+    operator fun minus(other: Rgba) =
+        Rgba(r - other.r, g - other.g, b - other.b, a - other.a).clampToUByteRange()
+
+    operator fun times(other: Rgba) = Rgba(r * other.r, g * other.g, b * other.b, a * other.a) shr 8
+    operator fun times(scalar: Int) = Rgba(r * scalar, g * scalar, b * scalar, a * scalar) shr 8
+
+    infix fun xor(other: Rgba) = Rgba(r xor other.r, g xor other.g, b xor other.b, a xor other.a)
+
+    infix fun shr(other: Int) = Rgba(r shr other, g shr other, b shr other, a shr other)
+
+    private fun clampToUByteRange() = Rgba(
+        r.clampToUByteRange(),
+        g.clampToUByteRange(),
+        b.clampToUByteRange(),
+        a.clampToUByteRange()
+    )
+}
+
+/**
+ * A 2D array of UByte vectors, stored in row-major format.
+ *
+ * Arrays of vectorSize == 3 are padded to 4.
+ */
+@ExperimentalUnsignedTypes
+class Vector2dArray(
+    val values: UByteArray,
+    val vectorSize: Int,
+    val sizeX: Int,
+    val sizeY: Int
+) {
+    /**
+     * If true, index access that would try to get a value that's out of bounds will simply
+     * return the border value instead. E.g. for [3, -3] would return the value for [3, 0],
+     * assuming that the sizeX > 3.
+     */
+    var clipReadToRange: Boolean = false
+
+    operator fun get(x: Int, y: Int): UByteArray {
+        var fixedX = x
+        var fixedY = y
+        if (clipReadToRange) {
+            fixedX = min(max(x, 0), sizeX - 1)
+            fixedY = min(max(y, 0), sizeY - 1)
+        } else {
+            require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" }
+        }
+        val start = indexOfVector(fixedX, fixedY)
+        return UByteArray(paddedSize(vectorSize)) { values[start + it] }
+    }
+
+    operator fun set(x: Int, y: Int, value: UByteArray) {
+        require(value.size == paddedSize(vectorSize)) { "Not the expected vector size" }
+        require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" }
+        val start = indexOfVector(x, y)
+        for (i in value.indices) {
+            values[start + i] = value[i]
+        }
+    }
+
+    private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * paddedSize(vectorSize)
+
+    fun createSameSized() = Vector2dArray(UByteArray(values.size), vectorSize, sizeX, sizeY)
+
+    fun forEach(restriction: Range2d?, work: (Int, Int) -> (Unit)) {
+        forEachCell(sizeX, sizeY, restriction, work)
+    }
+}
+
+/**
+ * A 2D array of float vectors, stored in row-major format.
+ *
+ * Arrays of vectorSize == 3 are padded to 4.
+ */
+class FloatVector2dArray(
+    val values: FloatArray,
+    val vectorSize: Int,
+    val sizeX: Int,
+    val sizeY: Int
+) {
+    /**
+     * If true, index access that would try to get a value that's out of bounds will simply
+     * return the border value instead. E.g. for [3, -3] would return the value for [3, 0],
+     * assuming that the sizeX > 3.
+     */
+    var clipAccessToRange: Boolean = false
+
+    operator fun get(x: Int, y: Int): FloatArray {
+        var fixedX = x
+        var fixedY = y
+        if (clipAccessToRange) {
+            fixedX = min(max(x, 0), sizeX - 1)
+            fixedY = min(max(y, 0), sizeY - 1)
+        } else {
+            require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" }
+        }
+        val start = indexOfVector(fixedX, fixedY)
+        return FloatArray(vectorSize) { values[start + it] }
+    }
+
+    operator fun set(x: Int, y: Int, value: FloatArray) {
+        require(x in 0 until sizeX && y in 0 until sizeY) { "Out of bounds" }
+        val start = indexOfVector(x, y)
+        for (i in value.indices) {
+            values[start + i] = value[i]
+        }
+    }
+
+    private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * paddedSize(vectorSize)
+
+    fun createSameSized() = FloatVector2dArray(FloatArray(values.size), vectorSize, sizeX, sizeY)
+
+    fun forEach(restriction: Range2d?, work: (Int, Int) -> (Unit)) {
+        forEachCell(sizeX, sizeY, restriction, work)
+    }
+}
+
+/**
+ * A 2D array of RGBA data.
+ */
+@ExperimentalUnsignedTypes
+class Rgba2dArray(
+    private val values: ByteArray,
+    val sizeX: Int,
+    val sizeY: Int
+) {
+    operator fun get(x: Int, y: Int): Rgba {
+        val i = indexOfVector(x, y)
+        return Rgba(
+            values[i].toUByte().toInt(),
+            values[i + 1].toUByte().toInt(),
+            values[i + 2].toUByte().toInt(),
+            values[i + 3].toUByte().toInt()
+        )
+    }
+
+    operator fun set(x: Int, y: Int, value: Rgba) {
+        // Verify that x, y, z, w are in the 0..255 range
+        require(value.r in 0..255)
+        require(value.g in 0..255)
+        require(value.b in 0..255)
+        require(value.a in 0..255)
+        val i = indexOfVector(x, y)
+        values[i] = value.r.toUByte().toByte()
+        values[i + 1] = value.g.toUByte().toByte()
+        values[i + 2] = value.b.toUByte().toByte()
+        values[i + 3] = value.a.toUByte().toByte()
+    }
+
+    private fun indexOfVector(x: Int, y: Int) = ((y * sizeX) + x) * 4
+
+    fun forEachCell(restriction: Range2d?, work: (Int, Int) -> (Unit)) =
+        forEachCell(sizeX, sizeY, restriction, work)
+}
+
+/**
+ * Return a value that's between start and end, with the fraction indicating how far along.
+ */
+fun mix(start: Float, end: Float, fraction: Float) = start + (end - start) * fraction
+
+fun mix(a: Float4, b: Float4, fraction: Float) = Float4(
+    mix(a.x, b.x, fraction),
+    mix(a.y, b.y, fraction),
+    mix(a.z, b.z, fraction),
+    mix(a.w, b.w, fraction)
+)
+
+/**
+ * For vectors of size 3, the original RenderScript has them occupy the same space as a size 4.
+ * While RenderScript had a method to avoid this padding, it did not apply to Intrinsics.
+ *
+ * To preserve compatibility, the Toolkit doing the same.
+ */
+fun paddedSize(vectorSize: Int) = if (vectorSize == 3) 4 else vectorSize
+
+/**
+ * Create a ByteArray of the specified size filled with random data.
+ */
+fun randomByteArray(seed: Long, sizeX: Int, sizeY: Int, elementSize: Int): ByteArray {
+    val r = Random(seed)
+    return ByteArray(sizeX * sizeY * elementSize) { (r.nextInt(255) - 128).toByte() }
+}
+
+/**
+ * Create a FloatArray of the specified size filled with random data.
+ *
+ * By default, the random data is between 0f and 1f. The factor can be used to scale that.
+ */
+fun randomFloatArray(
+    seed: Long,
+    sizeX: Int,
+    sizeY: Int,
+    elementSize: Int,
+    factor: Float = 1f
+): FloatArray {
+    val r = Random(seed)
+    return FloatArray(sizeX * sizeY * elementSize) { r.nextFloat() * factor }
+}
+
+/**
+ * Create a cube of the specified size filled with random data.
+ */
+fun randomCube(seed: Long, cubeSize: Dimension): ByteArray {
+    val r = Random(seed)
+    return ByteArray(cubeSize.sizeX * cubeSize.sizeY * cubeSize.sizeZ * 4) {
+        (r.nextInt(255) - 128).toByte()
+    }
+}
+
+/**
+ * Create the identity cube, i.e. one that if used in Lut3d, the output is the same as the input
+ */
+@ExperimentalUnsignedTypes
+fun identityCube(cubeSize: Dimension): ByteArray {
+    val data = ByteArray(cubeSize.sizeX * cubeSize.sizeY * cubeSize.sizeZ * 4)
+    val cube = Rgba3dArray(data, cubeSize.sizeX, cubeSize.sizeY, cubeSize.sizeZ)
+    for (z in 0 until cubeSize.sizeZ) {
+        for (y in 0 until cubeSize.sizeY) {
+            for (x in 0 until cubeSize.sizeX) {
+                cube[x, y, z] =
+                    byteArrayOf(
+                        (x * 255 / (cubeSize.sizeX - 1)).toByte(),
+                        (y * 255 / (cubeSize.sizeY - 1)).toByte(),
+                        (z * 255 / (cubeSize.sizeZ - 1)).toByte(),
+                        (255).toByte()
+                    )
+            }
+        }
+    }
+    return data
+}
+
+fun randomYuvArray(seed: Long, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray {
+    // YUV formats are not well defined for odd dimensions
+    require(sizeX % 2 == 0 && sizeY % 2 == 0)
+    val halfSizeX = sizeX / 2
+    val halfSizeY = sizeY / 2
+    var totalSize = 0
+    when (format) {
+        YuvFormat.YV12 -> {
+            val strideX = roundUpTo16(sizeX)
+            totalSize = strideX * sizeY + roundUpTo16(strideX / 2) * halfSizeY * 2
+        }
+        YuvFormat.NV21 -> totalSize = sizeX * sizeY + halfSizeX * halfSizeY * 2
+        else -> require(false) { "Unknown YUV format $format" }
+    }
+
+    return randomByteArray(seed, totalSize, 1, 1)
+}
+
+/**
+ * Converts a float to a byte, clamping to make it fit the limited range.
+ */
+@ExperimentalUnsignedTypes
+fun Float.clampToUByte(): UByte = min(255, max(0, (this + 0.5f).toInt())).toUByte()
+
+/**
+ * Converts a FloatArray to UByteArray, clamping.
+ */
+@ExperimentalUnsignedTypes
+fun FloatArray.clampToUByte() = UByteArray(size) { this[it].clampToUByte() }
+
+/**
+ * Limits an Int to what can fit in a UByte.
+ */
+fun Int.clampToUByteRange(): Int = min(255, max(0, this))
+
+/**
+ * Converts an Int to a UByte, clamping.
+ */
+@ExperimentalUnsignedTypes
+fun Int.clampToUByte(): UByte = this.clampToUByteRange().toUByte()
+
+/**
+ * Converts a float (0f .. 1f) to a byte (0 .. 255)
+ */
+@ExperimentalUnsignedTypes
+fun unitFloatClampedToUByte(num: Float): UByte = (num * 255f).clampToUByte()
+
+/**
+ * Convert a byte (0 .. 255) to a float (0f .. 1f)
+ */
+@ExperimentalUnsignedTypes
+fun byteToUnitFloat(num: UByte) = num.toFloat() * 0.003921569f
+
+@ExperimentalUnsignedTypes
+fun UByteArray.toFloatArray() = FloatArray(size) { this[it].toFloat() }
+
+/**
+ * For each cell that's in the 2D array defined by sizeX and sizeY, and clipped down by the
+ * restriction, invoke the work function.
+ */
+fun forEachCell(sizeX: Int, sizeY: Int, restriction: Range2d?, work: (Int, Int) -> (Unit)) {
+    val startX = restriction?.startX ?: 0
+    val startY = restriction?.startY ?: 0
+    val endX = restriction?.endX ?: sizeX
+    val endY = restriction?.endY ?: sizeY
+    for (y in startY until endY) {
+        for (x in startX until endX) {
+            work(x, y)
+        }
+    }
+}
+
+operator fun FloatArray.times(other: FloatArray) = FloatArray(size) { this[it] * other[it] }
+operator fun FloatArray.times(other: Float) = FloatArray(size) { this[it] * other }
+operator fun FloatArray.plus(other: FloatArray) = FloatArray(size) { this[it] + other[it] }
+operator fun FloatArray.minus(other: FloatArray) = FloatArray(size) { this[it] - other[it] }
+
+fun renderScriptVectorElementForU8(rs: RenderScript?, vectorSize: Int): Element {
+    when (vectorSize) {
+        1 -> return Element.U8(rs)
+        2 -> return Element.U8_2(rs)
+        3 -> return Element.U8_3(rs)
+        4 -> return Element.U8_4(rs)
+    }
+    throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.")
+}
+
+fun renderScriptVectorElementForI32(rs: RenderScript?, vectorSize: Int): Element {
+    when (vectorSize) {
+        1 -> return Element.I32(rs)
+        2 -> return Element.I32_2(rs)
+        3 -> return Element.I32_3(rs)
+        4 -> return Element.I32_4(rs)
+    }
+    throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.")
+}
+
+/* When we'll handle floats
+fun renderScriptVectorElementForF32(rs: RenderScript?, vectorSize: Int): Element {
+    when (vectorSize) {
+        1 -> return Element.F32(rs)
+        2 -> return Element.F32_2(rs)
+        3 -> return Element.F32_3(rs)
+        4 -> return Element.F32_4(rs)
+    }
+    throw java.lang.IllegalArgumentException("RenderScriptToolkit tests. Only vectors of size 1-4 are supported. $vectorSize provided.")
+}*/
+
+fun renderScriptElementForBitmap(context: RenderScript, bitmap: Bitmap): Element {
+    return when (val config = bitmap.config) {
+        Bitmap.Config.ALPHA_8 -> Element.A_8(context)
+        Bitmap.Config.ARGB_8888 -> Element.RGBA_8888(context)
+        else -> throw IllegalArgumentException("RenderScript Toolkit can't support bitmaps with config $config.")
+    }
+}
+
+fun getBitmapBytes(bitmap: Bitmap): ByteArray {
+    val buffer: ByteBuffer = ByteBuffer.allocate(bitmap.byteCount)
+    bitmap.copyPixelsToBuffer(buffer)
+    return buffer.array()
+}
+
+fun vectorSizeOfBitmap(bitmap: Bitmap): Int {
+    return when (val config = bitmap.config) {
+        Bitmap.Config.ALPHA_8 -> 1
+        Bitmap.Config.ARGB_8888 -> 4
+        else -> throw IllegalArgumentException("RenderScript Toolkit can't support bitmaps with config $config.")
+    }
+}
+
+fun duplicateBitmap(original: Bitmap): Bitmap {
+    val copy = Bitmap.createBitmap(original.width, original.height, original.config)
+    val canvas = Canvas(copy)
+    canvas.drawBitmap(original, 0f, 0f, null)
+    return copy
+}
+
+@ExperimentalUnsignedTypes
+fun logArray(prefix: String, array: ByteArray, number: Int = 20) {
+    val values = array.joinToString(limit = number) { it.toUByte().toString() }
+    println("$prefix[${array.size}] $values}\n")
+}
+
+fun logArray(prefix: String, array: IntArray, number: Int = 20) {
+    val values = array.joinToString(limit = number)
+    println("$prefix[${array.size}] $values}\n")
+}
+
+fun logArray(prefix: String, array: FloatArray?, number: Int = 20) {
+    val values = array?.joinToString(limit = number) { "%.2f".format(it) } ?: "(null)"
+    println("$prefix[${array?.size}] $values}\n")
+}
+
+fun roundUpTo16(value: Int): Int {
+    require(value >= 0)
+    return (value + 15) and 15.inv()
+}
diff --git a/toolkit/test/IntrinsicBlend.kt b/toolkit/test/IntrinsicBlend.kt
new file mode 100644
index 0000000..873cb15
--- /dev/null
+++ b/toolkit/test/IntrinsicBlend.kt
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicBlend
+import android.renderscript.Type
+import android.renderscript.toolkit.BlendingMode
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a Blend operation using the RenderScript Intrinsics.
+ */
+fun intrinsicBlend(
+    context: RenderScript,
+    mode: BlendingMode,
+    sourceArray: ByteArray,
+    destArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    restriction: Range2d?
+) {
+    val scriptBlend = ScriptIntrinsicBlend.create(context, Element.U8_4(context))
+    val builder = Type.Builder(context, Element.U8_4(context))
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val sourceAllocation = Allocation.createTyped(context, arrayType)
+    val destAllocation = Allocation.createTyped(context, arrayType)
+    sourceAllocation.copyFrom(sourceArray)
+    destAllocation.copyFrom(destArray)
+
+    callBlendForEach(scriptBlend, sourceAllocation, destAllocation, mode, restriction)
+    destAllocation.copyTo(destArray)
+
+    sourceAllocation.destroy()
+    destAllocation.destroy()
+    arrayType.destroy()
+    scriptBlend.destroy()
+}
+
+fun intrinsicBlend(
+    context: RenderScript,
+    mode: BlendingMode,
+    sourceBitmap: Bitmap,
+    destBitmap: Bitmap,
+    restriction: Range2d?
+) {
+    val scriptBlend = ScriptIntrinsicBlend.create(context, Element.U8_4(context))
+    val sourceAllocation = Allocation.createFromBitmap(context, sourceBitmap)
+    val destAllocation = Allocation.createFromBitmap(context, destBitmap)
+    sourceAllocation.copyFrom(sourceBitmap)
+    destAllocation.copyFrom(destBitmap)
+
+    callBlendForEach(scriptBlend, sourceAllocation, destAllocation, mode, restriction)
+    destAllocation.copyTo(destBitmap)
+
+    sourceAllocation.destroy()
+    destAllocation.destroy()
+    scriptBlend.destroy()
+}
+
+private fun callBlendForEach(
+    scriptBlend: ScriptIntrinsicBlend,
+    sourceAllocation: Allocation,
+    destAllocation: Allocation,
+    mode: BlendingMode,
+    restriction: Range2d?
+) {
+    if (restriction != null) {
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        when (mode) {
+            BlendingMode.CLEAR -> scriptBlend.forEachClear(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SRC -> scriptBlend.forEachSrc(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.DST -> scriptBlend.forEachDst(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SRC_OVER -> scriptBlend.forEachSrcOver(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.DST_OVER -> scriptBlend.forEachDstOver(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SRC_IN -> scriptBlend.forEachSrcIn(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.DST_IN -> scriptBlend.forEachDstIn(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SRC_OUT -> scriptBlend.forEachSrcOut(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.DST_OUT -> scriptBlend.forEachDstOut(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SRC_ATOP -> scriptBlend.forEachSrcAtop(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.DST_ATOP -> scriptBlend.forEachDstAtop(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.XOR -> scriptBlend.forEachXor(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.MULTIPLY -> scriptBlend.forEachMultiply(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.ADD -> scriptBlend.forEachAdd(
+                sourceAllocation, destAllocation, options
+            )
+            BlendingMode.SUBTRACT -> scriptBlend.forEachSubtract(
+                sourceAllocation, destAllocation, options
+            )
+        }
+    } else {
+        when (mode) {
+            BlendingMode.CLEAR -> scriptBlend.forEachClear(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SRC -> scriptBlend.forEachSrc(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.DST -> scriptBlend.forEachDst(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SRC_OVER -> scriptBlend.forEachSrcOver(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.DST_OVER -> scriptBlend.forEachDstOver(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SRC_IN -> scriptBlend.forEachSrcIn(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.DST_IN -> scriptBlend.forEachDstIn(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SRC_OUT -> scriptBlend.forEachSrcOut(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.DST_OUT -> scriptBlend.forEachDstOut(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SRC_ATOP -> scriptBlend.forEachSrcAtop(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.DST_ATOP -> scriptBlend.forEachDstAtop(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.XOR -> scriptBlend.forEachXor(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.MULTIPLY -> scriptBlend.forEachMultiply(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.ADD -> scriptBlend.forEachAdd(
+                sourceAllocation, destAllocation
+            )
+            BlendingMode.SUBTRACT -> scriptBlend.forEachSubtract(
+                sourceAllocation, destAllocation
+            )
+        }
+    }
+}
diff --git a/toolkit/test/IntrinsicBlur.kt b/toolkit/test/IntrinsicBlur.kt
new file mode 100644
index 0000000..be09094
--- /dev/null
+++ b/toolkit/test/IntrinsicBlur.kt
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicBlur
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a Blur operation using the RenderScript Intrinsics.
+ */
+fun intrinsicBlur(
+    context: RenderScript,
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    radius: Int,
+    restriction: Range2d?
+): ByteArray {
+    val scriptBlur = ScriptIntrinsicBlur.create(
+        context,
+        if (vectorSize == 4) Element.RGBA_8888(context) else Element.U8(context)
+    )
+    val builder =
+        Type.Builder(
+            context,
+            renderScriptVectorElementForU8(context, vectorSize)
+        )
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    inputAllocation.copyFrom(inputArray)
+    val outAllocation = Allocation.createTyped(context, arrayType)
+
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * vectorSize)
+    scriptBlur.setRadius(radius.toFloat())
+    scriptBlur.setInput(inputAllocation)
+
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptBlur.forEach(outAllocation, options)
+    } else {
+        scriptBlur.forEach(outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    arrayType.destroy()
+    scriptBlur.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicBlur(
+    context: RenderScript,
+    bitmap: Bitmap,
+    radius: Int,
+    restriction: Range2d?
+): ByteArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+    val scriptBlur = ScriptIntrinsicBlur.create(context, baseElement)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val outAllocation = Allocation.createTyped(context, inputAllocation.type)
+    val intrinsicOutArray = ByteArray(bitmap.byteCount)
+
+    scriptBlur.setRadius(radius.toFloat())
+    scriptBlur.setInput(inputAllocation)
+
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptBlur.forEach(outAllocation, options)
+    } else {
+        scriptBlur.forEach(outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptBlur.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicColorMatrix.kt b/toolkit/test/IntrinsicColorMatrix.kt
new file mode 100644
index 0000000..c0ccc67
--- /dev/null
+++ b/toolkit/test/IntrinsicColorMatrix.kt
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Matrix4f
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicColorMatrix
+import android.renderscript.Type
+import android.renderscript.Float4
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a ColorMatrix operation using the RenderScript Intrinsics.
+ */
+fun intrinsicColorMatrix(
+    context: RenderScript,
+    conversion: Tester.ColorMatrixConversionType,
+    inputArray: ByteArray,
+    inputVectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    outputVectorSize: Int,
+    matrix: FloatArray,
+    addVector: FloatArray,
+    restriction: Range2d?
+): ByteArray {
+    val scriptColorMatrix = ScriptIntrinsicColorMatrix.create(context)
+    val inputBuilder = Type.Builder(
+        context, renderScriptVectorElementForU8(
+            context,
+            inputVectorSize
+        )
+    )
+    inputBuilder.setX(sizeX)
+    inputBuilder.setY(sizeY)
+    val inputArrayType = inputBuilder.create()
+    val inputAllocation = Allocation.createTyped(context, inputArrayType)
+    val outputBuilder = Type.Builder(
+        context, renderScriptVectorElementForU8(
+            context,
+            outputVectorSize
+        )
+    )
+    outputBuilder.setX(sizeX)
+    outputBuilder.setY(sizeY)
+    val outputArrayType = outputBuilder.create()
+    val outAllocation = Allocation.createTyped(context, outputArrayType)
+
+    inputAllocation.copyFrom(inputArray)
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize))
+    when (conversion) {
+        Tester.ColorMatrixConversionType.RGB_TO_YUV -> scriptColorMatrix.setRGBtoYUV()
+        Tester.ColorMatrixConversionType.YUV_TO_RGB -> scriptColorMatrix.setYUVtoRGB()
+        Tester.ColorMatrixConversionType.GREYSCALE -> scriptColorMatrix.setGreyscale()
+        Tester.ColorMatrixConversionType.RANDOM -> {
+            val m = Matrix4f()
+            var index = 0
+            // RS is column major
+            for (x in 0..3) {
+                for (y in 0..3) {
+                    m.set(x, y, matrix[index++])
+                }
+            }
+            scriptColorMatrix.setColorMatrix(m)
+        }
+    }
+    val vector = Float4(
+        addVector[0],
+        addVector[1],
+        addVector[2],
+        addVector[3]
+    )
+    scriptColorMatrix.setAdd(vector)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptColorMatrix.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptColorMatrix.forEach(inputAllocation, outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    inputArrayType.destroy()
+    outputArrayType.destroy()
+    scriptColorMatrix.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicColorMatrix(
+    context: RenderScript,
+    conversion: Tester.ColorMatrixConversionType,
+    bitmap: Bitmap,
+    matrix: FloatArray,
+    addVector: FloatArray,
+    restriction: Range2d?
+): ByteArray {
+    val scriptColorMatrix = ScriptIntrinsicColorMatrix.create(context)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val outAllocation = Allocation.createTyped(context, inputAllocation.type)
+    val intrinsicOutArray = ByteArray(bitmap.byteCount)
+
+    when (conversion) {
+        Tester.ColorMatrixConversionType.RGB_TO_YUV -> scriptColorMatrix.setRGBtoYUV()
+        Tester.ColorMatrixConversionType.YUV_TO_RGB -> scriptColorMatrix.setYUVtoRGB()
+        Tester.ColorMatrixConversionType.GREYSCALE -> scriptColorMatrix.setGreyscale()
+        Tester.ColorMatrixConversionType.RANDOM -> {
+            val m = Matrix4f()
+            var index = 0
+            // RS is column major
+            for (x in 0..3) {
+                for (y in 0..3) {
+                    m.set(x, y, matrix[index++])
+                }
+            }
+            scriptColorMatrix.setColorMatrix(m)
+        }
+    }
+    val vector = Float4(
+        addVector[0],
+        addVector[1],
+        addVector[2],
+        addVector[3]
+    )
+    scriptColorMatrix.setAdd(vector)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptColorMatrix.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptColorMatrix.forEach(inputAllocation, outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptColorMatrix.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicConvolve.kt b/toolkit/test/IntrinsicConvolve.kt
new file mode 100644
index 0000000..0c9e4f0
--- /dev/null
+++ b/toolkit/test/IntrinsicConvolve.kt
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicConvolve3x3
+import android.renderscript.ScriptIntrinsicConvolve5x5
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a Convolve operation using the RenderScript Intrinsics.
+ */
+fun intrinsicConvolve(
+    context: RenderScript,
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    coefficients: FloatArray,
+    restriction: Range2d?
+): ByteArray {
+    val baseElement = renderScriptVectorElementForU8(context, vectorSize)
+    val builder = Type.Builder(context, baseElement)
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    val outAllocation = Allocation.createTyped(context, arrayType)
+    inputAllocation.copyFrom(inputArray)
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * paddedSize(vectorSize))
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+    }
+    invokeConvolveKernel(
+        coefficients,
+        context,
+        baseElement,
+        inputAllocation,
+        restriction,
+        outAllocation
+    )
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    arrayType.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicConvolve(
+    context: RenderScript,
+    bitmap: Bitmap,
+    coefficients: FloatArray,
+    restriction: Range2d?
+): ByteArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    val outAllocation = Allocation.createTyped(context, inputAllocation.type)
+    val intrinsicOutArray = ByteArray(bitmap.byteCount)
+    inputAllocation.copyFrom(bitmap)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+    }
+    invokeConvolveKernel(
+        coefficients,
+        context,
+        baseElement,
+        inputAllocation,
+        restriction,
+        outAllocation
+    )
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    return intrinsicOutArray
+}
+
+private fun invokeConvolveKernel(
+    coefficients: FloatArray,
+    context: RenderScript,
+    baseElement: Element,
+    inputAllocation: Allocation?,
+    restriction: Range2d?,
+    outAllocation: Allocation?
+) {
+    when (coefficients.size) {
+        9 -> {
+            val scriptConvolve3x3 =
+                ScriptIntrinsicConvolve3x3.create(context, baseElement)
+            scriptConvolve3x3.setCoefficients(coefficients)
+            scriptConvolve3x3.setInput(inputAllocation)
+            if (restriction != null) {
+                val options = Script.LaunchOptions()
+                options.setX(restriction.startX, restriction.endX)
+                options.setY(restriction.startY, restriction.endY)
+                scriptConvolve3x3.forEach(outAllocation, options)
+            } else {
+                scriptConvolve3x3.forEach(outAllocation)
+            }
+            scriptConvolve3x3.destroy()
+        }
+        25 -> {
+            val scriptConvolve5x5 =
+                ScriptIntrinsicConvolve5x5.create(context, baseElement)
+            scriptConvolve5x5.setCoefficients(coefficients)
+            scriptConvolve5x5.setInput(inputAllocation)
+            if (restriction != null) {
+                val options = Script.LaunchOptions()
+                options.setX(restriction.startX, restriction.endX)
+                options.setY(restriction.startY, restriction.endY)
+                scriptConvolve5x5.forEach(outAllocation, options)
+            } else {
+                scriptConvolve5x5.forEach(outAllocation)
+            }
+            scriptConvolve5x5.destroy()
+        }
+        else -> {
+            throw IllegalArgumentException("RenderScriptToolkit tests. Only 3x3 and 5x5 convolutions are supported. ${coefficients.size} coefficients provided.")
+        }
+    }
+}
diff --git a/toolkit/test/IntrinsicHistogram.kt b/toolkit/test/IntrinsicHistogram.kt
new file mode 100644
index 0000000..25cc55d
--- /dev/null
+++ b/toolkit/test/IntrinsicHistogram.kt
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicHistogram
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a Histogram operation using the RenderScript Intrinsics.
+ */
+fun intrinsicHistogram(
+    context: RenderScript,
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    restriction: Range2d?
+): IntArray {
+    val element = renderScriptVectorElementForU8(context, vectorSize)
+    val scriptHistogram = ScriptIntrinsicHistogram.create(context, element)
+    val builder = Type.Builder(context, element)
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    val outAllocation =
+        Allocation.createSized(
+            context,
+            renderScriptVectorElementForI32(context, vectorSize),
+            256
+        )
+    inputAllocation.copyFrom(inputArray)
+    scriptHistogram.setOutput(outAllocation)
+    if (restriction != null) {
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptHistogram.forEach(inputAllocation, options)
+    } else {
+        scriptHistogram.forEach(inputAllocation)
+    }
+
+    val intrinsicOutArray = IntArray(256 * paddedSize(vectorSize))
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    arrayType.destroy()
+    scriptHistogram.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicHistogram(
+    context: RenderScript,
+    bitmap: Bitmap,
+    restriction: Range2d?
+): IntArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+    val scriptHistogram = ScriptIntrinsicHistogram.create(context, baseElement)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val vectorSize = vectorSizeOfBitmap(bitmap)
+    val outAllocation =
+        Allocation.createSized(
+            context,
+            renderScriptVectorElementForI32(context, vectorSize),
+            256
+        )
+    scriptHistogram.setOutput(outAllocation)
+    if (restriction != null) {
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptHistogram.forEach(inputAllocation, options)
+    } else {
+        scriptHistogram.forEach(inputAllocation)
+    }
+
+    val intrinsicOutArray = IntArray(256 * vectorSize)
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptHistogram.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicHistogramDot(
+    context: RenderScript,
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    coefficients: FloatArray?,
+    restriction: Range2d?
+): IntArray {
+    val element = renderScriptVectorElementForU8(context, vectorSize)
+    val scriptHistogram = ScriptIntrinsicHistogram.create(context, element)
+    val builder = Type.Builder(context, element)
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    val outAllocation =
+        Allocation.createSized(context, Element.I32(context), 256)
+    inputAllocation.copyFrom(inputArray)
+
+    if (coefficients != null) {
+        require(coefficients.size == vectorSize) {
+            "RenderScriptToolkit tests. $vectorSize coefficients are required for histogram. " +
+                "${coefficients.size} provided."
+        }
+        scriptHistogram.setDotCoefficients(
+            coefficients[0],
+            if (vectorSize > 1) coefficients[1] else 0f,
+            if (vectorSize > 2) coefficients[2] else 0f,
+            if (vectorSize > 3) coefficients[3] else 0f
+        )
+    }
+    scriptHistogram.setOutput(outAllocation)
+    if (restriction != null) {
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptHistogram.forEach_Dot(inputAllocation, options)
+    } else {
+        scriptHistogram.forEach_Dot(inputAllocation)
+    }
+    val intrinsicOutArray = IntArray(256)
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    arrayType.destroy()
+    scriptHistogram.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicHistogramDot(
+    context: RenderScript,
+    bitmap: Bitmap,
+    coefficients: FloatArray?,
+    restriction: Range2d?
+): IntArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+    val scriptHistogram = ScriptIntrinsicHistogram.create(context, baseElement)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val outAllocation =
+        Allocation.createSized(context, Element.I32(context), 256)
+
+    if (coefficients != null) {
+        require(coefficients.size == 4) {
+            "RenderScriptToolkit tests. Four coefficients are required for histogram. " +
+                "${coefficients.size} provided."
+        }
+        scriptHistogram.setDotCoefficients(
+            coefficients[0],
+            coefficients[1],
+            coefficients[2],
+            coefficients[3]
+        )
+    }
+    scriptHistogram.setOutput(outAllocation)
+    if (restriction != null) {
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptHistogram.forEach_Dot(inputAllocation, options)
+    } else {
+        scriptHistogram.forEach_Dot(inputAllocation)
+    }
+    val intrinsicOutArray = IntArray(256)
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptHistogram.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicLut.kt b/toolkit/test/IntrinsicLut.kt
new file mode 100644
index 0000000..1ed03ac
--- /dev/null
+++ b/toolkit/test/IntrinsicLut.kt
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicLUT
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a LookUpTable operation using the RenderScript Intrinsics.
+ */
+@ExperimentalUnsignedTypes
+fun intrinsicLut(
+    context: RenderScript,
+    inputArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    newRed: ByteArray,
+    newGreen: ByteArray,
+    newBlue: ByteArray,
+    newAlpha: ByteArray,
+    restriction: Range2d?
+): ByteArray {
+    val scriptLut: ScriptIntrinsicLUT = ScriptIntrinsicLUT.create(
+        context,
+        Element.U8_4(context)
+    )
+    val builder = Type.Builder(context, Element.U8_4(context))
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    val outAllocation = Allocation.createTyped(context, arrayType)
+    inputAllocation.copyFrom(inputArray)
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * 4)
+
+    for (v in 0..255) {
+        scriptLut.setRed(v, newRed[v].toUByte().toInt())
+        scriptLut.setGreen(v, newGreen[v].toUByte().toInt())
+        scriptLut.setBlue(v, newBlue[v].toUByte().toInt())
+        scriptLut.setAlpha(v, newAlpha[v].toUByte().toInt())
+    }
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptLut.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptLut.forEach(inputAllocation, outAllocation)
+    }
+
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    arrayType.destroy()
+    scriptLut.destroy()
+    return intrinsicOutArray
+}
+
+@ExperimentalUnsignedTypes
+fun intrinsicLut(
+    context: RenderScript,
+    bitmap: Bitmap,
+    newRed: ByteArray,
+    newGreen: ByteArray,
+    newBlue: ByteArray,
+    newAlpha: ByteArray,
+    restriction: Range2d?
+): ByteArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+    val scriptLut: ScriptIntrinsicLUT = ScriptIntrinsicLUT.create(context, baseElement)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val outAllocation = Allocation.createTyped(context, inputAllocation.type)
+    val intrinsicOutArray = ByteArray(bitmap.byteCount)
+
+    for (v in 0..255) {
+        scriptLut.setRed(v, newRed[v].toUByte().toInt())
+        scriptLut.setGreen(v, newGreen[v].toUByte().toInt())
+        scriptLut.setBlue(v, newBlue[v].toUByte().toInt())
+        scriptLut.setAlpha(v, newAlpha[v].toUByte().toInt())
+    }
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptLut.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptLut.forEach(inputAllocation, outAllocation)
+    }
+
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptLut.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicLut3d.kt b/toolkit/test/IntrinsicLut3d.kt
new file mode 100644
index 0000000..48e785e
--- /dev/null
+++ b/toolkit/test/IntrinsicLut3d.kt
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsic3DLUT
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a 3D LookUpTable operation using the RenderScript Intrinsics.
+ */
+fun intrinsicLut3d(
+    context: RenderScript,
+    inputArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    cubeArray: ByteArray,
+    cubeSize: Dimension,
+    restriction: Range2d?
+): ByteArray {
+    val scriptLut3d: ScriptIntrinsic3DLUT = ScriptIntrinsic3DLUT.create(
+        context, Element.U8_4(
+            context
+        )
+    )
+    val builder = Type.Builder(context, Element.U8_4(context))
+    builder.setX(sizeX)
+    builder.setY(sizeY)
+    val arrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, arrayType)
+    val outAllocation = Allocation.createTyped(context, arrayType)
+    inputAllocation.copyFrom(inputArray)
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * 4)
+
+    val cubeTypeBuilder: Type.Builder =
+        Type.Builder(context, Element.U8_4(context))
+    cubeTypeBuilder.setX(cubeSize.sizeX)
+    cubeTypeBuilder.setY(cubeSize.sizeY)
+    cubeTypeBuilder.setZ(cubeSize.sizeZ)
+    val cubeType: Type = cubeTypeBuilder.create()
+    val cubeAllocation = Allocation.createTyped(context, cubeType)
+    cubeAllocation.copyFrom(cubeArray)
+    scriptLut3d.setLUT(cubeAllocation)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptLut3d.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptLut3d.forEach(inputAllocation, outAllocation)
+    }
+
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    cubeAllocation.destroy()
+    arrayType.destroy()
+    cubeType.destroy()
+    scriptLut3d.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicLut3d(
+    context: RenderScript,
+    bitmap: Bitmap,
+    cubeArray: ByteArray,
+    cubeSize: Dimension,
+    restriction: Range2d?
+): ByteArray {
+    val baseElement = renderScriptElementForBitmap(context, bitmap)
+    val scriptLut3d: ScriptIntrinsic3DLUT = ScriptIntrinsic3DLUT.create(context, baseElement)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+    val outAllocation = Allocation.createTyped(context, inputAllocation.type)
+    val intrinsicOutArray = ByteArray(bitmap.byteCount)
+
+    val cubeTypeBuilder: Type.Builder =
+        Type.Builder(context, Element.U8_4(context))
+    cubeTypeBuilder.setX(cubeSize.sizeX)
+    cubeTypeBuilder.setY(cubeSize.sizeY)
+    cubeTypeBuilder.setZ(cubeSize.sizeZ)
+    val cubeType: Type = cubeTypeBuilder.create()
+    val cubeAllocation = Allocation.createTyped(context, cubeType)
+    cubeAllocation.copyFrom(cubeArray)
+    scriptLut3d.setLUT(cubeAllocation)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptLut3d.forEach(inputAllocation, outAllocation, options)
+    } else {
+        scriptLut3d.forEach(inputAllocation, outAllocation)
+    }
+
+    outAllocation.copyTo(intrinsicOutArray)
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    cubeAllocation.destroy()
+    cubeType.destroy()
+    scriptLut3d.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicResize.kt b/toolkit/test/IntrinsicResize.kt
new file mode 100644
index 0000000..5cdf89a
--- /dev/null
+++ b/toolkit/test/IntrinsicResize.kt
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.Bitmap
+import android.renderscript.Allocation
+import android.renderscript.RenderScript
+import android.renderscript.Script
+import android.renderscript.ScriptIntrinsicResize
+import android.renderscript.Type
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Does a Resize operation using the RenderScript Intrinsics.
+ */
+fun intrinsicResize(
+    context: RenderScript,
+    inputArray: ByteArray,
+    vectorSize: Int,
+    inSizeX: Int,
+    inSizeY: Int,
+    outSizeX: Int,
+    outSizeY: Int,
+    restriction: Range2d?
+): ByteArray {
+    val scriptResize = ScriptIntrinsicResize.create(context)
+    val builder = Type.Builder(
+        context,
+        renderScriptVectorElementForU8(context, vectorSize)
+    )
+    builder.setX(inSizeX)
+    builder.setY(inSizeY)
+    val inputArrayType = builder.create()
+    val inputAllocation = Allocation.createTyped(context, inputArrayType)
+    builder.setX(outSizeX)
+    builder.setY(outSizeY)
+    val outputArrayType = builder.create()
+    val outAllocation = Allocation.createTyped(context, outputArrayType)
+    val intrinsicOutArray = ByteArray(outSizeX * outSizeY * paddedSize(vectorSize))
+
+    inputAllocation.copyFrom(inputArray)
+    scriptResize.setInput(inputAllocation)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptResize.forEach_bicubic(outAllocation, options)
+    } else {
+        scriptResize.forEach_bicubic(outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    scriptResize.destroy()
+    inputArrayType.destroy()
+    outputArrayType.destroy()
+    return intrinsicOutArray
+}
+
+fun intrinsicResize(
+    context: RenderScript,
+    bitmap: Bitmap,
+    outSizeX: Int,
+    outSizeY: Int,
+    restriction: Range2d?
+): ByteArray {
+    val scriptResize = ScriptIntrinsicResize.create(context)
+    val inputAllocation = Allocation.createFromBitmap(context, bitmap)
+    inputAllocation.copyFrom(bitmap)
+
+    val vectorSize = when (bitmap.config) {
+        Bitmap.Config.ARGB_8888 -> 4
+        Bitmap.Config.ALPHA_8 -> 1
+        else -> error("Unrecognized bitmap config $bitmap.config")
+    }
+    val builder = Type.Builder(
+        context,
+        renderScriptVectorElementForU8(context, vectorSize)
+    )
+    builder.setX(outSizeX)
+    builder.setY(outSizeY)
+    val outputArrayType = builder.create()
+    val outAllocation = Allocation.createTyped(context, outputArrayType)
+    val intrinsicOutArray = ByteArray(outSizeX * outSizeY * vectorSize)
+
+    scriptResize.setInput(inputAllocation)
+    if (restriction != null) {
+        outAllocation.copyFrom(intrinsicOutArray) // To initialize to zero
+        val options = Script.LaunchOptions()
+        options.setX(restriction.startX, restriction.endX)
+        options.setY(restriction.startY, restriction.endY)
+        scriptResize.forEach_bicubic(outAllocation, options)
+    } else {
+        scriptResize.forEach_bicubic(outAllocation)
+    }
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    outputArrayType.destroy()
+    scriptResize.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/IntrinsicYuvToRgb.kt b/toolkit/test/IntrinsicYuvToRgb.kt
new file mode 100644
index 0000000..5e46f2e
--- /dev/null
+++ b/toolkit/test/IntrinsicYuvToRgb.kt
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.graphics.ImageFormat
+import android.renderscript.Allocation
+import android.renderscript.Element
+import android.renderscript.RenderScript
+import android.renderscript.ScriptIntrinsicYuvToRGB
+import android.renderscript.Type
+import android.renderscript.toolkit.YuvFormat
+
+/**
+ * Does a YUV to RGB operation using the RenderScript Intrinsics.
+ */
+fun intrinsicYuvToRgb(
+    context: RenderScript,
+    inputArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    format: YuvFormat
+): ByteArray {
+    val scriptYuvToRgb = ScriptIntrinsicYuvToRGB.create(
+        context,
+        Element.YUV(context)
+    )
+    val inputBuilder = Type.Builder(context, Element.YUV(context))
+    inputBuilder.setX(sizeX)
+    inputBuilder.setY(sizeY)
+    when (format) {
+        YuvFormat.NV21 -> inputBuilder.setYuvFormat(ImageFormat.NV21)
+        YuvFormat.YV12 -> inputBuilder.setYuvFormat(ImageFormat.YV12)
+        else -> require(false) { "Unknown YUV format $format" }
+    }
+    val inputArrayType = inputBuilder.create()
+    val inputAllocation = Allocation.createTyped(context, inputArrayType)
+
+    val outputBuilder = Type.Builder(context, Element.U8_4(context))
+    outputBuilder.setX(sizeX)
+    outputBuilder.setY(sizeY)
+    val outputArrayType = outputBuilder.create()
+    val outAllocation = Allocation.createTyped(context, outputArrayType)
+    val intrinsicOutArray = ByteArray(sizeX * sizeY * 4)
+
+    inputAllocation.copyFrom(inputArray)
+    scriptYuvToRgb.setInput(inputAllocation)
+    scriptYuvToRgb.forEach(outAllocation)
+    outAllocation.copyTo(intrinsicOutArray)
+
+    inputAllocation.destroy()
+    outAllocation.destroy()
+    inputArrayType.destroy()
+    outputArrayType.destroy()
+    scriptYuvToRgb.destroy()
+    return intrinsicOutArray
+}
diff --git a/toolkit/test/MainActivity.kt b/toolkit/test/MainActivity.kt
new file mode 100644
index 0000000..4092861
--- /dev/null
+++ b/toolkit/test/MainActivity.kt
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.os.Bundle
+import android.widget.TextView
+import androidx.appcompat.app.AppCompatActivity
+
+@ExperimentalUnsignedTypes
+class MainActivity : AppCompatActivity() {
+
+    override fun onCreate(savedInstanceState: Bundle?) {
+        super.onCreate(savedInstanceState)
+        setContentView(R.layout.activity_main)
+
+        // To debug resources not destroyed
+        // "A resource failed to call destroy."
+        try {
+            Class.forName("dalvik.system.CloseGuard")
+                .getMethod("setEnabled", Boolean::class.javaPrimitiveType)
+                .invoke(null, true)
+        } catch (e: ReflectiveOperationException) {
+            throw RuntimeException(e)
+        }
+
+        val validate = true
+        val tester = Tester(this, validate)
+        val numberOfIterations = if (validate) 1 else 28
+        val t = TimingTracker(numberOfIterations, 0)
+        for (i in 1..numberOfIterations) {
+            println("*** Iteration $i of $numberOfIterations ****")
+            //startMethodTracing("myTracing")
+            //startMethodTracingSampling("myTracing_sample", 8000000, 10)
+            val r = tester.testAll(t)
+            //stopMethodTracing()
+            findViewById<TextView>(R.id.sample_text).text = "$r\n\n${t.report()}"
+            t.nextIteration()
+        }
+        tester.destroy()
+    }
+}
diff --git a/toolkit/test/ReferenceBlend.kt b/toolkit/test/ReferenceBlend.kt
new file mode 100644
index 0000000..ba60bc8
--- /dev/null
+++ b/toolkit/test/ReferenceBlend.kt
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.BlendingMode
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Reference implementation of a Blend operation.
+ *
+ * See the class Rgba for details of arithmetic operation using that class.
+ */
+@ExperimentalUnsignedTypes
+fun referenceBlend(
+    mode: BlendingMode,
+    sourceArray: ByteArray,
+    destArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    restriction: Range2d?
+) {
+    val source = Rgba2dArray(sourceArray, sizeX, sizeY)
+    val dest = Rgba2dArray(destArray, sizeX, sizeY)
+
+    /**
+     * For each corresponding RGBA value of the source and destination arrays, invoke the blend
+     * function and store the result in the destination array.
+     */
+    fun blendEachPair(blendFunction: (src: Rgba, dst: Rgba) -> Rgba) {
+        dest.forEachCell(restriction) { x, y ->
+            dest[x, y] = blendFunction(source[x, y], dest[x, y])
+        }
+    }
+
+    when (mode) {
+        BlendingMode.CLEAR -> blendEachPair { _, _ -> Rgba(0, 0, 0, 0) }
+        BlendingMode.SRC -> blendEachPair { src, _ -> src }
+        BlendingMode.DST -> { /* This doesn't do anything. */ }
+        BlendingMode.SRC_OVER -> blendEachPair { src, dst -> blendOver(src, dst) }
+        BlendingMode.DST_OVER -> blendEachPair { src, dst -> blendOver(dst, src) }
+        BlendingMode.SRC_IN -> blendEachPair { src, dst -> blendIn(src, dst) }
+        BlendingMode.DST_IN -> blendEachPair { src, dst -> blendIn(dst, src) }
+        BlendingMode.SRC_OUT -> blendEachPair { src, dst -> blendOut(src, dst) }
+        BlendingMode.DST_OUT -> blendEachPair { src, dst -> blendOut(dst, src) }
+        BlendingMode.SRC_ATOP -> blendEachPair { src, dst -> blendAtop(src, dst) }
+        BlendingMode.DST_ATOP -> blendEachPair { src, dst -> blendAtop(dst, src) }
+        BlendingMode.XOR -> blendEachPair { src, dst -> src xor dst }
+        BlendingMode.MULTIPLY -> blendEachPair { src, dst -> src * dst }
+        BlendingMode.ADD -> blendEachPair { src, dst -> dst + src }
+        BlendingMode.SUBTRACT -> blendEachPair { src, dst -> dst - src }
+    }
+}
+
+@ExperimentalUnsignedTypes
+private fun blendOver(src: Rgba, dst: Rgba) = src + (dst * (255 - src.a))
+
+@ExperimentalUnsignedTypes
+private fun blendIn(src: Rgba, dst: Rgba) = src * dst.a
+
+@ExperimentalUnsignedTypes
+private fun blendOut(src: Rgba, dst: Rgba) = src * (255 - dst.a)
+
+@ExperimentalUnsignedTypes
+private fun blendAtop(src: Rgba, dst: Rgba): Rgba {
+    val value = src * dst.a + dst * (255 - src.a)
+    value.a = dst.a
+    return value
+}
diff --git a/toolkit/test/ReferenceBlur.kt b/toolkit/test/ReferenceBlur.kt
new file mode 100644
index 0000000..66c2a05
--- /dev/null
+++ b/toolkit/test/ReferenceBlur.kt
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+import kotlin.math.max
+import kotlin.math.min
+import kotlin.math.pow
+import kotlin.math.sqrt
+
+/**
+ * Reference implementation of a Blur operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceBlur(inputArray: ByteArray,
+                  vectorSize: Int,
+                  sizeX: Int,
+                  sizeY: Int,
+                  radius: Int = 5, restriction: Range2d?): ByteArray {
+    val maxRadius = 25
+    require (radius in 1..maxRadius) {
+        "RenderScriptToolkit blur. Radius should be between 1 and $maxRadius. $radius provided."
+    }
+    val gaussian = buildGaussian(radius)
+
+    // Convert input data to float so that the blurring goes faster.
+    val inputValues = FloatArray(inputArray.size) { byteToUnitFloat(inputArray[it].toUByte()) }
+    val inputInFloat = FloatVector2dArray(inputValues, vectorSize, sizeX, sizeY)
+
+    val scratch = horizontalBlur(inputInFloat, gaussian, radius, restriction)
+    val outInFloat = verticalBlur(scratch, gaussian, radius, restriction)
+
+    // Convert the results back to bytes.
+    return ByteArray(outInFloat.values.size) { unitFloatClampedToUByte(outInFloat.values[it]).toByte() }
+}
+
+/**
+ * Blurs along the horizontal direction using the specified gaussian weights.
+ */
+private fun horizontalBlur(
+    input: FloatVector2dArray,
+    gaussian: FloatArray,
+    radius: Int,
+    restriction: Range2d?
+): FloatVector2dArray {
+    var expandedRestriction: Range2d? = null
+    if (restriction != null) {
+        // Expand the restriction in the vertical direction so that the vertical pass
+        // will have all the data it needs.
+        expandedRestriction = Range2d(
+            restriction.startX,
+            restriction.endX,
+            max(restriction.startY - radius, 0),
+            min(restriction.endY + radius, input.sizeY)
+        )
+    }
+
+    input.clipAccessToRange = true
+    val out = input.createSameSized()
+    out.forEach(expandedRestriction) { x, y ->
+        for ((gaussianIndex, delta: Int) in (-radius..radius).withIndex()) {
+            val v = input[x + delta, y] * gaussian[gaussianIndex]
+            out[x, y] += v
+        }
+    }
+    return out
+}
+
+/**
+ * Blurs along the horizontal direction using the specified gaussian weights.
+ */
+private fun verticalBlur(
+    input: FloatVector2dArray,
+    gaussian: FloatArray,
+    radius: Int,
+    restriction: Range2d?
+): FloatVector2dArray {
+    input.clipAccessToRange = true
+    val out = input.createSameSized()
+    out.forEach(restriction) { x, y ->
+        for ((gaussianIndex, delta: Int) in (-radius..radius).withIndex()) {
+            val v = input[x, y + delta] * gaussian[gaussianIndex]
+            out[x, y] += v
+        }
+    }
+    return out
+}
+
+/**
+ * Builds an array of gaussian weights that will be used for doing the horizontal and vertical
+ * blur.
+ *
+ * @return An array of (2 * radius + 1) floats.
+ */
+private fun buildGaussian(radius: Int): FloatArray {
+    val e: Float = kotlin.math.E.toFloat()
+    val pi: Float = kotlin.math.PI.toFloat()
+    val sigma: Float = 0.4f * radius.toFloat() + 0.6f
+    val coefficient1: Float = 1.0f / (sqrt(2.0f * pi) * sigma)
+    val coefficient2: Float = -1.0f / (2.0f * sigma * sigma)
+
+    var sum = 0.0f
+    val gaussian = FloatArray(radius * 2 + 1)
+    for (r in -radius..radius) {
+        val floatR: Float = r.toFloat()
+        val v: Float = coefficient1 * e.pow(floatR * floatR * coefficient2)
+        gaussian[r + radius] = v
+        sum += v
+    }
+
+    // Normalize so that the sum of the weights equal 1f.
+    val normalizeFactor: Float = 1.0f / sum
+    for (r in -radius..radius) {
+        gaussian[r + radius] *= normalizeFactor
+    }
+    return gaussian
+}
diff --git a/toolkit/test/ReferenceColorMatrix.kt b/toolkit/test/ReferenceColorMatrix.kt
new file mode 100644
index 0000000..75f93af
--- /dev/null
+++ b/toolkit/test/ReferenceColorMatrix.kt
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Reference implementation of a ColorMatrix operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceColorMatrix(inputArray: ByteArray,
+                         inputVectorSize: Int,
+                         sizeX: Int,
+                         sizeY: Int,
+                         outputVectorSize: Int,
+                         matrix: FloatArray, addVector: FloatArray,
+                         restriction: Range2d?): ByteArray {
+    require (matrix.size == 16) { "RenderScriptToolkit colorMatrix. Matrix should have 16 values. ${matrix.size} provided." }
+
+    val input = Vector2dArray(inputArray.asUByteArray(), inputVectorSize, sizeX, sizeY)
+    val outputArray = ByteArray(sizeX * sizeY * paddedSize(outputVectorSize))
+    val output = Vector2dArray(outputArray.asUByteArray(), outputVectorSize, sizeX, sizeY)
+
+    output.forEach (restriction) { x, y ->
+        val inUByteValue = input[x, y]
+        val inFloatValue = FloatArray(4) { if (it >= inputVectorSize) 0f else byteToUnitFloat(inUByteValue[it]) }
+        val outFloatValue = multiplyAndAdd(matrix, inFloatValue, addVector)
+        val outUByteValue = UByteArray(paddedSize(output.vectorSize)) { unitFloatClampedToUByte(outFloatValue[it]) }
+        output[x, y] = outUByteValue
+    }
+    return outputArray
+}
+
+private fun multiplyAndAdd(matrix: FloatArray, inVector: FloatArray, addVector: FloatArray): FloatArray {
+    // In RenderScript, matrix were set in column major format
+    val result = addVector.clone()
+    for (i in 0..3) {
+        for (j in 0..3) {
+            result[i] += matrix[j * 4 + i] * inVector[j]
+        }
+    }
+    return result
+}
diff --git a/toolkit/test/ReferenceConvolve.kt b/toolkit/test/ReferenceConvolve.kt
new file mode 100644
index 0000000..b9181a9
--- /dev/null
+++ b/toolkit/test/ReferenceConvolve.kt
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Reference implementation of a Convolve operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceConvolve(
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    coefficients: FloatArray,
+    restriction: Range2d?
+): ByteArray {
+    val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY)
+    val radius = when (coefficients.size) {
+        9 -> 1
+        25 -> 2
+        else -> {
+            throw IllegalArgumentException("RenderScriptToolkit Convolve. Only 3x3 and 5x5 convolutions are supported. ${coefficients.size} coefficients provided.")
+        }
+    }
+
+    input.clipReadToRange = true
+    val output = input.createSameSized()
+    input.forEach(restriction) { x, y ->
+        output[x, y] = convolveOne(input, x, y, coefficients, radius)
+    }
+    return output.values.asByteArray()
+}
+
+@ExperimentalUnsignedTypes
+private fun convolveOne(
+    inputAlloc: Vector2dArray,
+    x: Int,
+    y: Int,
+    coefficients: FloatArray,
+    radius: Int
+): UByteArray {
+    var sum = FloatArray(paddedSize(inputAlloc.vectorSize))
+    var coefficientIndex = 0
+    for (deltaY in -radius..radius) {
+        for (deltaX in -radius..radius) {
+            val inputVector = inputAlloc[x + deltaX, y + deltaY]
+            sum += inputVector.toFloatArray() * coefficients[coefficientIndex]
+            coefficientIndex++
+        }
+    }
+    return sum.clampToUByte()
+}
diff --git a/toolkit/test/ReferenceHistogram.kt b/toolkit/test/ReferenceHistogram.kt
new file mode 100644
index 0000000..6bd9167
--- /dev/null
+++ b/toolkit/test/ReferenceHistogram.kt
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Reference implementation of a Histogram operation.
+ *
+ * Return an array of 4 * 256 ints.
+ * Position 0 is the number of R with a value of 0,
+ * Position 1 is the number of G with a value of 0,
+ * Position 2 is the number of B with a value of 0,
+ * Position 3 is the number of A with a value of 0,
+ * Position 4 is the number of R with a value of 1,
+ * etc.
+*/
+@ExperimentalUnsignedTypes
+fun referenceHistogram(
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    restriction: Range2d?
+): IntArray {
+    val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY)
+
+    val counts = IntArray(paddedSize(input.vectorSize) * 256)
+    input.forEach(restriction) { x, y ->
+        val value = input[x, y]
+        for (i in 0 until vectorSize) {
+            counts[value[i].toInt() * paddedSize(input.vectorSize) + i]++
+        }
+    }
+    return counts
+}
+
+/**
+ * Reference implementation of a HistogramDot operation.
+ *
+ * Each RGBA input value is dot-multiplied first by the specified coefficients.
+ * The resulting value is converted to an integer and used for the histogram.
+ */
+@ExperimentalUnsignedTypes
+fun referenceHistogramDot(
+    inputArray: ByteArray,
+    vectorSize: Int,
+    sizeX: Int,
+    sizeY: Int,
+    coefficients: FloatArray?,
+    restriction: Range2d?
+): IntArray {
+    val floatCoefficients = coefficients ?: floatArrayOf(0.299f, 0.587f, 0.114f, 0f)
+    val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, sizeX, sizeY)
+    var coefficientSum = 0f
+    for (c in floatCoefficients) {
+        require (c >= 0) {
+            "RenderScriptToolkit histogramDot. Coefficients must be positive. $c provided."
+        }
+        coefficientSum += c
+    }
+    require(coefficientSum <= 1f) { "RenderScriptToolkit histogramDot. Coefficients should " +
+            "add to 1.0 or less. $coefficientSum provided." }
+
+    // Compute integer
+    val intCoefficients = IntArray(input.vectorSize) { (floatCoefficients[it] * 256f + 0.5f).toInt() }
+
+    val counts = IntArray(256)
+    input.forEach(restriction) { x, y ->
+        val value = input[x, y]
+        // While we could do the computation using floats, we won't get the same results as
+        // the existing intrinsics.
+        var sum = 0
+        // We don't use value.indices because we want to accumulate only 3 values, in the case
+        // of vectorSize == 3.
+        for (i in 0 until vectorSize) {
+            sum += intCoefficients[i] * value[i].toInt()
+        }
+        // Round up and normalize
+        val index = (sum + 0x7f) shr 8
+        counts[index]++
+    }
+    return counts
+}
diff --git a/toolkit/test/ReferenceLut.kt b/toolkit/test/ReferenceLut.kt
new file mode 100644
index 0000000..cd832f0
--- /dev/null
+++ b/toolkit/test/ReferenceLut.kt
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.LookupTable
+import android.renderscript.toolkit.Range2d
+
+/**
+ * Reference implementation of a LookUpTable operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceLut(
+    inputArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    table: LookupTable,
+    restriction: Range2d?
+): ByteArray {
+    val input = Vector2dArray(inputArray.asUByteArray(), 4, sizeX, sizeY)
+
+    val output = input.createSameSized()
+    input.forEach(restriction) { x, y ->
+        val oldValue = input[x, y]
+        val newValue = byteArrayOf(
+            table.red[oldValue[0].toInt()],
+            table.green[oldValue[1].toInt()],
+            table.blue[oldValue[2].toInt()],
+            table.alpha[oldValue[3].toInt()]
+        )
+        output[x, y] = newValue.asUByteArray()
+    }
+    return output.values.asByteArray()
+}
+
diff --git a/toolkit/test/ReferenceLut3d.kt b/toolkit/test/ReferenceLut3d.kt
new file mode 100644
index 0000000..afd977b
--- /dev/null
+++ b/toolkit/test/ReferenceLut3d.kt
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+import android.renderscript.toolkit.Rgba3dArray
+
+/**
+ * Reference implementation of a 3D LookUpTable operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceLut3d(
+    inputArray: ByteArray,
+    sizeX: Int,
+    sizeY: Int,
+    cube: Rgba3dArray,
+    restriction: Range2d?
+): ByteArray {
+    val input = Vector2dArray(inputArray.asUByteArray(), 4, sizeX, sizeY)
+    val output = input.createSameSized()
+    input.forEach(restriction) { x, y ->
+        output[x, y] = lookup(input[x, y], cube)
+    }
+    return output.values.asByteArray()
+}
+
+@ExperimentalUnsignedTypes
+private fun lookup(input: UByteArray, cube: Rgba3dArray): UByteArray {
+    // Calculate the two points at opposite edges of the size 1
+    // cube that contains our point.
+    val maxIndex = Int4(cube.sizeX - 1, cube.sizeY - 1, cube.sizeZ - 1, 0)
+    val baseCoordinate: Float4 = input.toFloat4() * maxIndex.toFloat4() / 255f
+    val point1: Int4 = baseCoordinate.intFloor()
+    val point2: Int4 = min(point1 + 1, maxIndex)
+    val fractionAwayFromPoint1: Float4 = baseCoordinate - point1.toFloat4()
+
+    // Get the RGBA values at each of the four corners of the size 1 cube.
+    val v000 = cube[point1.x, point1.y, point1.z].toFloat4()
+    val v100 = cube[point2.x, point1.y, point1.z].toFloat4()
+    val v010 = cube[point1.x, point2.y, point1.z].toFloat4()
+    val v110 = cube[point2.x, point2.y, point1.z].toFloat4()
+    val v001 = cube[point1.x, point1.y, point2.z].toFloat4()
+    val v101 = cube[point2.x, point1.y, point2.z].toFloat4()
+    val v011 = cube[point1.x, point2.y, point2.z].toFloat4()
+    val v111 = cube[point2.x, point2.y, point2.z].toFloat4()
+
+    // Do the linear mixing of these eight values.
+    val yz00 = mix(v000, v100, fractionAwayFromPoint1.x)
+    val yz10 = mix(v010, v110, fractionAwayFromPoint1.x)
+    val yz01 = mix(v001, v101, fractionAwayFromPoint1.x)
+    val yz11 = mix(v011, v111, fractionAwayFromPoint1.x)
+
+    val z0 = mix(yz00, yz10, fractionAwayFromPoint1.y)
+    val z1 = mix(yz01, yz11, fractionAwayFromPoint1.y)
+
+    val v = mix(z0, z1, fractionAwayFromPoint1.z)
+
+    // Preserve the alpha of the original value
+    return ubyteArrayOf(v.x.clampToUByte(), v.y.clampToUByte(), v.z.clampToUByte(), input[3])
+}
diff --git a/toolkit/test/ReferenceResize.kt b/toolkit/test/ReferenceResize.kt
new file mode 100644
index 0000000..023825e
--- /dev/null
+++ b/toolkit/test/ReferenceResize.kt
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.Range2d
+import kotlin.math.floor
+import kotlin.math.max
+
+var trace = false
+
+/**
+ * Reference implementation of a Resize operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceResize(inputArray: ByteArray,
+                    vectorSize: Int,
+                    inSizeX: Int,
+                    inSizeY: Int,
+                    outSizeX: Int, outSizeY: Int,
+                    restriction: Range2d?): ByteArray {
+    val input = Vector2dArray(inputArray.asUByteArray(), vectorSize, inSizeX, inSizeY)
+    val scaleX: Float = input.sizeX.toFloat() / outSizeX.toFloat()
+    val scaleY: Float = input.sizeY.toFloat() / outSizeY.toFloat()
+    val outArray = UByteArray(outSizeX * outSizeY * paddedSize(input.vectorSize))
+    val out = Vector2dArray(outArray, input.vectorSize, outSizeX, outSizeY)
+    out.forEach (restriction) { x, y ->
+        if (x == 1827 && y == 46) {
+            println("Found it")
+            trace = true
+        }
+        val o = bicubicU4(x, y, input, scaleX, scaleY)
+        out[x, y] = o.clampToUByte()
+    }
+    return out.values.asByteArray()
+}
+
+private fun cubicInterpolateF(p0: FloatArray, p1: FloatArray, p2: FloatArray, p3: FloatArray,
+                              x: Float): FloatArray {
+    return p1 + (p2 - p0 + (p0 * 2f - p1 * 5f + p2 * 4f - p3
+            + ((p1 - p2) * 3f + p3 - p0) * x) * x) * x * 0.5f
+}
+
+@ExperimentalUnsignedTypes
+private fun bicubicU4(x: Int, y: Int, gIn: Vector2dArray, scaleX: Float, scaleY: Float): FloatArray {
+    var xf: Float = (x + 0.5f) * scaleX - 0.5f
+    var yf: Float = (y + 0.5f) * scaleY - 0.5f
+
+    val startX: Int = floor(xf - 1).toInt()
+    val startY: Int = floor(yf - 1).toInt()
+    xf -= floor(xf)
+    yf -= floor(yf)
+    val maxX: Int = gIn.sizeX - 1
+    val maxY: Int = gIn.sizeY - 1
+
+    val xs0: Int = max(0, startX + 0)
+    val xs1: Int = max(0, startX + 1)
+    val xs2: Int = kotlin.math.min(maxX, startX + 2)
+    val xs3: Int = kotlin.math.min(maxX, startX + 3)
+
+    val ys0: Int = max(0, startY + 0)
+    val ys1: Int = max(0, startY + 1)
+    val ys2: Int = kotlin.math.min(maxY, startY + 2)
+    val ys3: Int = kotlin.math.min(maxY, startY + 3)
+
+    val p00 = gIn[xs0, ys0].toFloatArray()
+    val p01 = gIn[xs1, ys0].toFloatArray()
+    val p02 = gIn[xs2, ys0].toFloatArray()
+    val p03 = gIn[xs3, ys0].toFloatArray()
+    val p0  = cubicInterpolateF(p00, p01, p02, p03, xf)
+
+    val p10 = gIn[xs0, ys1].toFloatArray()
+    val p11 = gIn[xs1, ys1].toFloatArray()
+    val p12 = gIn[xs2, ys1].toFloatArray()
+    val p13 = gIn[xs3, ys1].toFloatArray()
+    val p1  = cubicInterpolateF(p10, p11, p12, p13, xf)
+
+    val p20 = gIn[xs0, ys2].toFloatArray()
+    val p21 = gIn[xs1, ys2].toFloatArray()
+    val p22 = gIn[xs2, ys2].toFloatArray()
+    val p23 = gIn[xs3, ys2].toFloatArray()
+    val p2  = cubicInterpolateF(p20, p21, p22, p23, xf)
+
+    val p30 = gIn[xs0, ys3].toFloatArray()
+    val p31 = gIn[xs1, ys3].toFloatArray()
+    val p32 = gIn[xs2, ys3].toFloatArray()
+    val p33 = gIn[xs3, ys3].toFloatArray()
+    val p3  = cubicInterpolateF(p30, p31, p32, p33, xf)
+
+    return cubicInterpolateF(p0, p1, p2, p3, yf)
+}
+
+
+/* To be used if we implement Floats
+private fun bicubic_F4(x: Int, y: Int, gin: ByteArray, sizeX: Int, sizeY: Int, scaleX: Float, scaleY: Float): Float4 {
+    var xf: Float = (x + 0.5f) * scaleX - 0.5f
+    var yf: Float = (y + 0.5f) * scaleY - 0.5f
+
+    val startX: Int = floor(xf - 1).toInt()
+    val startY: Int = floor(yf - 1).toInt()
+    xf = xf - floor(xf)
+    yf = yf - floor(yf)
+    val maxX: Int = sizeX - 1
+    val maxY: Int = sizeY - 1
+
+    val xs0: Int = max(0, startX + 0)
+    val xs1: Int = max(0, startX + 1)
+    val xs2: Int = min(maxX, startX + 2)
+    val xs3: Int = min(maxX, startX + 3)
+
+    val ys0: Int = max(0, startY + 0)
+    val ys1: Int = max(0, startY + 1)
+    val ys2: Int = min(maxY, startY + 2)
+    val ys3: Int = min(maxY, startY + 3)
+
+    val p00: Float4 = rsGetElementAt_Float4(gIn, xs0, ys0)
+    val p01: Float4 = rsGetElementAt_Float4(gIn, xs1, ys0)
+    val p02: Float4 = rsGetElementAt_Float4(gIn, xs2, ys0)
+    val p03: Float4 = rsGetElementAt_Float4(gIn, xs3, ys0)
+    val p0: Float4  = cubicInterpolate_F4(p00, p01, p02, p03, xf)
+
+    val p10: Float4 = rsGetElementAt_Float4(gIn, xs0, ys1)
+    val p11: Float4 = rsGetElementAt_Float4(gIn, xs1, ys1)
+    val p12: Float4 = rsGetElementAt_Float4(gIn, xs2, ys1)
+    val p13: Float4 = rsGetElementAt_Float4(gIn, xs3, ys1)
+    val p1: Float4  = cubicInterpolate_F4(p10, p11, p12, p13, xf)
+
+    val p20: Float4 = rsGetElementAt_Float4(gIn, xs0, ys2)
+    val p21: Float4 = rsGetElementAt_Float4(gIn, xs1, ys2)
+    val p22: Float4 = rsGetElementAt_Float4(gIn, xs2, ys2)
+    val p23: Float4 = rsGetElementAt_Float4(gIn, xs3, ys2)
+    val p2: Float4  = cubicInterpolate_F4(p20, p21, p22, p23, xf)
+
+    val p30: Float4 = rsGetElementAt_Float4(gIn, xs0, ys3)
+    val p31: Float4 = rsGetElementAt_Float4(gIn, xs1, ys3)
+    val p32: Float4 = rsGetElementAt_Float4(gIn, xs2, ys3)
+    val p33: Float4 = rsGetElementAt_Float4(gIn, xs3, ys3)
+    val p3: Float4  = cubicInterpolate_F4(p30, p31, p32, p33, xf)
+
+    val p: Float4  = cubicInterpolate_F4(p0, p1, p2, p3, yf)
+
+    return p
+}
+*/
diff --git a/toolkit/test/ReferenceYuvToRgb.kt b/toolkit/test/ReferenceYuvToRgb.kt
new file mode 100644
index 0000000..4d91cf6
--- /dev/null
+++ b/toolkit/test/ReferenceYuvToRgb.kt
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+import android.renderscript.toolkit.YuvFormat
+import java.lang.IllegalArgumentException
+
+/**
+ * Reference implementation of a YUV to RGB operation.
+ */
+@ExperimentalUnsignedTypes
+fun referenceYuvToRgb(inputSignedArray: ByteArray, sizeX: Int, sizeY: Int, format: YuvFormat): ByteArray {
+    require(sizeX % 2 == 0) { "The width of the input should be even."}
+    val inputArray = inputSignedArray.asUByteArray()
+
+    val outputArray = ByteArray(sizeX * sizeY * 4)
+    val output = Vector2dArray(outputArray.asUByteArray(), 4, sizeX, sizeY)
+
+    when (format) {
+        YuvFormat.NV21 -> {
+            val startY = 0
+            val startU = sizeX * sizeY + 1
+            val startV = sizeX * sizeY
+
+            for (y in 0 until sizeY) {
+                for (x in 0 until sizeX) {
+                    val offsetY = y * sizeX + x
+                    val offsetU = ((y shr 1) * sizeX + (x shr 1) * 2)
+                    val offsetV = ((y shr 1) * sizeX + (x shr 1) * 2)
+                    output[x, y] = yuvToRGBA4(
+                        inputArray[startY + offsetY],
+                        inputArray[startU + offsetU],
+                        inputArray[startV + offsetV]
+                    )
+                }
+            }
+        }
+
+        YuvFormat.YV12 -> {
+            /* According to https://developer.android.com/reference/kotlin/android/graphics/ImageFormat#yv12,
+             * strideX and strideUV should be aligned to 16 byte boundaries. If we do this, we
+             * won't get the same results as RenderScript.
+             *
+             * We may want to test & require that sizeX is a multiple of 16/32.
+             */
+            val strideX = roundUpTo16(sizeX) // sizeX //
+            val strideUV = roundUpTo16(strideX / 2) // strideX / 2 //
+            val startY = 0
+            val startU = strideX * sizeY
+            val startV = startU + strideUV * sizeY / 2
+
+            for (y in 0 until sizeY) {
+                for (x in 0 until sizeX) {
+                    val offsetY = y * sizeX + x
+                    val offsetUV = (y shr 1) * strideUV + (x shr 1)
+                    output[x, y] = yuvToRGBA4(
+                        inputArray[startY + offsetY],
+                        inputArray[startU + offsetUV],
+                        inputArray[startV + offsetUV],
+                    )
+                }
+            }
+        }
+        else -> throw IllegalArgumentException("Unknown YUV format $format")
+    }
+
+    return outputArray
+}
+
+@ExperimentalUnsignedTypes
+private fun yuvToRGBA4(y: UByte, u: UByte, v: UByte): UByteArray {
+    val intY = y.toInt() - 16
+    val intU = u.toInt() - 128
+    val intV = v.toInt() - 128
+    val p = intArrayOf(
+        intY * 298 + intV * 409 + 128 shr 8,
+        intY * 298 - intU * 100 - intV * 208 + 128 shr 8,
+        intY * 298 + intU * 516 + 128 shr 8,
+        255
+    )
+    return UByteArray(4) { p[it].clampToUByte() }
+}
+
+/* To be used if we support Float
+private fun yuvToRGBA_f4(y: UByte, u: UByte, v: UByte): UByteArray {
+    val yuv_U_values = floatArrayOf(0f, -0.392f * 0.003921569f, 2.02f * 0.003921569f, 0f)
+    val yuv_V_values = floatArrayOf(1.603f * 0.003921569f, -0.815f * 0.003921569f, 0f, 0f)
+
+    var color = FloatArray(4) {y.toFloat() * 0.003921569f}
+    val fU = FloatArray(4) {u.toFloat() - 128f}
+    val fV = FloatArray(4) {v.toFloat() - 128f}
+
+    color += fU * yuv_U_values;
+    color += fV * yuv_V_values;
+    //color = clamp(color, 0.f, 1.f);
+    return UByteArray(4) { unitFloatClampedToUByte(color[it]) }
+}
+*/
diff --git a/toolkit/test/TimingTracker.kt b/toolkit/test/TimingTracker.kt
new file mode 100644
index 0000000..81e90f2
--- /dev/null
+++ b/toolkit/test/TimingTracker.kt
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.testapp
+
+class TimingTracker(
+    private val numberOfIterations: Int = 1,
+    private var numberOfIterationsToIgnore: Int = 0
+) {
+    init {
+        require(numberOfIterations > numberOfIterationsToIgnore)
+    }
+    private val timings = mutableMapOf<String, IntArray>()
+    private var currentIteration: Int = 0
+    fun nextIteration() {
+        currentIteration++
+    }
+    fun <T> measure(name: String, workToTime: () -> T): T {
+        val start = System.nanoTime()
+        val t = workToTime()
+        if (currentIteration >= numberOfIterationsToIgnore) {
+            val end = System.nanoTime()
+            val deltaInMicroseconds: Int = ((end - start) / 1000).toInt()
+            val timing = timings.getOrPut(name) {
+                IntArray(numberOfIterations - numberOfIterationsToIgnore)
+            }
+            timing[currentIteration - numberOfIterationsToIgnore] += deltaInMicroseconds
+        }
+        return t
+    }
+    fun report(): String {
+        var minimum: Int = Int.MAX_VALUE
+        for (timing in timings.values) {
+            val m = timing.minOrNull()
+            if (m != null && m < minimum) minimum = m
+        }
+
+        println(timings.map { (name, timing) -> name + ": " + timing.minOrNull() }.joinToString(separator = "\n"))
+
+        return (timings.map { (name, timing) -> name + ": " + timing.joinToString() }.joinToString() + "\n\n" +
+                timings.map { (name, timing) -> name + ": " + timing.joinToString { "%.2f".format(it.toFloat() / minimum) } }.joinToString() + "\n\n" +
+                timings.map { (name, timing) -> name + ": " + timing.minOrNull() }.joinToString())
+    }
+}
+
diff --git a/toolkit/test/res/drawable-nodpi/img800x450a.jpg b/toolkit/test/res/drawable-nodpi/img800x450a.jpg
new file mode 100644
index 0000000..6d5b623
--- /dev/null
+++ b/toolkit/test/res/drawable-nodpi/img800x450a.jpg
Binary files differ
diff --git a/toolkit/test/res/drawable-nodpi/img800x450b.jpg b/toolkit/test/res/drawable-nodpi/img800x450b.jpg
new file mode 100644
index 0000000..2013e07
--- /dev/null
+++ b/toolkit/test/res/drawable-nodpi/img800x450b.jpg
Binary files differ