Functional 3D LUT intrinsic.

1600x1000 takes ~23ms on manta.

Change-Id: I142d6dedded66df05aa5f49e3da409a34c6e1b6e
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index e22b730..75fc3f1 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -433,6 +433,8 @@
     return i;
 }
 
+extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
+                                             const Script *s, const Element *e);
 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
                                                    const Script *s, const Element *e);
 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
@@ -453,6 +455,9 @@
 
     RsdCpuScriptImpl *i = NULL;
     switch (iid) {
+    case RS_SCRIPT_INTRINSIC_ID_3DLUT:
+        i = rsdIntrinsic_3DLUT(this, s, e);
+        break;
     case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
         i = rsdIntrinsic_Convolve3x3(this, s, e);
         break;