When running with <= 4 system threads, only run one renderthread slice at a time

If the user doesn't have hyperthreading or has an older machine with
fewer cores, it can be advantageous to run only one renderthread slice
at a time.

Note: Vulkan must run in unlimited because order maintenance operations
require coordination between more than one thread (see
vkCommandBufferHostSyncGOOGLE)

Manual cherry-pick from aosp/1478711

Change-Id: I45ad0910fee4826037fb740a8b1f9087a7faf4c6
diff --git a/stream-servers/RenderThread.cpp b/stream-servers/RenderThread.cpp
index 39201fa..5d223ca 100644
--- a/stream-servers/RenderThread.cpp
+++ b/stream-servers/RenderThread.cpp
@@ -33,12 +33,17 @@
 #include "base/System.h"
 #include "base/Tracing.h"
 #include "base/StreamSerializing.h"
+#include "base/Lock.h"
 #include "base/MessageChannel.h"
 
 #define EMUGL_DEBUG_LEVEL 0
 #include "host-common/crash_reporter.h"
 #include "host-common/debug.h"
 
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
 #include <assert.h>
 #include <string.h>
 
@@ -61,6 +66,17 @@
     return false;
 }
 
+static int getCpuCoreCount() {
+#ifdef _WIN32
+    SYSTEM_INFO si = {};
+    ::GetSystemInfo(&si);
+    return si.dwNumberOfProcessors < 1 ? 1 : si.dwNumberOfProcessors;
+#else
+    auto res = (int)sysconf(_SC_NPROCESSORS_ONLN);
+    return res < 1 ? 1 : res;
+#endif
+}
+
 static uint64_t currTimeUs(bool enable) {
     if (enable) {
         return android::base::getHighResTimeUs();
@@ -72,10 +88,18 @@
 // Start with a smaller buffer to not waste memory on a low-used render threads.
 static constexpr int kStreamBufferSize = 128 * 1024;
 
+// Requires this many threads on the system available to run unlimited.
+static constexpr int kMinThreadsToRunUnlimited = 5;
+
+// A thread run limiter that limits render threads to run one slice at a time.
+static android::base::Lock sThreadRunLimiter;
+
 RenderThread::RenderThread(RenderChannelImpl* channel,
                            android::base::Stream* loadStream)
     : android::base::Thread(android::base::ThreadFlags::MaskSignals, 2 * 1024 * 1024),
-      mChannel(channel) {
+      mChannel(channel),
+      mRunInLimitedMode(getCpuCoreCount() < kMinThreadsToRunUnlimited)
+{
     if (loadStream) {
         const bool success = loadStream->getByte();
         if (success) {
@@ -395,6 +419,10 @@
 
         do {
 
+            if (mRunInLimitedMode) {
+                sThreadRunLimiter.lock();
+            }
+
             progress = false;
             size_t last;
 
@@ -411,6 +439,10 @@
                 }
             }
 
+            if (mRunInLimitedMode) {
+                sThreadRunLimiter.unlock();
+            }
+
             // try to process some of the command buffer using the GLESv1
             // decoder
             //