Sync cache pixels before OpenMP loop (possible optimization)
diff --git a/MagickCore/effect.c b/MagickCore/effect.c
index 4d46774..924c47e 100644
--- a/MagickCore/effect.c
+++ b/MagickCore/effect.c
@@ -1635,7 +1635,7 @@
     kuwahara_image=DestroyImage(kuwahara_image);
   return(kuwahara_image);
 }
-
+
 /*
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %                                                                             %
@@ -1655,7 +1655,7 @@
 %  The format of the LocalContrastImage method is:
 %
 %      Image *LocalContrastImage(const Image *image, const double radius,
-%        const double strength, ExceptionInfo *exception)
+%        const double strength,ExceptionInfo *exception)
 %
 %  A description of each parameter follows:
 %
@@ -1692,7 +1692,6 @@
 
   ssize_t
     scanLineSize,
-    thread_count,
     width;
 
   /*
@@ -1717,20 +1716,10 @@
     }
   image_view=AcquireVirtualCacheView(image,exception);
   contrast_view=AcquireAuthenticCacheView(contrast_image,exception);
-  thread_count=1;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
-  #pragma omp parallel magick_threads(image,image,image->rows,1)
-  {
-    #pragma omp single
-    {
-      thread_count=omp_get_num_threads();
-    }
-  }
-#endif
   scanLineSize=(ssize_t) MagickMax(image->columns,image->rows);
   width=(ssize_t) scanLineSize*0.002f*fabs(radius);
   scanLineSize+=(2*width);
-  scanLinePixels_info=AcquireVirtualMemory((size_t) thread_count*
+  scanLinePixels_info=AcquireVirtualMemory((size_t) GetOpenMPMaximumThreads()*
     scanLineSize,sizeof(*scanLinePixels));
   if (scanLinePixels_info == (MemoryInfo *) NULL)
     {
@@ -1740,7 +1729,9 @@
       ThrowImageException(ResourceLimitError,"MemoryAllocationFailed");
     }
   scanLinePixels=(float *) GetVirtualMemoryBlob(scanLinePixels_info);
-  /* Create intermediate buffer */
+  /*
+    Create intermediate buffer.
+  */
   interImage_info=AcquireVirtualMemory(image->rows*(image->columns+(2*width)),
     sizeof(*interImage));
   if (interImage_info == (MemoryInfo *) NULL)
@@ -1753,8 +1744,9 @@
     }
   interImage=(float *) GetVirtualMemoryBlob(interImage_info);
   totalWeight=(float) ((width+1)*(width+1));
-
-  /* Vertical Pass */
+  /*
+    Vertical pass.
+  */
   {
     ssize_t
       x;
@@ -1765,6 +1757,9 @@
 #endif
     for (x=0; x < (ssize_t) image->columns; x++)
     {
+      const int
+        id = GetOpenMPThreadId();
+
       const Quantum
         *magick_restrict p;
 
@@ -1780,22 +1775,17 @@
         i;
 
       pixels=scanLinePixels;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
-      pixels+=scanLineSize*omp_get_thread_num();
-#endif
+      pixels+=id*scanLineSize;
       pix=pixels;
       p=GetCacheViewVirtualPixels(image_view,x,-width,1,image->rows+(2*width),
         exception);
-
       for (y=0; y < (ssize_t) image->rows+(2*width); y++)
       {
         *pix++=(float)GetPixelLuma(image,p);
         p+=image->number_channels;
       }
-
       out=interImage+x+width;
-
-      for (y = 0; y < (ssize_t) image->rows; y++)
+      for (y=0; y < (ssize_t) image->rows; y++)
       {
         float
           sum,
@@ -1822,12 +1812,13 @@
         if ((x > (ssize_t) image->columns-width-2) &&
             (x != (ssize_t) image->columns-1))
           *(out+((image->columns-x-1)*2))=*out;
-
         out+=image->columns+(width*2);
       }
     }
   }
-  /* Horizontal Pass */
+  /*
+    Horizontal pass.
+  */
   {
     ssize_t
       y;
@@ -1838,6 +1829,9 @@
 #endif
     for (y=0; y < (ssize_t) image->rows; y++)
     {
+      const int
+        id = GetOpenMPThreadId();
+
       const Quantum
         *magick_restrict p;
 
@@ -1855,17 +1849,13 @@
         i;
 
       pixels=scanLinePixels;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
-      pixels+=scanLineSize*omp_get_thread_num();
-#endif
+      pixels+=id*scanLineSize;
       p=GetCacheViewVirtualPixels(image_view,0,y,image->columns,1,
         exception);
       q=GetCacheViewAuthenticPixels(contrast_view,0,y,image->columns,1,
         exception);
-
       memcpy(pixels,interImage+(y*(image->columns+(2*width))),(image->columns+
         (2*width))*sizeof(float));
-
       for (x=0; x < (ssize_t) image->columns; x++)
       {
         float
@@ -1887,7 +1877,6 @@
           sum+=weight*(*pix++);
           weight-=1.0f;
         }
-
         /* Apply and write */
         srcVal=(float) GetPixelLuma(image,p);
         mult=(srcVal-(sum/totalWeight))*(strength/100.0f);