Sync cache pixels before OpenMP loop (possible optimization)
diff --git a/MagickCore/effect.c b/MagickCore/effect.c
index 4d46774..924c47e 100644
--- a/MagickCore/effect.c
+++ b/MagickCore/effect.c
@@ -1635,7 +1635,7 @@
kuwahara_image=DestroyImage(kuwahara_image);
return(kuwahara_image);
}
-
+
/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
@@ -1655,7 +1655,7 @@
% The format of the LocalContrastImage method is:
%
% Image *LocalContrastImage(const Image *image, const double radius,
-% const double strength, ExceptionInfo *exception)
+% const double strength,ExceptionInfo *exception)
%
% A description of each parameter follows:
%
@@ -1692,7 +1692,6 @@
ssize_t
scanLineSize,
- thread_count,
width;
/*
@@ -1717,20 +1716,10 @@
}
image_view=AcquireVirtualCacheView(image,exception);
contrast_view=AcquireAuthenticCacheView(contrast_image,exception);
- thread_count=1;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
- #pragma omp parallel magick_threads(image,image,image->rows,1)
- {
- #pragma omp single
- {
- thread_count=omp_get_num_threads();
- }
- }
-#endif
scanLineSize=(ssize_t) MagickMax(image->columns,image->rows);
width=(ssize_t) scanLineSize*0.002f*fabs(radius);
scanLineSize+=(2*width);
- scanLinePixels_info=AcquireVirtualMemory((size_t) thread_count*
+ scanLinePixels_info=AcquireVirtualMemory((size_t) GetOpenMPMaximumThreads()*
scanLineSize,sizeof(*scanLinePixels));
if (scanLinePixels_info == (MemoryInfo *) NULL)
{
@@ -1740,7 +1729,9 @@
ThrowImageException(ResourceLimitError,"MemoryAllocationFailed");
}
scanLinePixels=(float *) GetVirtualMemoryBlob(scanLinePixels_info);
- /* Create intermediate buffer */
+ /*
+ Create intermediate buffer.
+ */
interImage_info=AcquireVirtualMemory(image->rows*(image->columns+(2*width)),
sizeof(*interImage));
if (interImage_info == (MemoryInfo *) NULL)
@@ -1753,8 +1744,9 @@
}
interImage=(float *) GetVirtualMemoryBlob(interImage_info);
totalWeight=(float) ((width+1)*(width+1));
-
- /* Vertical Pass */
+ /*
+ Vertical pass.
+ */
{
ssize_t
x;
@@ -1765,6 +1757,9 @@
#endif
for (x=0; x < (ssize_t) image->columns; x++)
{
+ const int
+ id = GetOpenMPThreadId();
+
const Quantum
*magick_restrict p;
@@ -1780,22 +1775,17 @@
i;
pixels=scanLinePixels;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
- pixels+=scanLineSize*omp_get_thread_num();
-#endif
+ pixels+=id*scanLineSize;
pix=pixels;
p=GetCacheViewVirtualPixels(image_view,x,-width,1,image->rows+(2*width),
exception);
-
for (y=0; y < (ssize_t) image->rows+(2*width); y++)
{
*pix++=(float)GetPixelLuma(image,p);
p+=image->number_channels;
}
-
out=interImage+x+width;
-
- for (y = 0; y < (ssize_t) image->rows; y++)
+ for (y=0; y < (ssize_t) image->rows; y++)
{
float
sum,
@@ -1822,12 +1812,13 @@
if ((x > (ssize_t) image->columns-width-2) &&
(x != (ssize_t) image->columns-1))
*(out+((image->columns-x-1)*2))=*out;
-
out+=image->columns+(width*2);
}
}
}
- /* Horizontal Pass */
+ /*
+ Horizontal pass.
+ */
{
ssize_t
y;
@@ -1838,6 +1829,9 @@
#endif
for (y=0; y < (ssize_t) image->rows; y++)
{
+ const int
+ id = GetOpenMPThreadId();
+
const Quantum
*magick_restrict p;
@@ -1855,17 +1849,13 @@
i;
pixels=scanLinePixels;
-#if defined(MAGICKCORE_OPENMP_SUPPORT)
- pixels+=scanLineSize*omp_get_thread_num();
-#endif
+ pixels+=id*scanLineSize;
p=GetCacheViewVirtualPixels(image_view,0,y,image->columns,1,
exception);
q=GetCacheViewAuthenticPixels(contrast_view,0,y,image->columns,1,
exception);
-
memcpy(pixels,interImage+(y*(image->columns+(2*width))),(image->columns+
(2*width))*sizeof(float));
-
for (x=0; x < (ssize_t) image->columns; x++)
{
float
@@ -1887,7 +1877,6 @@
sum+=weight*(*pix++);
weight-=1.0f;
}
-
/* Apply and write */
srcVal=(float) GetPixelLuma(image,p);
mult=(srcVal-(sum/totalWeight))*(strength/100.0f);