Skip to content

Commit 15aa81b

Browse files
authored
perf: Speed up maketx --envlatl when multithreaded by over 10x. (#4825)
[As reported on Slack](https://academysoftwarefdn.slack.com/archives/C05782U3806/p1751642730171029), between OIIO 2.5 and 3.0, we had a big slowdown when doing a multithreaded `maketx --envlatl`, which was traced almost entirely to the new mutex lock in ImageBuf::IteratorBase::init_ib(), which were all happening inside the call stack of `resize_block_<float>()`. The reason was that it was calling ImageBuf::interppixel_NDC for every pixel, which in turn works by creating (internally) an ImageBuf::ConstIterator to sample the 4 nearby pixels. But every one of those ConstIterator constructors called init_ib, which briefly grabbed the mutex for the IB, which in addition to being wasteful on its own, caused the threads to block on each other. The solution is straightforward: there is no need to construct a new Iterator for every pixel. We can create the iterator once (a single call to the init_ib for each thread region of the image) and then for each pixel, just call its `rerange()` method to reset the set of pixels to loop over for the samples needed for that pixel. I also opportunistically eliminated a few redundant spec() calls in various routines in imagebuf.cpp. On my Mac laptop, when doing a maketx --latlong on a 16k image with 16 threads, it was previously taking 170.5s (including 117.8s for the initial resize and 42.8s for the MIP computations). With this change, the same operation takes 12.4s (including 3.7s for the initial resize and 1.6s for he MIP computations). That's almost a 14x speedup. YMMV, depending on platform, compiler, image size, and number of threads. Signed-off-by: Larry Gritz <[email protected]>
1 parent 691acac commit 15aa81b

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

src/libOpenImageIO/imagebuf.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,7 +2961,8 @@ ImageBuf::xbegin() const
29612961
int
29622962
ImageBuf::xend() const
29632963
{
2964-
return spec().x + spec().width;
2964+
const ImageSpec& spec(m_impl->spec());
2965+
return spec.x + spec.width;
29652966
}
29662967

29672968

@@ -2977,7 +2978,8 @@ ImageBuf::ybegin() const
29772978
int
29782979
ImageBuf::yend() const
29792980
{
2980-
return spec().y + spec().height;
2981+
const ImageSpec& spec(m_impl->spec());
2982+
return spec.y + spec.height;
29812983
}
29822984

29832985

@@ -2993,7 +2995,8 @@ ImageBuf::zbegin() const
29932995
int
29942996
ImageBuf::zend() const
29952997
{
2996-
return spec().z + std::max(spec().depth, 1);
2998+
const ImageSpec& spec(m_impl->spec());
2999+
return spec.z + std::max(spec.depth, 1);
29973000
}
29983001

29993002

@@ -3009,7 +3012,8 @@ ImageBuf::xmin() const
30093012
int
30103013
ImageBuf::xmax() const
30113014
{
3012-
return spec().x + spec().width - 1;
3015+
const ImageSpec& spec(m_impl->spec());
3016+
return spec.x + spec.width - 1;
30133017
}
30143018

30153019

@@ -3025,7 +3029,8 @@ ImageBuf::ymin() const
30253029
int
30263030
ImageBuf::ymax() const
30273031
{
3028-
return spec().y + spec().height - 1;
3032+
const ImageSpec& spec(m_impl->spec());
3033+
return spec.y + spec.height - 1;
30293034
}
30303035

30313036

@@ -3041,7 +3046,8 @@ ImageBuf::zmin() const
30413046
int
30423047
ImageBuf::zmax() const
30433048
{
3044-
return spec().z + std::max(spec().depth, 1) - 1;
3049+
const ImageSpec& spec(m_impl->spec());
3050+
return spec.z + std::max(spec.depth, 1) - 1;
30453051
}
30463052

30473053

src/libOpenImageIO/maketexture.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,19 @@ datestring(time_t t)
138138

139139
template<class SRCTYPE>
140140
static void
141-
interppixel_NDC_clamped(const ImageBuf& buf, float x, float y,
142-
span<float> pixel, bool envlatlmode)
141+
interppixel_NDC(const ImageBuf& buf, float x, float y, span<float> pixel,
142+
bool envlatlmode, ImageBuf::ConstIterator<SRCTYPE>& it,
143+
ImageBuf::WrapMode wrapmode)
143144
{
144-
int fx = buf.spec().full_x;
145-
int fy = buf.spec().full_y;
146-
int fw = buf.spec().full_width;
147-
int fh = buf.spec().full_height;
145+
const ImageSpec& spec(buf.spec());
146+
int fx = spec.full_x;
147+
int fy = spec.full_y;
148+
int fw = spec.full_width;
149+
int fh = spec.full_height;
148150
x = static_cast<float>(fx) + x * static_cast<float>(fw);
149151
y = static_cast<float>(fy) + y * static_cast<float>(fh);
150152

151-
int n = buf.spec().nchannels;
153+
int n = spec.nchannels;
152154
float *p0 = OIIO_ALLOCA(float, 4 * n), *p1 = p0 + n, *p2 = p1 + n,
153155
*p3 = p2 + n;
154156

@@ -160,8 +162,7 @@ interppixel_NDC_clamped(const ImageBuf& buf, float x, float y,
160162
yfrac = floorfrac(y, &ytexel);
161163

162164
// Get the four texels
163-
ImageBuf::ConstIterator<SRCTYPE> it(
164-
buf, ROI(xtexel, xtexel + 2, ytexel, ytexel + 2), ImageBuf::WrapClamp);
165+
it.rerange(xtexel, xtexel + 2, ytexel, ytexel + 2, 0, 1, wrapmode);
165166
for (int c = 0; c < n; ++c)
166167
p0[c] = it[c];
167168
++it;
@@ -181,8 +182,10 @@ interppixel_NDC_clamped(const ImageBuf& buf, float x, float y,
181182
// wrong will tend to over-represent the high latitudes in
182183
// low-res MIP levels. We fold the area weighting into our
183184
// linear interpolation by adjusting yfrac.
184-
int ynext = OIIO::clamp(ytexel + 1, buf.ymin(), buf.ymax());
185-
ytexel = OIIO::clamp(ytexel, buf.ymin(), buf.ymax());
185+
int ymin = spec.y;
186+
int ymax = ymin + spec.height - 1;
187+
int ynext = OIIO::clamp(ytexel + 1, ymin, ymax);
188+
ytexel = OIIO::clamp(ytexel, ymin, ymax);
186189
float w0 = (1.0f - yfrac)
187190
* sinf((float)M_PI * (ytexel + 0.5f) / (float)fh);
188191
float w1 = yfrac * sinf((float)M_PI * (ynext + 0.5f) / (float)fh);
@@ -218,15 +221,16 @@ resize_block_(ImageBuf& dst, const ImageBuf& src, ROI roi, bool envlatlmode)
218221
float yscale = 1.0f / (float)dstspec.full_height;
219222
int nchannels = dst.nchannels();
220223
OIIO_DASSERT(dst.spec().format == TypeFloat);
224+
ImageBuf::WrapMode wrapmode = src_is_crop ? ImageBuf::WrapBlack
225+
: ImageBuf::WrapClamp;
226+
ImageBuf::ConstIterator<SRCTYPE> srcit(src);
221227
ImageBuf::Iterator<float> d(dst, roi);
222228
for (int y = y0; y < y1; ++y) {
223229
float t = (y + 0.5f) * yscale + yoffset;
224230
for (int x = x0; x < x1; ++x, ++d) {
225231
float s = (x + 0.5f) * xscale + xoffset;
226-
if (src_is_crop)
227-
src.interppixel_NDC(s, t, pel);
228-
else
229-
interppixel_NDC_clamped<SRCTYPE>(src, s, t, pel, envlatlmode);
232+
interppixel_NDC<SRCTYPE>(src, s, t, pel, envlatlmode, srcit,
233+
wrapmode);
230234
for (int c = 0; c < nchannels; ++c)
231235
d[c] = pel[c];
232236
}
@@ -311,7 +315,7 @@ resize_block(ImageBuf& dst, const ImageBuf& src, ROI roi, bool envlatlmode,
311315
!envlatlmode && // not latlong wrap mode
312316
roi.xbegin == 0 && // Region x at origin
313317
dstspec.width == roi.width() && // Full width ROI
314-
dstspec.width == (srcspec.width / 2) && // Src is 2x resize
318+
(2 * dstspec.width) == srcspec.width && // Src is 2x resize
315319
dstspec.format == srcspec.format && // Same formats
316320
dstspec.x == 0 && dstspec.y == 0 && // Not a crop or overscan
317321
srcspec.x == 0 && srcspec.y == 0) {
@@ -387,15 +391,16 @@ lightprobe_to_envlatl(ImageBuf& dst, const ImageBuf& src, bool y_is_up,
387391

388392
span<float> pixel = OIIO_ALLOCA_SPAN(float, nchannels);
389393
float dw = dstspec.width, dh = dstspec.height;
394+
ImageBuf::ConstIterator<SRCTYPE> srcit(src);
390395
for (ImageBuf::Iterator<float> d(dst, roi); !d.done(); ++d) {
391396
Imath::V3f V = latlong_to_dir((d.x() + 0.5f) / dw,
392397
(dh - 1.0f - d.y() + 0.5f) / dh,
393398
y_is_up);
394399
float r = M_1_PI * acosf(V[2]) / hypotf(V[0], V[1]);
395400
float u = (V[0] * r + 1.0f) * 0.5f;
396401
float v = (V[1] * r + 1.0f) * 0.5f;
397-
interppixel_NDC_clamped<SRCTYPE>(src, float(u), float(v), pixel,
398-
false);
402+
interppixel_NDC<SRCTYPE>(src, float(u), float(v), pixel, false,
403+
srcit, ImageBuf::WrapClamp);
399404
for (int c = roi.chbegin; c < roi.chend; ++c)
400405
d[c] = pixel[c];
401406
}

0 commit comments

Comments
 (0)