Avoid short reads in read_to_end

ChrisDenton · ChrisDenton · commit 5473402d84bd · 2025-06-23T12:19:14.000Z
On Windows, the UTF-16 to UTF-8 translation is made simpler by ensuring we don't split code points.
diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs
@@ -405,7 +405,7 @@ where
 // - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
 // - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
 //   at the same time, i.e. small reads suffer from syscall overhead, all reads incur costs proportional to buffer size (#110650)
-//
+// - also avoid <4 byte reads as this may split UTF-8 code points, which can be a problem for Windows console reads (#142847)
 pub(crate) fn default_read_to_end<R: Read + ?Sized>(
     r: &mut R,
     buf: &mut Vec<u8>,
@@ -452,7 +452,7 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
     let mut consecutive_short_reads = 0;
 
     loop {
-        if buf.len() == buf.capacity() && buf.capacity() == start_cap {
+        if buf.spare_capacity_mut().len() < PROBE_SIZE && buf.capacity() == start_cap {
             // The buffer might be an exact fit. Let's read into a probe buffer
             // and see if it returns `Ok(0)`. If so, we've avoided an
             // unnecessary doubling of the capacity. But if not, append the
@@ -462,12 +462,13 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
             if read == 0 {
                 return Ok(buf.len() - start_len);
             }
+            // In the case of very short reads, continue to use the stack buffer
+            // until either we reach the end or we need to reallocate.
+            continue;
         }
 
-        if buf.len() == buf.capacity() {
-            // buf is full, need more space
-            buf.try_reserve(PROBE_SIZE)?;
-        }
+        // Avoid unnecessarily short reads by ensuring there's at least PROBE_SIZE space available.
+        buf.try_reserve(PROBE_SIZE)?;
 
         let mut spare = buf.spare_capacity_mut();
         let buf_len = cmp::min(spare.len(), max_read_size);