diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 68727a6c40ea0..0b575c13adf2a 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -127,10 +127,39 @@ impl FileLoader for RealFileLoader { let mut bytes = Lrc::new_uninit_slice(len as usize); let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap()); - file.read_buf_exact(buf.unfilled())?; + match file.read_buf_exact(buf.unfilled()) { + Ok(()) => {} + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => { + drop(bytes); + return fs::read(path).map(Vec::into); + } + Err(e) => return Err(e), + } // SAFETY: If the read_buf_exact call returns Ok(()), then we have // read len bytes and initialized the buffer. - Ok(unsafe { bytes.assume_init() }) + let bytes = unsafe { bytes.assume_init() }; + + // At this point, we've read all the bytes that filesystem metadata reported exist. + // But we are not guaranteed to be at the end of the file, because we did not attempt to do + // a read with a non-zero-sized buffer and get Ok(0). + // So we do small read to a fixed-size buffer. If the read returns no bytes then we're + // already done, and we just return the Lrc we built above. + // If the read returns bytes however, we just fall back to reading into a Vec then turning + // that into an Lrc, losing our nice peak memory behavior. This fallback code path should + // be rarely exercised. + + let mut probe = [0u8; 32]; + let n = loop { + match file.read(&mut probe) { + Ok(0) => return Ok(bytes), + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + Ok(n) => break n, + } + }; + let mut bytes: Vec = bytes.iter().copied().chain(probe[..n].iter().copied()).collect(); + file.read_to_end(&mut bytes)?; + Ok(bytes.into()) } } diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index e393db0206417..a12f50c87a213 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -567,3 +567,30 @@ fn test_next_point() { assert_eq!(span.hi().0, 6); assert!(sm.span_to_snippet(span).is_err()); } + +#[cfg(target_os = "linux")] +#[test] +fn read_binary_file_handles_lying_stat() { + // read_binary_file tries to read the contents of a file into an Lrc<[u8]> while + // never having two copies of the data in memory at once. This is an optimization + // to support include_bytes! with large files. But since Rust allocators are + // sensitive to alignment, our implementation can't be bootstrapped off calling + // std::fs::read. So we test that we have the same behavior even on files where + // fs::metadata lies. + + // stat always says that /proc/self/cmdline is length 0, but it isn't. + let cmdline = Path::new("/proc/self/cmdline"); + let len = std::fs::metadata(cmdline).unwrap().len() as usize; + let real = std::fs::read(cmdline).unwrap(); + assert!(len < real.len()); + let bin = RealFileLoader.read_binary_file(cmdline).unwrap(); + assert_eq!(&real[..], &bin[..]); + + // stat always says that /sys/devices/system/cpu/kernel_max is the size of a block. + let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max"); + let len = std::fs::metadata(kernel_max).unwrap().len() as usize; + let real = std::fs::read(kernel_max).unwrap(); + assert!(len > real.len()); + let bin = RealFileLoader.read_binary_file(kernel_max).unwrap(); + assert_eq!(&real[..], &bin[..]); +}