diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index e68db4f44ca48..ddfec9f886a6a 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -596,7 +596,9 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P .map(|fmap| { ( escape_dep_filename(&fmap.name.prefer_local().to_string()), - fmap.source_len.0 as u64, + // This needs to be unnormalized, + // as external tools wouldn't know how rustc normalizes them + fmap.unnormalized_source_len as u64, fmap.checksum_hash, ) }) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 808d9fbbc2cef..6c796b3a9c8c5 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -1744,7 +1744,8 @@ impl<'a> CrateMetadataRef<'a> { src_hash, checksum_hash, start_pos: original_start_pos, - source_len, + normalized_source_len, + unnormalized_source_len, lines, multibyte_chars, normalized_pos, @@ -1804,7 +1805,8 @@ impl<'a> CrateMetadataRef<'a> { src_hash, checksum_hash, stable_id, - source_len.to_u32(), + normalized_source_len.to_u32(), + unnormalized_source_len, self.cnum, lines, multibyte_chars, @@ -1817,9 +1819,9 @@ impl<'a> CrateMetadataRef<'a> { translated (start_pos {:?} source_len {:?})", local_version.name, original_start_pos, - source_len, + normalized_source_len, local_version.start_pos, - local_version.source_len + local_version.normalized_source_len ); ImportedSourceFile { diff --git a/compiler/rustc_query_system/src/ich/impls_syntax.rs b/compiler/rustc_query_system/src/ich/impls_syntax.rs index 044b97c2fea19..118229ffc9902 100644 --- a/compiler/rustc_query_system/src/ich/impls_syntax.rs +++ b/compiler/rustc_query_system/src/ich/impls_syntax.rs @@ -54,7 +54,8 @@ impl<'a> HashStable> for SourceFile { checksum_hash: _, external_src: _, start_pos: _, - source_len: _, + normalized_source_len: _, + unnormalized_source_len: _, lines: _, ref multibyte_chars, ref normalized_pos, diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index afd4564f1b6fd..2e03ccb1aa1a3 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1723,8 +1723,10 @@ pub struct SourceFile { pub external_src: FreezeLock, /// The start position of this source in the `SourceMap`. pub start_pos: BytePos, - /// The byte length of this source. - pub source_len: RelativeBytePos, + /// The byte length of this source after normalization. + pub normalized_source_len: RelativeBytePos, + /// The byte length of this source before normalization. + pub unnormalized_source_len: u32, /// Locations of lines beginnings in the source code. pub lines: FreezeLock, /// Locations of multi-byte characters in the source code. @@ -1748,7 +1750,8 @@ impl Clone for SourceFile { checksum_hash: self.checksum_hash, external_src: self.external_src.clone(), start_pos: self.start_pos, - source_len: self.source_len, + normalized_source_len: self.normalized_source_len, + unnormalized_source_len: self.unnormalized_source_len, lines: self.lines.clone(), multibyte_chars: self.multibyte_chars.clone(), normalized_pos: self.normalized_pos.clone(), @@ -1764,7 +1767,8 @@ impl Encodable for SourceFile { self.src_hash.encode(s); self.checksum_hash.encode(s); // Do not encode `start_pos` as it's global state for this session. - self.source_len.encode(s); + self.normalized_source_len.encode(s); + self.unnormalized_source_len.encode(s); // We are always in `Lines` form by the time we reach here. assert!(self.lines.read().is_lines()); @@ -1837,7 +1841,8 @@ impl Decodable for SourceFile { let name: FileName = Decodable::decode(d); let src_hash: SourceFileHash = Decodable::decode(d); let checksum_hash: Option = Decodable::decode(d); - let source_len: RelativeBytePos = Decodable::decode(d); + let normalized_source_len: RelativeBytePos = Decodable::decode(d); + let unnormalized_source_len = Decodable::decode(d); let lines = { let num_lines: u32 = Decodable::decode(d); if num_lines > 0 { @@ -1859,7 +1864,8 @@ impl Decodable for SourceFile { SourceFile { name, start_pos: BytePos::from_u32(0), - source_len, + normalized_source_len, + unnormalized_source_len, src: None, src_hash, checksum_hash, @@ -1959,12 +1965,17 @@ impl SourceFile { SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes()) } }); + // Capture the original source length before normalization. + let unnormalized_source_len = u32::try_from(src.len()).map_err(|_| OffsetOverflowError)?; + if unnormalized_source_len > Self::MAX_FILE_SIZE { + return Err(OffsetOverflowError); + } + let normalized_pos = normalize_src(&mut src); let stable_id = StableSourceFileId::from_filename_in_current_crate(&name); - let source_len = src.len(); - let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?; - if source_len > Self::MAX_FILE_SIZE { + let normalized_source_len = u32::try_from(src.len()).map_err(|_| OffsetOverflowError)?; + if normalized_source_len > Self::MAX_FILE_SIZE { return Err(OffsetOverflowError); } @@ -1977,7 +1988,8 @@ impl SourceFile { checksum_hash, external_src: FreezeLock::frozen(ExternalSource::Unneeded), start_pos: BytePos::from_u32(0), - source_len: RelativeBytePos::from_u32(source_len), + normalized_source_len: RelativeBytePos::from_u32(normalized_source_len), + unnormalized_source_len, lines: FreezeLock::frozen(SourceFileLines::Lines(lines)), multibyte_chars, normalized_pos, @@ -2161,7 +2173,7 @@ impl SourceFile { #[inline] pub fn end_position(&self) -> BytePos { - self.absolute_position(self.source_len) + self.absolute_position(self.normalized_source_len) } /// Finds the line containing the given position. The return value is the @@ -2197,7 +2209,7 @@ impl SourceFile { #[inline] pub fn is_empty(&self) -> bool { - self.source_len.to_u32() == 0 + self.normalized_source_len.to_u32() == 0 } /// Calculates the original byte position relative to the start of the file diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 166842e374b66..17de34c8436f1 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -262,7 +262,7 @@ impl SourceMap { bytes, Span::new( file.start_pos, - BytePos(file.start_pos.0 + file.source_len.0), + BytePos(file.start_pos.0 + file.normalized_source_len.0), SyntaxContext::root(), None, ), @@ -353,14 +353,15 @@ impl SourceMap { src_hash: SourceFileHash, checksum_hash: Option, stable_id: StableSourceFileId, - source_len: u32, + normalized_source_len: u32, + unnormalized_source_len: u32, cnum: CrateNum, file_local_lines: FreezeLock, multibyte_chars: Vec, normalized_pos: Vec, metadata_index: u32, ) -> Arc { - let source_len = RelativeBytePos::from_u32(source_len); + let normalized_source_len = RelativeBytePos::from_u32(normalized_source_len); let source_file = SourceFile { name: filename, @@ -372,7 +373,8 @@ impl SourceMap { metadata_index, }), start_pos: BytePos(0), - source_len, + normalized_source_len, + unnormalized_source_len, lines: file_local_lines, multibyte_chars, normalized_pos, @@ -566,7 +568,7 @@ impl SourceMap { let start_index = local_begin.pos.to_usize(); let end_index = local_end.pos.to_usize(); - let source_len = local_begin.sf.source_len.to_usize(); + let source_len = local_begin.sf.normalized_source_len.to_usize(); if start_index > end_index || end_index > source_len { return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { @@ -997,7 +999,7 @@ impl SourceMap { return 1; } - let source_len = local_begin.sf.source_len.to_usize(); + let source_len = local_begin.sf.normalized_source_len.to_usize(); debug!("source_len=`{:?}`", source_len); // Ensure indexes are also not malformed. if start_index > end_index || end_index > source_len - 1 { diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 589c2a3635481..c919aacf6b5f4 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -230,7 +230,8 @@ fn t10() { name, src_hash, checksum_hash, - source_len, + normalized_source_len, + unnormalized_source_len, lines, multibyte_chars, normalized_pos, @@ -243,7 +244,8 @@ fn t10() { src_hash, checksum_hash, stable_id, - source_len.to_u32(), + normalized_source_len.to_u32(), + unnormalized_source_len, CrateNum::ZERO, FreezeLock::new(lines.read().clone()), multibyte_chars, diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs index ed1db34463429..64c40e6116250 100644 --- a/compiler/rustc_span/src/tests.rs +++ b/compiler/rustc_span/src/tests.rs @@ -103,3 +103,17 @@ fn test_trim() { assert_eq!(span(well_before, before).trim_start(other), None); } + +#[test] +fn test_unnormalized_source_length() { + let source = "\u{feff}hello\r\nferries\r\n".to_owned(); + let sf = SourceFile::new( + FileName::Anon(Hash64::ZERO), + source, + SourceFileHashAlgorithm::Sha256, + Some(SourceFileHashAlgorithm::Sha256), + ) + .unwrap(); + assert_eq!(sf.unnormalized_source_len, 19); + assert_eq!(sf.normalized_source_len.0, 14); +} diff --git a/src/tools/clippy/clippy_config/src/conf.rs b/src/tools/clippy/clippy_config/src/conf.rs index 2a042e6c3d853..8cdd99ac44a8e 100644 --- a/src/tools/clippy/clippy_config/src/conf.rs +++ b/src/tools/clippy/clippy_config/src/conf.rs @@ -108,7 +108,7 @@ struct ConfError { impl ConfError { fn from_toml(file: &SourceFile, error: &toml::de::Error) -> Self { - let span = error.span().unwrap_or(0..file.source_len.0 as usize); + let span = error.span().unwrap_or(0..file.normalized_source_len.0 as usize); Self::spanned(file, error.message(), None, span) }