Skip to content

Commit 2354360

Browse files
Boshenclaude
andauthored
fix(package_json): re-read file for serde_json fallback in simd implementation (#808)
## Summary When `simd_json` fails to parse a `package.json` file, the fallback to `serde_json` now re-reads the file from disk instead of using a cloned byte buffer. This ensures accurate error messages with correct line and column numbers. ## Problem Previously, when `simd_json` parsing failed, the code used a cloned byte buffer for the `serde_json` fallback. However, `simd_json` mutates the buffer in-place during parsing, so even a cloned buffer taken before parsing might not reflect the original file content if the mutation happened before the clone. ## Solution - Updated `PackageJson::parse` to accept a `FileSystem` parameter - Modified the error fallback to re-read the file using `fs.read_to_string(&realpath)` before parsing with `serde_json` - This guarantees that error messages are based on the original, unmodified file content --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 2e251b9 commit 2354360

File tree

4 files changed

+50
-27
lines changed

4 files changed

+50
-27
lines changed

benches/resolver.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::{
55
};
66

77
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
8-
use oxc_resolver::PackageJson;
8+
use oxc_resolver::{FileSystem as FileSystemTrait, FileSystemOs, PackageJson};
99
use rayon::prelude::*;
1010

1111
mod memory_fs;
@@ -356,6 +356,10 @@ fn bench_package_json_deserialization(c: &mut Criterion) {
356356

357357
let test_path = PathBuf::from("/test/package.json");
358358
let test_realpath = test_path.clone();
359+
#[cfg(feature = "yarn_pnp")]
360+
let fs = FileSystemOs::new(false);
361+
#[cfg(not(feature = "yarn_pnp"))]
362+
let fs = FileSystemOs::new();
359363

360364
let data = [
361365
("small", small_json.to_string()),
@@ -369,7 +373,7 @@ fn bench_package_json_deserialization(c: &mut Criterion) {
369373
b.iter_with_setup_wrapper(|runner| {
370374
let json = json.clone();
371375
runner.run(|| {
372-
PackageJson::parse(test_path.clone(), test_realpath.clone(), json)
376+
PackageJson::parse(&fs, test_path.clone(), test_realpath.clone(), json)
373377
.expect("Failed to parse JSON");
374378
});
375379
});

src/cache/cache_impl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ impl<Fs: FileSystem> Cache<Fs> {
120120
} else {
121121
package_json_path.clone()
122122
};
123-
PackageJson::parse(package_json_path, real_path, package_json_string)
123+
PackageJson::parse(&self.fs, package_json_path, real_path, package_json_string)
124124
.map(|package_json| Some(Arc::new(package_json)))
125125
.map_err(ResolveError::Json)
126126
})

src/package_json/serde.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use std::{
1010
use serde_json::Value;
1111

1212
use super::{ImportsExportsKind, PackageType, SideEffects};
13-
use crate::{JSONError, ResolveError, path::PathUtil};
13+
use crate::{FileSystem, JSONError, ResolveError, path::PathUtil};
1414

1515
/// Serde implementation for the deserialized `package.json`.
1616
///
@@ -220,7 +220,12 @@ impl PackageJson {
220220
/// Parse a package.json file from JSON string
221221
///
222222
/// # Errors
223-
pub fn parse(path: PathBuf, realpath: PathBuf, json: String) -> Result<Self, JSONError> {
223+
pub fn parse<Fs: FileSystem>(
224+
_fs: &Fs,
225+
path: PathBuf,
226+
realpath: PathBuf,
227+
json: String,
228+
) -> Result<Self, JSONError> {
224229
// Strip BOM
225230
let json_string = if json.starts_with('\u{FEFF}') {
226231
json.trim_start_matches('\u{FEFF}')

src/package_json/simd.rs

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,18 @@ use std::{
77
path::{Path, PathBuf},
88
};
99

10+
use self_cell::MutBorrow;
1011
use simd_json::{BorrowedValue, prelude::*};
1112

1213
use super::{ImportsExportsKind, PackageType, SideEffects};
13-
use crate::{JSONError, ResolveError, path::PathUtil};
14+
use crate::{FileSystem, JSONError, ResolveError, path::PathUtil};
1415

1516
// Use simd_json's Object type which handles the hasher correctly based on features
1617
type BorrowedObject<'a> = simd_json::value::borrowed::Object<'a>;
1718

1819
self_cell::self_cell! {
1920
struct PackageJsonCell {
20-
owner: Vec<u8>,
21+
owner: MutBorrow<Vec<u8>>,
2122

2223
#[covariant]
2324
dependent: BorrowedValue,
@@ -253,7 +254,12 @@ impl PackageJson {
253254
///
254255
/// # Panics
255256
/// # Errors
256-
pub fn parse(path: PathBuf, realpath: PathBuf, json: String) -> Result<Self, JSONError> {
257+
pub fn parse<Fs: FileSystem>(
258+
fs: &Fs,
259+
path: PathBuf,
260+
realpath: PathBuf,
261+
json: String,
262+
) -> Result<Self, JSONError> {
257263
// Strip BOM in place by replacing with spaces (no reallocation)
258264
let mut json_bytes = json.into_bytes();
259265
if json_bytes.starts_with(b"\xEF\xBB\xBF") {
@@ -266,18 +272,34 @@ impl PackageJson {
266272
super::check_if_empty(&json_bytes, path.clone())?;
267273

268274
// Create the self-cell with the JSON bytes and parsed BorrowedValue
269-
let cell = PackageJsonCell::try_new(json_bytes.clone(), |bytes| {
270-
// We need a mutable slice from our owned data
271-
// SAFETY: We're creating a mutable reference to the owned data.
272-
// The self_cell ensures this reference is valid for the lifetime of the cell.
273-
let slice =
274-
unsafe { std::slice::from_raw_parts_mut(bytes.as_ptr().cast_mut(), bytes.len()) };
275-
simd_json::to_borrowed_value(slice)
275+
let cell = PackageJsonCell::try_new(MutBorrow::new(json_bytes), |bytes| {
276+
// Use MutBorrow to safely get mutable access for simd_json parsing
277+
simd_json::to_borrowed_value(bytes.borrow_mut())
276278
})
277279
.map_err(|simd_error| {
278-
// Fallback: parse with serde_json to get detailed error information
279-
// simd_json doesn't provide line/column info, so we re-parse to get it
280-
match serde_json::from_slice::<serde_json::Value>(&json_bytes) {
280+
// Fallback: re-read the file and parse with serde_json to get detailed error information
281+
// We re-read because simd_json may have mutated the buffer during its failed parse attempt
282+
// simd_json doesn't provide line/column info, so we use serde_json for better error messages
283+
let fallback_result = fs
284+
.read_to_string(&realpath)
285+
.map_err(|io_error| JSONError {
286+
path: path.clone(),
287+
message: format!("Failed to re-read file for error reporting: {io_error}"),
288+
line: 0,
289+
column: 0,
290+
})
291+
.and_then(|content| {
292+
serde_json::from_str::<serde_json::Value>(&content).map_err(|serde_error| {
293+
JSONError {
294+
path: path.clone(),
295+
message: serde_error.to_string(),
296+
line: serde_error.line(),
297+
column: serde_error.column(),
298+
}
299+
})
300+
});
301+
302+
match fallback_result {
281303
Ok(_) => {
282304
// serde_json succeeded but simd_json failed - this shouldn't happen
283305
// for valid JSON, but could indicate simd_json is more strict
@@ -288,15 +310,7 @@ impl PackageJson {
288310
column: 0,
289311
}
290312
}
291-
Err(serde_error) => {
292-
// Both parsers failed - use serde_json's detailed error
293-
JSONError {
294-
path: path.clone(),
295-
message: serde_error.to_string(),
296-
line: serde_error.line(),
297-
column: serde_error.column(),
298-
}
299-
}
313+
Err(error) => error,
300314
}
301315
})?;
302316

0 commit comments

Comments
 (0)