From d10f0dc174904023e0d90ff0c52b5d9e6f9d1e15 Mon Sep 17 00:00:00 2001 From: Rostyslav Toch Date: Wed, 24 Dec 2025 16:57:11 +0000 Subject: [PATCH 1/2] ptx: handle duplicate input files --- src/uu/ptx/src/ptx.rs | 19 ++++++++++++------- tests/by-util/test_ptx.rs | 8 ++++++++ tests/fixtures/ptx/one_word | 1 + 3 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 tests/fixtures/ptx/one_word diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 9f8977f70f5..d44b09fb2c3 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -7,7 +7,7 @@ use std::cmp; use std::cmp::PartialEq; -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeSet, HashSet}; use std::ffi::{OsStr, OsString}; use std::fmt::Write as FmtWrite; use std::fs::File; @@ -279,10 +279,10 @@ struct FileContent { offset: usize, } -type FileMap = HashMap; +type FileMap = Vec<(OsString, FileContent)>; fn read_input(input_files: &[OsString], config: &Config) -> std::io::Result { - let mut file_map: FileMap = HashMap::new(); + let mut file_map: FileMap = FileMap::new(); let mut offset: usize = 0; let sentence_splitter = if let Some(re_str) = &config.sentence_regex { @@ -310,14 +310,14 @@ fn read_input(input_files: &[OsString], config: &Config) -> std::io::Result, which can be indexed in constant time. let chars_lines: Vec> = lines.iter().map(|x| x.chars().collect()).collect(); let size = lines.len(); - file_map.insert( + file_map.push(( filename.clone(), FileContent { lines, chars_lines, offset, }, - ); + )); offset += size; } Ok(file_map) @@ -792,8 +792,13 @@ fn write_traditional_output( } for word_ref in words { - let file_map_value: &FileContent = file_map - .get(&word_ref.filename) + let (_, file_map_value) = file_map + .iter() + .find(|(name, content)| { + name == &word_ref.filename + && word_ref.global_line_nr >= content.offset + && word_ref.global_line_nr < content.offset + content.lines.len() + }) .expect("Missing file in file map"); let FileContent { ref lines, diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index acad875bb7b..9584dd52b5f 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -338,3 +338,11 @@ fn test_unicode_truncation_alignment() { .succeeds() .stdout_only(" / bar\n föö/\n"); } + +#[test] +fn test_duplicate_input_files() { + new_ucmd!() + .args(&["one_word", "one_word"]) + .succeeds() + .stdout_is(" rust\n rust\n"); +} diff --git a/tests/fixtures/ptx/one_word b/tests/fixtures/ptx/one_word new file mode 100644 index 00000000000..871732e64f9 --- /dev/null +++ b/tests/fixtures/ptx/one_word @@ -0,0 +1 @@ +rust From 3308729fce3a9565cdf2b497af92a52e6d4d6bf8 Mon Sep 17 00:00:00 2001 From: Rostyslav Toch Date: Wed, 24 Dec 2025 21:52:20 +0000 Subject: [PATCH 2/2] ptx: add comment explaining file map lookup logic --- src/uu/ptx/src/ptx.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index d44b09fb2c3..c0797bdfcca 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -792,6 +792,10 @@ fn write_traditional_output( } for word_ref in words { + // Since `ptx` accepts duplicate file arguments (e.g., `ptx file file`), + // simply looking up by filename is ambiguous. + // We use the `global_line_nr` (which is unique across the entire input stream) + // to identify which file covers this line. let (_, file_map_value) = file_map .iter() .find(|(name, content)| {