1- use std:: { ops:: Range , path:: PathBuf } ;
2-
1+ use gix_diff:: blob:: intern:: TokenSource ;
32use gix_hash:: ObjectId ;
43use gix_object:: { bstr:: BStr , FindExt } ;
4+ use std:: { ops:: Range , path:: PathBuf } ;
55
66use super :: { process_changes, Change , Offset , UnblamedHunk } ;
7- use crate :: BlameEntry ;
7+ use crate :: { BlameEntry , Outcome } ;
88
99// TODO: do not instantiate anything, get everything passed as argument.
1010/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
@@ -60,28 +60,35 @@ pub fn file<E>(
6060 // TODO: remove
6161 worktree_root : PathBuf ,
6262 file_path : & BStr ,
63- ) -> Result < Vec < BlameEntry > , E > {
63+ ) -> Result < Outcome , E > {
6464 // TODO: `worktree_root` should be removed - read everything from Commit.
6565 // Worktree changes should be placed into a temporary commit.
6666 // TODO: remove this and deduplicate the respective code.
6767 use gix_object:: bstr:: ByteSlice ;
6868 let absolute_path = worktree_root. join ( gix_path:: from_bstr ( file_path) ) ;
6969
70- // TODO use `imara-diff` to tokenize this just like it will be tokenized when diffing.
71- let number_of_lines = std:: fs:: read_to_string ( absolute_path) . unwrap ( ) . lines ( ) . count ( ) ;
72-
7370 let mut traverse = traverse. into_iter ( ) . peekable ( ) ;
7471 let Some ( Ok ( suspect) ) = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) ) else {
7572 todo ! ( "return actual error" ) ;
7673 } ;
7774
75+ let original_file_blob = std:: fs:: read ( absolute_path) . unwrap ( ) ;
76+ let num_lines_in_original = {
77+ let mut interner = gix_diff:: blob:: intern:: Interner :: new ( original_file_blob. len ( ) / 100 ) ;
78+ tokens_for_diffing ( & original_file_blob)
79+ . tokenize ( )
80+ . map ( |token| interner. intern ( token) )
81+ . count ( )
82+ } ;
83+
7884 let mut hunks_to_blame = vec ! [ UnblamedHunk :: new(
79- 0 ..number_of_lines . try_into( ) . unwrap( ) ,
85+ 0 ..num_lines_in_original . try_into( ) . unwrap( ) ,
8086 suspect,
8187 Offset :: Added ( 0 ) ,
8288 ) ] ;
8389
8490 let mut out = Vec :: new ( ) ;
91+ let mut buf = Vec :: with_capacity ( 512 ) ;
8592 ' outer: for item in traverse {
8693 let item = item?;
8794 let suspect = item. id ;
@@ -103,9 +110,8 @@ pub fn file<E>(
103110 break ;
104111 }
105112
106- let mut buffer = Vec :: new ( ) ;
107- let commit_id = odb. find_commit ( & suspect, & mut buffer) . unwrap ( ) . tree ( ) ;
108- let tree_iter = odb. find_tree_iter ( & commit_id, & mut buffer) . unwrap ( ) ;
113+ let commit_id = odb. find_commit ( & suspect, & mut buf) . unwrap ( ) . tree ( ) ;
114+ let tree_iter = odb. find_tree_iter ( & commit_id, & mut buf) . unwrap ( ) ;
109115
110116 let mut entry_buffer = Vec :: new ( ) ;
111117 let Some ( entry) = tree_iter
@@ -247,7 +253,10 @@ pub fn file<E>(
247253 // I don’t know yet whether it would make sense to use a data structure instead that preserves
248254 // order on insertion.
249255 out. sort_by ( |a, b| a. range_in_blamed_file . start . cmp ( & b. range_in_blamed_file . start ) ) ;
250- Ok ( coalesce_blame_entries ( out) )
256+ Ok ( Outcome {
257+ entries : coalesce_blame_entries ( out) ,
258+ blob : original_file_blob,
259+ } )
251260}
252261
253262/// This function merges adjacent blame entries. It merges entries that are adjacent both in the
@@ -416,9 +425,18 @@ fn blob_changes(
416425 . unwrap ( ) ;
417426
418427 let outcome = resource_cache. prepare_diff ( ) . unwrap ( ) ;
419- let input = outcome. interned_input ( ) ;
428+ let input = gix_diff:: blob:: intern:: InternedInput :: new (
429+ tokens_for_diffing ( outcome. old . data . as_slice ( ) . unwrap_or_default ( ) ) ,
430+ tokens_for_diffing ( outcome. new . data . as_slice ( ) . unwrap_or_default ( ) ) ,
431+ ) ;
420432 let number_of_lines_in_destination = input. after . len ( ) ;
421433 let change_recorder = ChangeRecorder :: new ( number_of_lines_in_destination. try_into ( ) . unwrap ( ) ) ;
422434
423435 gix_diff:: blob:: diff ( gix_diff:: blob:: Algorithm :: Histogram , & input, change_recorder)
424436}
437+
438+ /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
439+ /// so the later access shows the right thing.
440+ pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
441+ gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
442+ }
0 commit comments