Skip to content

Commit b05f8ba

Browse files
CopilotByron
andcommitted
fix: refspec for shallow clones uses a single-branch (#2227)
When doing shallow clones (depth != NoChange), it now uses a single-branch refspec instead of fetching all branches. This matches Git's behavior and significantly reduces the repository size for shallow clones. For shallow clones: - If ref_name is specified: uses that branch - Otherwise: attempts to detect from Protocol V1 handshake or falls back to init.defaultBranch config or "main" This addresses issue #2227 where `gix clone --depth 1` was creating repositories ~130MB vs Git's ~70MB due to fetching all branches. Co-authored-by: Byron <63622+Byron@users.noreply.github.com>
1 parent 3313233 commit b05f8ba

File tree

2 files changed

+115
-6
lines changed

2 files changed

+115
-6
lines changed

gix/src/clone/fetch/mod.rs

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ pub enum Error {
4747
},
4848
#[error(transparent)]
4949
CommitterOrFallback(#[from] crate::config::time::Error),
50+
#[error(transparent)]
51+
RefMap(#[from] crate::remote::ref_map::Error),
5052
}
5153

5254
/// Modification
@@ -101,14 +103,81 @@ impl PrepareFetch {
101103
};
102104

103105
let mut remote = repo.remote_at(self.url.clone())?;
106+
107+
// For shallow clones without custom configuration, we'll use a single-branch refspec
108+
// to match git's behavior (matching git's single-branch behavior for shallow clones).
109+
let use_single_branch_for_shallow = self.shallow != remote::fetch::Shallow::NoChange
110+
&& self.configure_remote.is_none()
111+
&& remote.fetch_specs.is_empty();
112+
113+
let target_ref = if use_single_branch_for_shallow {
114+
// Determine target branch from user-specified ref_name or default branch
115+
if let Some(ref_name) = &self.ref_name {
116+
// User specified a branch, use that
117+
Some(format!("refs/heads/{}", ref_name.as_ref().as_bstr()))
118+
} else {
119+
// For shallow clones without a specified ref, we need to determine the default branch.
120+
// We'll connect to get HEAD information. For Protocol V2, we need to explicitly list refs.
121+
let mut connection = remote.connect(remote::Direction::Fetch).await?;
122+
123+
// Perform handshake and try to get HEAD from it (works for Protocol V1)
124+
let _ = connection.ref_map_by_ref(&mut progress, Default::default()).await?;
125+
126+
let target = if let Some(handshake) = &connection.handshake {
127+
// Protocol V1: refs are in handshake
128+
handshake.refs.as_ref().and_then(|refs| {
129+
refs.iter().find_map(|r| match r {
130+
gix_protocol::handshake::Ref::Symbolic {
131+
full_ref_name,
132+
target,
133+
..
134+
} if full_ref_name == "HEAD" => Some(target.to_string()),
135+
_ => None,
136+
})
137+
})
138+
} else {
139+
None
140+
};
141+
142+
// For Protocol V2 or if we couldn't determine HEAD, use the configured default branch
143+
let fallback_branch = target.or_else(|| {
144+
repo.config
145+
.resolved
146+
.string(crate::config::tree::Init::DEFAULT_BRANCH)
147+
.and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{}", s)))
148+
}).unwrap_or_else(|| "refs/heads/main".to_string());
149+
150+
// Drop the connection explicitly to release the borrow on remote
151+
drop(connection);
152+
153+
Some(fallback_branch)
154+
}
155+
} else {
156+
None
157+
};
158+
159+
// Set up refspec based on whether we're doing a single-branch shallow clone
104160
if remote.fetch_specs.is_empty() {
105-
remote = remote
106-
.with_refspecs(
107-
Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()),
108-
remote::Direction::Fetch,
109-
)
110-
.expect("valid static spec");
161+
if let Some(target_ref) = &target_ref {
162+
// Single-branch refspec for shallow clones
163+
let short_name = target_ref.strip_prefix("refs/heads/").unwrap_or(target_ref.as_str());
164+
remote = remote
165+
.with_refspecs(
166+
Some(format!("+{target_ref}:refs/remotes/{remote_name}/{short_name}").as_str()),
167+
remote::Direction::Fetch,
168+
)
169+
.expect("valid refspec");
170+
} else {
171+
// Wildcard refspec for non-shallow clones or when target couldn't be determined
172+
remote = remote
173+
.with_refspecs(
174+
Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()),
175+
remote::Direction::Fetch,
176+
)
177+
.expect("valid static spec");
178+
}
111179
}
180+
112181
let mut clone_fetch_tags = None;
113182
if let Some(f) = self.configure_remote.as_mut() {
114183
remote = f(remote).map_err(Error::RemoteConfiguration)?;
@@ -133,6 +202,7 @@ impl PrepareFetch {
133202
.expect("valid")
134203
.to_owned();
135204
let pending_pack: remote::fetch::Prepare<'_, '_, _> = {
205+
// For shallow clones, we already connected once, so we need to connect again
136206
let mut connection = remote.connect(remote::Direction::Fetch).await?;
137207
if let Some(f) = self.configure_connection.as_mut() {
138208
f(&mut connection).map_err(Error::RemoteConnection)?;

gix/tests/gix/clone.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,45 @@ mod blocking_io {
8383
Ok(())
8484
}
8585

86+
#[test]
87+
fn shallow_clone_uses_single_branch_refspec() -> crate::Result {
88+
let tmp = gix_testtools::tempfile::TempDir::new()?;
89+
let (repo, _out) = gix::prepare_clone_bare(remote::repo("base").path(), tmp.path())?
90+
.with_shallow(Shallow::DepthAtRemote(1.try_into()?))
91+
.fetch_only(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?;
92+
93+
assert!(repo.is_shallow(), "repository should be shallow");
94+
95+
// Verify that only a single-branch refspec was configured
96+
let remote = repo.find_remote("origin")?;
97+
let refspecs: Vec<_> = remote
98+
.refspecs(Direction::Fetch)
99+
.iter()
100+
.map(|spec| spec.to_ref().to_bstring())
101+
.collect();
102+
103+
assert_eq!(
104+
refspecs.len(),
105+
1,
106+
"shallow clone should have only one fetch refspec"
107+
);
108+
109+
// The refspec should be for a single branch (main), not a wildcard
110+
let refspec_str = refspecs[0].to_str().expect("valid utf8");
111+
assert!(
112+
!refspec_str.contains("*"),
113+
"shallow clone refspec should not use wildcard: {}",
114+
refspec_str
115+
);
116+
assert!(
117+
refspec_str.contains("refs/heads/main"),
118+
"shallow clone refspec should reference the main branch: {}",
119+
refspec_str
120+
);
121+
122+
Ok(())
123+
}
124+
86125
#[test]
87126
fn from_shallow_prohibited_with_option() -> crate::Result {
88127
let tmp = gix_testtools::tempfile::TempDir::new()?;

0 commit comments

Comments
 (0)