Skip to content

Commit 7119736

Browse files
committed
feat(rust/signed-doc): CatalystSignedDocument has CIDv1
1 parent 8e1cb5a commit 7119736

File tree

3 files changed

+382
-0
lines changed

3 files changed

+382
-0
lines changed

rust/signed_doc/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ tracing = "0.1.40"
3535
thiserror = "2.0.11"
3636
chrono = "0.4.42"
3737

38+
# CID v1 support
39+
cid = "0.11.1"
40+
multihash = { version = "0.19.3", features = ["serde-codec"] }
41+
sha2 = "0.10"
42+
multibase = "0.9.2"
43+
3844
[dev-dependencies]
3945
base64-url = "3.0.0"
4046
rand = "0.8.5"

rust/signed_doc/src/cid_v1.rs

Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
//! CID v1 (Content Identifier version 1) implementation for Catalyst Signed Documents.
2+
//!
3+
//! This module provides functionality to generate IPFS-compatible CID v1 identifiers
4+
//! for CBOR-encoded Catalyst Signed Documents.
5+
//!
6+
//! ## CID v1 Structure
7+
//!
8+
//! The binary format follows the IPFS specification:
9+
//! ```text
10+
//! <cidv1> = <version> || <multicodec> || <multihash>
11+
//! ```
12+
//!
13+
//! Where:
14+
//! - `version`: varint(1) - CID version 1
15+
//! - `multicodec`: varint(0x51) - CBOR codec
16+
//! - `multihash`: varint(0x12) || varint(32) || digest[32] - SHA2-256 multihash
17+
//!
18+
//! ## Constraints
19+
//!
20+
//! - **Hash function**: Only SHA2-256 is supported (32-byte digest)
21+
//! - **Codec**: Fixed to CBOR (0x51)
22+
//! - **Output size**: 36 bytes in binary format
23+
//!
24+
//! ## Example
25+
//!
26+
//! ```no_run
27+
//! # use catalyst_signed_doc::CatalystSignedDocument;
28+
//! # let doc: CatalystSignedDocument = todo!();
29+
//! let cid_string = doc.to_cid_v1_string()?;
30+
//! // Result: "bafyrei..." (base32-encoded CID v1)
31+
//! # Ok::<(), anyhow::Error>(())
32+
//! ```
33+
34+
use sha2::{Digest, Sha256};
35+
36+
/// CBOR multicodec identifier.
37+
///
38+
/// See: <https://github.com/multiformats/multicodec/blob/master/table.csv>
39+
const CBOR_CODEC: u64 = 0x51;
40+
41+
/// SHA2-256 multihash code.
42+
const SHA2_256_CODE: u64 = 0x12;
43+
44+
/// Generates a CID v1 for the given CBOR bytes.
45+
///
46+
/// # Arguments
47+
///
48+
/// * `cbor_bytes` - The CBOR-encoded data to generate a CID for
49+
///
50+
/// # Returns
51+
///
52+
/// A `cid::Cid` object representing the CID v1
53+
///
54+
/// # Errors
55+
///
56+
/// Returns an error if multihash construction fails
57+
pub fn to_cid_v1(cbor_bytes: &[u8]) -> anyhow::Result<cid::Cid> {
58+
// Compute SHA2-256 hash
59+
let mut hasher = Sha256::new();
60+
hasher.update(cbor_bytes);
61+
let hash_digest = hasher.finalize();
62+
63+
// Create multihash from digest using the wrap() API
64+
// The generic parameter <64> is the max digest size we support
65+
let multihash = multihash::Multihash::<64>::wrap(SHA2_256_CODE, &hash_digest)?;
66+
67+
// Create CID v1 with CBOR codec
68+
let cid = cid::Cid::new_v1(CBOR_CODEC, multihash);
69+
70+
Ok(cid)
71+
}
72+
73+
/// Generates a CID v1 and returns it as a multibase-encoded string.
74+
///
75+
/// Uses base32 encoding (CID v1 default).
76+
///
77+
/// # Arguments
78+
///
79+
/// * `cbor_bytes` - The CBOR-encoded data to generate a CID for
80+
///
81+
/// # Returns
82+
///
83+
/// A base32-encoded CID v1 string (starting with 'b')
84+
///
85+
/// # Errors
86+
///
87+
/// Returns an error if CID generation fails
88+
pub fn to_cid_v1_string(cbor_bytes: &[u8]) -> anyhow::Result<String> {
89+
let cid = to_cid_v1(cbor_bytes)?;
90+
Ok(cid.to_string())
91+
}
92+
93+
/// Generates a CID v1 and returns it as raw bytes.
94+
///
95+
/// Binary format: `<version><multicodec><multihash>`
96+
///
97+
/// # Arguments
98+
///
99+
/// * `cbor_bytes` - The CBOR-encoded data to generate a CID for
100+
///
101+
/// # Returns
102+
///
103+
/// A 36-byte vector containing the binary CID v1
104+
///
105+
/// # Errors
106+
///
107+
/// Returns an error if CID generation fails
108+
pub fn to_cid_v1_bytes(cbor_bytes: &[u8]) -> anyhow::Result<Vec<u8>> {
109+
let cid = to_cid_v1(cbor_bytes)?;
110+
Ok(cid.to_bytes())
111+
}
112+
113+
#[cfg(test)]
114+
mod tests {
115+
use super::*;
116+
use crate::{Builder, CatalystSignedDocument, ContentType};
117+
118+
/// SHA2-256 digest size in bytes.
119+
const SHA2_256_SIZE: usize = 32;
120+
121+
/// Helper function to create a test CatalystSignedDocument
122+
fn create_test_document() -> CatalystSignedDocument {
123+
Builder::new()
124+
.with_json_metadata(serde_json::json!({
125+
"id": "0197f398-9f43-7c23-a576-f765131b81f2",
126+
"ver": "0197f398-9f43-7c23-a576-f765131b81f2",
127+
"type": "ab7c2428-c353-4331-856e-385b2eb20546",
128+
"content-type": ContentType::Json,
129+
}))
130+
.expect("Should create metadata")
131+
.with_json_content(&serde_json::json!({
132+
"test": "content"
133+
}))
134+
.expect("Should set content")
135+
.build()
136+
.expect("Should build document")
137+
}
138+
139+
/// Test CID v1 generation from a CatalystSignedDocument
140+
#[test]
141+
fn test_cid_generation_from_document() {
142+
let doc = create_test_document();
143+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
144+
145+
let result = to_cid_v1(&cbor_bytes);
146+
assert!(result.is_ok(), "CID generation should succeed");
147+
148+
let cid = result.expect("CID should be valid");
149+
assert_eq!(cid.version(), cid::Version::V1);
150+
assert_eq!(cid.codec(), CBOR_CODEC);
151+
}
152+
153+
/// Test that binary format is exactly 36 bytes for a CatalystSignedDocument
154+
#[test]
155+
fn test_binary_format_size_from_document() {
156+
let doc = create_test_document();
157+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
158+
159+
let cid_bytes = to_cid_v1_bytes(&cbor_bytes).expect("CID bytes should be valid");
160+
161+
assert_eq!(
162+
cid_bytes.len(),
163+
36,
164+
"CID v1 binary format should be exactly 36 bytes"
165+
);
166+
}
167+
168+
/// Test determinism: same document produces same CID
169+
#[test]
170+
fn test_determinism_from_document() {
171+
let doc = create_test_document();
172+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
173+
174+
let cid1 = to_cid_v1(&cbor_bytes).expect("First CID should be valid");
175+
let cid2 = to_cid_v1(&cbor_bytes).expect("Second CID should be valid");
176+
177+
assert_eq!(cid1, cid2, "Same document should produce identical CIDs");
178+
}
179+
180+
/// Test string format starts with 'b' (base32) for a CatalystSignedDocument
181+
#[test]
182+
fn test_string_format_from_document() {
183+
let doc = create_test_document();
184+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
185+
186+
let cid_string = to_cid_v1_string(&cbor_bytes).expect("CID string should be valid");
187+
188+
assert!(
189+
cid_string.starts_with('b'),
190+
"CID v1 base32 string should start with 'b'"
191+
);
192+
}
193+
194+
/// Test that multihash has correct SHA2-256 properties
195+
#[test]
196+
fn test_multihash_properties_from_document() {
197+
let doc = create_test_document();
198+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
199+
200+
let cid = to_cid_v1(&cbor_bytes).expect("CID should be valid");
201+
let multihash = cid.hash();
202+
203+
assert_eq!(
204+
multihash.code(),
205+
SHA2_256_CODE,
206+
"Multihash code should be SHA2-256 (0x12)"
207+
);
208+
assert_eq!(
209+
multihash.size() as usize,
210+
SHA2_256_SIZE,
211+
"Multihash digest size should be 32 bytes"
212+
);
213+
}
214+
215+
/// Test that CID string can be parsed back to a CID object
216+
#[test]
217+
fn test_cid_string_round_trip() {
218+
let doc = create_test_document();
219+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
220+
221+
let cid_string = to_cid_v1_string(&cbor_bytes).expect("CID string should be valid");
222+
223+
// Parse the string back to a CID
224+
let parsed_cid = cid::Cid::try_from(cid_string.as_str())
225+
.expect("CID string should be parseable");
226+
227+
// Generate CID directly for comparison
228+
let original_cid = to_cid_v1(&cbor_bytes).expect("CID should be valid");
229+
230+
assert_eq!(
231+
parsed_cid, original_cid,
232+
"Parsed CID should match original CID"
233+
);
234+
}
235+
236+
/// Test that different documents produce different CID strings
237+
#[test]
238+
fn test_different_documents_different_cid_strings() {
239+
let doc1 = create_test_document();
240+
let doc2 = Builder::new()
241+
.with_json_metadata(serde_json::json!({
242+
"id": "0197f398-9f43-7c23-a576-f765131b81f3",
243+
"ver": "0197f398-9f43-7c23-a576-f765131b81f3",
244+
"type": "ab7c2428-c353-4331-856e-385b2eb20546",
245+
"content-type": ContentType::Json,
246+
}))
247+
.expect("Should create metadata")
248+
.with_json_content(&serde_json::json!({
249+
"different": "content"
250+
}))
251+
.expect("Should set content")
252+
.build()
253+
.expect("Should build document");
254+
255+
let cid_string1 = to_cid_v1_string(&doc1.to_bytes().expect("Should serialize"))
256+
.expect("CID string 1 should be valid");
257+
let cid_string2 = to_cid_v1_string(&doc2.to_bytes().expect("Should serialize"))
258+
.expect("CID string 2 should be valid");
259+
260+
assert_ne!(
261+
cid_string1, cid_string2,
262+
"Different documents should produce different CID strings"
263+
);
264+
}
265+
266+
/// Test CID string properties (base32 encoding characteristics)
267+
#[test]
268+
fn test_cid_string_properties() {
269+
let doc = create_test_document();
270+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
271+
272+
let cid_string = to_cid_v1_string(&cbor_bytes).expect("CID string should be valid");
273+
274+
// Base32 strings start with 'b'
275+
assert!(
276+
cid_string.starts_with('b'),
277+
"CID v1 base32 string should start with 'b'"
278+
);
279+
280+
// Base32 encoding uses lowercase letters and digits 2-7
281+
assert!(
282+
cid_string.chars().skip(1).all(|c| c.is_ascii_lowercase() || ('2'..='7').contains(&c)),
283+
"CID v1 base32 string should only contain lowercase letters and digits 2-7"
284+
);
285+
286+
// Should be non-empty and reasonably sized
287+
assert!(
288+
cid_string.len() > 10,
289+
"CID string should have reasonable length"
290+
);
291+
}
292+
293+
/// Test CID string determinism (same document always produces same string)
294+
#[test]
295+
fn test_cid_string_determinism() {
296+
let doc = create_test_document();
297+
let cbor_bytes = doc.to_bytes().expect("Should serialize to CBOR");
298+
299+
let cid_string1 = to_cid_v1_string(&cbor_bytes).expect("First CID string should be valid");
300+
let cid_string2 = to_cid_v1_string(&cbor_bytes).expect("Second CID string should be valid");
301+
302+
assert_eq!(
303+
cid_string1, cid_string2,
304+
"Same document should always produce the same CID string"
305+
);
306+
}
307+
308+
/// Test full round-trip: document -> CID string -> CID -> bytes -> CID
309+
#[test]
310+
fn test_full_cid_round_trip() {
311+
let doc = create_test_document();
312+
let original_bytes = doc.to_bytes().expect("Should serialize to CBOR");
313+
314+
// Convert to CID string
315+
let cid_string = to_cid_v1_string(&original_bytes).expect("CID string should be valid");
316+
317+
// Parse string to CID
318+
let cid_from_string = cid::Cid::try_from(cid_string.as_str())
319+
.expect("Should parse CID from string");
320+
321+
// Convert CID to bytes
322+
let cid_bytes = cid_from_string.to_bytes();
323+
324+
// Parse bytes back to CID
325+
let cid_from_bytes = cid::Cid::try_from(cid_bytes.as_slice())
326+
.expect("Should parse CID from bytes");
327+
328+
// Convert back to string
329+
let final_string = cid_from_bytes.to_string();
330+
331+
assert_eq!(
332+
cid_string, final_string,
333+
"Full round-trip should preserve CID string"
334+
);
335+
}
336+
}

0 commit comments

Comments
 (0)