|
| 1 | +//! (De)compression support. |
| 2 | +
|
| 3 | +use crate::error::ActiveStorageError; |
| 4 | +use crate::models; |
| 5 | + |
| 6 | +use axum::body::Bytes; |
| 7 | +use flate2::read::{GzDecoder, ZlibDecoder}; |
| 8 | +use std::io::Read; |
| 9 | + |
| 10 | +/// Decompresses some Bytes and returns the uncompressed data. |
| 11 | +/// |
| 12 | +/// # Arguments |
| 13 | +/// |
| 14 | +/// * `compression`: Compression algorithm |
| 15 | +/// * `data`: Compressed data [Bytes](axum::body::Bytes) |
| 16 | +pub fn decompress( |
| 17 | + compression: models::Compression, |
| 18 | + data: &Bytes, |
| 19 | +) -> Result<Bytes, ActiveStorageError> { |
| 20 | + let mut decoder: Box<dyn Read> = match compression { |
| 21 | + models::Compression::Gzip => Box::new(GzDecoder::<&[u8]>::new(data)), |
| 22 | + models::Compression::Zlib => Box::new(ZlibDecoder::<&[u8]>::new(data)), |
| 23 | + }; |
| 24 | + // The data returned by the S3 client does not have any alignment guarantees. In order to |
| 25 | + // reinterpret the data as an array of numbers with a higher alignment than 1, we need to |
| 26 | + // return the data in Bytes object in which the underlying data has a higher alignment. |
| 27 | + // For now we're hard-coding an alignment of 8 bytes, although this should depend on the |
| 28 | + // data type, and potentially whether there are any SIMD requirements. |
| 29 | + // Create an 8-byte aligned Vec<u8>. |
| 30 | + // FIXME: The compressed length will not be enough to store the uncompressed data, and may |
| 31 | + // result in a change in the underlying buffer to one that is not correctly aligned. |
| 32 | + let mut buf = maligned::align_first::<u8, maligned::A8>(data.len()); |
| 33 | + decoder.read_to_end(&mut buf)?; |
| 34 | + // Release any unnecessary capacity. |
| 35 | + buf.shrink_to(0); |
| 36 | + Ok(buf.into()) |
| 37 | +} |
| 38 | + |
| 39 | +#[cfg(test)] |
| 40 | +mod tests { |
| 41 | + use super::*; |
| 42 | + use flate2::read::{GzEncoder, ZlibEncoder}; |
| 43 | + use flate2::Compression; |
| 44 | + |
| 45 | + fn compress_gzip() -> Vec<u8> { |
| 46 | + // Adapated from flate2 documentation. |
| 47 | + let mut result = Vec::<u8>::new(); |
| 48 | + let input = b"hello world"; |
| 49 | + let mut deflater = GzEncoder::new(&input[..], Compression::fast()); |
| 50 | + deflater.read_to_end(&mut result).unwrap(); |
| 51 | + result |
| 52 | + } |
| 53 | + |
| 54 | + fn compress_zlib() -> Vec<u8> { |
| 55 | + // Adapated from flate2 documentation. |
| 56 | + let mut result = Vec::<u8>::new(); |
| 57 | + let input = b"hello world"; |
| 58 | + let mut deflater = ZlibEncoder::new(&input[..], Compression::fast()); |
| 59 | + deflater.read_to_end(&mut result).unwrap(); |
| 60 | + result |
| 61 | + } |
| 62 | + |
| 63 | + #[test] |
| 64 | + fn test_decompress_gzip() { |
| 65 | + let compressed = compress_gzip(); |
| 66 | + let result = decompress(models::Compression::Gzip, &compressed.into()).unwrap(); |
| 67 | + assert_eq!(result, b"hello world".as_ref()); |
| 68 | + assert_eq!(result.as_ptr().align_offset(8), 0); |
| 69 | + } |
| 70 | + |
| 71 | + #[test] |
| 72 | + fn test_decompress_zlib() { |
| 73 | + let compressed = compress_zlib(); |
| 74 | + let result = decompress(models::Compression::Zlib, &compressed.into()).unwrap(); |
| 75 | + assert_eq!(result, b"hello world".as_ref()); |
| 76 | + assert_eq!(result.as_ptr().align_offset(8), 0); |
| 77 | + } |
| 78 | + |
| 79 | + #[test] |
| 80 | + fn test_decompress_invalid_gzip() { |
| 81 | + let invalid = b"invalid format"; |
| 82 | + let err = decompress(models::Compression::Gzip, &invalid.as_ref().into()).unwrap_err(); |
| 83 | + match err { |
| 84 | + ActiveStorageError::Decompression(io_err) => { |
| 85 | + assert_eq!(io_err.kind(), std::io::ErrorKind::InvalidInput); |
| 86 | + assert_eq!(io_err.to_string(), "invalid gzip header"); |
| 87 | + } |
| 88 | + err => panic!("unexpected error {}", err), |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + #[test] |
| 93 | + fn test_decompress_invalid_zlib() { |
| 94 | + let invalid = b"invalid format"; |
| 95 | + let err = decompress(models::Compression::Zlib, &invalid.as_ref().into()).unwrap_err(); |
| 96 | + match err { |
| 97 | + ActiveStorageError::Decompression(io_err) => { |
| 98 | + assert_eq!(io_err.kind(), std::io::ErrorKind::InvalidInput); |
| 99 | + assert_eq!(io_err.to_string(), "corrupt deflate stream"); |
| 100 | + } |
| 101 | + err => panic!("unexpected error {}", err), |
| 102 | + } |
| 103 | + } |
| 104 | +} |
0 commit comments