Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions c/tests/test_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "testlib.h"
#include <tskit/core.h>
#include <math.h>
#include <string.h>

#include <unistd.h>

Expand Down Expand Up @@ -82,6 +83,141 @@ test_generate_uuid(void)
CU_ASSERT_STRING_NOT_EQUAL(uuid, other_uuid);
}

static void
set_u64_le(uint8_t *dest, uint64_t value)
{
dest[0] = (uint8_t)(value & 0xFF);
dest[1] = (uint8_t)((value >> 8) & 0xFF);
dest[2] = (uint8_t)((value >> 16) & 0xFF);
dest[3] = (uint8_t)((value >> 24) & 0xFF);
dest[4] = (uint8_t)((value >> 32) & 0xFF);
dest[5] = (uint8_t)((value >> 40) & 0xFF);
dest[6] = (uint8_t)((value >> 48) & 0xFF);
dest[7] = (uint8_t)((value >> 56) & 0xFF);
}

static void
test_json_binary_metadata_get_blob(void)
{
int ret;
char metadata[128];
const char *json;
tsk_size_t json_buffer_length;
const uint8_t *blob;
tsk_size_t blob_length;
uint8_t *bytes;
tsk_size_t metadata_length;
size_t header_length;
size_t json_length;
size_t payload_length;
size_t total_length;
const char json_payload[] = "{\"a\":1}";
const uint8_t binary_payload[] = { 0x01, 0x02, 0x03, 0x04 };
const uint8_t empty_payload[] = { 0 };

bytes = (uint8_t *) metadata;
header_length = 4 + 1 + 8 + 8;
json_length = strlen(json_payload);
payload_length = sizeof(binary_payload);
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
memset(metadata, 0, sizeof(metadata));
bytes[0] = 'J';
bytes[1] = 'B';
bytes[2] = 'L';
bytes[3] = 'B';
bytes[4] = 1;
set_u64_le(bytes + 5, (uint64_t) json_length);
set_u64_le(bytes + 13, (uint64_t) payload_length);
memcpy(bytes + header_length, json_payload, json_length);
memcpy(bytes + header_length + json_length, binary_payload, payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
if (json_length > 0) {
CU_ASSERT_EQUAL(memcmp(json, json_payload, json_length), 0);
}
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_EQUAL(memcmp(blob, binary_payload, payload_length), 0);

payload_length = 0;
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
set_u64_le(bytes + 13, (uint64_t) payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);

json_length = 0;
payload_length = sizeof(empty_payload);
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
set_u64_le(bytes + 5, (uint64_t) json_length);
set_u64_le(bytes + 13, (uint64_t) payload_length);
memcpy(bytes + header_length + json_length, empty_payload, payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);
CU_ASSERT_EQUAL(memcmp(blob, empty_payload, payload_length), 0);

blob = NULL;
blob_length = 0;
json = NULL;
json_buffer_length = 0;
metadata_length = header_length - 1;
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);

metadata_length = (tsk_size_t) total_length;
bytes[0] = 'X';
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
bytes[0] = 'J';

bytes[4] = 2;
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_VERSION_TOO_NEW);
bytes[4] = 1;

metadata_length = (tsk_size_t)(total_length - 1);
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);

ret = tsk_json_binary_metadata_get_blob(
NULL, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, NULL, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, NULL, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, NULL, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_binary_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
}

static void
test_double_round(void)
{
Expand Down Expand Up @@ -652,6 +788,7 @@ main(int argc, char **argv)
{ "test_strerror", test_strerror },
{ "test_strerror_kastore", test_strerror_kastore },
{ "test_generate_uuid", test_generate_uuid },
{ "test_json_binary_metadata_get_blob", test_json_binary_metadata_get_blob },
{ "test_double_round", test_double_round },
{ "test_blkalloc", test_blkalloc },
{ "test_unknown_time", test_unknown_time },
Expand Down
69 changes: 69 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
#include <tskit/core.h>

#define UUID_NUM_BYTES 16
#define TSK_JSON_BINARY_HEADER_SIZE 21

static const uint8_t TSK_JSON_BINARY_MAGIC[4] = { 'J', 'B', 'L', 'B' };

#if defined(_WIN32)

Expand Down Expand Up @@ -95,6 +98,22 @@ get_random_bytes(uint8_t *buf)

#endif

static uint64_t
tsk_load_u64_le(const uint8_t *p)
{
uint64_t value;

value = (uint64_t) p[0];
value |= (uint64_t) p[1] << 8;
value |= (uint64_t) p[2] << 16;
value |= (uint64_t) p[3] << 24;
value |= (uint64_t) p[4] << 32;
value |= (uint64_t) p[5] << 40;
value |= (uint64_t) p[6] << 48;
value |= (uint64_t) p[7] << 56;
return value;
}

/* Generate a new UUID4 using a system-generated source of randomness.
* Note that this function writes a NULL terminator to the end of this
* string, so that the total length of the buffer must be 37 bytes.
Expand All @@ -121,6 +140,56 @@ tsk_generate_uuid(char *dest, int TSK_UNUSED(flags))
out:
return ret;
}

int
tsk_json_binary_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const uint8_t **blob,
tsk_size_t *blob_length)
{
int ret;
uint8_t version;
uint64_t json_length_u64;
uint64_t binary_length_u64;
const uint8_t *bytes;
const uint8_t *blob_start;
const char *json_start;

if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL
|| blob_length == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
bytes = (const uint8_t *) metadata;
if (metadata_length < TSK_JSON_BINARY_HEADER_SIZE) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (memcmp(bytes, TSK_JSON_BINARY_MAGIC, sizeof(TSK_JSON_BINARY_MAGIC)) != 0) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
version = bytes[4];
if (version != 1) {
ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW);
goto out;
}
json_length_u64 = tsk_load_u64_le(bytes + 5);
binary_length_u64 = tsk_load_u64_le(bytes + 13);
if ((uint64_t) metadata_length
< (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64 + binary_length_u64) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
json_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE;
blob_start = bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
*json = json_start;
*json_length = (tsk_size_t) json_length_u64;
*blob = blob_start;
*blob_length = (tsk_size_t) binary_length_u64;
ret = 0;
out:
return ret;
}
static const char *
tsk_strerror_internal(int err)
{
Expand Down
25 changes: 25 additions & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,31 @@ bool tsk_isfinite(double val);
#define TSK_UUID_SIZE 36
int tsk_generate_uuid(char *dest, int flags);

/**
@brief Extract the binary payload from ``json+binary`` encoded metadata.

@rst
Metadata produced by :py:class:`tskit.metadata.JSONBinaryCodec` consists of a fixed-size
header followed by canonical JSON bytes and an optional binary payload. This helper
validates the ``json+binary`` framing, returning pointers to the embedded JSON and binary
sections without copying.

The output pointers reference memory owned by the caller and remain valid only while
the original metadata buffer is alive.
@endrst

@param[in] metadata Pointer to the encoded metadata bytes.
@param[in] metadata_length Number of bytes available at ``metadata``.
@param[out] json On success, set to the start of the JSON bytes.
@param[out] json_length On success, set to the JSON length in bytes.
@param[out] blob On success, set to the start of the binary payload.
@param[out] blob_length On success, set to the payload length in bytes.
@return 0 on success, or a :ref:`TSK_ERR <c_api_errors>` code on failure.
*/
int tsk_json_binary_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const uint8_t **blob,
tsk_size_t *blob_length);

/* TODO most of these can probably be macros so they compile out as no-ops.
* Lets do the 64 bit tsk_size_t switch first though. */
void *tsk_malloc(tsk_size_t size);
Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
allowing greater flexibility in "disjoint union" situations.
(:user:`hyanwong`, :user:`petrelharp`, :issue:`3181`)

- Add ``json+binary`` metadata codec that allows storing binary data
alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`)

**Bugfixes**

- In some tables with mutations out-of-order ``TableCollection.sort`` did not re-order
Expand Down
Loading