bser_parse.c

#include <stdio.h> #include <string.h> #include "bser.h" /** * BSER parsing is done lazily, and does not copy strings out of the buffer. * This is accomplished by initializing each new object that we encounter * as an "unparsed" object, which contains only a pointer to the data buffer * (which itself has a cursor). The first time an object is queried or a * value is requested of it, it accesses the buffer and reads only the data * it needs to satisfy the request. For unit types, this usually means reading * the tag and all the data. For strings, the length is read and a pointer * to the string data in the buffer is stored in the object. * * Parsing occurs in the order that the data appears in the buffer. That is, * if an object's data appears earlier in the buffer than a different object, * then the first object is guaranteed to be parsed first. The buffer is a * stream and cannot be randomly accessed. The in-order parsing happens in * the implementation of the 'array' and 'object' types, which are the only * composite types in the system. When these are initially parsed, the read * only the header information (including the size) and allocate an array to * use for storage and this storage is initialized to all unparsed object. * However, when an element or a field is accessed, the query methods ensures * that all elements or fields before the requested field are fully-parsed * (from first to last). */ static bser_t* new_unparsed(bser_buffer_t* buffer, bser_t* fill) { if (fill == NULL) { fill = bser_alloc(); } fill->type = BSER_TAG_UNPARSED; fill->value.unparsed = buffer; return fill; } static bser_t* new_error(const char* message, bser_t* fill) { if (fill == NULL) { fill = bser_alloc(); } fill->type = BSER_TAG_ERROR; fill->value.error_message = message; return fill; } static void new_compact_object(bser_buffer_t* buffer, bser_t* header, bser_t* fill) { assert(bser_is_array(header)); size_t sz = bser_array_size(header); bser_key_value_pair_t* fields = malloc(sizeof(*fields) * sz); if (fields == NULL) { new_error("Could not allocate enough memory to hold " "compact object fields", fill); } else { for (int i = 0; i < sz; ++i) { fields[i].key = *(bser_array_get(header, i)); new_unparsed(buffer, &fields[i].value); } bser_new_object(fields, sz, fill); } } static bser_buffer_t* new_read_buffer(void* data, size_t buflen, int offset) { bser_buffer_t* buffer = malloc(sizeof(*buffer)); buffer->data = data; buffer->datalen = buflen; buffer->cursor = offset; return buffer; } bser_t* bser_parse_content(void* data, size_t buflen, bser_t* fill) { bser_buffer_t* read_buffer = new_read_buffer(data, buflen, 0); /* Create lazily-parsed node that will self-parse when it is accessed */ return new_unparsed(read_buffer, fill); } static int integer_from_file(FILE* fp, int64_t* value) { int8_t v8 = 0; int16_t v16 = 0; int32_t v32 = 0; int64_t v64 = 0; switch (fgetc(fp)) { case BSER_TAG_INT8: if (fread(&v8, sizeof(v8), 1, fp) == 1) { *value = v8; return 0; } case BSER_TAG_INT16: if (fread(&v16, sizeof(v16), 1, fp) == 1) { *value = v16; return 0; } case BSER_TAG_INT32: if (fread(&v32, sizeof(v32), 1, fp) == 1) { *value = v32; return 0; } case BSER_TAG_INT64: if (fread(&v64, sizeof(v64), 1, fp) == 1) { *value = v64; return 0; } } return -1; } bser_t* bser_parse_buffer(void* buffer, size_t buflen, bser_t* fill) { uint8_t* magic = buffer; if (buflen < 2 || magic[0] != 0 || magic[1] != 1) { return new_error("Could not read bser magic values", fill); } else { bser_t length; bser_buffer_t* read_buffer = new_read_buffer(buffer, buflen, 2); new_unparsed(read_buffer, &length); if (!bser_is_integer(&length)) { return new_error("Could not read bser length", fill); } else { size_t sz = bser_integer_value(&length); if (read_buffer->cursor + sz > buflen) { return new_error("Truncated buffer", fill); } else { return new_unparsed(read_buffer, fill); } } } } bser_t* bser_parse_from_file(FILE* fp, bser_t* fill) { void* buf; uint8_t magic[2]; int64_t size; int status = fread(magic, 1, 2, fp); if (status != 2 || magic[0] != 0 || magic[1] != 1) { return new_error("Could not read bser magic values", fill); } else if (integer_from_file(fp, &size)) { return new_error("Could not read bser length", fill); } else if (size <= 0) { return new_error("Invalid bser length", fill); } else { buf = malloc(size); if (buf == NULL) { return new_error("Could not allocate memory to hold data", fill); } else if (fread(buf, 1, size, fp) != size) { return new_error("Could not read full bser data", fill); } else { return bser_parse_content(buf, size, fill); } } } static void parse_array(bser_t* fill, bser_buffer_t* buffer) { bser_t length; new_unparsed(buffer, &length); if (bser_is_integer(&length)) { size_t sz = (size_t)bser_integer_value(&length); bser_t* array = malloc(sizeof(*array) * sz); if (array == NULL) { new_error("Could not allocate enough memory to hold " "array elements", fill); } else { for (int i = 0; i < sz; ++i) { new_unparsed(buffer, &array[i]); } bser_new_array(array, sz, fill); } } else { new_error("Array does not have an integer length", fill); } } static void parse_object(bser_t* fill, bser_buffer_t* buffer) { bser_t length; new_unparsed(buffer, &length); if (bser_is_integer(&length)) { size_t sz = (size_t)bser_integer_value(&length); bser_key_value_pair_t* array = malloc(sizeof(*array) * sz); if (array == NULL) { new_error("Could not allocate enough memory to hold " "object fields", fill); } else { for (int i = 0; i < sz; ++i) { new_unparsed(buffer, &array[i].key); new_unparsed(buffer, &array[i].value); } bser_new_object(array, sz, fill); } } else { new_error("Object does not have an integer length", fill); } } static void parse_string(bser_t* fill, bser_buffer_t* buffer) { bser_t length; new_unparsed(buffer, &length); if (bser_is_integer(&length)) { size_t sz = (size_t)bser_integer_value(&length); const char* chars = (const char*)buffer->data + buffer->cursor; buffer->cursor += sz; bser_new_string(chars, sz, fill); } else { new_error("String does not have an integer length", fill); } } static void parse_compact_array(bser_t* fill, bser_buffer_t* buffer) { bser_t header; new_unparsed(buffer, &header); if (bser_is_array(&header)) { size_t header_length = bser_array_size(&header); for (int i = 0; i < header_length; ++i) { bser_parse_if_necessary(bser_array_get(&header, i)); } bser_t length; new_unparsed(buffer, &length); if (bser_is_integer(&length)) { size_t sz = (size_t)bser_integer_value(&length); bser_t* array = malloc(sizeof(*array) * sz); if (array == NULL) { new_error("Could not allocate enough memory to hold " "compact array elements", fill); } else { bser_new_array(array, sz, fill); for (int i = 0; i < sz; ++i) { new_compact_object(buffer, &header, &array[i]); } } } else { new_error("Compact array does not have an integer length", fill); } } else { new_error("Compact array does not have a header array", fill); } bser_free_contents(&header); } void bser_parse_generic(bser_t* fill, bser_buffer_t* buffer) { if (buffer->cursor >= buffer->datalen) { fill->type = BSER_TAG_ERROR; fill->value.error_message = "out of data"; } else { uint8_t* as_int = buffer->data; uint8_t tag = as_int[buffer->cursor++]; void* data = (char *)buffer->data + buffer->cursor; switch (tag) { case BSER_TAG_ARRAY: parse_array(fill, buffer); break; case BSER_TAG_OBJECT: parse_object(fill, buffer); break; case BSER_TAG_STRING: parse_string(fill, buffer); break; case BSER_TAG_INT8: bser_new_integer(*(int8_t*)data, fill); buffer->cursor += sizeof(int8_t); break; case BSER_TAG_INT16: bser_new_integer(*(int16_t*)data, fill); buffer->cursor += sizeof(int16_t); break; case BSER_TAG_INT32: bser_new_integer(*(int32_t*)data, fill); buffer->cursor += sizeof(int32_t); break; case BSER_TAG_INT64: bser_new_integer(*(int64_t*)data, fill); buffer->cursor += sizeof(int64_t); break; case BSER_TAG_REAL: bser_new_real(*(double*)data, fill); buffer->cursor += sizeof(double); break; case BSER_TAG_TRUE: bser_new_true(fill); break; case BSER_TAG_FALSE: bser_new_false(fill); break; case BSER_TAG_NULL: bser_new_null(fill); break; case BSER_TAG_COMPACT_ARRAY: parse_compact_array(fill, buffer); break; case BSER_TAG_NO_FIELD: fill->type = BSER_TAG_NO_FIELD; break; default: new_error("unknown tag in data stream", fill); break; } } } static int is_fully_parsed(bser_t* bser) { size_t index; if (bser_is_unparsed(bser)) { return 0; } else if (bser_is_array(bser) && (index = bser_array_size(bser)) > 0) { return is_fully_parsed(&bser->value.array.elements[index - 1]); } else if (bser_is_object(bser) && (index = bser_object_size(bser)) > 0) { return is_fully_parsed(&bser->value.object.fields[index - 1].value); } return 1; } static void fully_parse(bser_t* bser) { bser_parse_if_necessary(bser); if (!is_fully_parsed(bser)) { if (bser_is_array(bser)) { for (int i = 0; i < bser->value.array.length; ++i) { fully_parse(&bser->value.array.elements[i]); } } else if (bser_is_object(bser)) { for (int i = 0; i < bser->value.object.length; ++i) { struct bser_key_value_pair* pair = &bser->value.object.fields[i]; bser_parse_if_necessary(&pair->key); fully_parse(&pair->value); } } } } void bser_parse_array_elements_to(bser_t* array, size_t limit) { assert(bser_is_array(array)); if (limit > 0) { /* Search back to find first parsed, and parse all from there */ size_t index = limit - 1; while (!is_fully_parsed(&array->value.array.elements[index]) && index > 0) { --index; } for (int i = index; i < limit; ++i) { fully_parse(&array->value.array.elements[i]); } } } void bser_parse_object_fields_to(bser_t* obj, size_t limit) { assert(bser_is_object(obj)); if (limit > 0) { /* Search back to find first parsed, and parse all from there */ size_t index = limit - 1; while (!is_fully_parsed(&obj->value.object.fields[index].value) && index > 0) { --index; } for (int i = index; i < limit; ++i) { bser_key_value_pair_t* pair = bser_object_pair_at(obj, i); bser_parse_if_necessary(&pair->key); fully_parse(&pair->value); } } }

bser_parse.c (345 lines of code) (raw):