blob: fc8fd61b41728fa5737566f29f66c4656c8c00b7 [file] [log] [blame]
/*
* xplist.c
* XML plist implementation
*
* Copyright (c) 2010-2017 Nikias Bassen All Rights Reserved.
* Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved.
* Copyright (c) 2008 Jonathan Beck All Rights Reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STRPTIME
#define _XOPEN_SOURCE 600
#endif
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <inttypes.h>
#include <math.h>
#include <limits.h>
#include <node.h>
#include <node_list.h>
#include "plist.h"
#include "base64.h"
#include "strbuf.h"
#include "time64.h"
#define XPLIST_KEY "key"
#define XPLIST_KEY_LEN 3
#define XPLIST_FALSE "false"
#define XPLIST_FALSE_LEN 5
#define XPLIST_TRUE "true"
#define XPLIST_TRUE_LEN 4
#define XPLIST_INT "integer"
#define XPLIST_INT_LEN 7
#define XPLIST_REAL "real"
#define XPLIST_REAL_LEN 4
#define XPLIST_DATE "date"
#define XPLIST_DATE_LEN 4
#define XPLIST_DATA "data"
#define XPLIST_DATA_LEN 4
#define XPLIST_STRING "string"
#define XPLIST_STRING_LEN 6
#define XPLIST_ARRAY "array"
#define XPLIST_ARRAY_LEN 5
#define XPLIST_DICT "dict"
#define XPLIST_DICT_LEN 4
#define MAC_EPOCH 978307200
#define MAX_DATA_BYTES_PER_LINE(__i) (((76 - (__i << 3)) >> 2) * 3)
static const char XML_PLIST_PROLOG[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n\
<plist version=\"1.0\">\n";
static const char XML_PLIST_EPILOG[] = "</plist>\n";
#ifdef DEBUG
static int plist_xml_debug = 0;
#define PLIST_XML_ERR(...) if (plist_xml_debug) { fprintf(stderr, "libplist[xmlparser] ERROR: " __VA_ARGS__); }
#else
#define PLIST_XML_ERR(...)
#endif
void plist_xml_init(void)
{
/* init XML stuff */
#ifdef DEBUG
char *env_debug = getenv("PLIST_XML_DEBUG");
if (env_debug && !strcmp(env_debug, "1")) {
plist_xml_debug = 1;
}
#endif
}
void plist_xml_deinit(void)
{
/* deinit XML stuff */
}
static size_t dtostr(char *buf, size_t bufsize, double realval)
{
double f = realval;
double ip = 0.0;
int64_t v;
size_t len;
size_t p;
double CORR = 0.0000005;
f = modf(f, &ip);
v = (int64_t)ip;
if (f < 0) {
if (((int)((f - CORR) * -10.0f)) >= 10) {
v--;
f = 0;
}
} else {
if (((int)((f + CORR) * 10.0f)) >= 10) {
v++;
f = 0;
}
}
len = snprintf(buf, bufsize, "%s%"PRIi64, ((f < 0) && (ip >= 0)) ? "-" : "", v);
if (len >= bufsize) {
return 0;
}
if (f < 0) {
f *= -1;
}
f += CORR;
p = len;
buf[p++] = '.';
while (p < bufsize && (p <= len+6)) {
f = modf(f*10, &ip);
v = (int)ip;
buf[p++] = (v + 0x30);
}
buf[p] = '\0';
return p;
}
static void node_to_xml(node_t* node, bytearray_t **outbuf, uint32_t depth)
{
plist_data_t node_data = NULL;
char isStruct = FALSE;
char tagOpen = FALSE;
const char *tag = NULL;
size_t tag_len = 0;
char *val = NULL;
size_t val_len = 0;
uint32_t i = 0;
if (!node)
return;
node_data = plist_get_data(node);
switch (node_data->type)
{
case PLIST_BOOLEAN:
{
if (node_data->boolval) {
tag = XPLIST_TRUE;
tag_len = XPLIST_TRUE_LEN;
} else {
tag = XPLIST_FALSE;
tag_len = XPLIST_FALSE_LEN;
}
}
break;
case PLIST_UINT:
tag = XPLIST_INT;
tag_len = XPLIST_INT_LEN;
val = (char*)malloc(64);
if (node_data->length == 16) {
val_len = snprintf(val, 64, "%"PRIu64, node_data->intval);
} else {
val_len = snprintf(val, 64, "%"PRIi64, node_data->intval);
}
break;
case PLIST_REAL:
tag = XPLIST_REAL;
tag_len = XPLIST_REAL_LEN;
val = (char*)malloc(64);
val_len = dtostr(val, 64, node_data->realval);
break;
case PLIST_STRING:
tag = XPLIST_STRING;
tag_len = XPLIST_STRING_LEN;
/* contents processed directly below */
break;
case PLIST_KEY:
tag = XPLIST_KEY;
tag_len = XPLIST_KEY_LEN;
/* contents processed directly below */
break;
case PLIST_DATA:
tag = XPLIST_DATA;
tag_len = XPLIST_DATA_LEN;
/* contents processed directly below */
break;
case PLIST_ARRAY:
tag = XPLIST_ARRAY;
tag_len = XPLIST_ARRAY_LEN;
isStruct = (node->children) ? TRUE : FALSE;
break;
case PLIST_DICT:
tag = XPLIST_DICT;
tag_len = XPLIST_DICT_LEN;
isStruct = (node->children) ? TRUE : FALSE;
break;
case PLIST_DATE:
tag = XPLIST_DATE;
tag_len = XPLIST_DATE_LEN;
{
Time64_T timev = (Time64_T)node_data->realval + MAC_EPOCH;
struct TM _btime;
struct TM *btime = gmtime64_r(&timev, &_btime);
if (btime) {
val = (char*)malloc(24);
memset(val, 0, 24);
struct tm _tmcopy;
copy_TM64_to_tm(btime, &_tmcopy);
val_len = strftime(val, 24, "%Y-%m-%dT%H:%M:%SZ", &_tmcopy);
if (val_len <= 0) {
free (val);
val = NULL;
}
}
}
break;
case PLIST_UID:
tag = XPLIST_DICT;
tag_len = XPLIST_DICT_LEN;
val = (char*)malloc(64);
if (node_data->length == 16) {
val_len = snprintf(val, 64, "%"PRIu64, node_data->intval);
} else {
val_len = snprintf(val, 64, "%"PRIi64, node_data->intval);
}
break;
default:
break;
}
for (i = 0; i < depth; i++) {
str_buf_append(*outbuf, "\t", 1);
}
/* append tag */
str_buf_append(*outbuf, "<", 1);
str_buf_append(*outbuf, tag, tag_len);
if (node_data->type == PLIST_STRING || node_data->type == PLIST_KEY) {
size_t j;
size_t len;
off_t start = 0;
off_t cur = 0;
str_buf_append(*outbuf, ">", 1);
tagOpen = TRUE;
/* make sure we convert the following predefined xml entities */
/* < = &lt; > = &gt; & = &amp; */
len = node_data->length;
for (j = 0; j < len; j++) {
switch (node_data->strval[j]) {
case '<':
str_buf_append(*outbuf, node_data->strval + start, cur - start);
str_buf_append(*outbuf, "&lt;", 4);
start = cur+1;
break;
case '>':
str_buf_append(*outbuf, node_data->strval + start, cur - start);
str_buf_append(*outbuf, "&gt;", 4);
start = cur+1;
break;
case '&':
str_buf_append(*outbuf, node_data->strval + start, cur - start);
str_buf_append(*outbuf, "&amp;", 5);
start = cur+1;
break;
default:
break;
}
cur++;
}
str_buf_append(*outbuf, node_data->strval + start, cur - start);
} else if (node_data->type == PLIST_DATA) {
str_buf_append(*outbuf, ">", 1);
tagOpen = TRUE;
str_buf_append(*outbuf, "\n", 1);
if (node_data->length > 0) {
uint32_t j = 0;
uint32_t indent = (depth > 8) ? 8 : depth;
uint32_t maxread = MAX_DATA_BYTES_PER_LINE(indent);
size_t count = 0;
size_t amount = (node_data->length / 3 * 4) + 4 + (((node_data->length / maxread) + 1) * (indent+1));
if ((*outbuf)->len + amount > (*outbuf)->capacity) {
str_buf_grow(*outbuf, amount);
}
while (j < node_data->length) {
for (i = 0; i < indent; i++) {
str_buf_append(*outbuf, "\t", 1);
}
count = (node_data->length-j < maxread) ? node_data->length-j : maxread;
assert((*outbuf)->len + count < (*outbuf)->capacity);
(*outbuf)->len += base64encode((char*)(*outbuf)->data + (*outbuf)->len, node_data->buff + j, count);
str_buf_append(*outbuf, "\n", 1);
j+=count;
}
}
for (i = 0; i < depth; i++) {
str_buf_append(*outbuf, "\t", 1);
}
} else if (node_data->type == PLIST_UID) {
/* special case for UID nodes: create a DICT */
str_buf_append(*outbuf, ">", 1);
tagOpen = TRUE;
str_buf_append(*outbuf, "\n", 1);
/* add CF$UID key */
for (i = 0; i < depth+1; i++) {
str_buf_append(*outbuf, "\t", 1);
}
str_buf_append(*outbuf, "<key>CF$UID</key>", 17);
str_buf_append(*outbuf, "\n", 1);
/* add UID value */
for (i = 0; i < depth+1; i++) {
str_buf_append(*outbuf, "\t", 1);
}
str_buf_append(*outbuf, "<integer>", 9);
str_buf_append(*outbuf, val, val_len);
str_buf_append(*outbuf, "</integer>", 10);
str_buf_append(*outbuf, "\n", 1);
for (i = 0; i < depth; i++) {
str_buf_append(*outbuf, "\t", 1);
}
} else if (val) {
str_buf_append(*outbuf, ">", 1);
tagOpen = TRUE;
str_buf_append(*outbuf, val, val_len);
} else if (isStruct) {
tagOpen = TRUE;
str_buf_append(*outbuf, ">", 1);
} else {
tagOpen = FALSE;
str_buf_append(*outbuf, "/>", 2);
}
free(val);
if (isStruct) {
/* add newline for structured types */
str_buf_append(*outbuf, "\n", 1);
/* add child nodes */
if (node_data->type == PLIST_DICT && node->children) {
assert((node->children->count % 2) == 0);
}
node_t *ch;
for (ch = node_first_child(node); ch; ch = node_next_sibling(ch)) {
node_to_xml(ch, outbuf, depth+1);
}
/* fix indent for structured types */
for (i = 0; i < depth; i++) {
str_buf_append(*outbuf, "\t", 1);
}
}
if (tagOpen) {
/* add closing tag */
str_buf_append(*outbuf, "</", 2);
str_buf_append(*outbuf, tag, tag_len);
str_buf_append(*outbuf, ">", 1);
}
str_buf_append(*outbuf, "\n", 1);
return;
}
static void parse_date(const char *strval, struct TM *btime)
{
if (!btime) return;
memset(btime, 0, sizeof(struct tm));
if (!strval) return;
#ifdef HAVE_STRPTIME
strptime((char*)strval, "%Y-%m-%dT%H:%M:%SZ", btime);
#else
#ifdef USE_TM64
#define PLIST_SSCANF_FORMAT "%lld-%d-%dT%d:%d:%dZ"
#else
#define PLIST_SSCANF_FORMAT "%d-%d-%dT%d:%d:%dZ"
#endif
sscanf(strval, PLIST_SSCANF_FORMAT, &btime->tm_year, &btime->tm_mon, &btime->tm_mday, &btime->tm_hour, &btime->tm_min, &btime->tm_sec);
btime->tm_year-=1900;
btime->tm_mon--;
#endif
btime->tm_isdst=0;
}
#define PO10i_LIMIT (INT64_MAX/10)
/* based on https://stackoverflow.com/a/4143288 */
static int num_digits_i(int64_t i)
{
int n;
int64_t po10;
n=1;
if (i < 0) {
i = -i;
n++;
}
po10=10;
while (i>=po10) {
n++;
if (po10 > PO10i_LIMIT) break;
po10*=10;
}
return n;
}
#define PO10u_LIMIT (UINT64_MAX/10)
/* based on https://stackoverflow.com/a/4143288 */
static int num_digits_u(uint64_t i)
{
int n;
uint64_t po10;
n=1;
po10=10;
while (i>=po10) {
n++;
if (po10 > PO10u_LIMIT) break;
po10*=10;
}
return n;
}
static void node_estimate_size(node_t *node, uint64_t *size, uint32_t depth)
{
plist_data_t data;
if (!node) {
return;
}
data = plist_get_data(node);
if (node->children) {
node_t *ch;
for (ch = node_first_child(node); ch; ch = node_next_sibling(ch)) {
node_estimate_size(ch, size, depth + 1);
}
switch (data->type) {
case PLIST_DICT:
*size += (XPLIST_DICT_LEN << 1) + 7;
break;
case PLIST_ARRAY:
*size += (XPLIST_ARRAY_LEN << 1) + 7;
break;
default:
break;
}
*size += (depth << 1);
} else {
uint32_t indent = (depth > 8) ? 8 : depth;
switch (data->type) {
case PLIST_DATA: {
uint32_t req_lines = (data->length / MAX_DATA_BYTES_PER_LINE(indent)) + 1;
uint32_t b64len = data->length + (data->length / 3);
b64len += b64len % 4;
*size += b64len;
*size += (XPLIST_DATA_LEN << 1) + 5 + (indent+1) * (req_lines+1) + 1;
} break;
case PLIST_STRING:
*size += data->length;
*size += (XPLIST_STRING_LEN << 1) + 6;
break;
case PLIST_KEY:
*size += data->length;
*size += (XPLIST_KEY_LEN << 1) + 6;
break;
case PLIST_UINT:
if (data->length == 16) {
*size += num_digits_u(data->intval);
} else {
*size += num_digits_i((int64_t)data->intval);
}
*size += (XPLIST_INT_LEN << 1) + 6;
break;
case PLIST_REAL:
*size += num_digits_i((int64_t)data->realval) + 7;
*size += (XPLIST_REAL_LEN << 1) + 6;
break;
case PLIST_DATE:
*size += 20; /* YYYY-MM-DDThh:mm:ssZ */
*size += (XPLIST_DATE_LEN << 1) + 6;
break;
case PLIST_BOOLEAN:
*size += ((data->boolval) ? XPLIST_TRUE_LEN : XPLIST_FALSE_LEN) + 4;
break;
case PLIST_DICT:
*size += XPLIST_DICT_LEN + 4; /* <dict/> */
break;
case PLIST_ARRAY:
*size += XPLIST_ARRAY_LEN + 4; /* <array/> */
break;
case PLIST_UID:
*size += num_digits_i((int64_t)data->intval);
*size += (XPLIST_DICT_LEN << 1) + 7;
*size += indent + ((indent+1) << 1);
*size += 18; /* <key>CF$UID</key> */
*size += (XPLIST_INT_LEN << 1) + 6;
break;
default:
break;
}
*size += indent;
}
}
PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length)
{
uint64_t size = 0;
node_estimate_size(plist, &size, 0);
size += sizeof(XML_PLIST_PROLOG) + sizeof(XML_PLIST_EPILOG) - 1;
strbuf_t *outbuf = str_buf_new(size);
str_buf_append(outbuf, XML_PLIST_PROLOG, sizeof(XML_PLIST_PROLOG)-1);
node_to_xml(plist, &outbuf, 0);
str_buf_append(outbuf, XML_PLIST_EPILOG, sizeof(XML_PLIST_EPILOG));
*plist_xml = outbuf->data;
*length = outbuf->len - 1;
outbuf->data = NULL;
str_buf_free(outbuf);
}
struct _parse_ctx {
const char *pos;
const char *end;
int err;
};
typedef struct _parse_ctx* parse_ctx;
static void parse_skip_ws(parse_ctx ctx)
{
while (ctx->pos < ctx->end && ((*(ctx->pos) == ' ') || (*(ctx->pos) == '\t') || (*(ctx->pos) == '\r') || (*(ctx->pos) == '\n'))) {
ctx->pos++;
}
}
static void find_char(parse_ctx ctx, char c, int skip_quotes)
{
while (ctx->pos < ctx->end && (*(ctx->pos) != c)) {
if (skip_quotes && (c != '"') && (*(ctx->pos) == '"')) {
ctx->pos++;
find_char(ctx, '"', 0);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while looking for matching double quote\n");
return;
}
if (*(ctx->pos) != '"') {
PLIST_XML_ERR("Unmatched double quote\n");
return;
}
}
ctx->pos++;
}
}
static void find_str(parse_ctx ctx, const char *str, size_t len, int skip_quotes)
{
while (ctx->pos < (ctx->end - len)) {
if (!strncmp(ctx->pos, str, len)) {
break;
}
if (skip_quotes && (*(ctx->pos) == '"')) {
ctx->pos++;
find_char(ctx, '"', 0);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while looking for matching double quote\n");
return;
}
if (*(ctx->pos) != '"') {
PLIST_XML_ERR("Unmatched double quote\n");
return;
}
}
ctx->pos++;
}
}
static void find_next(parse_ctx ctx, const char *nextchars, int numchars, int skip_quotes)
{
int i = 0;
while (ctx->pos < ctx->end) {
if (skip_quotes && (*(ctx->pos) == '"')) {
ctx->pos++;
find_char(ctx, '"', 0);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while looking for matching double quote\n");
return;
}
if (*(ctx->pos) != '"') {
PLIST_XML_ERR("Unmatched double quote\n");
return;
}
}
for (i = 0; i < numchars; i++) {
if (*(ctx->pos) == nextchars[i]) {
return;
}
}
ctx->pos++;
}
}
typedef struct {
const char *begin;
size_t length;
int is_cdata;
void *next;
} text_part_t;
static text_part_t* text_part_init(text_part_t* part, const char *begin, size_t length, int is_cdata)
{
part->begin = begin;
part->length = length;
part->is_cdata = is_cdata;
part->next = NULL;
return part;
}
static void text_parts_free(text_part_t *tp)
{
while (tp) {
text_part_t *tmp = tp;
tp = tp->next;
free(tmp);
}
}
static text_part_t* text_part_append(text_part_t* parts, const char *begin, size_t length, int is_cdata)
{
text_part_t* newpart = malloc(sizeof(text_part_t));
assert(newpart);
parts->next = text_part_init(newpart, begin, length, is_cdata);
return newpart;
}
static text_part_t* get_text_parts(parse_ctx ctx, const char* tag, size_t tag_len, int skip_ws, text_part_t *parts)
{
const char *p = NULL;
const char *q = NULL;
text_part_t *last = NULL;
if (skip_ws) {
parse_skip_ws(ctx);
}
do {
p = ctx->pos;
find_char(ctx, '<', 0);
if (ctx->pos >= ctx->end || *ctx->pos != '<') {
PLIST_XML_ERR("EOF while looking for closing tag\n");
ctx->err++;
return NULL;
}
q = ctx->pos;
ctx->pos++;
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while parsing '%s'\n", p);
ctx->err++;
return NULL;
}
if (*ctx->pos == '!') {
ctx->pos++;
if (ctx->pos >= ctx->end-1) {
PLIST_XML_ERR("EOF while parsing <! special tag\n");
ctx->err++;
return NULL;
}
if (*ctx->pos == '-' && *(ctx->pos+1) == '-') {
if (last) {
last = text_part_append(last, p, q-p, 0);
} else if (parts) {
last = text_part_init(parts, p, q-p, 0);
}
ctx->pos += 2;
find_str(ctx, "-->", 3, 0);
if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "-->", 3) != 0) {
PLIST_XML_ERR("EOF while looking for end of comment\n");
ctx->err++;
return NULL;
}
ctx->pos += 3;
} else if (*ctx->pos == '[') {
ctx->pos++;
if (ctx->pos >= ctx->end - 8) {
PLIST_XML_ERR("EOF while parsing <[ tag\n");
ctx->err++;
return NULL;
}
if (strncmp(ctx->pos, "CDATA[", 6) == 0) {
if (q-p > 0) {
if (last) {
last = text_part_append(last, p, q-p, 0);
} else if (parts) {
last = text_part_init(parts, p, q-p, 0);
}
}
ctx->pos+=6;
p = ctx->pos;
find_str(ctx, "]]>", 3, 0);
if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "]]>", 3) != 0) {
PLIST_XML_ERR("EOF while looking for end of CDATA block\n");
ctx->err++;
return NULL;
}
q = ctx->pos;
if (last) {
last = text_part_append(last, p, q-p, 1);
} else if (parts) {
last = text_part_init(parts, p, q-p, 1);
}
ctx->pos += 3;
} else {
p = ctx->pos;
find_next(ctx, " \r\n\t>", 5, 1);
PLIST_XML_ERR("Invalid special tag <[%.*s> encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag);
ctx->err++;
return NULL;
}
} else {
p = ctx->pos;
find_next(ctx, " \r\n\t>", 5, 1);
PLIST_XML_ERR("Invalid special tag <!%.*s> encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag);
ctx->err++;
return NULL;
}
} else if (*ctx->pos == '/') {
break;
} else {
p = ctx->pos;
find_next(ctx, " \r\n\t>", 5, 1);
PLIST_XML_ERR("Invalid tag <%.*s> encountered inside <%s> tag\n", (int)(ctx->pos - p), p, tag);
ctx->err++;
return NULL;
}
} while (1);
ctx->pos++;
if (ctx->pos >= ctx->end-tag_len || strncmp(ctx->pos, tag, tag_len)) {
PLIST_XML_ERR("EOF or end tag mismatch\n");
ctx->err++;
return NULL;
}
ctx->pos+=tag_len;
parse_skip_ws(ctx);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while parsing closing tag\n");
ctx->err++;
return NULL;
} else if (*ctx->pos != '>') {
PLIST_XML_ERR("Invalid closing tag; expected '>', found '%c'\n", *ctx->pos);
ctx->err++;
return NULL;
}
ctx->pos++;
if (q-p > 0) {
if (last) {
last = text_part_append(last, p, q-p, 0);
} else if (parts) {
last = text_part_init(parts, p, q-p, 0);
}
}
return parts;
}
static int unescape_entities(char *str, size_t *length)
{
size_t i = 0;
size_t len = *length;
while (len > 0 && i < len-1) {
if (str[i] == '&') {
char *entp = str + i + 1;
while (i < len && str[i] != ';') {
i++;
}
if (i >= len) {
PLIST_XML_ERR("Invalid entity sequence encountered (missing terminating ';')\n");
return -1;
}
if (str+i >= entp+1) {
int entlen = str+i - entp;
int bytelen = 1;
if (!strncmp(entp, "amp", 3)) {
/* the '&' is already there */
} else if (!strncmp(entp, "apos", 4)) {
*(entp-1) = '\'';
} else if (!strncmp(entp, "quot", 4)) {
*(entp-1) = '"';
} else if (!strncmp(entp, "lt", 2)) {
*(entp-1) = '<';
} else if (!strncmp(entp, "gt", 2)) {
*(entp-1) = '>';
} else if (*entp == '#') {
/* numerical character reference */
uint64_t val = 0;
char* ep = NULL;
if (entlen > 8) {
PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too long: &%.*s;\n", entlen, entp);
return -1;
}
if (*(entp+1) == 'x' || *(entp+1) == 'X') {
if (entlen < 3) {
PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too short: &%.*s;\n", entlen, entp);
return -1;
}
val = strtoull(entp+2, &ep, 16);
} else {
if (entlen < 2) {
PLIST_XML_ERR("Invalid numerical character reference encountered, sequence too short: &%.*s;\n", entlen, entp);
return -1;
}
val = strtoull(entp+1, &ep, 10);
}
if (val == 0 || val > 0x10FFFF || ep-entp != entlen) {
PLIST_XML_ERR("Invalid numerical character reference found: &%.*s;\n", entlen, entp);
return -1;
}
/* convert to UTF8 */
if (val >= 0x10000) {
/* four bytes */
*(entp-1) = (char)(0xF0 + ((val >> 18) & 0x7));
*(entp+0) = (char)(0x80 + ((val >> 12) & 0x3F));
*(entp+1) = (char)(0x80 + ((val >> 6) & 0x3F));
*(entp+2) = (char)(0x80 + (val & 0x3F));
entp+=3;
bytelen = 4;
} else if (val >= 0x800) {
/* three bytes */
*(entp-1) = (char)(0xE0 + ((val >> 12) & 0xF));
*(entp+0) = (char)(0x80 + ((val >> 6) & 0x3F));
*(entp+1) = (char)(0x80 + (val & 0x3F));
entp+=2;
bytelen = 3;
} else if (val >= 0x80) {
/* two bytes */
*(entp-1) = (char)(0xC0 + ((val >> 6) & 0x1F));
*(entp+0) = (char)(0x80 + (val & 0x3F));
entp++;
bytelen = 2;
} else {
/* one byte */
*(entp-1) = (char)(val & 0x7F);
}
} else {
PLIST_XML_ERR("Invalid entity encountered: &%.*s;\n", entlen, entp);
return -1;
}
memmove(entp, str+i+1, len - i);
i -= entlen+1 - bytelen;
len -= entlen+2 - bytelen;
continue;
} else {
PLIST_XML_ERR("Invalid empty entity sequence &;\n");
return -1;
}
}
i++;
}
*length = len;
return 0;
}
static char* text_parts_get_content(text_part_t *tp, int unesc_entities, size_t *length, int *requires_free)
{
char *str = NULL;
size_t total_length = 0;
if (!tp) {
return NULL;
}
char *p;
if (requires_free && !tp->next) {
if (tp->is_cdata || !unesc_entities) {
*requires_free = 0;
if (length) {
*length = tp->length;
}
return (char*)tp->begin;
}
}
text_part_t *tmp = tp;
while (tp && tp->begin) {
total_length += tp->length;
tp = tp->next;
}
str = malloc(total_length + 1);
assert(str);
p = str;
tp = tmp;
while (tp && tp->begin) {
size_t len = tp->length;
strncpy(p, tp->begin, len);
p[len] = '\0';
if (!tp->is_cdata && unesc_entities) {
if (unescape_entities(p, &len) < 0) {
free(str);
return NULL;
}
}
p += len;
tp = tp->next;
}
*p = '\0';
if (length) {
*length = p - str;
}
if (requires_free) {
*requires_free = 1;
}
return str;
}
static void node_from_xml(parse_ctx ctx, plist_t *plist)
{
char *tag = NULL;
char *keyname = NULL;
plist_t subnode = NULL;
const char *p = NULL;
plist_t parent = NULL;
int has_content = 0;
struct node_path_item {
const char *type;
void *prev;
};
struct node_path_item* node_path = NULL;
while (ctx->pos < ctx->end && !ctx->err) {
parse_skip_ws(ctx);
if (ctx->pos >= ctx->end) {
break;
}
if (*ctx->pos != '<') {
p = ctx->pos;
find_next(ctx, " \t\r\n", 4, 0);
PLIST_XML_ERR("Expected: opening tag, found: %.*s\n", (int)(ctx->pos - p), p);
ctx->err++;
goto err_out;
}
ctx->pos++;
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while parsing tag\n");
ctx->err++;
goto err_out;
}
if (*(ctx->pos) == '?') {
find_str(ctx, "?>", 2, 1);
if (ctx->pos > ctx->end-2) {
PLIST_XML_ERR("EOF while looking for <? tag closing marker\n");
ctx->err++;
goto err_out;
}
if (strncmp(ctx->pos, "?>", 2)) {
PLIST_XML_ERR("Couldn't find <? tag closing marker\n");
ctx->err++;
goto err_out;
}
ctx->pos += 2;
continue;
} else if (*(ctx->pos) == '!') {
/* comment or DTD */
if (((ctx->end - ctx->pos) > 3) && !strncmp(ctx->pos, "!--", 3)) {
ctx->pos += 3;
find_str(ctx,"-->", 3, 0);
if (ctx->pos > ctx->end-3 || strncmp(ctx->pos, "-->", 3)) {
PLIST_XML_ERR("Couldn't find end of comment\n");
ctx->err++;
goto err_out;
}
ctx->pos+=3;
} else if (((ctx->end - ctx->pos) > 8) && !strncmp(ctx->pos, "!DOCTYPE", 8)) {
int embedded_dtd = 0;
ctx->pos+=8;
while (ctx->pos < ctx->end) {
find_next(ctx, " \t\r\n[>", 6, 1);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("EOF while parsing !DOCTYPE\n");
ctx->err++;
goto err_out;
}
if (*ctx->pos == '[') {
embedded_dtd = 1;
break;
} else if (*ctx->pos == '>') {
/* end of DOCTYPE found already */
ctx->pos++;
break;
} else {
parse_skip_ws(ctx);
}
}
if (embedded_dtd) {
find_str(ctx, "]>", 2, 1);
if (ctx->pos > ctx->end-2 || strncmp(ctx->pos, "]>", 2)) {
PLIST_XML_ERR("Couldn't find end of DOCTYPE\n");
ctx->err++;
goto err_out;
}
ctx->pos += 2;
}
} else {
p = ctx->pos;
find_next(ctx, " \r\n\t>", 5, 1);
PLIST_XML_ERR("Invalid or incomplete special tag <%.*s> encountered\n", (int)(ctx->pos - p), p);
ctx->err++;
goto err_out;
}
continue;
} else {
int is_empty = 0;
int closing_tag = 0;
p = ctx->pos;
find_next(ctx," \r\n\t<>", 6, 0);
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("Unexpected EOF while parsing XML\n");
ctx->err++;
goto err_out;
}
int taglen = ctx->pos - p;
tag = malloc(taglen + 1);
strncpy(tag, p, taglen);
tag[taglen] = '\0';
if (*ctx->pos != '>') {
find_next(ctx, "<>", 2, 1);
}
if (ctx->pos >= ctx->end) {
PLIST_XML_ERR("Unexpected EOF while parsing XML\n");
ctx->err++;
goto err_out;
}
if (*ctx->pos != '>') {
PLIST_XML_ERR("Missing '>' for tag <%s\n", tag);
ctx->err++;
goto err_out;
}
if (*(ctx->pos-1) == '/') {
int idx = ctx->pos - p - 1;
if (idx < taglen)
tag[idx] = '\0';
is_empty = 1;
}
ctx->pos++;
if (!strcmp(tag, "plist")) {
free(tag);
tag = NULL;
has_content = 0;
if (!node_path && *plist) {
/* we don't allow another top-level <plist> */
break;
}
if (is_empty) {
PLIST_XML_ERR("Empty plist tag\n");
ctx->err++;
goto err_out;
}
struct node_path_item *path_item = malloc(sizeof(struct node_path_item));
if (!path_item) {
PLIST_XML_ERR("out of memory when allocating node path item\n");
ctx->err++;
goto err_out;
}
path_item->type = "plist";
path_item->prev = node_path;
node_path = path_item;
continue;
} else if (!strcmp(tag, "/plist")) {
if (!has_content) {
PLIST_XML_ERR("encountered empty plist tag\n");
ctx->err++;
goto err_out;
}
if (!node_path) {
PLIST_XML_ERR("node path is empty while trying to match closing tag with opening tag\n");
ctx->err++;
goto err_out;
}
if (strcmp(node_path->type, tag+1) != 0) {
PLIST_XML_ERR("mismatching closing tag <%s> found for opening tag <%s>\n", tag, node_path->type);
ctx->err++;
goto err_out;
}
struct node_path_item *path_item = node_path;
node_path = node_path->prev;
free(path_item);
free(tag);
tag = NULL;
continue;
}
plist_data_t data = plist_new_plist_data();
subnode = plist_new_node(data);
has_content = 1;
if (!strcmp(tag, XPLIST_DICT)) {
data->type = PLIST_DICT;
} else if (!strcmp(tag, XPLIST_ARRAY)) {
data->type = PLIST_ARRAY;
} else if (!strcmp(tag, XPLIST_INT)) {
if (!is_empty) {
text_part_t first_part = { NULL, 0, 0, NULL };
text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part);
if (!tp) {
PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
if (tp->begin) {
int requires_free = 0;
char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free);
if (!str_content) {
PLIST_XML_ERR("Could not get text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
char *str = str_content;
int is_negative = 0;
if ((str[0] == '-') || (str[0] == '+')) {
if (str[0] == '-') {
is_negative = 1;
}
str++;
}
data->intval = strtoull((char*)str, NULL, 0);
if (is_negative || (data->intval <= INT64_MAX)) {
uint64_t v = data->intval;
if (is_negative) {
v = -v;
}
data->intval = v;
data->length = 8;
} else {
data->length = 16;
}
if (requires_free) {
free(str_content);
}
} else {
is_empty = 1;
}
text_parts_free(tp->next);
}
if (is_empty) {
data->intval = 0;
data->length = 8;
}
data->type = PLIST_UINT;
} else if (!strcmp(tag, XPLIST_REAL)) {
if (!is_empty) {
text_part_t first_part = { NULL, 0, 0, NULL };
text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part);
if (!tp) {
PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
if (tp->begin) {
int requires_free = 0;
char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free);
if (!str_content) {
PLIST_XML_ERR("Could not get text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
data->realval = atof(str_content);
if (requires_free) {
free(str_content);
}
}
text_parts_free(tp->next);
}
data->type = PLIST_REAL;
data->length = 8;
} else if (!strcmp(tag, XPLIST_TRUE)) {
if (!is_empty) {
get_text_parts(ctx, tag, taglen, 1, NULL);
}
data->type = PLIST_BOOLEAN;
data->boolval = 1;
data->length = 1;
} else if (!strcmp(tag, XPLIST_FALSE)) {
if (!is_empty) {
get_text_parts(ctx, tag, taglen, 1, NULL);
}
data->type = PLIST_BOOLEAN;
data->boolval = 0;
data->length = 1;
} else if (!strcmp(tag, XPLIST_STRING) || !strcmp(tag, XPLIST_KEY)) {
if (!is_empty) {
text_part_t first_part = { NULL, 0, 0, NULL };
text_part_t *tp = get_text_parts(ctx, tag, taglen, 0, &first_part);
char *str = NULL;
size_t length = 0;
if (!tp) {
PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
str = text_parts_get_content(tp, 1, &length, NULL);
text_parts_free(first_part.next);
if (!str) {
PLIST_XML_ERR("Could not get text content for '%s' node\n", tag);
ctx->err++;
goto err_out;
}
if (!strcmp(tag, "key") && !keyname && parent && (plist_get_node_type(parent) == PLIST_DICT)) {
keyname = str;
free(tag);
tag = NULL;
plist_free(subnode);
subnode = NULL;
continue;
} else {
data->strval = str;
data->length = length;
}
} else {
data->strval = strdup("");
data->length = 0;
}
data->type = PLIST_STRING;
} else if (!strcmp(tag, XPLIST_DATA)) {
if (!is_empty) {
text_part_t first_part = { NULL, 0, 0, NULL };
text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part);
if (!tp) {
PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
if (tp->begin) {
int requires_free = 0;
char *str_content = text_parts_get_content(tp, 0, NULL, &requires_free);
if (!str_content) {
PLIST_XML_ERR("Could not get text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
size_t size = tp->length;
if (size > 0) {
data->buff = base64decode(str_content, &size);
data->length = size;
}
if (requires_free) {
free(str_content);
}
}
text_parts_free(tp->next);
}
data->type = PLIST_DATA;
} else if (!strcmp(tag, XPLIST_DATE)) {
if (!is_empty) {
text_part_t first_part = { NULL, 0, 0, NULL };
text_part_t *tp = get_text_parts(ctx, tag, taglen, 1, &first_part);
if (!tp) {
PLIST_XML_ERR("Could not parse text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
Time64_T timev = 0;
if (tp->begin) {
int requires_free = 0;
size_t length = 0;
char *str_content = text_parts_get_content(tp, 0, &length, &requires_free);
if (!str_content) {
PLIST_XML_ERR("Could not get text content for '%s' node\n", tag);
text_parts_free(first_part.next);
ctx->err++;
goto err_out;
}
if ((length >= 11) && (length < 32)) {
/* we need to copy here and 0-terminate because sscanf will read the entire string (whole rest of XML data) which can be huge */
char strval[32];
struct TM btime;
strncpy(strval, str_content, length);
strval[tp->length] = '\0';
parse_date(strval, &btime);
timev = timegm64(&btime);
} else {
PLIST_XML_ERR("Invalid text content in date node\n");
}
if (requires_free) {
free(str_content);
}
}
text_parts_free(tp->next);
data->realval = (double)(timev - MAC_EPOCH);
}
data->length = sizeof(double);
data->type = PLIST_DATE;
} else if (tag[0] == '/') {
closing_tag = 1;
} else {
PLIST_XML_ERR("Unexpected tag <%s%s> encountered\n", tag, (is_empty) ? "/" : "");
ctx->pos = ctx->end;
ctx->err++;
goto err_out;
}
if (subnode && !closing_tag) {
if (!*plist) {
/* first node, make this node the parent node */
*plist = subnode;
if (data->type != PLIST_DICT && data->type != PLIST_ARRAY) {
/* if the first node is not a structered node, we're done */
subnode = NULL;
goto err_out;
}
parent = subnode;
} else if (parent) {
switch (plist_get_node_type(parent)) {
case PLIST_DICT:
if (!keyname) {
PLIST_XML_ERR("missing key name while adding dict item\n");
ctx->err++;
goto err_out;
}
plist_dict_set_item(parent, keyname, subnode);
break;
case PLIST_ARRAY:
plist_array_append_item(parent, subnode);
break;
default:
/* should not happen */
PLIST_XML_ERR("parent is not a structured node\n");
ctx->err++;
goto err_out;
}
}
if (!is_empty && (data->type == PLIST_DICT || data->type == PLIST_ARRAY)) {
struct node_path_item *path_item = malloc(sizeof(struct node_path_item));
if (!path_item) {
PLIST_XML_ERR("out of memory when allocating node path item\n");
ctx->err++;
goto err_out;
}
path_item->type = (data->type == PLIST_DICT) ? XPLIST_DICT : XPLIST_ARRAY;
path_item->prev = node_path;
node_path = path_item;
parent = subnode;
}
subnode = NULL;
} else if (closing_tag) {
if (!node_path) {
PLIST_XML_ERR("node path is empty while trying to match closing tag with opening tag\n");
ctx->err++;
goto err_out;
}
if (strcmp(node_path->type, tag+1) != 0) {
PLIST_XML_ERR("unexpected %s found (for opening %s)\n", tag, node_path->type);
ctx->err++;
goto err_out;
}
struct node_path_item *path_item = node_path;
node_path = node_path->prev;
free(path_item);
parent = ((node_t*)parent)->parent;
if (!parent) {
goto err_out;
}
}
free(tag);
tag = NULL;
free(keyname);
keyname = NULL;
plist_free(subnode);
subnode = NULL;
}
}
if (node_path) {
PLIST_XML_ERR("EOF encountered while </%s> was expected\n", node_path->type);
ctx->err++;
}
err_out:
free(tag);
free(keyname);
plist_free(subnode);
/* clean up node_path if required */
while (node_path) {
struct node_path_item *path_item = node_path;
node_path = path_item->prev;
free(path_item);
}
if (ctx->err) {
plist_free(*plist);
*plist = NULL;
}
}
PLIST_API void plist_from_xml(const char *plist_xml, uint32_t length, plist_t * plist)
{
if (!plist_xml || (length == 0)) {
*plist = NULL;
return;
}
struct _parse_ctx ctx = { plist_xml, plist_xml + length, 0 };
node_from_xml(&ctx, plist);
}