The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#define PERL_NO_GET_CONTEXT 1
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#define PERL_VERSION_DECIMAL(r,v,s) (r*1000000 + v*1000 + s)
#define PERL_DECIMAL_VERSION \
	PERL_VERSION_DECIMAL(PERL_REVISION,PERL_VERSION,PERL_SUBVERSION)
#define PERL_VERSION_GE(r,v,s) \
	(PERL_DECIMAL_VERSION >= PERL_VERSION_DECIMAL(r,v,s))

#ifndef cBOOL
# define cBOOL(x) ((bool)!!(x))
#endif /* !cBOOL */

#ifndef hv_fetchs
# define hv_fetchs(hv, keystr, lval) \
		hv_fetch(hv, ""keystr"", sizeof(keystr)-1, lval)
#endif /* !hv_fetchs */

#ifndef newSVpvs
# define newSVpvs(string) newSVpvn(""string"", sizeof(string)-1)
#endif /* !newSVpvs */

#ifndef sv_catpvs_nomg
# define sv_catpvs_nomg(sv, string) \
	sv_catpvn_nomg(sv, ""string"", sizeof(string)-1)
#endif /* !sv_catpvs_nomg */

#if PERL_VERSION_GE(5,19,4)
typedef SSize_t array_ix_t;
#else /* <5.19.4 */
typedef I32 array_ix_t;
#endif /* <5.19.4 */

/* parameter classification */

#define sv_is_glob(sv) (SvTYPE(sv) == SVt_PVGV)

#if PERL_VERSION_GE(5,11,0)
# define sv_is_regexp(sv) (SvTYPE(sv) == SVt_REGEXP)
#else /* <5.11.0 */
# define sv_is_regexp(sv) 0
#endif /* <5.11.0 */

#define sv_is_undef(sv) (!sv_is_glob(sv) && !sv_is_regexp(sv) && !SvOK(sv))

#define sv_is_string(sv) \
	(!sv_is_glob(sv) && !sv_is_regexp(sv) && \
	 (SvFLAGS(sv) & (SVf_IOK|SVf_NOK|SVf_POK|SVp_IOK|SVp_NOK|SVp_POK)))

/* exceptions */

#define throw_utf8_error() croak("broken internal UTF-8 encoding\n")
#define throw_syntax_error(p) croak("Pond syntax error\n")
#define throw_constraint_error(MSG) croak("Pond constraint error: "MSG"\n")
#define throw_data_error(MSG) croak("Pond data error: "MSG"\n")

/*
 * string walking
 *
 * The parser deals with strings that are internally encoded using Perl's
 * extended form of UTF-8.  It is not assumed that the encoding is
 * well-formed; encoding errors will result in an exception.  The encoding
 * octets are treated as U8 type.
 *
 * Characters that are known to be in the ASCII range are in some places
 * processed as U8.  General Unicode characters are processed as U32, with
 * the intent that the entire ISO-10646 31-bit range be handleable.  Any
 * codepoint is accepted for processing, even the surrogates (which are
 * not legal in true UTF-8 encoding).  Perl's extended UTF-8 extends to
 * 72-bit codepoints; encodings beyond the 31-bit range are translated to
 * codepoint U+80000000, whereby they are all treated as invalid.
 *
 * char_unicode() returns the codepoint represented by the character being
 * pointed at, or throws an exception if the encoding is malformed.
 *
 * To move on to the character following the one pointed at, use the core
 * macro UTF8SKIP(), as in (p + UTF8SKIP(p)).  It assumes that the character
 * is properly encoded, so it is essential that char_unicode() has been
 * called on it first.
 *
 * Given an input SV (that is meant to be a string), pass it through
 * upgrade_sv() to return an SV that contains the string in UTF-8.  This
 * could be either the same SV (if it is already UTF-8-encoded or contains
 * no non-ASCII characters) or a mortal upgraded copy.
 */

#define char_unicode(p) THX_char_unicode(aTHX_ p)
static U32 THX_char_unicode(pTHX_ U8 *p)
{
	U32 val = *p;
	U8 req_c1;
	int ncont;
	int i;
	if(!(val & 0x80)) return val;
	if(!(val & 0x40)) throw_utf8_error();
	if(!(val & 0x20)) {
		if(!(val & 0x1e)) throw_utf8_error();
		val &= 0x1f;
		ncont = 1;
		req_c1 = 0x00;
	} else if(!(val & 0x10)) {
		val &= 0x0f;
		ncont = 2;
		req_c1 = 0x20;
	} else if(!(val & 0x08)) {
		val &= 0x07;
		ncont = 3;
		req_c1 = 0x30;
	} else if(!(val & 0x04)) {
		val &= 0x03;
		ncont = 4;
		req_c1 = 0x38;
	} else if(!(val & 0x02)) {
		val &= 0x01;
		ncont = 5;
		req_c1 = 0x3c;
	} else if(!(val & 0x01)) {
		if(!(p[1] & 0x3e)) throw_utf8_error();
		for(i = 6; i--; )
			if((*++p & 0xc0) != 0x80)
				throw_utf8_error();
		return 0x80000000;
	} else {
		U8 first_six = 0;
		for(i = 6; i--; ) {
			U8 ext = *++p;
			if((ext & 0xc0) != 0x80)
				throw_utf8_error();
			first_six |= ext;
		}
		if(!(first_six & 0x3f))
			throw_utf8_error();
		for(i = 6; i--; )
			if((*++p & 0xc0) != 0x80)
				throw_utf8_error();
		return 0x80000000;
	}
	if(val == 0 && !(p[1] & req_c1))
		throw_utf8_error();
	for(i = ncont; i--; ) {
		U8 ext = *++p;
		if((ext & 0xc0) != 0x80)
			throw_utf8_error();
		val = UTF8_ACCUMULATE(val, ext);
	}
	return val;
}

#define sv_cat_unichar(str, val) THX_sv_cat_unichar(aTHX_ str, val)
static void THX_sv_cat_unichar(pTHX_ SV *str, U32 val)
{
	STRLEN vlen;
	U8 *vstart, *voldend, *vnewend;
	vlen = SvCUR(str);
	vstart = (U8*)SvGROW(str, vlen+6+1);
	voldend = vstart + vlen;
	vnewend = uvuni_to_utf8_flags(voldend, val, UNICODE_ALLOW_ANY);
	*vnewend = 0;
	SvCUR_set(str, vnewend - vstart);
}

#define upgrade_sv(input) THX_upgrade_sv(aTHX_ input)
static SV *THX_upgrade_sv(pTHX_ SV *input)
{
	U8 *p, *end;
	STRLEN len;
	if(SvUTF8(input)) return input;
	p = (U8*)SvPV(input, len);
	for(end = p + len; p != end; p++) {
		if(*p & 0x80) {
			SV *output = sv_mortalcopy(input);
			sv_utf8_upgrade(output);
			return output;
		}
	}
	return input;
}

/*
 * Pond reading
 */

#define CHARATTR_WSP       0x01
#define CHARATTR_DQSPECIAL 0x02
#define CHARATTR_CONTROL   0x04
#define CHARATTR_HEXDIGIT  0x08
#define CHARATTR_WORDSTART 0x10
#define CHARATTR_WORDCONT  0x20
#define CHARATTR_DECDIGIT  0x40
#define CHARATTR_OCTDIGIT  0x80

static U8 const asciichar_attr[128] = {
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* NUL to BEL */
	0x04, 0x05, 0x05, 0x04, 0x05, 0x05, 0x04, 0x04, /* BS to SI */
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* DLE to ETB */
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, /* CAN to US */
	0x01, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, /* SP to ' */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( to / */
	0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, /* 0 to 7 */
	0x68, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 to ? */
	0x02, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x30, /* @ to G */
	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* H to O */
	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* P to W */
	0x30, 0x30, 0x30, 0x00, 0x02, 0x00, 0x00, 0x30, /* X to _ */
	0x00, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x30, /* ` to g */
	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* h to o */
	0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* p to w */
	0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x04, /* x to DEL */
};

static int char_is_wsp(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WSP);
}

static int char_is_dqspecial(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_DQSPECIAL);
}

static int char_is_control(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_CONTROL);
}

static int unichar_is_control(U32 c)
{
	return (c >= 0x80) ? c <= 0xa0 : (asciichar_attr[c] & CHARATTR_CONTROL);
}

static int char_is_wordstart(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WORDSTART);
}

static int char_is_wordcont(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_WORDCONT);
}

static int char_is_decdigit(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_DECDIGIT);
}

static int char_is_octdigit(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_OCTDIGIT);
}

static int char_is_hexdigit(U8 c)
{
	return !(c & 0x80) && (asciichar_attr[c] & CHARATTR_HEXDIGIT);
}

static int hexdigit_value(U8 c)
{
	return c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10;
}

static U8 *parse_opt_wsp(U8 *p)
{
	while(char_is_wsp(*p))
		p++;
	return p;
}

static U8 const asciichar_backslash[128] = {
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* NUL to BEL */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* BS to SI */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* DLE to ETB */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* CAN to US */
	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* SP to ' */
	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* ( to / */
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0 to 7 */
	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 8 to ? */
	0x40, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* @ to G */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* H to O */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, /* P to W */
	0xfd, 0xfd, 0xfd, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* X to _ */
	0x60, 0x07, 0x08, 0xfd, 0xfd, 0x1b, 0x0c, 0xfd, /* ` to g */
	0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0xfd, 0x0a, 0xfd, /* h to o */
	0xfd, 0xfd, 0x0d, 0xfd, 0x09, 0xfd, 0xfd, 0xfd, /* p to w */
	0xfe, 0xfd, 0xfd, 0x7b, 0x7c, 0x7d, 0x7e, 0xfd, /* x to DEL */
};

#define parse_dqstring(end, pp) THX_parse_dqstring(aTHX_ end, pp)
static SV *THX_parse_dqstring(pTHX_ U8 *end, U8 **pp)
{
	U8 *p = *pp;
	SV *datum = sv_2mortal(newSVpvs(""));
	SvUTF8_on(datum);
	while(1) {
		U8 c = *p, e;
		if(p == end || char_is_control(c)) throw_syntax_error(p);
		if(!char_is_dqspecial(c)) {
			U8 *q = p;
			do {
				U32 val = char_unicode(q);
				if(unichar_is_control(val))
					throw_syntax_error(q);
				q += UTF8SKIP(q);
				c = *q;
			} while(q != end && !char_is_dqspecial(c));
			sv_catpvn_nomg(datum, (char*)p, q-p);
			p = q;
			continue;
		}
		if(c == '"') break;
		if(c != '\\') throw_syntax_error(p);
		c = *++p;
		if(p == end) throw_syntax_error(p);
		if(c & 0x80) {
			U32 val = char_unicode(p);
			if(unichar_is_control(val)) throw_syntax_error(q);
			/* character will be treated as literal anyway */
			continue;
		}
		e = asciichar_backslash[c];
		if(e == 0xff) {
			U32 val = c & 7;
			c = *++p;
			if(char_is_octdigit(c)) {
				p++;
				val = (val << 3) | (c & 7);
				c = *p;
				if(char_is_octdigit(c)) {
					p++;
					val = (val << 3) | (c & 7);
				}
			}
			sv_cat_unichar(datum, val);
		} else if(e == 0xfe) {
			U32 val;
			c = *++p;
			if(char_is_hexdigit(c)) {
				p++;
				val = hexdigit_value(c);
				c = *p;
				if(char_is_hexdigit(c)) {
					p++;
					val = (val << 4) | hexdigit_value(c);
				}
			} else if(c == '{') {
				p++;
				c = *p;
				if(!char_is_hexdigit(c))
					throw_syntax_error(p);
				val = 0;
				do {
					if(val & 0x78000000)
						throw_constraint_error(
							"invalid character");
					val = (val << 4) | hexdigit_value(c);
					c = *++p;
				} while(char_is_hexdigit(c));
				if(c != '}') throw_syntax_error(p);
				p++;
			} else {
				throw_syntax_error(p);
			}
			sv_cat_unichar(datum, val);
		} else if(e == 0xfd) {
			throw_syntax_error(p);
		} else {
			p++;
			sv_catpvn_nomg(datum, (char*)&e, 1);
		}
	}
	*pp = p+1;
	return datum;
}

#define parse_sqstring(end, pp) THX_parse_sqstring(aTHX_ end, pp)
static SV *THX_parse_sqstring(pTHX_ U8 *end, U8 **pp)
{
	U8 *p = *pp;
	SV *datum = sv_2mortal(newSVpvs(""));
	SvUTF8_on(datum);
	while(1) {
		U8 c = *p;
		if(p == end || char_is_control(c)) throw_syntax_error(p);
		if(c == '\'') break;
		if(c != '\\') {
			U8 *q = p;
			do {
				U32 val = char_unicode(q);
				if(unichar_is_control(val))
					throw_syntax_error(q);
				q += UTF8SKIP(q);
				c = *q;
			} while(q != end && c != '\'' && c != '\\');
			sv_catpvn_nomg(datum, (char*)p, q-p);
			p = q;
		} else {
			c = p[1];
			if(c == '\\' || c == '\'')
				p++;
			sv_catpvn_nomg(datum, (char*)p, 1);
			p++;
		}
	}
	*pp = p+1;
	return datum;
}

#define array_to_hash(array) THX_array_to_hash(aTHX_ array)
static SV *THX_array_to_hash(pTHX_ AV *array)
{
	HV *hash;
	SV *href;
	array_ix_t alen, i;
	alen = av_len(array);
	if(!(alen & 1))
		throw_constraint_error(
			"odd number of elements in hash constructor");
	hash = newHV();
	href = sv_2mortal(newRV_noinc((SV*)hash));
	for(i = 0; i <= alen; i += 2) {
		SV **key_ptr = av_fetch(array, i, 0);
		STRLEN key_len;
		char *key_str;
		SV *value;
		if(!key_ptr || !sv_is_string(*key_ptr))
			throw_constraint_error("non-string hash key");
		key_str = SvPV(*key_ptr, key_len);
		value = *av_fetch(array, i+1, 0);
		if(!hv_store(hash, key_str, -key_len, SvREFCNT_inc(value), 0))
			SvREFCNT_dec(value);
	}
	return href;
}

#define parse_datum(end, pp) THX_parse_datum(aTHX_ end, pp)
static SV *THX_parse_datum(pTHX_ U8 *end, U8 **pp);
static SV *THX_parse_datum(pTHX_ U8 *end, U8 **pp)
{
	U8 *p = *pp;
	U8 c = *p;
	SV *datum;
	if(c == '"') {
		p++;
		datum = parse_dqstring(end, &p);
	} else if(c == '\'') {
		p++;
		datum = parse_sqstring(end, &p);
	} else if(c == '[' || c == '{') {
		int is_hash = c == '{';
		U8 close = is_hash ? '}' : ']';
		AV *array = newAV();
		sv_2mortal((SV*)array);
		p++;
		while(1) {
			p = parse_opt_wsp(p);
			if(*p == close) break;
			av_push(array, SvREFCNT_inc(parse_datum(end, &p)));
			p = parse_opt_wsp(p);
			if(*p == close) break;
			if(*p == ',') {
				p++;
			} else if(p[0] == '=' && p[1] == '>') {
				p += 2;
			} else {
				throw_syntax_error(p);
			}
		}
		p++;
		datum = is_hash ? array_to_hash(array) :
			sv_2mortal(newRV_inc((SV*)array));
	} else if(c & 0x80) {
		throw_syntax_error(p);
	} else {
		U8 attr = asciichar_attr[c];
		if(attr & CHARATTR_WORDSTART) {
			U8 *start = p++;
			U8 *q;
			while(char_is_wordcont(*p))
				p++;
			q = parse_opt_wsp(p);
			if(!(q[0] == '=' && q[1] == '>'))
				throw_syntax_error(q);
			datum = sv_2mortal(newSVpvn((char*)start, p-start));
		} else if(attr & CHARATTR_DECDIGIT) {
			U8 *start = p++;
			if(c == '0') {
				if(char_is_decdigit(*p)) throw_syntax_error(p);
			} else {
				while(char_is_decdigit(*p))
					p++;
			}
			datum = sv_2mortal(newSVpvn((char*)start, p-start));
		} else {
			throw_syntax_error(p);
		}
	}
	*pp = p;
	return datum;
}

/*
 * Pond writing
 */

struct writer_options {
	int indent;
	int undef_is_empty, unicode;
};

static int pvn_is_integer(U8 *p, STRLEN len)
{
	U8 *e = p + len;
	if(len == 0 || len > 9) return 0;
	if(*p == '0') return len == 1;
	for(; p != e; p++) {
		if(!char_is_decdigit(*p)) return 0;
	}
	return 1;
}

#define ASCIICHAR_QUOTE_LITERAL 0x00
#define ASCIICHAR_QUOTE_HEXPAIR 0x01

static U8 const asciichar_quote[128] = {
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* NUL to BEL */
	0x01, 0x74, 0x6e, 0x01, 0x01, 0x01, 0x01, 0x01, /* BS to SI */
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* DLE to ETB */
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* CAN to US */
	0x00, 0x00, 0x22, 0x00, 0x24, 0x00, 0x00, 0x00, /* SP to ' */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( to / */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0 to 7 */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 to ? */
	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* @ to G */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* H to O */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* P to W */
	0x00, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, /* X to _ */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ` to g */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* h to o */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* p to w */
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, /* x to DEL */
};

static char const hexdig[16] = "0123456789abcdef";

#define serialise_as_string(wo, out, datum) \
	THX_serialise_as_string(aTHX_ wo, out, datum)
static void THX_serialise_as_string(pTHX_ struct writer_options *wo,
	SV *out, SV *datum)
{
	U8 *p;
	STRLEN len;
	p = (U8*)SvPV(datum, len);
	if(pvn_is_integer(p, len)) {
		sv_catpvn_nomg(out, (char *)p, len);
	} else {
		U8 *e = p + len;
		U8 *lstart = p;
		sv_catpvs_nomg(out, "\"");
		while(p != e) {
			U8 c = *p;
			if(c & 0x80) {
				U32 val = char_unicode(p);
				if(val == 0x80000000)
					throw_data_error("invalid character");
				if(val <= 0xa0 || !wo->unicode) {
					if(lstart != p)
						sv_catpvn_nomg(out,
							(char*)lstart,
							p-lstart);
				}
				p += UTF8SKIP(p);
				if(val <= 0xa0) {
					c = val;
					p--;
					goto hexpair;
				}
				if(!wo->unicode) {
					char hexbuf[12];
					sprintf(hexbuf, "\\x{%02x}",
						(unsigned)val);
					sv_catpvn_nomg(out, hexbuf,
						strlen(hexbuf));
					lstart = p;
				}
			} else {
				U8 quote = asciichar_quote[c];
				if(quote == ASCIICHAR_QUOTE_LITERAL) {
					p++;
					continue;
				}
				if(lstart != p)
					sv_catpvn_nomg(out, (char*)lstart,
							p-lstart);
				if(quote == ASCIICHAR_QUOTE_HEXPAIR) {
					char hexbuf[4];
					hexpair:
					hexbuf[0] = '\\';
					hexbuf[1] = 'x';
					hexbuf[2] = hexdig[c >> 4];
					hexbuf[3] = hexdig[c & 0xf];
					sv_catpvn_nomg(out, hexbuf, 4);
				} else {
					char bsbuf[2];
					bsbuf[0] = '\\';
					bsbuf[1] = (char)quote;
					sv_catpvn_nomg(out, bsbuf, 2);
				}
				lstart = ++p;
			}
		}
		if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart);
		sv_catpvs_nomg(out, "\"");
	}
}

static int pvn_is_bareword(U8 *p, STRLEN len)
{
	U8 *e = p + len;
	if(!char_is_wordstart(*p)) return 0;
	while(++p != e) {
		if(!char_is_wordcont(*p)) return 0;
	}
	return 1;
}

#define serialise_as_bareword(wo, out, datum) \
	THX_serialise_as_bareword(aTHX_ wo, out, datum)
static void THX_serialise_as_bareword(pTHX_ struct writer_options *wo,
	SV *out, SV *datum)
{
	U8 *p;
	STRLEN len;
	p = (U8*)SvPV(datum, len);
	if(pvn_is_bareword(p, len)) {
		sv_catpvn_nomg(out, (char *)p, len);
	} else {
		serialise_as_string(wo, out, datum);
	}
}

#define serialise_newline(wo, out) THX_serialise_newline(aTHX_ wo, out)
static void THX_serialise_newline(pTHX_ struct writer_options *wo, SV *out)
{
	int indent = wo->indent;
	if(indent != -1) {
		STRLEN cur = SvCUR(out);
		char *p = SvGROW(out, cur+indent+2) + cur;
		*p++ = '\n';
		memset(p, ' ', indent);
		p[indent] = 0;
		SvCUR_set(out, cur+1+indent);
	}
}

#define serialise_datum(wo, out, datum) \
	THX_serialise_datum(aTHX_ wo, out, datum)
static void THX_serialise_datum(pTHX_ struct writer_options *wo,
	SV *out, SV *datum);

#define serialise_array(wo, out, adatum) \
	THX_serialise_array(aTHX_ wo, out, adatum)
static void THX_serialise_array(pTHX_ struct writer_options *wo,
	SV *out, AV *adatum)
{
	array_ix_t alen = av_len(adatum), pos;
	if(alen == -1) {
		sv_catpvs_nomg(out, "[]");
		return;
	}
	sv_catpvs_nomg(out, "[");
	if(wo->indent != -1) wo->indent += 4;
	serialise_newline(wo, out);
	for(pos = 0; ; pos++) {
		serialise_datum(wo, out,
			*av_fetch(adatum, pos, 0));
		if(pos == alen && wo->indent == -1)
			break;
		sv_catpvs_nomg(out, ",");
		if(pos == alen)
			break;
		serialise_newline(wo, out);
	}
	if(wo->indent != -1) wo->indent -= 4;
	serialise_newline(wo, out);
	sv_catpvs_nomg(out, "]");
}

#define serialise_hash(wo, out, hdatum) \
	THX_serialise_hash(aTHX_ wo, out, hdatum)
static void THX_serialise_hash(pTHX_ struct writer_options *wo,
	SV *out, HV *hdatum)
{
	AV *keys;
	U32 nelem = hv_iterinit(hdatum), pos;
	if(nelem == 0) {
		sv_catpvs_nomg(out, "{}");
		return;
	}
	keys = newAV();
	sv_2mortal((SV*)keys);
	av_extend(keys, nelem-1);
	for(pos = nelem; pos--; ) {
		SV *keysv = upgrade_sv(
			hv_iterkeysv(hv_iternext(hdatum)));
		av_push(keys, SvREFCNT_inc(keysv));
	}
	sortsv(AvARRAY(keys), nelem, Perl_sv_cmp);
	sv_catpvs_nomg(out, "{");
	if(wo->indent != -1) wo->indent += 4;
	serialise_newline(wo, out);
	for(pos = 0; ; pos++) {
		SV *keysv = *av_fetch(keys, pos, 0);
		STRLEN klen;
		char *key;
		serialise_as_bareword(wo, out, keysv);
		if(wo->indent == -1) {
			sv_catpvs_nomg(out, "=>");
		} else {
			sv_catpvs_nomg(out, " => ");
		}
		key = SvPV(keysv, klen);
		serialise_datum(wo, out, *hv_fetch(hdatum, key, -klen, 0));
		if(pos == nelem-1 && wo->indent == -1)
			break;
		sv_catpvs_nomg(out, ",");
		if(pos == nelem-1)
			break;
		serialise_newline(wo, out);
	}
	if(wo->indent != -1) wo->indent -= 4;
	serialise_newline(wo, out);
	sv_catpvs_nomg(out, "}");
}

static void THX_serialise_datum(pTHX_ struct writer_options *wo,
	SV *out, SV *datum)
{
	if(sv_is_undef(datum) && wo->undef_is_empty) {
		sv_catpvs_nomg(out, "\"\"");
	} else if(sv_is_string(datum)) {
		datum = upgrade_sv(datum);
		serialise_as_string(wo, out, datum);
	} else {
		if(!SvROK(datum))
			throw_data_error("unsupported data type");
		datum = SvRV(datum);
		if(SvOBJECT(datum))
			throw_data_error("unsupported data type");
		if(SvTYPE(datum) == SVt_PVAV) {
			serialise_array(wo, out, (AV*)datum);
		} else if(SvTYPE(datum) == SVt_PVHV) {
			serialise_hash(wo, out, (HV*)datum);
		} else {
			throw_data_error("unsupported data type");
		}
	}
}

MODULE = Data::Pond PACKAGE = Data::Pond

PROTOTYPES: DISABLE

SV *
pond_read_datum(SV *text_sv)
PROTOTYPE: $
PREINIT:
	STRLEN text_len;
	U8 *p, *end;
CODE:
	if(!sv_is_string(text_sv)) throw_data_error("text isn't a string");
	text_sv = upgrade_sv(text_sv);
	p = (U8*)SvPV(text_sv, text_len);
	end = p + text_len;
	p = parse_opt_wsp(p);
	RETVAL = parse_datum(end, &p);
	p = parse_opt_wsp(p);
	if(p != end) throw_syntax_error(p);
	SvREFCNT_inc(RETVAL);
OUTPUT:
	RETVAL

SV *
pond_write_datum(SV *datum, SV *options = 0)
PROTOTYPE: $;$
PREINIT:
	struct writer_options wo = { -1, 0, 0 };
CODE:
	if(options) {
		HV *opthash;
		SV **item_ptr;
		if(!SvROK(options))
			throw_data_error("option hash isn't a hash");
		options = SvRV(options);
		if(SvOBJECT(options) || SvTYPE(options) != SVt_PVHV)
			throw_data_error("option hash isn't a hash");
		opthash = (HV*)options;
		if((item_ptr = hv_fetchs(opthash, "indent", 0))) {
			SV *item = *item_ptr;
			if(!sv_is_undef(item)) {
				if(!sv_is_string(item))
					throw_data_error(
						"indent option isn't a number");
				wo.indent = SvIV(item);
				if(wo.indent < 0)
					throw_data_error(
						"indent option is negative");
			}
		}
		if((item_ptr = hv_fetchs(opthash, "undef_is_empty", 0))) {
			SV *item = *item_ptr;
			wo.undef_is_empty = cBOOL(SvTRUE(item));
		}
		if((item_ptr = hv_fetchs(opthash, "unicode", 0))) {
			SV *item = *item_ptr;
			wo.unicode = cBOOL(SvTRUE(item));
		}
	}
	RETVAL = sv_2mortal(newSVpvs(""));
	SvUTF8_on(RETVAL);
	serialise_datum(&wo, RETVAL, datum);
	SvREFCNT_inc(RETVAL);
OUTPUT:
	RETVAL