/* Copyright (c) 2007-2017 H.Merijn Brand. All rights reserved.
* Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
* This program is free software; you can redistribute it and/or
* modify it under the same terms as Perl itself.
*/
#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
#define DPPP_PL_parser_NO_DUMMY
#define NEED_my_snprintf
#define NEED_pv_escape
#define NEED_pv_pretty
#ifndef PERLIO_F_UTF8
# define PERLIO_F_UTF8 0x00008000
# endif
#ifndef MAXINT
# define MAXINT ((int)(~(unsigned)0 >> 1))
# endif
#include "ppport.h"
#define is_utf8_sv(s) is_utf8_string ((U8 *)SvPV_nolen (s), SvCUR (s))
#define MAINT_DEBUG 0
#define BUFFER_SIZE 1024
#define CSV_XS_TYPE_WARN 1
#define CSV_XS_TYPE_PV 0
#define CSV_XS_TYPE_IV 1
#define CSV_XS_TYPE_NV 2
/* maximum length for EOL, SEP, and QUOTE - keep in sync with .pm */
#define MAX_ATTR_LEN 16
#define CSV_FLAGS_QUO 0x0001
#define CSV_FLAGS_BIN 0x0002
#define CSV_FLAGS_EIF 0x0004
#define CSV_FLAGS_MIS 0x0010
#define HOOK_ERROR 0x0001
#define HOOK_AFTER_PARSE 0x0002
#define HOOK_BEFORE_PRINT 0x0004
#define CH_TAB '\011'
#define CH_NL '\012'
#define CH_CR '\015'
#define CH_SPACE '\040'
#define CH_DEL '\177'
#define CH_EOLX 1215
#define CH_EOL *csv->eol
#define CH_SEPX 8888
#define CH_SEP *csv->sep
#define CH_QUOTEX 8889
#define CH_QUOTE *csv->quo
#define useIO_EOF 0x10
#define unless(expr) if (!(expr))
#define _is_arrayref(f) ( f && \
(SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVAV )
#define _is_hashref(f) ( f && \
(SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVHV )
#define _is_coderef(f) ( f && \
(SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVCV )
#define SvSetUndef(sv) sv_setpvn (sv, NULL, 0)
#define SvSetEmpty(sv) sv_setpvn_mg (sv, "", 0)
#define CSV_XS_SELF \
if (!self || !SvOK (self) || !SvROK (self) || \
SvTYPE (SvRV (self)) != SVt_PVHV) \
croak ("self is not a hash ref"); \
hv = (HV *)SvRV (self)
/* Keep in sync with .pm! */
#define CACHE_ID_quote_char 0
#define CACHE_ID_escape_char 1
#define CACHE_ID_sep_char 2
#define CACHE_ID_binary 3
#define CACHE_ID_keep_meta_info 4
#define CACHE_ID_always_quote 5
#define CACHE_ID_allow_loose_quotes 6
#define CACHE_ID_allow_loose_escapes 7
#define CACHE_ID_allow_unquoted_escape 8
#define CACHE_ID_allow_whitespace 9
#define CACHE_ID_blank_is_undef 10
#define CACHE_ID_sep 39
#define CACHE_ID_sep_len 38
#define CACHE_ID_eol 11
#define CACHE_ID_eol_len 12
#define CACHE_ID_eol_is_cr 13
#define CACHE_ID_quo 15
#define CACHE_ID_quo_len 16
#define CACHE_ID_verbatim 22
#define CACHE_ID_empty_is_undef 23
#define CACHE_ID_auto_diag 24
#define CACHE_ID_quote_space 25
#define CACHE_ID_quote_empty 37
#define CACHE_ID__is_bound 26
#define CACHE_ID__has_ahead 30
#define CACHE_ID_escape_null 31
#define CACHE_ID_quote_binary 32
#define CACHE_ID_diag_verbose 33
#define CACHE_ID_has_error_input 34
#define CACHE_ID_decode_utf8 35
#define CACHE_ID__has_hooks 36
#define CACHE_ID_strict 58
#define byte unsigned char
#define ulng unsigned long
typedef struct {
byte quote_char;
byte escape_char;
byte fld_idx;
byte binary;
byte keep_meta_info;
byte always_quote;
byte useIO; /* Also used to indicate EOF */
byte eol_is_cr;
byte allow_loose_quotes;
byte allow_loose_escapes;
byte allow_unquoted_escape;
byte allow_whitespace;
byte blank_is_undef;
byte empty_is_undef;
byte verbatim;
byte auto_diag;
byte quote_space;
byte escape_null;
byte quote_binary;
byte first_safe_char;
byte diag_verbose;
byte has_error_input;
byte decode_utf8;
byte has_hooks;
byte quote_empty;
byte strict;
short strict_n;
long is_bound;
ulng recno;
byte * cache;
SV * pself;
HV * self;
SV * bound;
char * types;
byte eol_len;
byte sep_len;
byte quo_len;
byte types_len;
char * bptr;
SV * tmp;
int utf8;
byte has_ahead;
byte eolx;
int eol_pos;
STRLEN size;
STRLEN used;
byte eol[MAX_ATTR_LEN];
byte sep[MAX_ATTR_LEN];
byte quo[MAX_ATTR_LEN];
char buffer[BUFFER_SIZE];
} csv_t;
#define bool_opt_def(o,d) \
(((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvTRUE (*svp) : d)
#define bool_opt(o) bool_opt_def (o, 0)
#define num_opt_def(o,d) \
(((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvIV (*svp) : d)
#define num_opt(o) num_opt_def (o, 0)
typedef struct {
int xs_errno;
char *xs_errstr;
} xs_error_t;
static const xs_error_t xs_errors[] = {
/* Generic errors */
{ 1000, "INI - constructor failed" },
{ 1001, "INI - sep_char is equal to quote_char or escape_char" },
{ 1002, "INI - allow_whitespace with escape_char or quote_char SP or TAB" },
{ 1003, "INI - \r or \n in main attr not allowed" },
{ 1004, "INI - callbacks should be undef or a hashref" },
{ 1005, "INI - EOL too long" },
{ 1006, "INI - SEP too long" },
{ 1007, "INI - QUOTE too long" },
{ 1008, "INI - SEP undefined" },
{ 1010, "INI - the header is empty" },
{ 1011, "INI - the header contains more than one valid separator" },
{ 1012, "INI - the header contains an empty field" },
{ 1013, "INI - the header contains nun-unique fields" },
{ 1014, "INI - header called on undefined stream" },
/* Syntax errors */
{ 1500, "PRM - Invalid/unsupported argument(s)" },
{ 1501, "PRM - The key attribute is passed as an unsupported type" },
/* Parse errors */
{ 2010, "ECR - QUO char inside quotes followed by CR not part of EOL" },
{ 2011, "ECR - Characters after end of quoted field" },
{ 2012, "EOF - End of data in parsing input stream" },
{ 2013, "ESP - Specification error for fragments RFC7111" },
{ 2014, "ENF - Inconsistent number of fields" },
/* EIQ - Error Inside Quotes */
{ 2021, "EIQ - NL char inside quotes, binary off" },
{ 2022, "EIQ - CR char inside quotes, binary off" },
{ 2023, "EIQ - QUO character not allowed" },
{ 2024, "EIQ - EOF cannot be escaped, not even inside quotes" },
{ 2025, "EIQ - Loose unescaped escape" },
{ 2026, "EIQ - Binary character inside quoted field, binary off" },
{ 2027, "EIQ - Quoted field not terminated" },
/* EIF - Error Inside Field */
{ 2030, "EIF - NL char inside unquoted verbatim, binary off" },
{ 2031, "EIF - CR char is first char of field, not part of EOL" },
{ 2032, "EIF - CR char inside unquoted, not part of EOL" },
{ 2034, "EIF - Loose unescaped quote" },
{ 2035, "EIF - Escaped EOF in unquoted field" },
{ 2036, "EIF - ESC error" },
{ 2037, "EIF - Binary character in unquoted field, binary off" },
/* Combine errors */
{ 2110, "ECB - Binary character in Combine, binary off" },
/* IO errors */
{ 2200, "EIO - print to IO failed. See errno" },
/* Hash-Ref errors */
{ 3001, "EHR - Unsupported syntax for column_names ()" },
{ 3002, "EHR - getline_hr () called before column_names ()" },
{ 3003, "EHR - bind_columns () and column_names () fields count mismatch" },
{ 3004, "EHR - bind_columns () only accepts refs to scalars" },
{ 3006, "EHR - bind_columns () did not pass enough refs for parsed fields" },
{ 3007, "EHR - bind_columns needs refs to writable scalars" },
{ 3008, "EHR - unexpected error in bound fields" },
{ 3009, "EHR - print_hr () called before column_names ()" },
{ 3010, "EHR - print_hr () called with invalid arguments" },
{ 0, "" },
};
static int last_error = 0;
static SV *m_getline, *m_print;
#define is_EOL(c) (c == CH_EOLX)
#define __is_SEPX(c) (c == CH_SEP && (csv->sep_len == 0 || (\
csv->size - csv->used >= csv->sep_len - 1 &&\
!memcmp (csv->bptr + csv->used, csv->sep + 1, csv->sep_len - 1) &&\
(csv->used += csv->sep_len - 1) &&\
(c = CH_SEPX))))
#if MAINT_DEBUG > 1
static byte _is_SEPX (unsigned int *c, csv_t *csv, int line) {
unsigned int b = __is_SEPX (*c);
(void)fprintf (stderr, "# %4d - is_SEPX:\t%d (%d)\n", line, b, csv->sep_len);
if (csv->sep_len)
(void)fprintf (stderr,
"# len: %d, siz: %d, usd: %d, c: %03x, *sep: %03x\n",
csv->sep_len, csv->size, csv->used, *c, CH_SEP);
return b;
} /* _is_SEPX */
#define is_SEP(c) _is_SEPX (&c, csv, __LINE__)
#else
#define is_SEP(c) __is_SEPX (c)
#endif
#define __is_QUOTEX(c) (CH_QUOTE && c == CH_QUOTE && (csv->quo_len == 0 || (\
csv->size - csv->used >= csv->quo_len - 1 &&\
!memcmp (csv->bptr + csv->used, csv->quo + 1, csv->quo_len - 1) &&\
(csv->used += csv->quo_len - 1) &&\
(c = CH_QUOTEX))))
#if MAINT_DEBUG > 1
static byte _is_QUOTEX (unsigned int *c, csv_t *csv, int line) {
unsigned int b = __is_QUOTEX (*c);
(void)fprintf (stderr, "# %4d - is_QUOTEX:\t%d (%d)\n", line, b, csv->quo_len);
if (csv->quo_len)
(void)fprintf (stderr,
"# len: %d, siz: %d, usd: %d, c: %03x, *quo: %03x\n",
csv->quo_len, csv->size, csv->used, *c, CH_QUOTE);
return b;
} /* _is_QUOTEX */
#define is_QUOTE(c) _is_QUOTEX (&c, csv, __LINE__)
#else
#define is_QUOTE(c) __is_QUOTEX (c)
#endif
#define is_whitespace(ch) \
( (ch) != CH_SEP && \
(ch) != CH_QUOTE && \
(ch) != csv->escape_char && \
( (ch) == CH_SPACE || \
(ch) == CH_TAB \
) \
)
#define SvDiag(xse) cx_SvDiag (aTHX_ xse)
static SV *cx_SvDiag (pTHX_ int xse) {
int i = 0;
SV *err;
while (xs_errors[i].xs_errno && xs_errors[i].xs_errno != xse) i++;
if ((err = newSVpv (xs_errors[i].xs_errstr, 0))) {
(void)SvUPGRADE (err, SVt_PVIV);
SvIV_set (err, xse);
SvIOK_on (err);
}
return (err);
} /* SvDiag */
/* This function should be altered to deal with the optional extra argument
* that holds the replacement message */
#define SetDiag(csv,xse) cx_SetDiag (aTHX_ csv, xse)
static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse) {
dSP;
SV *err = SvDiag (xse);
last_error = xse;
(void)hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
if (xse == 0) {
(void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (0), 0);
(void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (0), 0);
(void)hv_store (csv->self, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
csv->has_error_input = 0;
}
if (xse == 2012) /* EOF */
(void)hv_store (csv->self, "_EOF", 4, &PL_sv_yes, 0);
if (csv->pself && csv->auto_diag) {
ENTER;
SAVETMPS;
PUSHMARK (SP);
XPUSHs (csv->pself);
PUTBACK;
call_pv ("Text::CSV_XS::error_diag", G_VOID | G_DISCARD);
FREETMPS;
LEAVE;
}
return (err);
} /* SetDiag */
#define xs_cache_set(hv,idx,val) cx_xs_cache_set (aTHX_ hv, idx, val)
static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val) {
SV **svp;
byte *cache;
csv_t csvs;
csv_t *csv = &csvs;
IV iv;
char *cp = "\0";
STRLEN len = 0;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
return;
cache = (byte *)SvPV_nolen (*svp);
memcpy (csv, cache, sizeof (csv_t));
if (SvPOK (val))
cp = SvPV (val, len);
if (SvIOK (val))
iv = SvIV (val);
else if (SvNOK (val)) /* Needed for 5.6.x but safe for 5.8.x+ */
iv = (IV)SvNV (val); /* uncoverable statement ancient perl required */
else
iv = *cp;
switch (idx) {
/* single char/byte */
case CACHE_ID_sep_char:
CH_SEP = *cp;
csv->sep_len = 0;
break;
case CACHE_ID_quote_char:
CH_QUOTE = *cp;
csv->quo_len = 0;
break;
case CACHE_ID_escape_char: csv->escape_char = *cp; break;
/* boolean/numeric */
case CACHE_ID_binary: csv->binary = iv; break;
case CACHE_ID_keep_meta_info: csv->keep_meta_info = iv; break;
case CACHE_ID_always_quote: csv->always_quote = iv; break;
case CACHE_ID_quote_empty: csv->quote_empty = iv; break;
case CACHE_ID_quote_space: csv->quote_space = iv; break;
case CACHE_ID_escape_null: csv->escape_null = iv; break;
case CACHE_ID_quote_binary: csv->quote_binary = iv; break;
case CACHE_ID_decode_utf8: csv->decode_utf8 = iv; break;
case CACHE_ID_allow_loose_escapes: csv->allow_loose_escapes = iv; break;
case CACHE_ID_allow_loose_quotes: csv->allow_loose_quotes = iv; break;
case CACHE_ID_allow_unquoted_escape: csv->allow_unquoted_escape = iv; break;
case CACHE_ID_allow_whitespace: csv->allow_whitespace = iv; break;
case CACHE_ID_blank_is_undef: csv->blank_is_undef = iv; break;
case CACHE_ID_empty_is_undef: csv->empty_is_undef = iv; break;
case CACHE_ID_strict: csv->strict = iv; break;
case CACHE_ID_verbatim: csv->verbatim = iv; break;
case CACHE_ID_auto_diag: csv->auto_diag = iv; break;
case CACHE_ID_diag_verbose: csv->diag_verbose = iv; break;
case CACHE_ID__has_ahead: csv->has_ahead = iv; break;
case CACHE_ID__has_hooks: csv->has_hooks = iv; break;
case CACHE_ID_has_error_input: csv->has_error_input = iv; break;
/* a 4-byte IV */
case CACHE_ID__is_bound: csv->is_bound = iv; break;
/* string */
case CACHE_ID_sep:
memcpy (csv->sep, cp, len);
csv->sep_len = len == 1 ? 0 : len;
break;
case CACHE_ID_quo:
memcpy (csv->quo, cp, len);
csv->quo_len = len == 1 ? 0 : len;
break;
case CACHE_ID_eol:
memcpy (csv->eol, cp, len);
csv->eol_len = len;
csv->eol_is_cr = len == 1 && *cp == CH_CR ? 1 : 0;
break;
default:
warn ("Unknown cache index %d ignored\n", idx);
}
csv->cache = cache;
memcpy (cache, csv, sizeof (csv_t));
} /* cache_set */
#define _pretty_str(csv,xse) cx_pretty_str (aTHX_ csv, xse)
static char *cx_pretty_str (pTHX_ byte *s, STRLEN l) {
SV *dsv = sv_2mortal (newSVpvs (""));
return (pv_pretty (dsv, (char *)s, l, 0, NULL, NULL,
(PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
} /* _pretty_str */
#define _cache_show_byte(trim,c) \
warn (" %-21s %02x:%3d\n", trim, c, c)
#define _cache_show_char(trim,c) \
warn (" %-21s %02x:%s\n", trim, c, _pretty_str (&c, 1))
#define _cache_show_str(trim,l,str) \
warn (" %-21s %02d:%s\n", trim, l, _pretty_str (str, l))
#define xs_cache_diag(hv) cx_xs_cache_diag (aTHX_ hv)
static void cx_xs_cache_diag (pTHX_ HV *hv) {
SV **svp;
byte *cache;
csv_t csvs;
csv_t *csv = &csvs;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
warn ("CACHE: invalid\n");
return;
}
cache = (byte *)SvPV_nolen (*svp);
memcpy (csv, cache, sizeof (csv_t));
warn ("CACHE:\n");
_cache_show_char ("quote_char", CH_QUOTE);
_cache_show_char ("escape_char", csv->escape_char);
_cache_show_char ("sep_char", CH_SEP);
_cache_show_byte ("binary", csv->binary);
_cache_show_byte ("decode_utf8", csv->decode_utf8);
_cache_show_byte ("allow_loose_escapes", csv->allow_loose_escapes);
_cache_show_byte ("allow_loose_quotes", csv->allow_loose_quotes);
_cache_show_byte ("allow_unquoted_escape", csv->allow_unquoted_escape);
_cache_show_byte ("allow_whitespace", csv->allow_whitespace);
_cache_show_byte ("always_quote", csv->always_quote);
_cache_show_byte ("quote_empty", csv->quote_empty);
_cache_show_byte ("quote_space", csv->quote_space);
_cache_show_byte ("escape_null", csv->escape_null);
_cache_show_byte ("quote_binary", csv->quote_binary);
_cache_show_byte ("auto_diag", csv->auto_diag);
_cache_show_byte ("diag_verbose", csv->diag_verbose);
_cache_show_byte ("strict", csv->strict);
_cache_show_byte ("has_error_input", csv->has_error_input);
_cache_show_byte ("blank_is_undef", csv->blank_is_undef);
_cache_show_byte ("empty_is_undef", csv->empty_is_undef);
_cache_show_byte ("has_ahead", csv->has_ahead);
_cache_show_byte ("keep_meta_info", csv->keep_meta_info);
_cache_show_byte ("verbatim", csv->verbatim);
_cache_show_byte ("has_hooks", csv->has_hooks);
_cache_show_byte ("eol_is_cr", csv->eol_is_cr);
_cache_show_byte ("eol_len", csv->eol_len);
_cache_show_str ("eol", csv->eol_len, csv->eol);
_cache_show_byte ("sep_len", csv->sep_len);
if (csv->sep_len > 1)
_cache_show_str ("sep", csv->sep_len, csv->sep);
_cache_show_byte ("quo_len", csv->quo_len);
if (csv->quo_len > 1)
_cache_show_str ("quote", csv->quo_len, csv->quo);
} /* xs_cache_diag */
#define set_eol_is_cr(csv) cx_set_eol_is_cr (aTHX_ csv)
static void cx_set_eol_is_cr (pTHX_ csv_t *csv) {
csv->eol[0] = CH_CR;
csv->eol_is_cr = 1;
csv->eol_len = 1;
memcpy (csv->cache, csv, sizeof (csv_t));
(void)hv_store (csv->self, "eol", 3, newSVpvn ((char *)csv->eol, 1), 0);
} /* set_eol_is_cr */
#define SetupCsv(csv,self,pself) cx_SetupCsv (aTHX_ csv, self, pself)
static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself) {
SV **svp;
STRLEN len;
char *ptr;
last_error = 0;
if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
byte *cache = (byte *)SvPVX (*svp);
memcpy (csv, cache, sizeof (csv_t));
}
else {
SV *sv_cache;
memset (csv, 0, sizeof (csv_t)); /* Reset everything */
csv->self = self;
csv->pself = pself;
CH_SEP = ',';
if ((svp = hv_fetchs (self, "sep_char", FALSE)) && *svp && SvOK (*svp))
CH_SEP = *SvPV (*svp, len);
if ((svp = hv_fetchs (self, "sep", FALSE)) && *svp && SvOK (*svp)) {
ptr = SvPV (*svp, len);
memcpy (csv->sep, ptr, len);
if (len > 1)
csv->sep_len = len;
}
CH_QUOTE = '"';
if ((svp = hv_fetchs (self, "quote_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
CH_QUOTE = len ? *ptr : (char)0;
}
else
CH_QUOTE = (char)0;
}
if ((svp = hv_fetchs (self, "quote", FALSE)) && *svp && SvOK (*svp)) {
ptr = SvPV (*svp, len);
memcpy (csv->quo, ptr, len);
if (len > 1)
csv->quo_len = len;
}
csv->escape_char = '"';
if ((svp = hv_fetchs (self, "escape_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
csv->escape_char = len ? *ptr : (char)0;
}
else
csv->escape_char = (char)0;
}
if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
char *eol = SvPV (*svp, len);
memcpy (csv->eol, eol, len);
csv->eol_len = len;
if (len == 1 && *csv->eol == CH_CR)
csv->eol_is_cr = 1;
}
if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
csv->types = SvPV (*svp, len);
csv->types_len = len;
}
if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK (*svp))
csv->is_bound = SvIV(*svp);
if ((svp = hv_fetchs (self, "callbacks", FALSE)) && _is_hashref (*svp)) {
HV *cb = (HV *)SvRV (*svp);
if ((svp = hv_fetchs (cb, "after_parse", FALSE)) && _is_coderef (*svp))
csv->has_hooks |= HOOK_AFTER_PARSE;
if ((svp = hv_fetchs (cb, "before_print", FALSE)) && _is_coderef (*svp))
csv->has_hooks |= HOOK_BEFORE_PRINT;
}
csv->binary = bool_opt ("binary");
csv->decode_utf8 = bool_opt ("decode_utf8");
csv->always_quote = bool_opt ("always_quote");
csv->strict = bool_opt ("strict");
csv->quote_empty = bool_opt ("quote_empty");
csv->quote_space = bool_opt_def ("quote_space", 1);
csv->escape_null = bool_opt_def ("escape_null", 1);
csv->quote_binary = bool_opt_def ("quote_binary", 1);
csv->allow_loose_quotes = bool_opt ("allow_loose_quotes");
csv->allow_loose_escapes = bool_opt ("allow_loose_escapes");
csv->allow_unquoted_escape = bool_opt ("allow_unquoted_escape");
csv->allow_whitespace = bool_opt ("allow_whitespace");
csv->blank_is_undef = bool_opt ("blank_is_undef");
csv->empty_is_undef = bool_opt ("empty_is_undef");
csv->verbatim = bool_opt ("verbatim");
csv->auto_diag = num_opt ("auto_diag");
csv->diag_verbose = num_opt ("diag_verbose");
csv->keep_meta_info = num_opt ("keep_meta_info");
unless (csv->escape_char) csv->escape_null = 0;
sv_cache = newSVpvn ((char *)csv, sizeof (csv_t));
csv->cache = (byte *)SvPVX (sv_cache);
SvREADONLY_on (sv_cache);
memcpy (csv->cache, csv, sizeof (csv_t));
(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
}
csv->utf8 = 0;
csv->size = 0;
csv->used = 0;
csv->first_safe_char = csv->quote_space ? 0x21 : 0x20;
if (csv->is_bound) {
if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
csv->bound = *svp;
else
csv->is_bound = 0;
}
csv->eol_pos = -1;
csv->eolx = csv->eol_len
? csv->verbatim || csv->eol_len >= 2
? 1
: csv->eol[0] == CH_CR || csv->eol[0] == CH_NL
? 0
: 1
: 0;
if (csv->sep_len > 1 && is_utf8_string ((U8 *)(csv->sep), csv->sep_len))
csv->utf8 = 1;
if (csv->quo_len > 1 && is_utf8_string ((U8 *)(csv->quo), csv->quo_len))
csv->utf8 = 1;
} /* SetupCsv */
#define Print(csv,dst) cx_Print (aTHX_ csv, dst)
static int cx_Print (pTHX_ csv_t *csv, SV *dst) {
int result;
int keep = 0;
if (csv->useIO) {
SV *tmp = sv_2mortal (newSVpvn (csv->buffer, csv->used));
dSP;
PUSHMARK (sp);
EXTEND (sp, 2);
PUSHs ((dst));
if (csv->utf8) {
STRLEN len;
char *ptr;
int j;
ptr = SvPV (tmp, len);
while (len > 0 && !is_utf8_sv (tmp) && keep < 16) {
ptr[--len] = (char)0;
SvCUR_set (tmp, len);
keep++;
}
for (j = 0; j < keep; j++)
csv->buffer[j] = csv->buffer[csv->used - keep + j];
SvUTF8_on (tmp);
}
PUSHs (tmp);
PUTBACK;
result = call_sv (m_print, G_METHOD);
SPAGAIN;
if (result) {
result = POPi;
unless (result)
(void)SetDiag (csv, 2200);
}
PUTBACK;
}
else {
sv_catpvn (SvRV (dst), csv->buffer, csv->used);
result = TRUE;
}
if (csv->utf8 && !csv->useIO && csv->decode_utf8
&& SvROK (dst) && is_utf8_sv (SvRV (dst)))
SvUTF8_on (SvRV (dst));
csv->used = keep;
return result;
} /* Print */
#define CSV_PUT(csv,dst,c) { \
if ((csv)->used == sizeof ((csv)->buffer) - 1) { \
unless (Print ((csv), (dst))) \
return FALSE; \
} \
(csv)->buffer[(csv)->used++] = (c); \
}
#define bound_field(csv,i,keep) cx_bound_field (aTHX_ csv, i, keep)
static SV *cx_bound_field (pTHX_ csv_t *csv, int i, int keep) {
SV *sv = csv->bound;
AV *av;
/* fprintf (stderr, "# New bind %d/%d\n", i, csv->is_bound);\ */
if (i >= csv->is_bound) {
(void)SetDiag (csv, 3006);
return (NULL);
}
if (sv && SvROK (sv)) {
av = (AV *)(SvRV (sv));
/* fprintf (stderr, "# Bind %d/%d/%d\n", i, csv->is_bound, av_len (av)); */
sv = *av_fetch (av, i, FALSE);
if (sv && SvROK (sv)) {
sv = SvRV (sv);
if (keep)
return (sv);
unless (SvREADONLY (sv)) {
SvSetEmpty (sv);
return (sv);
}
}
}
(void)SetDiag (csv, 3008);
return (NULL);
} /* bound_field */
#define was_quoted(mf,idx) cx_was_quoted (aTHX_ mf, idx)
static int cx_was_quoted (pTHX_ AV *mf, int idx) {
SV **x = av_fetch (mf, idx, FALSE);
return (x && SvIOK (*x) && SvIV (*x) & CSV_FLAGS_QUO ? 1 : 0);
} /* was_quoted */
/* Should be extended for EBCDIC ? */
#define is_csv_binary(ch) ((ch < CH_SPACE || ch >= CH_DEL) && ch != CH_TAB)
#define Combine(csv,dst,fields) cx_Combine (aTHX_ csv, dst, fields)
static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields) {
int i, n, bound = 0;
int aq = (int)csv->always_quote;
int qe = (int)csv->quote_empty;
int kmi = (int)csv->keep_meta_info;
AV *qm = NULL;
n = av_len (fields);
if (n < 0 && csv->is_bound) {
n = csv->is_bound - 1;
bound = 1;
}
if (kmi >= 10) {
SV **svp;
if ((svp = hv_fetchs (csv->self, "_FFLAGS", FALSE)) && _is_arrayref (*svp)) {
AV *avp = (AV *)SvRV (*svp);
if (avp && av_len (avp) >= n)
qm = avp;
}
}
for (i = 0; i <= n; i++) {
SV *sv;
if (i > 0) {
CSV_PUT (csv, dst, CH_SEP);
if (csv->sep_len) {
int x;
for (x = 1; x < (int)csv->sep_len; x++)
CSV_PUT (csv, dst, csv->sep[x]);
}
}
if (bound)
sv = bound_field (csv, i, 1);
else {
SV **svp = av_fetch (fields, i, 0);
sv = svp && *svp ? *svp : NULL;
}
if (sv) {
STRLEN len;
char *ptr;
int quoteMe;
unless ((SvOK (sv) || (
(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv)))
)) continue;
ptr = SvPV (sv, len);
if (len == 0)
quoteMe = aq ? 1 : qe ? 1 : qm ? was_quoted (qm, i) : 0;
else {
if (SvUTF8 (sv)) {
csv->utf8 = 1;
csv->binary = 1;
}
quoteMe = aq ? 1 : qm ? was_quoted (qm, i) : 0;
/* Do we need quoting? We do quote, if the user requested
* (always_quote), if binary or blank characters are found
* and if the string contains quote or escape characters.
*/
if (!quoteMe &&
( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && CH_QUOTE))) {
char *ptr2;
STRLEN l;
for (ptr2 = ptr, l = len; l; ++ptr2, --l) {
byte c = *ptr2;
if ((CH_QUOTE && c == CH_QUOTE) ||
(CH_SEP && c == CH_SEP) ||
(csv->escape_char && c == csv->escape_char) ||
(csv->quote_binary ? (c >= 0x7f && c <= 0xa0) ||
c < csv->first_safe_char
: c == CH_NL || c == CH_CR ||
(csv->quote_space && (
c == CH_SPACE || c == CH_TAB)))) {
/* Binary character */
break;
}
}
quoteMe = (l > 0);
}
}
if (quoteMe) {
CSV_PUT (csv, dst, CH_QUOTE);
if (csv->quo_len) {
int x;
for (x = 1; x < (int)csv->quo_len; x++)
CSV_PUT (csv, dst, csv->quo[x]);
}
}
while (len-- > 0) {
char c = *ptr++;
int e = 0;
if (!csv->binary && is_csv_binary (c)) {
SvREFCNT_inc (sv);
csv->has_error_input = 1;
unless (hv_store (csv->self, "_ERROR_INPUT", 12, sv, 0))
SvREFCNT_dec (sv); /* uncoverable statement memory fail */
(void)SetDiag (csv, 2110);
return FALSE;
}
if (CH_QUOTE && (byte)c == CH_QUOTE && (csv->quo_len == 0 ||
memcmp (ptr, csv->quo +1, csv->quo_len - 1) == 0))
e = 1;
else
if (c == csv->escape_char && csv->escape_char)
e = 1;
else
if (c == (char)0 && csv->escape_null) {
e = 1;
c = '0';
}
if (e && csv->escape_char)
CSV_PUT (csv, dst, csv->escape_char);
CSV_PUT (csv, dst, c);
}
if (quoteMe) {
CSV_PUT (csv, dst, CH_QUOTE);
if (csv->quo_len) {
int x;
for (x = 1; x < (int)csv->quo_len; x++)
CSV_PUT (csv, dst, csv->quo[x]);
}
}
}
}
if (csv->eol_len) {
STRLEN len = csv->eol_len;
byte *ptr = csv->eol;
while (len--)
CSV_PUT (csv, dst, *ptr++);
}
if (csv->used)
return Print (csv, dst);
return TRUE;
} /* Combine */
#define ParseError(csv,xse,pos) cx_ParseError (aTHX_ csv, xse, pos)
static void cx_ParseError (pTHX_ csv_t *csv, int xse, int pos) {
(void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (pos), 0);
(void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (csv->fld_idx), 0);
if (csv->tmp) {
csv->has_error_input = 1;
if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
SvREFCNT_inc (csv->tmp);
}
(void)SetDiag (csv, xse);
} /* ParseError */
#define CsvGet(csv,src) cx_CsvGet (aTHX_ csv, src)
static int cx_CsvGet (pTHX_ csv_t *csv, SV *src) {
unless (csv->useIO)
return EOF;
if (csv->tmp && csv->eol_pos >= 0) {
csv->eol_pos = -2;
sv_setpvn (csv->tmp, (char *)csv->eol, csv->eol_len);
csv->bptr = SvPV (csv->tmp, csv->size);
csv->used = 0;
return CH_EOLX;
}
{ STRLEN result;
dSP;
PUSHMARK (sp);
EXTEND (sp, 1);
PUSHs (src);
PUTBACK;
result = call_sv (m_getline, G_METHOD);
SPAGAIN;
csv->eol_pos = -1;
csv->tmp = result ? POPs : NULL;
PUTBACK;
#if MAINT_DEBUG > 4
fprintf (stderr, "getline () returned:\n");
sv_dump (csv->tmp);
#endif
}
if (csv->tmp && SvOK (csv->tmp)) {
STRLEN tmp_len;
csv->bptr = SvPV (csv->tmp, tmp_len);
csv->used = 0;
csv->size = tmp_len;
if (csv->eolx && csv->size >= csv->eol_len) {
int i, match = 1;
for (i = 1; i <= (int)csv->eol_len; i++) {
unless (csv->bptr[csv->size - i] == csv->eol[csv->eol_len - i]) {
match = 0;
break;
}
}
if (match) {
#if MAINT_DEBUG > 4
fprintf (stderr, "# EOLX match, size: %d\n", csv->size);
#endif
csv->size -= csv->eol_len;
unless (csv->verbatim)
csv->eol_pos = csv->size;
csv->bptr[csv->size] = (char)0;
SvCUR_set (csv->tmp, csv->size);
unless (csv->verbatim || csv->size)
return CH_EOLX;
}
}
if (SvUTF8 (csv->tmp)) csv->utf8 = 1;
if (tmp_len)
return ((byte)csv->bptr[csv->used++]);
}
csv->useIO |= useIO_EOF;
return EOF;
} /* CsvGet */
#define ERROR_INSIDE_QUOTES(diag_code) { \
unless (csv->is_bound) SvREFCNT_dec (sv); \
ParseError (csv, diag_code, csv->used - 1); \
return FALSE; \
}
#define ERROR_INSIDE_FIELD(diag_code) { \
unless (csv->is_bound) SvREFCNT_dec (sv); \
ParseError (csv, diag_code, csv->used - 1); \
return FALSE; \
}
#if MAINT_DEBUG > 4
#define PUT_RPT fprintf (stderr, "# CSV_PUT @ %4d: 0x%02x '%c'\n", __LINE__, c, isprint (c) ? c : '?')
#define PUT_SEPX_RPT1 fprintf (stderr, "# PUT SEPX @ %4d\n", __LINE__)
#define PUT_SEPX_RPT2 fprintf (stderr, "# Done putting SEPX\n")
#define PUT_QUOX_RPT1 fprintf (stderr, "# PUT QUOX @ %4d\n", __LINE__)
#define PUT_QUOX_RPT2 fprintf (stderr, "# Done putting QUOX\n")
#define PUT_EOLX_RPT1 fprintf (stderr, "# PUT EOLX @ %4d\n", __LINE__)
#define PUT_EOLX_RPT2 fprintf (stderr, "# Done putting EOLX\n")
#define PUSH_RPT fprintf (stderr, "# AV_PUSHd @ %4d\n", __LINE__); sv_dump (sv)
#else
#define PUT_RPT
#define PUT_SEPX_RPT1
#define PUT_SEPX_RPT2
#define PUT_QUOX_RPT1
#define PUT_QUOX_RPT2
#define PUT_EOLX_RPT1
#define PUT_EOLX_RPT2
#define PUSH_RPT
#endif
#define CSV_PUT_SV1(c) { \
len = SvCUR ((sv)); \
SvGROW ((sv), len + 2); \
*SvEND ((sv)) = c; \
PUT_RPT; \
SvCUR_set ((sv), len + 1); \
}
#define CSV_PUT_SV(c) { \
if (c == CH_EOLX) { \
int x; PUT_EOLX_RPT1; \
if (csv->eol_pos == -2) \
csv->size = 0; \
for (x = 0; x < (int)csv->eol_len; x++) \
CSV_PUT_SV1 (csv->eol[x]); \
csv->eol_pos = -1; \
PUT_EOLX_RPT2; \
} \
else if (c == CH_SEPX) { \
int x; PUT_SEPX_RPT1; \
for (x = 0; x < (int)csv->sep_len; x++) \
CSV_PUT_SV1 (csv->sep[x]); \
PUT_SEPX_RPT2; \
} \
else if (c == CH_QUOTEX) { \
int x; PUT_QUOX_RPT1; \
for (x = 0; x < (int)csv->quo_len; x++) \
CSV_PUT_SV1 (csv->quo[x]); \
PUT_QUOX_RPT2; \
} \
else \
CSV_PUT_SV1 (c); \
}
#define CSV_GET1 \
(csv->used < csv->size ? (byte)csv->bptr[csv->used++] : CsvGet (csv, src))
#if MAINT_DEBUG > 3
int CSV_GET_ (pTHX_ csv_t *csv, SV *src, int l) {
int c;
fprintf (stderr, "# 1-CSV_GET @ %4d: (used: %d, size: %d, eol_pos: %d, eolx = %d)\n", l, csv->used, csv->size, csv->eol_pos, csv->eolx);
c = CSV_GET1;
fprintf (stderr, "# 2-CSV_GET @ %4d: 0x%02x '%c'\n", l, c, isprint (c) ? c : '?');
return (c);
} /* CSV_GET_ */
#define CSV_GET CSV_GET_ (aTHX_ csv, src, __LINE__)
#else
#define CSV_GET CSV_GET1
#endif
#define AV_PUSH { \
*SvEND (sv) = (char)0; \
SvUTF8_off (sv); \
if (SvCUR (sv) == 0 && ( \
csv->empty_is_undef || \
(!(f & CSV_FLAGS_QUO) && csv->blank_is_undef))) \
SvSetUndef (sv); \
else { \
if (csv->allow_whitespace && ! (f & CSV_FLAGS_QUO)) \
strip_trail_whitespace (sv); \
if (f & CSV_FLAGS_BIN && csv->decode_utf8 \
&& (csv->utf8 || is_utf8_sv (sv))) \
SvUTF8_on (sv); \
} \
SvSETMAGIC (sv); \
unless (csv->is_bound) av_push (fields, sv); \
PUSH_RPT; \
sv = NULL; \
if (csv->keep_meta_info && fflags) \
av_push (fflags, newSViv (f)); \
waitingForField = 1; \
}
#define strip_trail_whitespace(sv) cx_strip_trail_whitespace (aTHX_ sv)
static void cx_strip_trail_whitespace (pTHX_ SV *sv) {
STRLEN len;
char *s = SvPV (sv, len);
unless (s && len) return;
while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB)
s[--len] = (char)0;
SvCUR_set (sv, len);
} /* strip_trail_whitespace */
#define NewField \
unless (sv) { \
if (csv->is_bound) \
sv = bound_field (csv, fnum++, 0); \
else \
sv = newSVpvs (""); \
unless (sv) return FALSE; \
f = 0; csv->fld_idx++; \
}
#if MAINT_DEBUG
static char str_parsed[40];
#endif
#if MAINT_DEBUG > 1
static char *_sep_string (csv_t *csv) {
char sep[64];
if (csv->sep_len) {
int x;
for (x = 0; x < csv->sep_len; x++)
sprintf (sep + x * x, "%02x ", csv->sep[x]);
}
else
sprintf (sep, "'%c' (0x%02x)", CH_SEP, CH_SEP);
return sep;
} /* _sep_string */
#endif
#define Parse(csv,src,fields,fflags) cx_Parse (aTHX_ csv, src, fields, fflags)
static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags) {
int c, f = 0;
int waitingForField = 1;
SV *sv = NULL;
STRLEN len;
int seenSomething = FALSE;
int fnum = 0;
int spl = -1;
#if MAINT_DEBUG
memset (str_parsed, 0, 40);
#endif
csv->fld_idx = 0;
while ((c = CSV_GET) != EOF) {
NewField;
seenSomething = TRUE;
spl++;
#if MAINT_DEBUG
if (spl < 39) str_parsed[spl] = c;
#endif
restart:
if (is_SEP (c)) {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = SEP %s\t'%s'\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
_sep_string (csv), csv->bptr + csv->used);
#endif
if (waitingForField) {
/* ,1,"foo, 3",,bar,
* ^ ^
*/
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
sv = NULL;
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
}
else
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo, 3",,bar,
* ^
*/
CSV_PUT_SV (c)
}
else {
/* ,1,"foo, 3",,bar,
* ^ ^ ^
*/
AV_PUSH;
}
} /* SEP char */
else
if (is_QUOTE (c)) {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = QUO '%c'\t\t'%s'\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
csv->bptr + csv->used);
#endif
if (waitingForField) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
f |= CSV_FLAGS_QUO;
waitingForField = 0;
continue;
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
int quoesc = 0;
int c2 = CSV_GET;
if (csv->allow_whitespace) {
/* , 1 , "foo, 3" , , bar , \r\n
* ^
*/
while (is_whitespace (c2)) {
if (csv->allow_loose_quotes &&
!(csv->escape_char && c2 == csv->escape_char)) {
/* This feels like a brittle fix for RT115953, where
* ["foo "bar" baz"] got parsed as [foo "bar"baz]
* when both allow_whitespace and allow_loose_quotes
* are true and escape does not equal quote
*/
CSV_PUT_SV (c);
c = c2;
}
c2 = CSV_GET;
}
}
if (is_SEP (c2)) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
AV_PUSH;
continue;
}
if (c2 == CH_NL || c2 == CH_EOLX) {
/* ,1,"foo, 3",,"bar"\n
* ^
*/
AV_PUSH;
return TRUE;
}
/* ---
* if QUOTE eq ESCAPE
* AND ( c2 eq QUOTE 1,"abc""def",2
* OR c2 eq ESCAPE 1,"abc""def",2 (QUO eq ESC)
* OR c2 eq NULL ) 1,"abc"0def",2
* ---
*/
if (csv->escape_char && c == csv->escape_char) {
quoesc = 1;
if (c2 == '0') {
/* ,1,"foo, 3"056",,bar,\r\n
* ^
*/
CSV_PUT_SV (0)
continue;
}
if (is_QUOTE (c2)) {
/* ,1,"foo, 3""56",,bar,\r\n
* ^
*/
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
continue;
}
if (csv->allow_loose_escapes && c2 != CH_CR) {
/* ,1,"foo, 3"56",,bar,\r\n
* ^
*/
CSV_PUT_SV (c);
c = c2;
goto restart;
}
}
if (c2 == CH_CR) {
int c3;
if (csv->eol_is_cr) {
/* ,1,"foo, 3"\r
* ^
*/
AV_PUSH;
return TRUE;
}
c3 = CSV_GET;
if (c3 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo, 3"\r\n
* ^
*/
AV_PUSH;
return TRUE;
}
if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c3)) {
/* ,1,"foo\n 3",,"bar"\r
* baz,4
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
AV_PUSH;
return TRUE;
}
ParseError (csv, quoesc ? 2023 : 2010, csv->used - 2);
return FALSE;
}
if (c2 == EOF) {
/* ,1,"foo, 3"
* ^
*/
AV_PUSH;
return TRUE;
}
if (csv->allow_loose_quotes && !quoesc) {
/* ,1,"foo, 3"456",,bar,\r\n
* ^
*/
CSV_PUT_SV (c);
c = c2;
goto restart;
}
/* 1,"foo" ",3
* ^
*/
if (quoesc) {
csv->used--;
ERROR_INSIDE_QUOTES (2023);
}
ERROR_INSIDE_QUOTES (2011);
}
/* !waitingForField, !InsideQuotes */
if (csv->allow_loose_quotes) { /* 1,foo "boo" d'uh,1 */
f |= CSV_FLAGS_EIF; /* Mark as error-in-field */
CSV_PUT_SV (c);
}
else
ERROR_INSIDE_FIELD (2034);
} /* QUO char */
else
if (c == csv->escape_char && csv->escape_char) {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = ESC '%c'\t'%s%\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
csv->bptr + csv->used);
#endif
/* This means quote_char != escape_char */
if (waitingForField) {
waitingForField = 0;
if (csv->allow_unquoted_escape) {
/* The escape character is the first character of an
* unquoted field */
/* ... get and store next character */
int c2 = CSV_GET;
SvSetEmpty (sv);
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_FIELD (2035);
}
if (c2 == '0')
CSV_PUT_SV (0)
else
if ( is_QUOTE (c2) || is_SEP (c2) ||
c2 == csv->escape_char || csv->allow_loose_escapes) {
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
}
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
}
}
}
else
if (f & CSV_FLAGS_QUO) {
int c2 = CSV_GET;
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_QUOTES (2024);
}
if (c2 == '0')
CSV_PUT_SV (0)
else
if ( is_QUOTE (c2) || is_SEP (c2) ||
c2 == csv->escape_char || csv->allow_loose_escapes) {
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
}
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
}
}
else
if (sv) {
int c2 = CSV_GET;
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_FIELD (2035);
}
CSV_PUT_SV (c2);
}
else
ERROR_INSIDE_FIELD (2036); /* uncoverable statement I think there's no way to get here */
} /* ESC char */
else
if (c == CH_NL || is_EOL (c)) {
EOLX:
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = NL\t'%s'\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
csv->bptr + csv->used);
#endif
if (waitingForField) {
/* ,1,"foo, 3",,bar,
* ^
*/
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
return TRUE;
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo\n 3",,bar,
* ^
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2021);
CSV_PUT_SV (c);
}
else
if (csv->verbatim) {
/* ,1,foo\n 3,,bar,
* This feature should be deprecated
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_FIELD (2030);
CSV_PUT_SV (c);
}
else {
/* sep=,
* ^
*/
if (csv->recno == 0 && csv->fld_idx == 1 && csv->useIO &&
(csv->bptr[0] == 's' || csv->bptr[0] == 'S') &&
(csv->bptr[1] == 'e' || csv->bptr[1] == 'E') &&
(csv->bptr[2] == 'p' || csv->bptr[2] == 'P') &&
csv->bptr[3] == '=') {
char *sep = csv->bptr + 4;
int len = csv->used - 5;
if (len <= MAX_ATTR_LEN) {
sep[len] = (char)0;
memcpy (csv->sep, sep, len);
csv->sep_len = len == 1 ? 0 : len;
return Parse (csv, src, fields, fflags);
}
}
/* ,1,"foo\n 3",,bar
* ^
*/
AV_PUSH;
return TRUE;
}
} /* CH_NL */
else
if (c == CH_CR && !(csv->verbatim)) {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = CR\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
#endif
if (waitingForField) {
int c2;
waitingForField = 0;
if (csv->eol_is_cr) {
/* ,1,"foo\n 3",,bar,\r
* ^
*/
c = CH_NL;
goto restart;
}
c2 = CSV_GET;
if (c2 == EOF) {
/* ,1,"foo\n 3",,bar,\r
* ^
*/
c = EOF;
goto restart;
}
if (c2 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo\n 3",,bar,\r\n
* ^
*/
c = c2;
goto restart;
}
if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c2)) {
/* ,1,"foo\n 3",,bar,\r
* baz,4
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
AV_PUSH;
return TRUE;
}
/* ,1,"foo\n 3",,bar,\r\t
* ^
*/
csv->used--;
ERROR_INSIDE_FIELD (2031);
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo\r 3",,bar,\r\t
* ^
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2022);
CSV_PUT_SV (c);
}
else {
int c2;
if (csv->eol_is_cr) {
/* ,1,"foo\n 3",,bar\r
* ^
*/
AV_PUSH;
return TRUE;
}
c2 = CSV_GET;
if (c2 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo\n 3",,bar\r\n
* ^
*/
AV_PUSH;
return TRUE;
}
if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c2)) {
/* ,1,"foo\n 3",,bar\r
* baz,4
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
AV_PUSH;
return TRUE;
}
/* ,1,"foo\n 3",,bar\r\t
* ^
*/
ERROR_INSIDE_FIELD (2032);
}
} /* CH_CR */
else {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%03x pos %d = CCC '%c'\t\t'%s'\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
csv->bptr + csv->used);
#endif
/* Needed for non-IO parse, where EOL is not set during read */
if (csv->eolx && c == CH_EOL &&
csv->size - csv->used >= csv->eol_len - 1 &&
!memcmp (csv->bptr + csv->used, csv->eol + 1, csv->eol_len - 1) &&
(csv->used += csv->eol_len - 1)) {
c = CH_EOLX;
goto EOLX;
}
if (waitingForField) {
if (csv->allow_whitespace && is_whitespace (c)) {
do {
c = CSV_GET;
} while (is_whitespace (c));
if (c == EOF)
break;
goto restart;
}
waitingForField = 0;
goto restart;
}
if (f & CSV_FLAGS_QUO) {
if (is_csv_binary (c)) {
f |= CSV_FLAGS_BIN;
unless (csv->binary || csv->utf8)
ERROR_INSIDE_QUOTES (2026);
}
CSV_PUT_SV (c);
}
else {
if (is_csv_binary (c)) {
f |= CSV_FLAGS_BIN;
unless (csv->binary || csv->utf8)
ERROR_INSIDE_FIELD (2037);
}
CSV_PUT_SV (c);
}
}
/* continue */
if (csv->verbatim && csv->useIO && csv->used == csv->size)
break;
}
if (waitingForField) {
if (seenSomething || !csv->useIO) {
NewField;
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
return TRUE;
}
(void)SetDiag (csv, 2012);
return FALSE;
}
if (f & CSV_FLAGS_QUO)
ERROR_INSIDE_QUOTES (2027);
if (sv)
AV_PUSH;
return TRUE;
} /* Parse */
#define c_xsParse(csv,hv,av,avf,src,useIO) cx_c_xsParse (aTHX_ csv, hv, av, avf, src, useIO)
static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
int result, ahead = 0;
SV *pos = NULL;
ENTER;
if (csv.eolx || csv.eol_is_cr) {
/* local $/ = $eol */
SAVEGENERICSV (PL_rs);
PL_rs = newSVpvn ((char *)csv.eol, csv.eol_len);
}
if ((csv.useIO = useIO)) {
csv.tmp = NULL;
if ((ahead = csv.has_ahead)) {
SV **svp;
if ((svp = hv_fetchs (hv, "_AHEAD", FALSE)) && *svp) {
csv.bptr = SvPV (csv.tmp = *svp, csv.size);
csv.used = 0;
if (pos && SvIV (pos) > csv.size)
sv_setiv (pos, SvIV (pos) - csv.size);
}
}
}
else {
csv.tmp = src;
csv.utf8 = SvUTF8 (src);
csv.bptr = SvPV (src, csv.size);
}
if (csv.has_error_input) {
(void)hv_store (hv, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
csv.has_error_input = 0;
}
result = Parse (&csv, src, av, avf);
(void)hv_store (hv, "_RECNO", 6, newSViv (++csv.recno), 0);
(void)hv_store (hv, "_EOF", 4, &PL_sv_no, 0);
if (csv.strict) {
unless (csv.strict_n) csv.strict_n = (short)csv.fld_idx;
if (csv.fld_idx != csv.strict_n) {
ParseError (&csv, 2014, csv.used);
result = FALSE;
}
}
if (csv.useIO) {
if (csv.tmp && csv.used < csv.size && csv.has_ahead) {
SV *sv = newSVpvn (csv.bptr + csv.used, csv.size - csv.used);
(void)hv_store (hv, "_AHEAD", 6, sv, 0);
}
else {
csv.has_ahead = 0;
if (csv.useIO & useIO_EOF)
(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
}
/* csv.cache[CACHE_ID__has_ahead] = csv.has_ahead; */
memcpy (csv.cache, &csv, sizeof (csv_t));
if (avf) {
if (csv.keep_meta_info)
(void)hv_store (hv, "_FFLAGS", 7, newRV_noinc ((SV *)avf), 0);
else {
av_undef (avf);
sv_free ((SV *)avf);
}
}
}
else /* just copy the cache */
memcpy (csv.cache, &csv, sizeof (csv_t));
if (result && csv.types) {
I32 i;
STRLEN len = av_len (av);
SV **svp;
for (i = 0; i <= (I32)len && i <= (I32)csv.types_len; i++) {
if ((svp = av_fetch (av, i, 0)) && *svp && SvOK (*svp)) {
switch (csv.types[i]) {
case CSV_XS_TYPE_IV:
#ifdef CSV_XS_TYPE_WARN
sv_setiv (*svp, SvIV (*svp));
#else
if (SvTRUE (*svp))
sv_setiv (*svp, SvIV (*svp));
else
sv_setiv (*svp, 0);
#endif
break;
case CSV_XS_TYPE_NV:
#ifdef CSV_XS_TYPE_WARN
sv_setnv (*svp, SvNV (*svp));
#else
if (SvTRUE (*svp))
sv_setnv (*svp, SvNV (*svp));
else
sv_setnv (*svp, 0.0);
#endif
break;
default:
break;
}
}
}
}
LEAVE;
return result;
} /* c_xsParse */
static int hook (pTHX_ HV *hv, char *cb_name, AV *av) {
SV **svp;
HV *cb;
int res;
#if MAINT_DEBUG > 1
fprintf (stderr, "# HOOK %s %x\n", cb_name, av);
#endif
unless ((svp = hv_fetchs (hv, "callbacks", FALSE)) && _is_hashref (*svp))
return 0; /* uncoverable statement defensive programming */
cb = (HV *)SvRV (*svp);
svp = hv_fetch (cb, cb_name, strlen (cb_name), FALSE);
unless (svp && _is_coderef (*svp))
return 0;
{ dSP;
ENTER;
SAVETMPS;
PUSHMARK (SP);
XPUSHs (newRV_noinc ((SV *)hv));
XPUSHs (newRV_noinc ((SV *)av));
PUTBACK;
res = call_sv (*svp, G_SCALAR);
SPAGAIN;
if (res) {
SV *rv = POPs;
if (SvROK (rv) && (rv = SvRV (rv)) && SvPOK (rv)) {
if (strcmp (SvPV_nolen (rv), "skip") == 0)
res = 0;
}
}
PUTBACK;
FREETMPS;
LEAVE;
}
return res;
} /* hook */
#define xsParse(self,hv,av,avf,src,useIO) cx_xsParse (aTHX_ self, hv, av, avf, src, useIO)
static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
csv_t csv;
int state;
SetupCsv (&csv, hv, self);
state = c_xsParse (csv, hv, av, avf, src, useIO);
if (state && csv.has_hooks & HOOK_AFTER_PARSE)
(void)hook (aTHX_ hv, "after_parse", av);
return (state || !last_error);
} /* xsParse */
/* API also offers av_clear and av_undef, but they have more overhead */
#define av_empty(av) cx_av_empty (aTHX_ av)
static void cx_av_empty (pTHX_ AV *av) {
while (av_len (av) >= 0)
sv_free (av_pop (av));
} /* av_empty */
#define xsParse_all(self,hv,io,off,len) cx_xsParse_all (aTHX_ self, hv, io, off, len)
static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len) {
csv_t csv;
int n = 0, skip = 0, length = MAXINT, tail = MAXINT;
AV *avr = newAV ();
AV *row = newAV ();
SetupCsv (&csv, hv, self);
if (SvIOK (off)) {
skip = SvIV (off);
if (skip < 0) {
tail = -skip;
skip = -1;
}
}
if (SvIOK (len))
length = SvIV (len);
while (c_xsParse (csv, hv, row, NULL, io, 1)) {
SetupCsv (&csv, hv, self);
if (skip > 0) {
skip--;
av_empty (row); /* re-use */
continue;
}
if (n++ >= tail) {
SvREFCNT_dec (av_shift (avr));
n--;
}
if (csv.has_hooks & HOOK_AFTER_PARSE) {
unless (hook (aTHX_ hv, "after_parse", row)) {
av_empty (row); /* re-use */
continue;
}
}
av_push (avr, newRV_noinc ((SV *)row));
if (n >= length && skip >= 0)
break; /* We have enough */
row = newAV ();
}
while (n > length) {
SvREFCNT_dec (av_pop (avr));
n--;
}
return (SV *)sv_2mortal (newRV_noinc ((SV *)avr));
} /* xsParse_all */
#define xsCombine(self,hv,av,io,useIO) cx_xsCombine (aTHX_ self, hv, av, io, useIO)
static int cx_xsCombine (pTHX_ SV *self, HV *hv, AV *av, SV *io, bool useIO) {
csv_t csv;
int result;
#if (PERL_BCDVERSION >= 0x5008000)
SV *ors = PL_ors_sv;
#endif
SetupCsv (&csv, hv, self);
csv.useIO = useIO;
#if (PERL_BCDVERSION >= 0x5008000)
if (*csv.eol)
PL_ors_sv = NULL;
#endif
if (useIO && csv.has_hooks & HOOK_BEFORE_PRINT)
(void)hook (aTHX_ hv, "before_print", av);
result = Combine (&csv, io, av);
#if (PERL_BCDVERSION >= 0x5008000)
PL_ors_sv = ors;
#endif
if (result && !useIO && csv.utf8)
sv_utf8_upgrade (io);
return result;
} /* xsCombine */
MODULE = Text::CSV_XS PACKAGE = Text::CSV_XS
PROTOTYPES: DISABLE
BOOT:
m_getline = newSVpvs ("getline");
m_print = newSVpvs ("print");
Perl_load_module (aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs ("IO::Handle"), NULL, NULL, NULL);
void
SetDiag (self, xse, ...)
SV *self
int xse
PPCODE:
HV *hv;
csv_t csv;
if (SvOK (self) && SvROK (self)) {
CSV_XS_SELF;
SetupCsv (&csv, hv, self);
ST (0) = SetDiag (&csv, xse);
}
else
ST (0) = sv_2mortal (SvDiag (xse));
if (xse && items > 1 && SvPOK (ST (2))) {
sv_setpvn (ST (0), SvPVX (ST (2)), SvCUR (ST (2)));
SvIOK_on (ST (0));
}
XSRETURN (1);
/* XS SetDiag */
void
error_input (self)
SV *self
PPCODE:
if (self && SvOK (self) && SvROK (self) && SvTYPE (SvRV (self)) == SVt_PVHV) {
HV *hv = (HV *)SvRV (self);
SV **sv = hv_fetchs (hv, "_ERROR_INPUT", FALSE);
if (SvOK (*sv))
ST (0) = *sv;
else
ST (0) = newSV (0);
}
else
ST (0) = newSV (0);
XSRETURN (1);
/* XS error_input */
void
Combine (self, dst, fields, useIO)
SV *self
SV *dst
SV *fields
bool useIO
PPCODE:
HV *hv;
AV *av;
CSV_XS_SELF;
av = (AV *)SvRV (fields);
ST (0) = xsCombine (self, hv, av, dst, useIO) ? &PL_sv_yes : &PL_sv_undef;
XSRETURN (1);
/* XS Combine */
void
Parse (self, src, fields, fflags)
SV *self
SV *src
SV *fields
SV *fflags
PPCODE:
HV *hv;
AV *av;
AV *avf;
CSV_XS_SELF;
av = (AV *)SvRV (fields);
avf = (AV *)SvRV (fflags);
ST (0) = xsParse (self, hv, av, avf, src, 0) ? &PL_sv_yes : &PL_sv_no;
XSRETURN (1);
/* XS Parse */
void
print (self, io, fields)
SV *self
SV *io
SV *fields
PPCODE:
HV *hv;
AV *av;
CSV_XS_SELF;
if (fields == &PL_sv_undef)
av = newAV ();
else {
unless (_is_arrayref (fields))
croak ("Expected fields to be an array ref");
av = (AV *)SvRV (fields);
}
ST (0) = xsCombine (self, hv, av, io, 1) ? &PL_sv_yes : &PL_sv_no;
XSRETURN (1);
/* XS print */
void
getline (self, io)
SV *self
SV *io
PPCODE:
HV *hv;
AV *av;
AV *avf;
CSV_XS_SELF;
av = newAV ();
avf = newAV ();
ST (0) = xsParse (self, hv, av, avf, io, 1)
? sv_2mortal (newRV_noinc ((SV *)av))
: &PL_sv_undef;
XSRETURN (1);
/* XS getline */
void
getline_all (self, io, ...)
SV *self
SV *io
PPCODE:
HV *hv;
SV *offset, *length;
CSV_XS_SELF;
offset = items > 2 ? ST (2) : &PL_sv_undef;
length = items > 3 ? ST (3) : &PL_sv_undef;
ST (0) = xsParse_all (self, hv, io, offset, length);
XSRETURN (1);
/* XS getline_all */
void
_cache_set (self, idx, val)
SV *self
int idx
SV *val
PPCODE:
HV *hv;
CSV_XS_SELF;
xs_cache_set (hv, idx, val);
XSRETURN (1);
/* XS _cache_set */
void
_cache_diag (self)
SV *self
PPCODE:
HV *hv;
CSV_XS_SELF;
xs_cache_diag (hv);
XSRETURN (1);
/* XS _cache_diag */