The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#include "parser.h"
#include<stdio.h>
#ifdef DARWIN
  #include "stdlib.h"
#endif
#ifdef NOSTRING
  void memset(char *s, int c, int n) {
    char *se = s + n;
    while(s < se)	*s++ = c;
	}
#else
  #include <string.h>
#endif

int dh_memcmp(char *a,char *b,int n) {
  int c = 0;
  while( c < n ) {
    if( *a != *b ) return c+1;
    a++; b++; c++;
  }
  return 0;
}

struct nodec *new_nodecp( struct nodec *newparent ) {
  static int pos = 0;
  int size = sizeof( struct nodec );
  struct nodec *self = (struct nodec *) malloc( size );
  memset( (char *) self, 0, size );
  self->parent      = newparent;
  self->pos = ++pos;
  return self;
}

struct nodec *new_nodec() {
  int size = sizeof( struct nodec );
  struct nodec *self = (struct nodec *) malloc( size );
  memset( (char *) self, 0, size );
  return self;
}

void del_nodec( struct nodec *node ) {
  struct nodec *curnode;
  struct attc *curatt;
  struct nodec *next;
  struct attc *nexta;
  curnode = node->firstchild;
  while( curnode ) {
    next = curnode->next;
    del_nodec( curnode );
    if( !next ) break;
    curnode = next;
  }
  curatt = node->firstatt;
  while( curatt ) {
    nexta = curatt->next;
    free( curatt );
    curatt = nexta;
  }
  free( node );
}

struct attc* new_attc( struct nodec *newparent ) {
  int size = sizeof( struct attc );
  struct attc *self = (struct attc *) malloc( size );
  memset( (char *) self, 0, size );
  self->parent  = newparent;
  return self;
}

//#define DEBUG

#define ST_val_1 1
#define ST_val_x 2
#define ST_comment_1dash 3
#define ST_comment_2dash 4
#define ST_comment 5
#define ST_comment_x 6
#define ST_pi 7
#define ST_bang 24
#define ST_cdata 8
#define ST_name_1 9
#define ST_name_x 10
#define ST_name_gap 11
#define ST_att_name1 12
#define ST_att_space 13
#define ST_att_name 14
#define ST_att_nameqs 15
#define ST_att_nameqsdone 16
#define ST_att_eq1 17
#define ST_att_eqx 18
#define ST_att_quot 19
#define ST_att_quots 20
#define ST_att_tick 21
#define ST_ename_1 22
#define ST_ename_x 23

int parserc_parse( struct parserc *self, char *xmlin ) {
    // Variables that represent current 'state'
    struct nodec *root    = NULL;
    char  *tagname        = NULL; int    tagname_len    = 0;
    char  *attname        = NULL; int    attname_len    = 0;
    char  *attval         = NULL; int    attval_len     = 0;
    int    att_has_val    = 0;
    struct nodec *curnode = NULL;
    struct attc  *curatt  = NULL;
    int    last_state     = 0;
    self->rootpos = xmlin;
    
    // Variables used temporarily during processing
    struct nodec *temp;
    char   *cpos          = &xmlin[0];
    int    res            = 0;
    int    dent;
    register int let;
    
    if( self->last_state ) {
      #ifdef DEBUG
      printf( "Resuming parse in state %i\n", self->last_state );
      #endif
      self->err = 0;
      root = self->rootnode;
      curnode = self->curnode;
      curatt = self->curatt;
      tagname = self->tagname; tagname_len = self->tagname_len;
      attname = self->attname; attname_len = self->attname_len;
      attval = self->attval; attval_len = self->attval_len;
      att_has_val = self->att_has_val;
      switch( self->last_state ) {
        case ST_val_1: goto val_1;
        case ST_val_x: goto val_x;
        case ST_comment_1dash: goto comment_1dash;
        case ST_comment_2dash: goto comment_2dash;
        case ST_comment: goto comment;
        case ST_comment_x: goto comment_x;
        case ST_pi: goto pi;
        case ST_bang: goto bang;
        case ST_cdata: goto cdata;
        case ST_name_1: goto name_1;
        case ST_name_x: goto name_x;
        case ST_name_gap: goto name_gap;
        case ST_att_name1: goto att_name1;
        case ST_att_space: goto att_space;
        case ST_att_name: goto att_name;
        case ST_att_nameqs: goto att_nameqs;
        case ST_att_nameqsdone: goto att_nameqsdone;
        case ST_att_eq1: goto att_eq1;
        case ST_att_eqx: goto att_eqx;
        case ST_att_quot: goto att_quot;
        case ST_att_quots: goto att_quots;
        case ST_att_tick: goto att_tick;
        case ST_ename_1: goto ename_1;
        case ST_ename_x: goto ename_x;
      }
    }
    else {
      self->err = 0;
      curnode = root = self->rootnode = new_nodec();
    }
    
    #ifdef DEBUG
    printf("Entry to C Parser\n");
    #endif
    
    val_1:
      #ifdef DEBUG
      printf("val_1: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_val_1; goto done;
        case '<': goto val_x;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 1;
      }
      curnode->numvals++;
      cpos++;
      
    val_x:
      #ifdef DEBUG
      printf("val_x: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_val_x; goto done;
        case '<':
          switch( *(cpos+1) ) {
            case '!':
              if( *(cpos+2) == '[' ) { // <![
                //if( !strncmp( cpos+3, "CDATA", 5 ) ) {
                if( *(cpos+3) == 'C' &&
                    *(cpos+4) == 'D' &&
                    *(cpos+5) == 'A' &&
                    *(cpos+6) == 'T' &&
                    *(cpos+7) == 'A'    ) {
                  cpos += 9;
                  curnode->type = 1;
                  goto cdata;
                }
                else {
                  cpos++; cpos++;
                  goto val_x;//actually goto error...
                }
              }
              else if( *(cpos+2) == '-' && // <!--
                *(cpos+3) == '-' ) {
                  cpos += 4;
                  goto comment;
              }
              else {
                cpos++;
                goto bang;
              }
            case '?':
              cpos+=2;
              goto pi;
          }
          tagname_len = 0; // for safety
          cpos++;
          goto name_1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto val_x;
      
    comment_1dash:
      cpos++;
      let = *cpos;
      if( let == '-' ) goto comment_2dash;
      if( !let ) { last_state = ST_comment_1dash; goto done; }
      goto comment_x;
      
    comment_2dash:
      cpos++;
      let = *cpos;
      if( let == '>' ) {
        cpos++;
        goto val_1;
      }
      if( !let ) { last_state = ST_comment_2dash; goto done; }
      goto comment_x;
      
    comment:
      let = *cpos;
      switch( let ) {
        case 0:   last_state = ST_comment; goto done;
        case '-': goto comment_1dash;
      }
      if( !curnode->numcoms ) {
        curnode->comment = cpos;
        curnode->comlen = 1;
      }
      curnode->numcoms++;
      cpos++;
    
    comment_x:
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_comment_x; goto done;
        case '-': goto comment_1dash;
      }
      if( curnode->numcoms == 1 ) curnode->comlen++;
      cpos++;
      goto comment_x;
      
    pi:
      let = *cpos;
      if( let == '?' && *(cpos+1) == '>' ) {
        cpos += 2;
        goto val_1;
      }
      if( !let ) { last_state = ST_pi; goto done; }
      cpos++;
      goto pi;

    bang:
      let = *cpos;
      if( let == '>' ) {
        cpos++;
        goto val_1;
      }
      if( !let ) { last_state = ST_bang; goto done; }
      cpos++;
      goto bang;
    
    cdata:
      let = *cpos;
      if( !let ) { last_state = ST_cdata; goto done; }
      if( let == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
        cpos += 3;
        goto val_1;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 0;
        curnode->numvals = 1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto cdata;
      
    name_1:
      #ifdef DEBUG
      printf("name_1: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_name_1; goto done;        
        case ' ':
        case 0x0d:
        case 0x0a:
          cpos++;
          goto name_1;
        case '/': // regular closing tag
          tagname_len = 0; // needed to reset
          cpos++;
          goto ename_1;
      }
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      goto name_x;
      
    name_x:
      #ifdef DEBUG
      printf("name_x: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_name_x; goto done;
        case ' ':
        case 0x0d:
        case 0x0a:
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          attname_len = 0;
          cpos++;
          goto name_gap;
        case '>':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          cpos++;
          goto val_1;
        case '/': // self closing
          temp = nodec_addchildr( curnode, tagname, tagname_len );
          temp->z = cpos +1 - xmlin;
          tagname_len            = 0;
          cpos+=2;
          goto val_1;
      }
      
      tagname_len++;
      cpos++;
      goto name_x;
          
    name_gap:
      let = *cpos;
      switch( *cpos ) {
        case 0: last_state = ST_name_gap; goto done;
        case ' ':
        case 0x0d:
        case 0x0a:
          cpos++;
          goto name_gap;
        case '>':
          cpos++;
          goto val_1;
        case '/': // self closing
          curnode->z = cpos+1-xmlin;
          curnode = curnode->parent;
          if( !curnode ) goto done;
          cpos+=2; // am assuming next char is >
          goto val_1;
        case '=':
          cpos++;
          goto name_gap;//actually goto error
      }
        
    att_name1:
      #ifdef DEBUG
      printf("attname1: %c\n", *cpos);
      #endif
      att_has_val = 0;
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_name1; goto done;
        case 0x27://'
          cpos++;
          attname = cpos;
          attname_len = 0;
          goto att_nameqs;
      }
      attname = cpos;
      attname_len = 1;
      cpos++;
      goto att_name;
      
    att_space:
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_space; goto done;
        case ' ':
        case 0x0d:
        case 0x0a:
          cpos++;
          goto att_space;
        case '=':
          att_has_val = 1;
          cpos++;
          goto att_eq1;
      }
      // we have another attribute name, so continue
      
    att_name:
      #ifdef DEBUG
      printf("attname: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_name; goto done;
        case '/': // self closing     !! /> is assumed !!
          curatt = nodec_addattr( curnode, attname, attname_len );
          if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
          attname_len            = 0;
          
          curnode->z = cpos+1-xmlin;
          curnode = curnode->parent;
          if( !curnode ) goto done;
          cpos += 2;
          goto val_1;
        case ' ':
          if( *(cpos+1) == '=' ) {
            cpos++;
            goto att_name;
          }
          curatt = nodec_addattr( curnode, attname, attname_len );
          attname_len = 0;
          cpos++;
          goto att_space;
        case '>':
          curatt = nodec_addattr( curnode, attname, attname_len );
          if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
          attname_len = 0;
          cpos++;
          goto val_1;
        case '=':
          attval_len = 0;
          curatt = nodec_addattr( curnode, attname, attname_len );
          attname_len = 0;
          cpos++;
          goto att_eq1;
      }
      
      if( !attname_len ) attname = cpos;
      attname_len++;
      cpos++;
      goto att_name;
      
    att_nameqs:
      #ifdef DEBUG
      printf("nameqs: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_nameqs; goto done;
        case 0x27://'
          cpos++;
          goto att_nameqsdone;
      }
      attname_len++;
      cpos++;
      goto att_nameqs;
      
    att_nameqsdone:
      #ifdef DEBUG
      printf("nameqsdone: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_nameqsdone; goto done;
        case '=':
          attval_len = 0;
          curatt = nodec_addattr( curnode, attname, attname_len );
          attname_len = 0;
          cpos++;
          goto att_eq1;
      }
      goto att_nameqsdone;
      
    att_eq1:
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_eq1; goto done;
        case '/': // self closing
          if( *(cpos+1) == '>' ) {
            curnode->z = cpos+1-xmlin;
            curnode = curnode->parent;
            if( !curnode ) goto done;
            cpos+=2;
            goto att_eq1;
          }
          break;
        case '"':  cpos++; goto att_quot;
        case 0x27: cpos++; goto att_quots; //'
        case '`':  cpos++; goto att_tick;
        case '>':  cpos++; goto val_1;
        case ' ':  cpos++; goto att_eq1;
      }  
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto att_eqx;
      
    att_eqx:
      let = *cpos;
      switch( let ) {
        case 0: last_state = ST_att_eqx; goto done;
        case '/': // self closing
          if( *(cpos+1) == '>' ) {
            curnode->z = cpos+1-xmlin;
            curnode = curnode->parent;
            if( !curnode ) goto done; // bad error condition
            curatt->value = attval;
            curatt->vallen = attval_len;
            attval_len    = 0;
            cpos += 2;
            goto val_1;
          }
          break;
        case '>':
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len    = 0;
          cpos++;
          goto val_1;
        case ' ':
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len    = 0;
          cpos++;
          goto name_gap;
      }
      
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto att_eqx;
      
    att_quot:
      let = *cpos;
      
      if( let == '"' ) {
        if( attval_len ) {
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len = 0;
        }
        cpos++;
        goto name_gap;
      }
      if( !let ) { last_state = ST_att_quot; goto done; }
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto att_quot;
      
    att_quots:
      let = *cpos;
      
      if( let == 0x27 ) { // '
        if( attval_len ) {
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len = 0;
        }
        cpos++;
        goto name_gap;
      }
      if( !let ) { last_state = ST_att_quots; goto done; }
      
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto att_quots;
      
    att_tick:
      let = *cpos;
      
      if( let == '`' ) {
        if( attval_len ) {
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len = 0;
        }
        cpos++;
        goto name_gap;
      }
      if( !let ) { last_state = ST_att_tick; goto done; }
      
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto att_tick;
      
    ename_1:
      let = *cpos;
      if( let == '>' ) {
        curnode->namelen = tagname_len;
        curnode->z = cpos-xmlin;
        curnode = curnode->parent; // jump up
        if( !curnode ) goto done;
        tagname_len++;
        cpos++;
        root->err = -1;
        goto error;
      }
      if( !let ) { last_state = ST_ename_1; goto done; }
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      // continue
      
    ename_x: // ending name
      let = *cpos;
      if( let == '>' ) {
        if( curnode->namelen != tagname_len ) {
          goto error;
        }
        if( res = dh_memcmp( curnode->name, tagname, tagname_len ) ) {
          #ifdef DEBUG
          printf("Closing node not equal: curnode->name=%.*s - opening tag=%.*s\n", tagname_len, curnode->name, tagname_len, tagname );
          #endif
          cpos -= tagname_len;
          cpos += res - 1;
          goto error;
        }
        curnode->z = cpos-xmlin;
        curnode = curnode->parent; // jump up
        if( !curnode ) goto done;
        tagname_len++;
        cpos++;
        
        goto val_1;
      }
      if( !let ) { last_state = ST_ename_x; goto done; }
      tagname_len++;
      cpos++;
      goto ename_x;
    error:
      self->err = - ( int ) ( cpos - &xmlin[0] );
      return self->err;
    done:
      #ifdef DEBUG
      printf("done\n", *cpos);
      #endif
      
      // store the current state of the parser
      self->last_state = last_state;
      self->curnode = curnode;
      self->curatt = curatt;
      self->tagname = tagname; self->tagname_len = tagname_len;
      self->attname = attname; self->attname_len = attname_len;
      self->attval  = attval;  self->attval_len  = attval_len;
      self->att_has_val = att_has_val;
      
      #ifdef DEBUG
      printf("returning\n", *cpos);
      #endif
      return 0;//no error
}

int parserc_parse_unsafely( struct parserc *self, char *xmlin ) {
    // Variables that represent current 'state'
    struct nodec *root    = NULL;
    char  *tagname        = NULL; int    tagname_len    = 0;
    char  *attname        = NULL; int    attname_len    = 0;
    char  *attval         = NULL; int    attval_len     = 0;
    int    att_has_val    = 0;
    struct nodec *curnode = NULL;
    struct attc  *curatt  = NULL;
    int    last_state     = 0;
    self->rootpos = xmlin;
    
    // Variables used temporarily during processing
    struct nodec *temp;
    char   *cpos          = &xmlin[0];
    int    res            = 0;
    int    dent;
    register int let;
    
    if( self->last_state ) {
      return -1; // unsafe doesn't support this
    }
    else {
      self->err = 0;
      curnode = root = self->rootnode = new_nodec();
    }
    
    #ifdef DEBUG
    printf("Entry to C Parser\n");
    #endif
    
    u_val_1: // content
      #ifdef DEBUG
      printf("val_1: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case 0: last_state = ST_val_1; goto u_done;
        case '<': goto u_val_x;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 1;
      }
      curnode->numvals++;
      cpos++;
      
    u_val_x: // content
      #ifdef DEBUG
      printf("val_x: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case 0: last_state = ST_val_x; goto u_done;
        case '<':
          if( *(cpos+1) == '!' &&
              *(cpos+2) == '[' &&
              *(cpos+3) == 'C' &&
              *(cpos+4) == 'D' &&
              *(cpos+5) == 'A' &&
              *(cpos+6) == 'T' &&
              *(cpos+7) == 'A'    ) {
            cpos += 9;
            curnode->type = 1;
            goto u_cdata;
          }
          
          tagname_len = 0; // for safety
          cpos++;
          goto u_name_1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto u_val_x;
    
    u_cdata:
      if( *cpos == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
        cpos += 3;
        goto u_val_1;
      }
      if( !curnode->numvals ) {
        curnode->value = cpos;
        curnode->vallen = 0;
        curnode->numvals = 1;
      }
      if( curnode->numvals == 1 ) curnode->vallen++;
      cpos++;
      goto u_cdata;
      
    u_name_1: // node name
      #ifdef DEBUG
      printf("name_1: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case '/': // regular closing tag
          tagname_len = 0; // needed to reset
          cpos++;
          goto u_ename_1;
      }
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      goto u_name_x;
      
    u_name_x: // node name
      #ifdef DEBUG
      printf("name_x: %c\n", *cpos);
      #endif
      switch( *cpos ) {
        case ' ':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          attname_len = 0;
          cpos++;
          goto u_name_gap;
        case '>':
          curnode     = nodec_addchildr( curnode, tagname, tagname_len );
          cpos++;
          goto u_val_1;
        case '/': // self closing
          temp = nodec_addchildr( curnode, tagname, tagname_len );
          tagname_len = 0;
          cpos+=2;
          goto u_val_1;
      }
      
      tagname_len++;
      cpos++;
      goto u_name_x;
          
    u_name_gap: // node name gap
      switch( *cpos ) {
        case ' ':
        case '>':
          cpos++;
          goto u_val_1;
        case '/': // self closing
          curnode = curnode->parent;
          if( !curnode ) goto u_done;
          cpos += 2; // am assuming next char is >
          goto u_val_1;
      }
        
    u_att_name1:
      #ifdef DEBUG
      printf("attname1: %c\n", *cpos);
      #endif
      att_has_val = 0;
      attname = cpos;
      attname_len = 1;
      cpos++;
      goto u_att_name;
      
    u_att_space:
      if( *cpos == '=' ) {
          att_has_val = 1;
          cpos++;
          goto u_att_eq1;
      }
      // we have another attribute name, so continue
      
    u_att_name:
      #ifdef DEBUG
      printf("attname: %c\n", *cpos);
      #endif
      let = *cpos;
      switch( let ) {
        case '/': // self closing     !! /> is assumed !!
          curatt = nodec_addattr( curnode, attname, attname_len );
          if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
          attname_len = 0;
          
          curnode = curnode->parent;
          if( !curnode ) goto u_done;
          cpos += 2;
          goto u_val_1;
        case ' ':
          if( *(cpos+1) == '=' ) {
            cpos++;
            goto u_att_name;
          }
          curatt = nodec_addattr( curnode, attname, attname_len );
          attname_len = 0;
          cpos++;
          goto u_att_space;
        case '>':
          curatt = nodec_addattr( curnode, attname, attname_len );
          if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
          attname_len = 0;
          cpos++;
          goto u_val_1;
        case '=':
          attval_len = 0;
          curatt = nodec_addattr( curnode, attname, attname_len );
          attname_len = 0;
          cpos++;
          goto u_att_eq1;
      }
      
      if( !attname_len ) attname = cpos;
      attname_len++;
      cpos++;
      goto u_att_name;
      
    u_att_eq1:
      switch( *cpos ) {
        case '/': // self closing
          if( *(cpos+1) == '>' ) {
            curnode = curnode->parent;
            if( !curnode ) goto u_done;
            cpos += 2;
            goto u_att_eq1;
          }
          break;
        case '"':  cpos++; goto u_att_quot;
        case 0x27: cpos++; goto u_att_quots; //'
        case '>':  cpos++; goto u_val_1;
        case ' ':  cpos++; goto u_att_eq1;
      }  
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto u_att_eqx;
      
    u_att_eqx:
      switch( *cpos ) {
        case '/': // self closing
          if( *(cpos+1) == '>' ) {
            curnode = curnode->parent;
            if( !curnode ) goto u_done; // bad error condition
            curatt->value = attval;
            curatt->vallen = attval_len;
            attval_len    = 0;
            cpos += 2;
            goto u_val_1;
          }
          break;
        case '>':
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len    = 0;
          cpos++;
          goto u_val_1;
        case ' ':
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len    = 0;
          cpos++;
          goto u_name_gap;
      }
      
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto u_att_eqx;
      
    u_att_quot:
      if( *cpos == '"' ) {
        if( attval_len ) {
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len = 0;
        }
        cpos++;
        goto u_name_gap;
      }
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto u_att_quot;
      
    u_att_quots:
      if( *cpos == 0x27 ) { // '
        if( attval_len ) {
          curatt->value = attval;
          curatt->vallen = attval_len;
          attval_len = 0;
        }
        cpos++;
        goto u_name_gap;
      }
      if( !attval_len ) attval = cpos;
      attval_len++;
      cpos++;
      goto u_att_quots;
      
    u_ename_1:
      tagname       = cpos;
      tagname_len   = 1;
      cpos++;
      // continue
      
    u_ename_x: // ending name
      let = *cpos;
      if( let == '>' ) {
        curnode->z = cpos-xmlin;
        curnode = curnode->parent; // jump up
        if( !curnode ) goto u_done;
        tagname_len++;
        cpos++;
        
        goto u_val_1;
      }
      tagname_len++;
      cpos++;
      goto u_ename_x;
    
    u_done:
      #ifdef DEBUG
      printf("done\n", *cpos);
      #endif
      
      // store the current state of the parser
      self->last_state = last_state;
      self->curnode = curnode;
      self->curatt = curatt;
      self->tagname = tagname; self->tagname_len = tagname_len;
      self->attname = attname; self->attname_len = attname_len;
      self->attval  = attval;  self->attval_len  = attval_len;
      self->att_has_val = att_has_val;
      
      #ifdef DEBUG
      printf("returning\n", *cpos);
      #endif
      return 0;//no error
}

struct utfchar {
  char high;
  char low;
};

struct nodec *nodec_addchildr(  struct nodec *self, char *newname, int newnamelen ) {
  struct nodec *newnode = new_nodecp( self );
  newnode->name    = newname;
  newnode->namelen = newnamelen;
  if( self->numchildren == 0 ) {
    self->firstchild = newnode;
    self->lastchild  = newnode;
    self->numchildren++;
    return newnode;
  }
  else {
    self->lastchild->next = newnode;
    self->lastchild = newnode;
    self->numchildren++;
    return newnode;
  }
}

struct attc *nodec_addattr( struct nodec *self, char *newname, int newnamelen ) {
  struct attc *newatt = new_attc( self );
  newatt->name    = newname;
  newatt->namelen = newnamelen;
  
  if( !self->numatt ) {
    self->firstatt = newatt;
    self->lastatt  = newatt;
    self->numatt++;
    return newatt;
  }
  else {
    self->lastatt->next = newatt;
    self->lastatt = newatt;
    self->numatt++;
    return newatt;
  }
}