The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
# Reads data from flat text file dumped from database
# and builds universal catalog .XML file.
#
use strict;
use XAO::Utils;
use XAO::Web;
use XML::Writer;

XAO::Utils::set_debug(1);

use constant SITENAME => '<%PROJECT%>';
##
# The catalog name and ID
#
my $catalog_name = "Type anything here!";
my $catalog_id	 = "0001";

##
# File names 
#
my $flatfile = shift;

if (!$flatfile) {
    print "Usage:\n scan-test.pl data_file.txt > output.xml\n";
    exit 0;
}

my @cat;
my @prod;
my $category_id = 0;

##
# Getting flat file specifications
#
my $site=XAO::Web->new(sitename => SITENAME);

my $flatfile_spec = 
    $site->config->embedded('hash')->{product_structure}->{flatfile_spec};

my %Product_fields;
my %Category_fields;

# Configuration processing
fields_prepare($flatfile_spec);
# Incoming data reading to internal data
load_flatfile($flatfile);
# Making XML document from internal data
make_xml();

1;

##
# Make two hashes Field_name=>Field_position for
# Product fields and Category fields separately.
sub fields_prepare {
    my $spec = shift;
    my $fields_ref = $spec->{fields};
    my $category_ref = $spec->{category_hierarchy};
    for (my $i=0;$i<@$fields_ref;$i++) {
	$Product_fields{$fields_ref->[$i]} = $i;
    }
    split_fields($category_ref);
}
##
# Split flatfile fields to Product fielda and 
# Category fields using category hierarchy description
sub split_fields {
    my $level = shift;
    foreach my $key (keys %$level) {
	if (exists($Product_fields{$key})) {
	    $Category_fields{$key} = $Product_fields{$key};
	    delete($Product_fields{$key});
	}
	my $child = $level->{$key};
	if ((ref($child) eq 'HASH') &&
	    (keys(%$child) > 0)) {
	    split_fields($child);
	}
    }    
}

##
# Reading flatfile to Categories hash and Products array. 
#
sub load_flatfile {
    my $file = shift;
    open(F,$file) || die "Can't open '$file': $!\n";
    while(my $str=<F>) {
	chop $str while $str=~/[\r\n\s]$/;
	# Remove unprintable character with code higher 200
	$str =~ s/[\200-\377]//g;
    
	my @line = split(/\t/,$str);

        my $category = category_level('',
	      $flatfile_spec->{category_hierarchy},
	      \@line
	);
	my $hash_ref = {category => $category }; 
	# product processing
        foreach my $key (keys %Product_fields) {
	    $hash_ref->{$key} = $line[$Product_fields{$key}]; 
	}
	push(@prod, $hash_ref);

    }
    close(F);
}
##
# Making XML document from Categories hash and Products array. 
#
sub make_xml {
    my $xml=XML::Writer->new(DATA_MODE => 1, DATA_INDENT => 1);
    $xml->xmlDecl;
    ##
    # Writing out categories
    #
    $xml->startTag('catalog', 
	id => $catalog_id, 
	name => $catalog_name
    );
    
    $xml->startTag('categories');
    for(my $key=0;$key<@cat;$key++) {
	my $attr_ref = $cat[$key];
	$xml->emptyTag('catdesc', %$attr_ref);
    }
    ##
    # Finishing 'categories'
    #
    $xml->endTag('categories');
    ##
    # Writing out products
    #    
    $xml->startTag('products');

    for (my $key=0;$key<@prod;$key++) {
	my $hash_ref = $prod[$key];
	my $category = $hash_ref->{category};
	delete($hash_ref->{category});
	$xml->startTag('product', %$hash_ref);
	$xml->emptyTag('category', id => $category) if $category;
	$xml->endTag('product');
    }
    $xml->endTag('products');
    $xml->endTag('catalog');
}
##
# Categories hierarchy processing to make parent-child
# categories pairs in categories hash.
sub category_level {
    my $parent_key = shift;
    my $level = shift;
    my $data = shift;
    my $cat_name = '';
    my $parent_id = '';
    my $parent_name = '';
    my $this_category_id;
    
    if ($parent_key ne '') {
        if (exists($Category_fields{$parent_key}) && 
    		    scalar($Category_fields{$parent_key})) {
    	    $parent_name = $data->[$Category_fields{$parent_key}] || '';

	    if (defined($parent_name)) {
		for (my $i=0;$i<@cat;$i++){
		    $parent_id = $i if (($cat[$i]->{name} || '') eq $parent_name);
		}
	    }
	}
    }
    
    foreach my $key (keys %$level) {
	if (exists($Category_fields{$key}) && scalar($Category_fields{$key})) {
	    $cat_name = $data->[$Category_fields{$key}] || '';
	    my $found = '';
	    for (my $i=0;$i<@cat;$i++){
		if (
		     (($cat[$i]->{name} || '') eq $cat_name) && 
		     (($cat[$i]->{parent_id} || '') eq $parent_id)
		   ) {
		    $this_category_id = $i;
		    $found = 1;
		}
	    }
	    if ($found eq '') {
		$cat[$category_id] = {
			    id		=> $category_id,
			    name	=> $cat_name,
			    parent_id 	=> $parent_id,
			    parent_name => $parent_name,
		    };
		$this_category_id = $category_id;
		$category_id++;
	    }
	    my $child = $level->{$key};
	    if ((ref($child) eq 'HASH') &&
		(keys(%$child) > 0)) {
		$this_category_id = category_level($key,$child,$data);
	    }
	}else{
	    die "Can't get category by '$Category_fields{$key}'\nCheck your Config.pm!\n";
	}
    }
    return $this_category_id;
}