The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl 

#------------------------------------------------------------------------------
# parse the ZX Spectrum ROM disassembly file
package Parsezx48;

use strict;
use warnings;

use Fcntl 'O_RDONLY';
use Tie::File;
use Asm::Z80::Table;

#------------------------------------------------------------------------------
# collect info from disassembly file
use Class::XSAccessor {
	constructor	=> '_new',
	accessors	=> [
		'label_by_addr',	# hash of addr => label name
		'label_by_name',	# hash of label name => addr
		'label_by_line',	# hash of line number => label name
		'header',			# file header
		'footer',			# file footer
		'instr',			# array of instr by addr
	],
};

use Class::XSAccessor {
	class		=> 'My::Instr',
	constructor	=> 'new',
	accessors	=> [
		'addr',				# address
		'size',				# size
		'label',			# label, if any
		'block_comment',	# block comment before instruction
		'line_comment',		# line comment after instruction
		'opcode',			# assembly instruction
		'is_data',			# true if it is a def* opcode
	],
};

#------------------------------------------------------------------------------
# parse the given file
sub new { 
	my($class, $rom_asm_file) = @_;
	my $self = $class->_new( label_by_addr 	=> {}, 
							 label_by_name 	=> {},
							 label_by_line 	=> {},
							 instr 			=> []);
	$self->read_asm($rom_asm_file);
	
	return $self;
}

#------------------------------------------------------------------------------
# read assembly file
sub read_asm {
	my($self, $rom_asm_file) = @_;
	
	my @file;
	tie @file, 'Tie::File', $rom_asm_file, mode => O_RDONLY;
	
	# read file header and footer
	my $p_min = $self->read_header(\@file);
	my $p_max = $self->read_footer(\@file);
	
	# first pass : read all the labels
	$self->read_labels(\@file, $p_min, $p_max);
	
	# read all instr
	my $addr = 0;
	my $instr;
	for (my $p = $p_min ; $p <= $p_max ; $p++ ) {
		($instr, $p) = $self->read_instr($addr, \@file, $p, $p_max) or die;
		
		$self->instr->[$addr] and die;
		$self->instr->[$addr] = $instr;
		
		$addr += $instr->size;
	}		
}

#------------------------------------------------------------------------------
# read file header
sub read_header {
	my($self, $file) = @_;

	my $header = '';
	my $found_org;
	for my $p (0 .. $#$file) {
		if ($file->[$p] =~ / ^ \s* org /ix) {
			$found_org++;
		}
		elsif ( $found_org ?
					$file->[$p] =~ / ^ (?:     \s* $ | (\#) ) /x :
					$file->[$p] =~ / ^ (?: ; | \s* $ | (\#) ) /x ) {
			$header .= ($1 ? ';' : '') . $file->[$p] . "\n";	# comment '#'
		}
		else {
			$self->header($header) unless $header eq ''; 
			return $p;
		}
	}
	return 0;	# no header;
}

#------------------------------------------------------------------------------
# read file footer
sub read_footer {
	my($self, $file) = @_;

	my $footer = '';
	for my $p (reverse 0 .. $#$file) {
		if ($file->[$p] =~ / ^ (?: ; | \s* $ | (\#) ) /x ) {
			$footer = ($1 ? ';' : '') . $file->[$p] . "\n" .	# comment '#'
					  $footer;
		}
		else {
			$self->footer($footer) unless $footer eq ''; 
			return $p;
		}
	}
	return $#$file;	# no footer;
}

#------------------------------------------------------------------------------
# read labels
sub read_labels {
	my($self, $file, $p_min, $p_max) = @_;

	for my $p ($p_min .. $p_max) {
		if ($file->[$p] =~ /^([a-z_]\w*)/i) {
			my $label = $1;
			next if $label =~ /^org$/i;
			$label =~ /^[LX]([0-9A-F]{4})$/ 
				or die "unrecoginzed label format at line ".($p+1);
			my $addr = hex($1);
			
			# search for label name
			my $label_name = $label;		# default
			for my $q (reverse $p_min .. $p-1) {
				last unless $file->[$q] =~ /^\s*(;|$)/;
				if ($file->[$q] =~ /^;;\s+(\S+)/) {
					$label_name = $1;
					last;
				}
			}
			
			# make label name valid
			$label_name =~ s/\W/_/g;
			
			# cannot exist
			exists $self->label_by_addr->{$addr}
				and die "two labels at the same address at line ".($p+1); 
			exists $self->label_by_name->{uc($label_name)}
				and die "two labels with the same name at line ".($p+1); 
			exists $self->label_by_line->{$p}
				and die "two labels at the same line at line ".($p+1); 
				
			$self->label_by_addr->{$addr} = $label_name;
			$self->label_by_name->{uc($label_name)} = $addr;
			$self->label_by_line->{$p} = $label_name;
		}
	}
}

#------------------------------------------------------------------------------
# read the next opcode
sub read_instr {
	my($self, $addr, $file, $p, $p_max) = @_;
	
	my $instr = My::Instr->new(addr => $addr);
	
	# block comment
	my $block_comment = '';
	while ($p <= $p_max &&
		   $file->[$p] =~ /^\s*(;|$|org)/i) {
		$block_comment .= $file->[$p] . "\n" unless $file->[$p] =~ /^\s*org/i;
		$p++;
	}
	$instr->block_comment($block_comment) unless $block_comment eq '';
	
	# first opcode line
	if ($p <= $p_max &&
	    $file->[$p] =~ / ^ ( [a-z_]\w* [:\s] \s* | \s+ ) 
						   ( \w+ .*? )?
						   ( ; .*)? $ /ix) {
		my($label, $opcode, $comment) = ($1, $2, $3);
		
		# get label from $self, already processed
		if ($label =~ /\w+/) {
			my $label_name = $self->label_by_line->{$p} 
				or die "label $label not found at line ".($p+1);
			$instr->label($label_name);
			
			$addr == $self->label_by_name->{uc($instr->label)}
				or die "address $addr does not match label $label_name".
					   " at line ".($p+1);			
		}
		
		# convert opcode format to lower case, 0xhhhh numbers, include labels
		if (! $opcode) {
			# need to read opcode from next line
			$comment and die "comment unexpected at line ".($p+1);
			$p++;
			$p > $p_max and die "opcode not found at line ".($p+1);
			$file->[$p] =~ / ^ \s+ ( \w+ .*? )
						           ( ; .*)? $ /ix 
				or die "cannot parse line ".($p+1);
			($opcode, $comment) = ($1, $2);
		}
		
		my @tokens = $self->lexer($opcode);
		$opcode = join('', @tokens);

		$instr->opcode($opcode);
		$instr->is_data( $opcode =~ /^def/i );
		$instr->size($self->opcode_size($p, @tokens));
		
		# line comment
		if (defined $comment) {
			for ($comment) {
				s/^;\s?//;
				s/\s+$//;
			}
			$instr->line_comment($comment) unless $comment eq '';
		}
	}
	else {
		die "cannot parse instruction at line ".($p+1);
	}

	# following comment lines
	while ($p+1 <= $p_max &&
		   $file->[$p+1] =~ /^\s*(;.*)/) {
		my $comment = $1;
		$comment =~ s/^;\s?//; $comment =~ s/\s+$//;
		if ($comment ne '') {
			defined($instr->line_comment) or $instr->line_comment('');
			$instr->line_comment($instr->line_comment . "\n" . $comment);
		}
		
		$p++;
	}
	
	return ($instr, $p);
}

#------------------------------------------------------------------------------
# convert opcode into tokens
sub lexer {
	my($self, $opcode) = @_;
	my @tokens;
	
	for ($opcode) {
		s/\s+$//;
		while (! / \G \z /gcxi) {
			if (/ \G \s+ /gcxi) {
				push @tokens, " ";
			}
			elsif (/ \G \$ ([0-9A-F]+) \b /gcxi) {
				push @tokens, "0x".$1;
			}
			elsif (/ \G (\d [0-9A-F]*) h \b /gcxi) {
				push @tokens, "0x".$1;
			}
			elsif (/ \G [LX] ([0-9A-F]{4}) \b /gcxi) {
				my $addr = hex($1);
				my $label_name = $self->label_by_addr->{$addr} or die;
				push @tokens, $label_name;
			}
			elsif (/ \G (' [^']* ') /gcxi) {
				push @tokens, $1;
			}
			elsif (/ \G (" [^"]* ") /gcxi) {
				push @tokens, $1;
			}
			elsif (/ \G (af' | \w+ | .) /gcxi) {
				push @tokens, lc($1);
			}
			else {
				die 'not reached';
			}
		}
	}
	return @tokens;
}

#------------------------------------------------------------------------------
# compute size of assembly opcode
sub opcode_size {
	my($self, $p, @tokens) = @_;
	my $opcode = join('', @tokens);
	
	@tokens = grep {! /^\s*$/} @tokens;
	if (@tokens && $tokens[0] =~ /^def/i) {
		# compute size of defb, defw
		my $token = shift @tokens;
		if ($token eq 'defm') {
			@tokens == 1 
				or die "cannot parse '$opcode' at line ".($p+1);
			return length($tokens[0]) - 2;	# length minus quotes
		}
		else {
			my $size = 
				$token eq 'defb' ? 1 : 
				$token eq 'defw' ? 2 : 
				die("cannot parse token '$token' from '$opcode' at line ".($p+1));
			my $count = 1;
			for (@tokens) { $count++ if $_ eq ',' }
			return $count * $size;
		}
	}
	else {
		my $table = Asm::Z80::Table->asm_table;
		while (defined(my $token = shift @tokens)) {
			$token = oct($token) if $token =~ /^0x/;
		
			if (exists $table->{$token}) {
				$table = $table->{$token};
			}
			elsif ( exists($table->{NN}  ) || 
					exists($table->{N}   ) || 
					exists($table->{DIS} ) || 
					exists($table->{NDIS}) ) {
				$table = $table->{NN}  || 
						 $table->{N}   || 
						 $table->{DIS} || 
						 $table->{NDIS};
				
				# $token is first term of expression
				my $paren = $token eq '(' ? 1 : 0;
				while (@tokens) {
					last if $tokens[0] eq ',';					# term separator
					last if $tokens[0] eq ')' && $paren == 0;	# end of expression
					
					$token = shift @tokens;
					$paren++ if $token eq '(';
					$paren-- if $token eq ')';
				}
			}
			else {			
				die "cannot parse token '$token' from opcode '$opcode' at line ".
				    ($p+1);
			}
		}
		
		return scalar @{$table->{''}};
	}
}

#------------------------------------------------------------------------------
# send the disassembly to the given file
sub write {
	my($self, $file) = @_;
	
	my $fh;
	if ($file) {
		open($fh, ">", $file) or die "write $file: $!";
	}
	else {
		$fh = \*STDOUT;
	}
	
	print $fh $self->header if defined $self->header;
	
	print $fh " " x 8, "org ", sprintf("0x%04X", $self->instr->[0]->addr), "\n\n"
		if @{$self->instr};
	
	for my $instr (@{$self->instr}) {
		next unless $instr;
		
		print $fh $instr->block_comment if defined $instr->block_comment;
		
		if (defined $instr->label) {
			print $fh $instr->label, ":\n";
		}
		if (! defined $instr->line_comment) {
			print $fh " " x 8, $instr->opcode, "\n";
		}
		else {
			my @line_comment = split(/\n/, $instr->line_comment);
			
			if (! @line_comment ||
			    length($instr->opcode) >= 24) {
				print $fh " " x 8, $instr->opcode, "\n";
			}
			else {
				print $fh " " x 8, sprintf("%-24s", $instr->opcode),
						  "; ", shift(@line_comment), "\n";
			}
		
			while (@line_comment) {
				print $fh " " x 32, "; ", shift(@line_comment), "\n";
			}
		}
	}
	print $fh $self->footer if defined $self->footer;
}

1;