#!/usr/bin/perl -wT

use strict;

# 6809 assembler
# by Ciaran Anscomb, 2010

# A 3+ pass assembler.  Kind of.
# Pass 1: Read in text, store macros while reading.
# Pass 2: Divide text into sections.
# Pass 3: Assemble each section in turn, expanding any macro calls.
#         Repeat until addresses are stable.

# Disclaimer:
# Possibly not fit for any purpose.  You're free to use, copy, modify and
# redistribute so long as I don't get the blame for anything.

use Text::Tabs;
$tabstop = 8;

my $VERSION = "1.1";

my $output_format = 'binary';
my $output_filename = undef;
my $symbols_filename = undef;
my $listing_filename = undef;
my $verbose = 0;
my $quiet = 0;
my $exec_label = undef;

sub helptext {
	print <<EOF;
Usage: $0 [OPTION]... SOURCE-FILE...
Assembles 6809 source code.

  -I PATH           add to include path

  -B, --bin         output to binary file (default)
  -H, --hex         output to (currently malformed) Intel hex record file
  -D, --dragondos   output to DragonDOS binary file
  -C, --coco        output to CoCo segmented binary file
  -e, --exec=ADDR   EXEC address (for output formats that support one)

  -o, --output=FILE    set output filename
  -l, --listing=FILE   create listing file
  -s, --symbols=FILE   create symbol table

  -v, --verbose   show what assembler is doing at each stage
  -q, --quiet     suppress warnings
      --help      show this help and exit

If more than one SOURCE-FILE is specified, they are assembled as though
they were all in one file.
EOF
}

my @include_path = ( "." );
my @files = ();

while (scalar(@ARGV) > 0) {
	my $opt = $ARGV[0];
	if ($opt eq "--") {
		shift @ARGV; last;
	} elsif ($opt =~ /^-I(.+)$/) {
		push @include_path, $1;
	} elsif ($opt eq "-I") {
		shift @ARGV;
		if ($ARGV[0] =~ /^(.*)$/) {
			push @include_path, $1;  # de-taint
		}
	} elsif ($opt =~ /^--output=(.*)$/) {
		$output_filename = $1;
	} elsif ($opt eq "-o" || $opt eq "--output") {
		shift @ARGV;
		if ($ARGV[0] =~ /^(.*)$/) {
			$output_filename = $1;  # de-taint
		}
	} elsif ($opt =~ /^--symbols=(.*)$/) {
		$symbols_filename = $1;
	} elsif ($opt eq "-s" || $opt eq "--symbols") {
		shift @ARGV;
		if ($ARGV[0] =~ /^(.*)$/) {
			$symbols_filename = $1;  # de-taint
		}
	} elsif ($opt =~ /^--listing=(.*)$/) {
		$listing_filename = $1;
	} elsif ($opt eq "-l" || $opt eq "--listing") {
		shift @ARGV;
		if ($ARGV[0] =~ /^(.*)$/) {
			$listing_filename = $1;  # de-taint
		}
	} elsif ($opt eq "-B" || $opt eq "--bin") {
		$output_format = 'binary';
	} elsif ($opt eq "-H" || $opt eq "--hex") {
		$output_format = 'hex';
	} elsif ($opt eq "-D" || $opt eq "--dragondos") {
		$output_format = 'dragondos';
	} elsif ($opt eq "-C" || $opt eq "--coco") {
		$output_format = 'coco';
	} elsif ($opt eq "-e" || $opt eq "--exec") {
		shift @ARGV;
		if ($ARGV[0] =~ /^(.*)$/) {
			$exec_label = $1;  # de-taint
		}
	} elsif ($opt =~ /^--exec=(.*)$/) {
		$exec_label = $1;
	} elsif ($opt eq "-v" || $opt eq "--verbose") {
		$verbose++;
	} elsif ($opt eq "-q" || $opt eq "--quiet") {
		$quiet = 1;
	} elsif ($opt eq "--version") {
		print "asm6809.pl $VERSION\n";
		exit 0;
	} elsif ($opt eq "--help") {
		helptext();
		exit 0;
	} elsif ($opt =~ /^-/) {
		print STDERR "$0: unrecognised option '$opt'\n";
		print STDERR "Try '$0 --help' for more information.\n";
		exit 1;
	} else {
		push @files, $opt;
	}
	shift @ARGV;
}

# Append rest of command line to file list.
push @files, @ARGV;

if (scalar(@files) < 1) {
	print STDERR "$0: source filename required\n";
	print STDERR "Try '$0 --help' for more information.\n";
	exit 1;
}

my %modes = (
	'export' => 'export',
	'org'   => 'equ',
	'equ'   => 'equ',
	'setdp' => 'setdp',
	'rmb'   => 'rmb',
	'fcb'	=> 'fcb',
	'fdb'	=> 'fdb',
	'fcc'	=> 'fcc',
	'pshs'  => 'stack',
	'puls'  => 'stack',
	'pshu'  => 'stack',
	'pulu'  => 'stack',
	'exg'   => 'pair',
	'tfr'   => 'pair',
);

my %opcodes = (
	abx => { inherent => '3a' },
	adca => { immediate => '89%b', direct => '99%b', indexed => 'a9%i', extended => 'b9%w' },
	adcb => { immediate => 'c9%b', direct => 'd9%b', indexed => 'e9%i', extended => 'f9%w' },
	adda => { immediate => '8b%b', direct => '9b%b', indexed => 'ab%i', extended => 'bb%w' },
	addb => { immediate => 'cb%b', direct => 'db%b', indexed => 'eb%i', extended => 'fb%w' },
	addd => { immediate => 'c3%w', direct => 'd3%b', indexed => 'e3%i', extended => 'f3%w' },
	anda => { immediate => '84%b', direct => '94%b', indexed => 'a4%i', extended => 'b4%w' },
	andb => { immediate => 'c4%b', direct => 'd4%b', indexed => 'e4%i', extended => 'f4%w' },
	andcc => { immediate => '1c%b' },
	asl => { direct => '08%b', indexed => '68%i', extended => '78%w' },
	asla => { inherent => '48' },
	aslb => { inherent => '58' },
	asr => { direct => '07%b', indexed => '67%i', extended => '77%w' },
	asra => { inherent => '47' },
	asrb => { inherent => '57' },
	bcc => { relative => '24%b' },
	bcs => { relative => '25%b' },
	beq => { relative => '27%b' },
	bge => { relative => '2c%b' },
	bgt => { relative => '2e%b' },
	bhi => { relative => '22%b' },
	bhs => { relative => '24%b' },
	bita => { immediate => '85%b', direct => '95%b', indexed => 'a5%i', extended => 'b5%w' },
	bitb => { immediate => 'c5%b', direct => 'd5%b', indexed => 'e5%i', extended => 'f5%w' },
	ble => { relative => '2f%b' },
	blo => { relative => '25%b' },
	bls => { relative => '23%b' },
	blt => { relative => '2d%b' },
	bmi => { relative => '2b%b' },
	bne => { relative => '26%b' },
	bpl => { relative => '2a%b' },
	bra => { relative => '20%b' },
	brn => { relative => '21%b' },
	bsr => { relative => '8d%b' },
	bvc => { relative => '28%b' },
	bvs => { relative => '29%b' },
	clr => { direct => '0f%b', indexed => '6f%i', extended => '7f%w' },
	clra => { inherent => '4f' },
	clrb => { inherent => '5f' },
	cmpa => { immediate => '81%b', direct => '91%b', indexed => 'a1%i', extended => 'b1%w' },
	cmpb => { immediate => 'c1%b', direct => 'd1%b', indexed => 'e1%i', extended => 'f1%w' },
	cmpd => { immediate => '1083%w', direct => '1093%b', indexed => '10a3%i', extended => '10b3%w' },
	cmps => { immediate => '118c%w', direct => '119c%b', indexed => '11ac%i', extended => '11bc%w' },
	cmpu => { immediate => '1183%w', direct => '1193%b', indexed => '11a3%i', extended => '11b3%w' },
	cmpx => { immediate => '8c%w', direct => '9c%b', indexed => 'ac%i', extended => 'bc%w' },
	cmpy => { immediate => '108c%w', direct => '109c%b', indexed => '10ac%i', extended => '10bc%w' },
	com => { direct => '03%b', indexed => '63%i', extended => '73%w' },
	coma => { inherent => '43' },
	comb => { inherent => '53' },
	cwai => { immediate => '3c%b' },
	daa => { inherent => '19' },
	dec => { direct => '0a%b', indexed => '6a%i', extended => '7a%w' },
	deca => { inherent => '4a' },
	decb => { inherent => '5a' },
	eora => { immediate => '88%b', direct => '98%b', indexed => 'a8%i', extended => 'b8%w' },
	eorb => { immediate => 'c8%b', direct => 'd8%b', indexed => 'e8%i', extended => 'f8%w' },
	exg => { pair => '1e%b' },
	inc => { direct => '0c%b', indexed => '6c%i', extended => '7c%w' },
	inca => { inherent => '4c' },
	incb => { inherent => '5c' },
	jmp => { direct => '0e%b', indexed => '6e%i', extended => '7e%w' },
	jsr => { direct => '9d%b', indexed => 'ad%i', extended => 'bd%w' },
	lbcc => { longrelative => '1024%w' },
	lbcs => { longrelative => '1025%w' },
	lbeq => { longrelative => '1027%w' },
	lbge => { longrelative => '102c%w' },
	lbgt => { longrelative => '102e%w' },
	lbhi => { longrelative => '1022%w' },
	lbhs => { longrelative => '1024%w' },
	lble => { longrelative => '102f%w' },
	lblo => { longrelative => '1025%w' },
	lbls => { longrelative => '1023%w' },
	lblt => { longrelative => '102d%w' },
	lbmi => { longrelative => '102b%w' },
	lbne => { longrelative => '1026%w' },
	lbpl => { longrelative => '102a%w' },
	lbra => { longrelative => '16%w' },
	lbrn => { longrelative => '1021%w' },
	lbsr => { longrelative => '17%w' },
	lbvc => { longrelative => '1028%w' },
	lbvs => { longrelative => '1029%w' },
	lda => { immediate => '86%b', direct => '96%b', indexed => 'a6%i', extended => 'b6%w' },
	ldb => { immediate => 'c6%b', direct => 'd6%b', indexed => 'e6%i', extended => 'f6%w' },
	ldd => { immediate => 'cc%w', direct => 'dc%b', indexed => 'ec%i', extended => 'fc%w' },
	lds => { immediate => '10ce%w', direct => '10de%b', indexed => '10ee%i', extended => '10fe%w' },
	ldu => { immediate => 'ce%w', direct => 'de%b', indexed => 'ee%i', extended => 'fe%w' },
	ldx => { immediate => '8e%w', direct => '9e%b', indexed => 'ae%i', extended => 'be%w' },
	ldy => { immediate => '108e%w', direct => '109e%b', indexed => '10ae%i', extended => '10be%w' },
	leas => { indexed => '32%i' },
	leau => { indexed => '33%i' },
	leax => { indexed => '30%i' },
	leay => { indexed => '31%i' },
	lsl => { direct => '08%b', indexed => '68%i', extended => '78%w' },
	lsla => { inherent => '48' },
	lslb => { inherent => '58' },
	lsr => { direct => '04%b', indexed => '64%i', extended => '74%w' },
	lsra => { inherent => '44' },
	lsrb => { inherent => '54' },
	mul => { inherent => '3d' },
	neg => { direct => '00%b', indexed => '60%i', extended => '70%w' },
	nega => { inherent => '40' },
	negb => { inherent => '50' },
	nop => { inherent => '12' },
	ora => { immediate => '8a%b', direct => '9a%b', indexed => 'aa%i', extended => 'ba%w' },
	orb => { immediate => 'ca%b', direct => 'da%b', indexed => 'ea%i', extended => 'fa%w' },
	orcc => { immediate => '1a%b' },
	pshs => { stack => '34%s' },
	pshu => { stack => '36%u' },
	puls => { stack => '35%s' },
	pulu => { stack => '37%u' },
	rol => { direct => '09%b', indexed => '69%i', extended => '79%w' },
	rola => { inherent => '49' },
	rolb => { inherent => '59' },
	ror => { direct => '06%b', indexed => '66%i', extended => '76%w' },
	rora => { inherent => '46' },
	rorb => { inherent => '56' },
	rti => { inherent => '3b' },
	rts => { inherent => '39' },
	sbca => { immediate => '82%b', direct => '92%b', indexed => 'a2%i', extended => 'b2%w' },
	sbcb => { immediate => 'c2%b', direct => 'd2%b', indexed => 'e2%i', extended => 'f2%w' },
	sex => { inherent => '1d' },
	sta => { direct => '97%b', indexed => 'a7%i', extended => 'b7%w' },
	stb => { direct => 'd7%b', indexed => 'e7%i', extended => 'f7%w' },
	std => { direct => 'dd%b', indexed => 'ed%i', extended => 'fd%w' },
	sts => { direct => '10df%b', indexed => '10ef%i', extended => '10ff%w' },
	stu => { direct => 'df%b', indexed => 'ef%i', extended => 'ff%w' },
	stx => { direct => '9f%b', indexed => 'af%i', extended => 'bf%w' },
	sty => { direct => '109f%b', indexed => '10af%i', extended => '10bf%w' },
	suba => { immediate => '80%b', direct => '90%b', indexed => 'a0%i', extended => 'b0%w' },
	subb => { immediate => 'c0%b', direct => 'd0%b', indexed => 'e0%i', extended => 'f0%w' },
	subd => { immediate => '83%w', direct => '93%b', indexed => 'a3%i', extended => 'b3%w' },
	swi => { inherent => '3f' },
	swi2 => { inherent => '103f' },
	swi3 => { inherent => '113f' },
	sync => { inherent => '13' },
	tfr => { pair => '1f%b' },
	tst => { direct => '0d%b', indexed => '6d%i', extended => '7d%w' },
	tsta => { inherent => '4d' },
	tstb => { inherent => '5d' },
);

my %arg_parse = (
	export => \&parse_export,
	equ => \&parse_equ,
	setdp => \&parse_setdp,
	rmb => \&parse_rmb,
	fcb => \&parse_fcb,
	fdb => \&parse_fdb,
	fcc => \&parse_fcc,
	stack => \&parse_stack,
	pair => \&parse_pair,
	address => \&parse_address,
	immediate => \&parse_immediate,
	direct => \&parse_direct,
	indexed => \&parse_indexed,
	extended => \&parse_extended,
	inherent => \&parse_inherent,
	relative => \&parse_relative,
	longrelative => \&parse_longrelative,
);

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

### Pass 1
# First pass is purely reading in files, building macros

my %macros = ();
my $defining_macro = undef;
my $file_ctx = { };

my @syntax_errors = ();  # syntax errors fatal after first pass
my @errors = ();  # other errors and warnings go in here
my %label_errors = ();  # only report missing labels once

my $input = [];
for (@files) {
	read_file($_, $input);
}
die_if_syntax_error();

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

### Pass 2
# Second pass splits input into sections

my @section_list;
my %sections;

{

	my $section_name = "";
	@section_list = ( $section_name );
	%sections = ( $section_name => [] );
	my $section = $sections{$section_name};

	$file_ctx = { filename => "", lno => 0 };

	for my $cmd (@$input) {
		if (exists $$cmd{pragma}) {
			if ($$cmd{pragma} eq 'line') {
				$file_ctx = { %{$$cmd{file_ctx}} };
				push @$section, { pragma => 'line', file_ctx => { %{$file_ctx} } };
			} else {
				syntax_error("internal error: bad pragma");
			}
			next;
		}
		if (exists $$cmd{opcode} && $$cmd{opcode} eq 'section') {
			# Actually changing section?
			if ($section_name ne $$cmd{arg}) {
				$section_name = $$cmd{arg};  # XXX validate
				# New section?
				if (!exists $sections{$section_name}) {
					$sections{$section_name} = [];
					push @section_list, $section_name;
				}
				$section = $sections{$section_name};
				push @$section, { pragma => 'line', file_ctx => { %{$file_ctx} } };
			}
			$$file_ctx{lno}++;
			delete $$cmd{opcode};
			push @$section, $cmd;
			next;
		}
		$$file_ctx{lno}++;
		push @$section, $cmd;
	}
	undef $input;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

### Pass 3+
# Assemble sections until stable

my $pass = 2;

my $unresolved = 1;
my @listing;
my $pc = 0;
my $dp = undef;
my $exec_addr = undef;
my %equs = ();
my %export_labels = ();
my $asection;  # array ref of section currently being assembled
my $as_index;  # track where in the section array we are

while ($unresolved && $pass < 10) {
	$pass++;
	print STDERR "=== Pass $pass\n" if ($verbose);

	$unresolved = 0;
	@listing = ();
	@errors = ();
	%label_errors = ();
	$pc = 0;

	$file_ctx = { filename => "", lno => 0 };

	for my $sname (@section_list) {
		print STDERR "==> SECTION $sname\n" if ($verbose && $sname);
		$asection = $sections{$sname};
		$as_index = -1;
		for my $cmd (@$asection) {
			$as_index++;
			if (exists $$cmd{pragma}) {
				if ($$cmd{pragma} eq 'line') {
					$file_ctx = { %{$$cmd{file_ctx}} };
					push @listing, $cmd;
				} else {
					syntax_error("internal error: bad pragma");
				}
				next;
			}
			$$file_ctx{lno}++;

			process_cmd($cmd);

		}
	}

	if (defined $exec_label) {
		$exec_addr = eval_expr($exec_label);
		$unresolved = 1 if (!defined $exec_addr);
	}

	die_if_syntax_error();

}

for (@errors) {
	print STDERR "$_\n";
}

if ($unresolved) {
	print STDERR "error: unresolved after $pass passes\n";
	exit 1;
}

undef %sections;
undef @section_list;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

my %region_start = ();
my %region_end = ();

$file_ctx = { filename => "", lno => 0 };

my $listing_file = undef;
if (defined $listing_filename) {
	open($listing_file, ">", $listing_filename);
}

#
# Place instruction text into regions.
# Generate listing file if required.
#

for my $cmd (@listing) {
	my ($start,$end) = (undef,undef);
	my $addr = "";
	my $text = $$cmd{text} || "";
	my $line = expand($$cmd{line} || "");
	my $label = $$cmd{label} || "";
	my $opcode = $$cmd{opcode} || "";
	if ($label && $$cmd{label} !~ /^\d+$/) {
		$start = $equs{$$cmd{label}};
		$addr = sprintf("\%04X", $equs{$$cmd{label}} & 0xffff);
	} elsif ($text || $label || $opcode eq 'org') {
		$start = $$cmd{addr};
		$addr = sprintf("\%04X", $start || 0);
	}
	if (defined $listing_file) {
		printf $listing_file "\%-6s\%-14s  \%s\n", $addr, uc $text, $line;
	}
	if ($text) {
		my $end = $start + $$cmd{size};
		if (exists $region_end{$start}) {
			my $region = $region_end{$start};
			$$region{text} .= $text;
			$$region{end} = $end;
			delete $region_end{$start};
			$region_end{$end} = $region;
		} elsif (exists $region_start{$end}) {
			my $region = $region_start{$end};
			$$region{text} = $text . $$region{text};
			$$region{start} = $start;
			delete $region_start{$end};
			$region_start{$start} = $region;
		} else {
			my $region = {
				text => $text,
				start => $start,
				end => $end,
			};
			$region_start{$start} = $region;
			$region_end{$end} = $region;
		}
	}
}

undef @listing;

if (defined $listing_file) {
	close $listing_file;
}

#
# Generate symbols file, if required.
#

if (defined $symbols_filename) {
	if (open(my $symbols_file, ">", $symbols_filename)) {
		for my $label (keys %export_labels) {
			if (exists $macros{lc($label)}) {
				print $symbols_file "$label\tmacro\n";
				for (@{$macros{lc($label)}}) {
					print $symbols_file "$_->{line}\n";
				}
				print $symbols_file "\tendm\n";
			} elsif (exists $equs{$label}) {
				print $symbols_file "$label\tequ\t$equs{$label}\n";
			}
		}
		close $symbols_file;
	} else {
		die "Couldn't open $symbols_filename for writing: $!\n";
	}
}
undef %export_labels;
undef %macros;

exit 0 if (!defined $output_filename);

#
# Generate output file.
#

my $output_file = undef;
unless (open($output_file, ">", $output_filename)) {
	die "Couldn't open $output_filename for writing: $!\n";
}

my @regions = sort { $a <=> $b } keys %region_start;

if ($output_format eq 'dragondos') {
	my $start = $regions[0];
	my $region0 = $region_start{$start};
	my $regionl = $region_start{$regions[$#regions]};
	my $length = ($regions[$#regions] + (length($$regionl{text}) / 2)) - $start;
	my $exec = (defined $exec_addr) ? $exec_addr : $start;
	print $output_file pack("CCnnnC", 0x55, 0x02, $start, $length, $exec, 0xaa);
}

my $last_region_end = undef;
for my $r (sort { $a <=> $b } keys %region_start) {
	my $region = $region_start{$r};
	my $addr = $r;
	if ($output_format eq 'binary' || $output_format eq 'dragondos') {
		if (defined $last_region_end) {
			my $skip = $addr - $last_region_end;
			print $output_file "\0" x $skip;
		}
		print $output_file pack("H*", $$region{text});
	} elsif ($output_format eq 'hex') {
		my $size = length($$region{text}) / 2;
		my $offset = 0;
		while ($size > 0) {
			my $bytes = ($size > 32) ? 32 : $size;
			printf $output_file ":\%02X\%04X00\%s00\n", $bytes, $addr, uc substr($$region{text}, $offset, $bytes * 2);
			$offset += $bytes * 2;
			$addr += $bytes;
			$size -= $bytes;
		}
	} elsif ($output_format eq 'coco') {
		print $output_file pack("xnnH*", length($$region{text}) / 2, $addr, $$region{text});
	}
	$last_region_end = $$region{end};
}

if ($output_format eq 'hex') {
	print $output_file ":00000001FF\n";
} elsif ($output_format eq 'coco' && defined $exec_addr) {
	print $output_file pack("Cxxn", 255, $exec_addr);
}
close $output_file;

exit 0;

############################################################################

# Assemble one line, calling expand_macro if required.
sub process_cmd {
	my ($cmd) = @_;

	push @listing, $cmd;

	my $label = $$cmd{label} || "";
	my $opcode = $$cmd{opcode} || "";
	return if (!$label && !$opcode);

	# Is this a macro?
	if (exists $macros{$opcode}) {
		verify_address($cmd, $pc);
		expand_macro($macros{$opcode}, $$cmd{arg});
		return;
	}

	# Determine the mode of an opcode.
	my $mode = "";
	if ($opcode) {
		# Mostly simple precendence rules:
		if (exists $modes{$opcode}) {
			$mode = $modes{$opcode};
		} elsif (exists $opcodes{$opcode}->{inherent}) {
			$mode = 'inherent';
		} elsif (exists $opcodes{$opcode}->{relative}) {
			$mode = 'relative';
		} elsif (exists $opcodes{$opcode}->{longrelative}) {
			$mode = 'longrelative';
		} elsif (exists $$cmd{arg}) {
			# Otherwise, format of argument determines mode:
			my $arg = $$cmd{arg};
			if ($arg =~ /^#/) {
				# XXX generalise this checking
				if (exists $opcodes{$opcode}->{immediate}) {
					$mode = 'immediate';
				} else {
					syntax_error("error: immediate addressing not available for '$opcode'");
				}
			} elsif ($arg =~ /^\[.*\]$/) {
				$mode = 'indexed';
			} elsif ($arg =~ /,/) {
				$mode = 'indexed';
			} elsif ($arg =~ /^</) {
				$mode = 'direct';
			} elsif ($arg =~ /^>/) {
				$mode = 'extended';
			} elsif ($arg) {
				$mode = 'address';  # either direct or extended 
			} else {
				syntax_error("error: unknown opcode '$opcode'");
			}
		}
	}

	# Copy appropriate template into the hash for parsing.
	if ($mode) {
		$$cmd{mode} = $mode;
		if (exists $opcodes{$opcode}->{$mode}) {
			$$cmd{tmpl} = $opcodes{$opcode}->{$mode};
		}
	}

	# Verify address/label: EQU is a special case.
	verify_address($cmd, $pc) if ($mode ne 'equ');

	# Call appropriate parse routine, fills in text & size.
	if (exists $arg_parse{$mode}) {
		$arg_parse{$mode}($cmd);
	}

	# Address of next instruction.
	$pc += $$cmd{size} if (exists $$cmd{size});
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# Check instruction address against supplied PC
sub verify_address {
	my ($cmd, $pc) = @_;

	# If the address of this instruction has changed since last
	# time, mark unresolved.
	if (exists $$cmd{addr} && $$cmd{addr} != $pc) {
		$unresolved = 1;
	}
	$$cmd{addr} = $pc;

	# For non-local labels, if the address of a label has changed
	# since last time, mark unresolved.
	my $label = $$cmd{label} || "";
	if ($label && $label !~ /^\d+$/) {
		if (exists $equs{$label} && $equs{$label} != $pc) {
			$unresolved = 1;
		}
		$equs{$label} = $pc;
	}
}

# Expand a macro performing substitution, calling process_cmd for each line.
sub expand_macro {
	my ($macro, $arg) = @_;

	# Intelligently split comma-separated list of args
	my @args = ();
	if (defined $arg) {
		$arg =~ s/^\s+//;
		while ($arg ne "") {
			last if ($arg =~ /^;/);  # skip comments
			if ($arg =~ /^,/) {
				# empty expression not allowed
				syntax_error("error: bad expression in macro");
				return;
			} elsif ($arg =~ /^([\/\"!])(.*?)(\1)\s*(.*)?$/) {
				# string
				$arg = $4;
				push @args, $2;
			} elsif ($arg =~ /^([^,;]*)(.*)$/) {
				# expression
				$arg = $2;
				push @args, $1;
			}
			$arg =~ s/^,\s*//;
		}
	}

	for my $cmd (@$macro) {
		# Copy macro cmd and perform substitutions:
		my $new_cmd = { %{$cmd} };
		$$new_cmd{label} =~ s/\&(\d+)/$args[$1-1]/g if (exists $$new_cmd{label});
		$$new_cmd{opcode} =~ s/\&(\d+)/$args[$1-1]/g if (exists $$new_cmd{opcode});
		$$new_cmd{arg} =~ s/\&(\d+)/$args[$1-1]/g if (exists $$new_cmd{arg});
		$$new_cmd{line} =~ s/\&(\d+)/$args[$1-1]/g if (exists $$new_cmd{line});
		process_cmd($new_cmd);
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# Tries to convert an expression into one eval-able by Perl.  Any undefined
# label references cause $unresolved to be set and undef returned.
sub eval_expr {
	my ($expr) = @_;
	my $evalstr = "";
	my $invalid = 0;
	$expr =~ s/^\s+//;
	my $nest = 0;
	my $expect_value = 1;

	while ($expr ne "") {
		if ($expect_value) {
			if ($expr =~ /^(\(+)\s*(.*)$/) {
				$evalstr .= $1;
				$nest += length($1);
				$expr = $2;
			} elsif ($expr =~ /^([~\-])\s*(.*)$/) {
				$evalstr .= $1;
				$expr = $2;
			} elsif ($expr =~ /^(\*)\s*(.*)$/) {
				$evalstr .= $pc;
				$expr = $2;
				$expect_value = 0;
			} elsif ($expr =~ /^\$([\da-fA-F]+)\s*(.*)$/) {
				$evalstr .= hex($1);
				$expr = $2;
				$expect_value = 0;
			} elsif ($expr =~ /^\%([01]+)\s*(.*)$/) {
				$evalstr .= oct("0b$1");
				$expr = $2;
				$expect_value = 0;
			} elsif ($expr =~ /^\@([0-7]+)\s*(.*)$/) {
				$evalstr .= oct("0$1");
				$expr = $2;
				$expect_value = 0;
			} elsif ($expr =~ /^'(.)\s*(.*)$/) {
				$evalstr .= ord($1);
				$expr = $2;
				$expect_value = 0;
			} elsif ($expr =~ /^(\d+)([fb])\s*(.*)$/i) {
				my $num .= $1;
				my $dir = uc $2;
				$expr = $3;
				my $instr = undef;
				# Search backwards or forwards for local label
				if ($dir eq 'B') {
					for (my $i = $as_index; $i >= 0; $i--) {
						if  (exists $asection->[$i]->{label} && $asection->[$i]->{label} eq $num) {
							$instr = $asection->[$i];
							last;
						}
					}
				} else {
					for (my $i = $as_index + 1; $i < $#$asection; $i++) {
						if  (exists $asection->[$i]->{label} && $asection->[$i]->{label} eq $num) {
							$instr = $asection->[$i];
							last;
						}
					}
				}
				if (!defined $instr) {
					error("error: unknown local label '$num$dir'");
					$unresolved = 1;
					$invalid = 1;
				} elsif (!exists $$instr{addr}) {
					$unresolved = 1;
					$evalstr .= "0";
				} else {
					$evalstr .= (0+($$instr{addr}));
				}
				$expect_value = 0;
			} elsif ($expr =~ /^(\d+(\.\d+)?)\s*(.*)$/) {
				$evalstr .= (0+$1);
				$expr = $3;
				$expect_value = 0;
			} elsif ($expr =~ /^(\w+)\s*(.*)$/) {
				$expr = $2;
				if (exists $equs{$1}) {
					$evalstr .= (0+$equs{$1});
				} else {
					$evalstr .= (0);
					if (!exists $label_errors{$1}) {
						error("error: unknown label '$1'");
						$label_errors{$1} = 1;
					}
					$unresolved = 1;
					$invalid = 1;
				}
				$expect_value = 0;
			} else {
				syntax_error("error: bad expression");
				return undef;
			}
		} else {
			if ($expr =~ /^(\)+)\s*(.*)$/) {
				$evalstr .= $1;
				$nest -= length($1);
				$expr = $2;
			} elsif ($expr =~ /^([\+\-\*\/\&\|\^]|<<|>>)\s*(.*)$/) {
				$evalstr .= $1;
				$expr = $2;
				$expect_value = 1;
			} else {
				syntax_error("error: bad expression");
				return undef;
			}
		}
	}
	if ($nest != 0) {
		syntax_error("error: bad expression");
		return undef;
	}

	return undef if ($invalid);
	my $ret = undef;
	eval "\$ret = ($evalstr);";
	return $ret;
}

############################################################################

sub parse_export {
	my ($cmd) = @_;
	if (exists $$cmd{arg}) {
		my $arg = lc $$cmd{arg};
		$export_labels{$arg} = 1;
	} else {
		syntax_error("error: bad label in export");
	}
}

sub parse_equ {
	my ($instr) = @_;
	my $arg = lc $$instr{arg};
	my $val = eval_expr($arg);
	return undef if (!defined $val);
	if (exists $$instr{addr} && $$instr{addr} != $val) {
		$unresolved = 1;
	}
	$$instr{addr} = $val;
	if ($$instr{opcode} eq 'org') {
		$pc = $val;
	}
	if (exists $$instr{label} && $$instr{label} !~ /^\d+$/) {
		$equs{$$instr{label}} = $val;
	}
}

sub parse_setdp {
	my ($instr) = @_;
	my $arg = lc $$instr{arg};
	my $val = eval_expr($arg);
	return undef if (!defined $val);
	$dp = $val & 0xff;
}

sub parse_rmb {
	my ($instr) = @_;
	my $arg = lc $$instr{arg};
	$arg =~ s/\s+//g;
	my $num = eval_expr($arg);
	return undef if (!defined $num);
	$$instr{size} = $num;
}

sub parse_fcb {
	my ($instr) = @_;
	my $arg = $$instr{arg};
	my $text = "";
	for my $i (split(/,/, $arg)) {
		my $num = eval_expr($i);
		if (defined $num) {
			$text .= sprintf("%02x", $num & 0xff);
		} else {
			$unresolved = 1;
		}
	}
	$$instr{text} = $text;
	$$instr{size} = length($text) / 2;
}

sub parse_fdb {
	my ($instr) = @_;
	my $arg = lc $$instr{arg};
	my $text = "";
	for my $i (split(/,/, $arg)) {
		my $num = eval_expr($i);
		if (defined $num) {
			$text .= sprintf("%04x", $num & 0xffff);
		} else {
			$unresolved = 1;
		}
	}
	$$instr{text} = $text;
	$$instr{size} = length($text) / 2;
}

sub parse_fcc {
	my ($instr) = @_;
	my $arg = $$instr{arg};
	my $text = "";
	my $c;
	my $tmp = "";
	my $valid = 1;
	$arg =~ s/^\s+//;
	while (defined $arg && $arg ne "") {
		last if ($arg =~ /^;/);  # skip comments
		if ($arg =~ /^,/) {
			# empty expression not allowed
			syntax_error("error: bad expression");
			return;
		} elsif ($arg =~ /^([\/\"!])(.*?)(\1)\s*(.*)?$/) {
			# string
			$arg = $4;
			my $string = $2;
			$string =~ s/(.)/sprintf("%02x", ord($1))/ge;
			$text .= $string;
		} elsif ($arg =~ /^([^,;]*)(.*)$/) {
			# expression
			$arg = $2;
			my $val = eval_expr($1);
			if (defined $val) {
				$text .= sprintf("%02x", $val & 0xff);
			} else {
				$text .= "xx";
				$valid = 0;
			}
		}
		$arg =~ s/^,\s*//;
	}
	$$instr{size} = length($text) / 2;
	return undef unless $valid;
	$$instr{text} = $text;
}

sub parse_stack {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	my $sstack = 0;
	my $ustack = 0;
	my %regs = (
		cc => 0x01, a => 0x02, b => 0x04, d => 0x06, dp => 0x08,
		x => 0x10, y => 0x20, pc => 0x80,
	);
	$arg =~ s/\s+//g;
	for my $reg (split(/,/, $arg)) {
		if ($reg eq 's') {
			$ustack |= 0x40;
		} elsif ($reg eq 'u') {
			$sstack |= 0x40;
		} elsif (exists $regs{$reg}) {
			$ustack |= $regs{$reg};
			$sstack |= $regs{$reg};
		} else {
			syntax_error("error: bad register in stack operation");
		}
	}
	$sstack = sprintf("%02x", $sstack);
	$ustack = sprintf("%02x", $ustack);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%s/$sstack/g;
	$$instr{text} =~ s/\%u/$ustack/g;
	$$instr{size} = length($$instr{text}) / 2;
}

sub parse_pair {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	my %regs = (
		d => 0, x => 1,  y => 2,   u => 3, s => 4, pc => 5,
		a => 8, b => 9, cc => 10, dp => 11,
	);
	$arg =~ s/\s+//g;
	my $warn_numerical = 0;
	my ($src,$dest) = split(/,/, $arg);
	if ($src =~ /^\d+$/) {
		$warn_numerical = 1;
	} elsif (exists $regs{$src}) {
		$src = $regs{$src};
	} else {
		syntax_error("error: bad source register");
		$src = 0;
	}
	if ($dest =~ /^\d+$/) {
		$warn_numerical = 1;
	} elsif (exists $regs{$dest}) {
		$dest = $regs{$dest};
	} else {
		syntax_error("error: bad destination register");
		$dest = 0;
	}
	error("warning: numerical values used in tfr or exg") if ($warn_numerical && !$quiet);
	if (($src & 8) != ($dest & 8)) {
		error("warning: register sizes don't match in tfr or exg") if (!$quiet);
	}
	my $pair = sprintf("%01x%01x", $src & 15, $dest & 15);
	$$instr{size} = length($tmpl) / 2;
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%b/$pair/g;
}

sub parse_address {
	my ($instr) = @_;
	my $opcode = $$instr{opcode};
	my $arg = lc $$instr{arg};
	my $addr = eval_expr($arg);
	if (!defined $addr) {
		$unresolved = 1;
		if (exists $opcodes{$opcode}->{extended}) {
			$$instr{size} = length($opcodes{$opcode}->{extended}) / 2;
		} elsif (exists $opcodes{$opcode}->{direct}) {
			$$instr{size} = (length($opcodes{$opcode}->{direct}) / 2) + 1;
		} elsif (exists $opcodes{$opcode}->{indexed}) {
			$$instr{size} = (length($opcodes{$opcode}->{indexed}) / 2) + 2;
		} else {
			syntax_error("error: unknown opcode '$opcode'");
		}
		return undef;
	}
	if (defined $dp
		&& $dp == (($addr >> 8) & 0xff)
		&& exists $opcodes{$opcode}->{direct}) {
		$$instr{tmpl} = $opcodes{$opcode}->{direct};
		return parse_direct($instr);
	} elsif (exists $opcodes{$opcode}->{extended}) {
		$$instr{tmpl} = $opcodes{$opcode}->{extended};
		return parse_extended($instr);
	} else {
		$$instr{tmpl} = $opcodes{$opcode}->{indexed};
		return parse_indexed($instr);
	}
}

sub parse_immediate {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	$arg =~ s/\s+//g;
	$arg =~ s/^#//;
	my $size = length($tmpl) / 2;
	$size++ if ($tmpl =~ /\%w/);
	$$instr{size} = $size;
	my $val = eval_expr($arg);
	return undef if (!defined $val);
	my $as_byte = sprintf("%02x", $val & 0xff);
	my $as_word = sprintf("%04x", $val & 0xffff);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%b/$as_byte/g;
	$$instr{text} =~ s/\%w/$as_word/g;
}

sub parse_direct {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	$arg =~ s/^<//;
	my $size = length($tmpl) / 2;
	$$instr{size} = $size;
	my $addr = eval_expr($arg);
	return undef if (!defined $addr);
	my $as_byte = sprintf("%02x", $addr & 0xff);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%b/$as_byte/g;
}

sub parse_indexed {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	my %aregs = ( a => 0x06, b => 0x05, d => 0x0b );
	my %regs = ( x => 0x00, y => 0x20, u => 0x40, s => 0x60 );
	$arg =~ s/\s+//g;
	my $indirect = 0;
	my $want_bits = 0;
	my $text = "";
	if ($arg =~ /^\[(.*)\]$/) {
		$arg = $1;
		$indirect = 0x10;
	}
	# Allow bracketing a second time to reverse the meaning
	if ($arg =~ /^\[(.*)\]$/) {
		$arg = $1;
		$indirect = 0;
	}
	if ($arg =~ /^>>(.*)$/) {
		$want_bits = 16;
		$arg = $1;
	} elsif ($arg =~ /^>(.*)$/) {
		$want_bits = 8;
		$arg = $1;
	} elsif ($arg =~ /^<+(.*)$/) {
		$arg = $1;
	}
	my $base_size = length($tmpl) / 2;  # includes postbyte
	$$instr{size} = $base_size + 2;  # maximum
	my ($offset,$reg) = split(/,/, $arg);
	if ($arg !~ /,/) {
		if (!$indirect) {
			error("warning: invalid extended non-indirect address") if (!$quiet);
		}
		my $addr = eval_expr($arg);
		return undef if (!defined $addr);
		$text = sprintf("\%02x\%04x", 0x8f | $indirect, $addr & 0xffff);
	} elsif ($reg eq 'pcr') {
		my $addr = eval_expr($offset);
		if ($want_bits < 16) {
			my $next_pc = $pc + $base_size + 1;
			my $offset = $addr - $next_pc;
			if (defined $addr && $offset >= -128 && $offset <= 127) {
				$text = sprintf("\%02x\%02x", 0x8c | $indirect, $offset & 0xff);
			} else {
				$want_bits = 16;
			}
		}
		if ($want_bits == 16) {
			my $next_pc = $pc + $base_size + 2;
			my $offset = $addr - $next_pc;
			$text = sprintf("\%02x\%04x", 0x8d | $indirect, $offset & 0xffff);
		}
	} elsif ($offset eq "") {
		$$instr{size} -= 2;
		if ($reg =~ /([xyus])\+\+/) {
			$text = sprintf("\%02x", 0x81 | $regs{$1} | $indirect);
		} elsif ($reg =~ /--([xyus])/) {
			$text = sprintf("\%02x", 0x83 | $regs{$1} | $indirect);
		} elsif ($reg =~ /([xyus])\+/) {
			if ($indirect) {
				error("warning: invalid indirect post-increment") if (!$quiet);
			}
			$text = sprintf("\%02x", 0x80 | $regs{$1} | $indirect);
		} elsif ($reg =~ /-([xyus])/) {
			if ($indirect) {
				error("warning: invalid indirect pre-decrement") if (!$quiet);
			}
			$text = sprintf("\%02x", 0x82 | $regs{$1} | $indirect);
		} elsif ($reg =~ /([xyus])/) {
			$text = sprintf("\%02x", 0x84 | $regs{$1} | $indirect);
		} else {
			syntax_error("error: unknown index register or operation");
			return undef;
		}
	} elsif ($offset =~ /^([abd])$/) {
		my $areg = $1;
		$$instr{size} -= 2;
		if ($reg =~ /^([xyus])$/) {
			$reg = $1;
		} else {
			syntax_errors("error: unknown index register or operation");
			return undef;
		}
		$text = sprintf("\%02x", 0x80 | $regs{$reg} | $aregs{$areg} | $indirect);
	} else {
		if ($reg =~ /^([xyus])$/) {
			$reg = $1;
		} else {
			syntax_error("error: unknown index register or operation");
			return undef;
		}
		my $val = eval_expr($offset);
		return undef if (!defined $val);
		$want_bits = 8 if ($want_bits < 8 && $indirect);
		if ($want_bits < 8) {
			if (defined $val && $val >= -16 && $val <= 15) {
				if ($val == 0) {
					$text = sprintf("\%02x", 0x84 | $regs{$reg} | $indirect);
				} else {
					$val &= 0x1f;
					$text = sprintf("\%02x", $regs{$reg} | $val);
				}
			} else {
				$want_bits = 8;
			}
		}
		if ($want_bits == 8 || $want_bits == 16) {
			if (defined $val && $val >= -128 && $val <= 127) {
				$val &= 0xff;
				$text = sprintf("\%02x\%02x", 0x88 | $regs{$reg} | $indirect, $val);
			} else {
				$want_bits = 16;
			}
		}
		if ($want_bits == 16) {
			$val &= 0xffff;
			$text = sprintf("\%02x\%04x", 0x89 | $regs{$reg} | $indirect, $val);
		}
	}
	return undef if (!$text);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%i/$text/g;
	$$instr{size} = length($$instr{text}) / 2;
}

sub parse_extended {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $arg = lc $$instr{arg};
	$arg =~ s/\s+//g;
	$arg =~ s/^>//;
	my $size = (length($tmpl) / 2) + 1;
	my $addr = eval_expr($arg);
	$$instr{size} = $size;
	return undef if (!defined $addr);
	my $as_word = sprintf("%04x", $addr & 0xffff);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%w/$as_word/g;
}

sub parse_inherent {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	$$instr{size} = length($tmpl) / 2;
	$$instr{text} = $tmpl;
}

sub parse_relative {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $size = length($tmpl) / 2;
	$$instr{size} = $size;
	my $arg = lc $$instr{arg};
	my $addr = eval_expr($arg);
	return undef if (!defined $addr);
	my $next_pc = $pc + $size;
	my $offset = $addr - $next_pc;
	if ($offset < -128 || $offset > 127) {
		error("error: branch destination out of range");
		$unresolved = 1;
	}
	my $off_text = sprintf("%02x", $offset & 0xff);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%b/$off_text/g;
}

sub parse_longrelative {
	my ($instr) = @_;
	my $tmpl = $$instr{tmpl};
	my $size = (length($tmpl) / 2) + 1;
	$$instr{size} = $size;
	my $arg = lc $$instr{arg};
	my $addr = eval_expr($arg);
	return undef if (!defined $addr);
	my $next_pc = $pc + $size;
	my $offset = $addr - $next_pc;
	if ($offset >= -128 && $offset <= 127) {
		error("warning: long branch could be optimised") if (!$quiet);
	}
	my $off_text = sprintf("%04x", $offset & 0xffff);
	$$instr{text} = $tmpl;
	$$instr{text} =~ s/\%w/$off_text/g;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# Read in a file and scan lines into an array.
# Globals:
#    $file_ctx - current file context (filename,lno)

sub read_file {
	my ($filename, $input) = @_;
	my $fd;
	OPENFILE: {
		for (@include_path) {
			last OPENFILE if open($fd, "<", "$_/$filename");
		}
		die "Couldn't open $filename: $!\n";
	}
	$input ||= [];
	print STDERR "=== Reading file '$filename'\n" if ($verbose);
	$file_ctx = { filename => $filename, lno => 0 };
	push @$input, { pragma => 'line', file_ctx => { %{$file_ctx} } };
	while (my $line = <$fd>) {
		chomp $line;
		$$file_ctx{lno}++;

		# Blank or pure comment lines
		if ($line =~ /^\s*([;*].*)$/) {
			push @$input, { line => $line };
			next;
		}

		my $cmd = scan_line($line, $defining_macro);
		my $opcode = $$cmd{opcode} || "";

		# Include file
		if ($opcode eq 'include') {
			# Stop defining any macro
			undef $defining_macro;
			my $include_filename = $$cmd{arg};
			# Keep label
			delete $$cmd{opcode};
			delete $$cmd{arg};
			push @$input, $cmd;
			if ($include_filename =~ /^([\/\"\'])?(.*)\1\s*$/) {
				$include_filename = $2;
			}
			my $old_file_ctx = { %{$file_ctx} };
			read_file($include_filename, $input);
			$file_ctx = $old_file_ctx;
			push @$input, { pragma => 'line', file_ctx => { %{$file_ctx} } };
			next;
		}

		# Start defining a macro
		if ($opcode eq 'macro') {
			my $new_macro = $$cmd{label};
			if (!defined $new_macro || $new_macro !~ /^\w+/) {
				syntax_error("error: bad macro name");
				next;
			}
			push @$input, { line => $$cmd{line} };
			$defining_macro = $new_macro;
			next;
		}

		# Finish defining a macro
		if ($opcode eq 'endm') {
			if (!defined $defining_macro) {
				syntax_error("error: endm with no macro");
				next;
			}
			# A label on the endm should be included in the macro
			if (exists $$cmd{label}) {
				push @{$macros{$defining_macro}}, { label => $$cmd{label} };
			}
			push @$input, { line => $$cmd{line} };
			undef $defining_macro;
			next;
		}

		# Add line as appropriate
		if (defined $defining_macro) {
			push @$input, { line => $$cmd{line} };
			push @{$macros{$defining_macro}}, $cmd;
		} else {
			push @$input, $cmd;
		}

	}

	# Stop defining any macro
	undef $defining_macro;

	close $fd;
	return $input;
}

# Break a line into (label,opcode,arg) and ensure label & opcode don't have
# any dodgy characters in them.  Allows \&\d+ when $defining_macro is set.
# Returns a hash of the parts.

sub scan_line {
	my ($line, $defining_macro) = @_;

	my $cmd = { line => $line };
	my ($label, $opcode, $arg) = split(/\s+/, $line, 3);
	$label = "" unless (defined $label);
	$opcode = "" unless (defined $opcode);
	$opcode = "" if ($opcode =~ /^;/);
	$arg = "" unless (defined $arg);

	if (!defined $defining_macro) {
		if ($label) {
			if ($label =~ /^([\w\@]+)$/) {
				$label = lc $1;  # de-taint label
				$$cmd{label} = $label;
			} else {
				syntax_error("error: bad label");
				return;
			}
		}
		if ($opcode) {
			if ($opcode =~ /^(\w+)$/) {
				$opcode = lc $1;  # de-taint opcode
				$$cmd{opcode} = $opcode;
			} else {
				syntax_error("error: bad opcode");
				return;
			}
		}
	} else {
		if ($label) {
			if ($label =~ /^(((\&\d+)|[\w\@])+)$/) {
				$label = lc $1;  # de-taint label
				$$cmd{label} = $label;
			} else {
				syntax_error("error: bad label in macro");
				return;
			}
		}
		if ($opcode) {
			if ($opcode =~ /^(((\&\d+)|\w)+)$/) {
				$opcode = lc $1;  # de-taint opcode
				$$cmd{opcode} = $opcode;
			} else {
				syntax_error("error: bad opcode in macro");
				return;
			}
		}
	}

	if (defined $arg) {
		# FCC is a special case.  Comments can safely be stripped from
		# arguments to all other opcodes.
		if ($opcode ne 'fcc') {
			$arg =~ s/\s+;.*//;
		}

		# Leave $$cmd{arg} tainted, nothing should use it unparsed
		$$cmd{arg} = $arg;
	}

	return $cmd;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

sub syntax_error {
	my ($msg) = @_;
	push @syntax_errors, "$$file_ctx{filename}:$$file_ctx{lno}: $msg";
}

sub die_if_syntax_error {
	if (scalar(@syntax_errors) > 0) {
		for (@syntax_errors) {
			print STDERR "$_\n";
		}
		exit 1;
	}
}

sub error {
	my ($msg) = @_;
	push @errors, "$$file_ctx{filename}:$$file_ctx{lno}: $msg";
}
