#!/usr/bin/perl -T use strict; # 6809 assembler # by Ciaran Anscomb, 2010 # Disclaimer: # Possibly not fit for any purpose. You're free to use, copy, modify and # redistribute so long as I don't get the blame for anything. use Text::Tabs; $tabstop = 8; my $VERSION = "0.11"; my $output_format = 'binary'; my $output_filename = undef; my $symbols_filename = undef; my $listing_filename = undef; my $verbose = 0; my $quiet = 0; my $exec_label = undef; sub helptext { print < 0) { my $opt = $ARGV[0]; if ($opt eq "--") { shift @ARGV; last; } elsif ($opt =~ /^--output=(.*)$/) { $output_filename = $1; } elsif ($opt eq "-o" || $opt eq "--output") { shift @ARGV; if ($ARGV[0] =~ /^(.*)$/) { $output_filename = $1; # de-taint } } elsif ($opt =~ /^--symbols=(.*)$/) { $symbols_filename = $1; } elsif ($opt eq "-s" || $opt eq "--symbols") { shift @ARGV; if ($ARGV[0] =~ /^(.*)$/) { $symbols_filename = $1; # de-taint } } elsif ($opt =~ /^--listing=(.*)$/) { $listing_filename = $1; } elsif ($opt eq "-l" || $opt eq "--listing") { shift @ARGV; if ($ARGV[0] =~ /^(.*)$/) { $listing_filename = $1; # de-taint } } elsif ($opt eq "-B" || $opt eq "--bin") { $output_format = 'binary'; } elsif ($opt eq "-H" || $opt eq "--hex") { $output_format = 'hex'; } elsif ($opt eq "-D" || $opt eq "--dragondos") { $output_format = 'dragondos'; } elsif ($opt eq "-C" || $opt eq "--coco") { $output_format = 'coco'; } elsif ($opt eq "-e" || $opt eq "--exec") { shift @ARGV; if ($ARGV[0] =~ /^(.*)$/) { $exec_label = $1; # de-taint } } elsif ($opt =~ /^--exec=(.*)$/) { $exec_label = $1; } elsif ($opt eq "-v" || $opt eq "--verbose") { $verbose++; } elsif ($opt eq "-q" || $opt eq "--quiet") { $quiet = 1; } elsif ($opt eq "--version") { print "asm6809.pl $VERSION\n"; exit 0; } elsif ($opt eq "--help") { helptext(); exit 0; } elsif ($opt =~ /^-/) { print STDERR "$0: unrecognised option '$opt'\n"; print STDERR "Try '$0 --help' for more information.\n"; exit 1; } else { last; } shift @ARGV; } if (scalar(@ARGV) < 1) { print STDERR "$0: source filename required\n"; print STDERR "Try '$0 --help' for more information.\n"; exit 1; } my $filename = shift @ARGV; my %modes = ( 'org' => 'equ', 'equ' => 'equ', 'section' => 'section', 'setdp' => 'setdp', 'rmb' => 'rmb', 'fcb' => 'fcb', 'fdb' => 'fdb', 'fcc' => 'fcc', 'pshs' => 'stack', 'puls' => 'stack', 'pshu' => 'stack', 'pulu' => 'stack', 'exg' => 'pair', 'tfr' => 'pair', ); my %tmpls = ( stack => { 'pshs' => { tmpl => '34%s' }, 'puls' => { tmpl => '35%s' }, 'pshu' => { tmpl => '36%u' }, 'pulu' => { tmpl => '37%u' }, }, pair => { 'exg' => { tmpl => '1e%b' }, 'tfr' => { tmpl => '1f%b' }, }, immediate => { 'orcc' => { tmpl => '1a%b' }, 'andcc' => { tmpl => '1c%b' }, # 'cwai' => { tmpl => '3c%b' }, # 'suba' => { tmpl => '80%b' }, 'cmpa' => { tmpl => '81%b' }, 'sbca' => { tmpl => '82%b' }, 'subd' => { tmpl => '83%w' }, 'anda' => { tmpl => '84%b' }, 'bita' => { tmpl => '85%b' }, 'lda' => { tmpl => '86%b' }, 'eora' => { tmpl => '88%b' }, 'adca' => { tmpl => '89%b' }, 'ora' => { tmpl => '8a%b' }, 'adda' => { tmpl => '8b%b' }, 'cmpx' => { tmpl => '8c%w' }, 'ldx' => { tmpl => '8e%w' }, # 'subb' => { tmpl => 'c0%b' }, 'cmpb' => { tmpl => 'c1%b' }, 'sbcb' => { tmpl => 'c2%b' }, 'addd' => { tmpl => 'c3%w' }, 'andb' => { tmpl => 'c4%b' }, 'bitb' => { tmpl => 'c5%b' }, 'ldb' => { tmpl => 'c6%b' }, 'eorb' => { tmpl => 'c8%b' }, 'adcb' => { tmpl => 'c9%b' }, 'orb' => { tmpl => 'ca%b' }, 'addb' => { tmpl => 'cb%b' }, 'ldd' => { tmpl => 'cc%w' }, 'ldu' => { tmpl => 'ce%w' }, # 'cmpd' => { tmpl => '1083%w' }, 'cmpy' => { tmpl => '108c%w' }, 'ldy' => { tmpl => '108e%w' }, # 'lds' => { tmpl => '10ce%w' }, # 'cmpu' => { tmpl => '1183%w' }, 'cmps' => { tmpl => '118c%w' }, }, direct => { 'neg' => { tmpl => '00%b' }, 'com' => { tmpl => '03%b' }, 'lsr' => { tmpl => '04%b' }, 'ror' => { tmpl => '06%b' }, 'asr' => { tmpl => '07%b' }, 'asl' => { tmpl => '08%b' }, 'lsl' => { tmpl => '08%b' }, 'rol' => { tmpl => '09%b' }, 'dec' => { tmpl => '0a%b' }, 'inc' => { tmpl => '0c%b' }, 'tst' => { tmpl => '0d%b' }, 'jmp' => { tmpl => '0e%b' }, 'clr' => { tmpl => '0f%b' }, # 'suba' => { tmpl => '90%b' }, 'cmpa' => { tmpl => '91%b' }, 'sbca' => { tmpl => '92%b' }, 'subd' => { tmpl => '93%b' }, 'anda' => { tmpl => '94%b' }, 'bita' => { tmpl => '95%b' }, 'lda' => { tmpl => '96%b' }, 'sta' => { tmpl => '97%b' }, 'eora' => { tmpl => '98%b' }, 'adca' => { tmpl => '99%b' }, 'ora' => { tmpl => '9a%b' }, 'adda' => { tmpl => '9b%b' }, 'cmpx' => { tmpl => '9c%b' }, 'jsr' => { tmpl => '9d%b' }, 'ldx' => { tmpl => '9e%b' }, 'stx' => { tmpl => '9f%b' }, # 'subb' => { tmpl => 'd0%b' }, 'cmpb' => { tmpl => 'd1%b' }, 'sbcb' => { tmpl => 'd2%b' }, 'addd' => { tmpl => 'd3%b' }, 'andb' => { tmpl => 'd4%b' }, 'bitb' => { tmpl => 'd5%b' }, 'ldb' => { tmpl => 'd6%b' }, 'stb' => { tmpl => 'd7%b' }, 'eorb' => { tmpl => 'd8%b' }, 'adcb' => { tmpl => 'd9%b' }, 'orb' => { tmpl => 'da%b' }, 'addb' => { tmpl => 'db%b' }, 'ldd' => { tmpl => 'dc%b' }, 'std' => { tmpl => 'dd%b' }, 'ldu' => { tmpl => 'de%b' }, 'stu' => { tmpl => 'df%b' }, # 'cmpd' => { tmpl => '1093%b' }, 'cmpy' => { tmpl => '109c%b' }, 'ldy' => { tmpl => '109e%b' }, 'sty' => { tmpl => '109f%b' }, # 'lds' => { tmpl => '10de%b' }, 'sts' => { tmpl => '10df%b' }, # 'cmpu' => { tmpl => '1193%b' }, 'cmps' => { tmpl => '119c%b' }, }, indexed => { 'leax' => { tmpl => '30%i' }, 'leay' => { tmpl => '31%i' }, 'leas' => { tmpl => '32%i' }, 'leau' => { tmpl => '33%i' }, # 'neg' => { tmpl => '60%i' }, 'com' => { tmpl => '63%i' }, 'lsr' => { tmpl => '64%i' }, 'ror' => { tmpl => '66%i' }, 'asr' => { tmpl => '67%i' }, 'asl' => { tmpl => '68%i' }, 'lsl' => { tmpl => '68%i' }, 'rol' => { tmpl => '69%i' }, 'dec' => { tmpl => '6a%i' }, 'inc' => { tmpl => '6c%i' }, 'tst' => { tmpl => '6d%i' }, 'jmp' => { tmpl => '6e%i' }, 'clr' => { tmpl => '6f%i' }, # 'suba' => { tmpl => 'a0%i' }, 'cmpa' => { tmpl => 'a1%i' }, 'sbca' => { tmpl => 'a2%i' }, 'subd' => { tmpl => 'a3%i' }, 'anda' => { tmpl => 'a4%i' }, 'bita' => { tmpl => 'a5%i' }, 'lda' => { tmpl => 'a6%i' }, 'sta' => { tmpl => 'a7%i' }, 'eora' => { tmpl => 'a8%i' }, 'adca' => { tmpl => 'a9%i' }, 'ora' => { tmpl => 'aa%i' }, 'adda' => { tmpl => 'ab%i' }, 'cmpx' => { tmpl => 'ac%i' }, 'jsr' => { tmpl => 'ad%i' }, 'ldx' => { tmpl => 'ae%i' }, 'stx' => { tmpl => 'af%i' }, # 'subb' => { tmpl => 'e0%i' }, 'cmpb' => { tmpl => 'e1%i' }, 'sbcb' => { tmpl => 'e2%i' }, 'addd' => { tmpl => 'e3%i' }, 'andb' => { tmpl => 'e4%i' }, 'bitb' => { tmpl => 'e5%i' }, 'ldb' => { tmpl => 'e6%i' }, 'stb' => { tmpl => 'e7%i' }, 'eorb' => { tmpl => 'e8%i' }, 'adcb' => { tmpl => 'e9%i' }, 'orb' => { tmpl => 'ea%i' }, 'addb' => { tmpl => 'eb%i' }, 'ldd' => { tmpl => 'ec%i' }, 'std' => { tmpl => 'ed%i' }, 'ldu' => { tmpl => 'ee%i' }, 'stu' => { tmpl => 'ef%i' }, # 'cmpd' => { tmpl => '10a3%i' }, 'cmpy' => { tmpl => '10ac%i' }, 'ldy' => { tmpl => '10ae%i' }, 'sty' => { tmpl => '10af%i' }, # 'lds' => { tmpl => '10ee%i' }, 'sts' => { tmpl => '10ef%i' }, # 'cmpu' => { tmpl => '11a3%i' }, 'cmps' => { tmpl => '11ac%i' }, }, extended => { 'neg' => { tmpl => '70%w' }, 'com' => { tmpl => '73%w' }, 'lsr' => { tmpl => '74%w' }, 'ror' => { tmpl => '76%w' }, 'asr' => { tmpl => '77%w' }, 'asl' => { tmpl => '78%w' }, 'lsl' => { tmpl => '78%w' }, 'rol' => { tmpl => '79%w' }, 'dec' => { tmpl => '7a%w' }, 'inc' => { tmpl => '7c%w' }, 'tst' => { tmpl => '7d%w' }, 'jmp' => { tmpl => '7e%w' }, 'clr' => { tmpl => '7f%w' }, # 'suba' => { tmpl => 'b0%w' }, 'cmpa' => { tmpl => 'b1%w' }, 'sbca' => { tmpl => 'b2%w' }, 'subd' => { tmpl => 'b3%w' }, 'anda' => { tmpl => 'b4%w' }, 'bita' => { tmpl => 'b5%w' }, 'lda' => { tmpl => 'b6%w' }, 'sta' => { tmpl => 'b7%w' }, 'eora' => { tmpl => 'b8%w' }, 'adca' => { tmpl => 'b9%w' }, 'ora' => { tmpl => 'ba%w' }, 'adda' => { tmpl => 'bb%w' }, 'cmpx' => { tmpl => 'bc%w' }, 'jsr' => { tmpl => 'bd%w' }, 'ldx' => { tmpl => 'be%w' }, 'stx' => { tmpl => 'bf%w' }, # 'subb' => { tmpl => 'f0%w' }, 'cmpb' => { tmpl => 'f1%w' }, 'sbcb' => { tmpl => 'f2%w' }, 'addd' => { tmpl => 'f3%w' }, 'andb' => { tmpl => 'f4%w' }, 'bitb' => { tmpl => 'f5%w' }, 'ldb' => { tmpl => 'f6%w' }, 'stb' => { tmpl => 'f7%w' }, 'eorb' => { tmpl => 'f8%w' }, 'adcb' => { tmpl => 'f9%w' }, 'orb' => { tmpl => 'fa%w' }, 'addb' => { tmpl => 'fb%w' }, 'ldd' => { tmpl => 'fc%w' }, 'std' => { tmpl => 'fd%w' }, 'ldu' => { tmpl => 'fe%w' }, 'stu' => { tmpl => 'ff%w' }, # 'cmpd' => { tmpl => '10b3%w' }, 'cmpy' => { tmpl => '10bc%w' }, 'ldy' => { tmpl => '10be%w' }, 'sty' => { tmpl => '10bf%w' }, # 'lds' => { tmpl => '10fe%w' }, 'sts' => { tmpl => '10ff%w' }, # 'cmpu' => { tmpl => '11b3%w' }, 'cmps' => { tmpl => '11bc%w' }, }, inherent => { 'nop' => { tmpl => '12' }, 'sync' => { tmpl => '13' }, 'daa' => { tmpl => '19' }, 'sex' => { tmpl => '1d' }, # 'rts' => { tmpl => '39' }, 'abx' => { tmpl => '3a' }, 'rti' => { tmpl => '3b' }, 'mul' => { tmpl => '3d' }, 'swi' => { tmpl => '3f' }, # 'nega' => { tmpl => '40' }, 'coma' => { tmpl => '43' }, 'lsra' => { tmpl => '44' }, 'rora' => { tmpl => '46' }, 'asra' => { tmpl => '47' }, 'asla' => { tmpl => '48' }, 'lsla' => { tmpl => '48' }, 'rola' => { tmpl => '49' }, 'deca' => { tmpl => '4a' }, 'inca' => { tmpl => '4c' }, 'tsta' => { tmpl => '4d' }, 'clra' => { tmpl => '4f' }, # 'negb' => { tmpl => '50' }, 'comb' => { tmpl => '53' }, 'lsrb' => { tmpl => '54' }, 'rorb' => { tmpl => '56' }, 'asrb' => { tmpl => '57' }, 'aslb' => { tmpl => '58' }, 'lslb' => { tmpl => '58' }, 'rolb' => { tmpl => '59' }, 'decb' => { tmpl => '5a' }, 'incb' => { tmpl => '5c' }, 'tstb' => { tmpl => '5d' }, 'clrb' => { tmpl => '5f' }, # 'swi2' => { tmpl => '103f' }, # 'swi3' => { tmpl => '113f' }, }, relative => { 'bra' => { tmpl => '20%b' }, 'brn' => { tmpl => '21%b' }, 'bhi' => { tmpl => '22%b' }, 'bls' => { tmpl => '23%b' }, 'bhs' => { tmpl => '24%b' }, 'bcc' => { tmpl => '24%b' }, 'blo' => { tmpl => '25%b' }, 'bcs' => { tmpl => '25%b' }, 'bne' => { tmpl => '26%b' }, 'beq' => { tmpl => '27%b' }, 'bvc' => { tmpl => '28%b' }, 'bvs' => { tmpl => '29%b' }, 'bpl' => { tmpl => '2a%b' }, 'bmi' => { tmpl => '2b%b' }, 'bge' => { tmpl => '2c%b' }, 'blt' => { tmpl => '2d%b' }, 'bgt' => { tmpl => '2e%b' }, 'ble' => { tmpl => '2f%b' }, 'bsr' => { tmpl => '8d%b' }, }, longrelative => { 'lbra' => { tmpl => '16%w' }, 'lbsr' => { tmpl => '17%w' }, 'lbrn' => { tmpl => '1021%w' }, 'lbhi' => { tmpl => '1022%w' }, 'lbls' => { tmpl => '1023%w' }, 'lbhs' => { tmpl => '1024%w' }, 'lbcc' => { tmpl => '1024%w' }, 'lbcs' => { tmpl => '1025%w' }, 'lblo' => { tmpl => '1025%w' }, 'lbne' => { tmpl => '1026%w' }, 'lbeq' => { tmpl => '1027%w' }, 'lbvc' => { tmpl => '1028%w' }, 'lbvs' => { tmpl => '1029%w' }, 'lbpl' => { tmpl => '102a%w' }, 'lbmi' => { tmpl => '102b%w' }, 'lbge' => { tmpl => '102c%w' }, 'lblt' => { tmpl => '102d%w' }, 'lbgt' => { tmpl => '102e%w' }, 'lble' => { tmpl => '102f%w' }, }, ); my %arg_parse = ( equ => \&parse_equ, section => \&parse_section, setdp => \&parse_setdp, rmb => \&parse_rmb, fcb => \&parse_fcb, fdb => \&parse_fdb, fcc => \&parse_fcc, stack => \&parse_stack, pair => \&parse_pair, address => \&parse_address, immediate => \&parse_immediate, direct => \&parse_direct, indexed => \&parse_indexed, extended => \&parse_extended, inherent => \&parse_inherent, relative => \&parse_relative, longrelative => \&parse_longrelative, ); my @instrs = (); my %export_labels = (); my $num_instrs = 0; my @file_stack = (); { my $file; print STDERR "=== Reading file '$filename'\n" if ($verbose); unless (open($file, "<", $filename)) { die "Couldn't open $filename: $!\n"; } push @file_stack, { file => $file, filename => $filename, line_number => 0, }; } my @line_text = (); my $line_number = 0; my %equs = (); my %macros = (); my %have_label = (); # to check for duplicates in pass 1 my %file_changes = (); my $pc = 0; my $unresolved = 0; my $dp = undef; my $ino = 0; my $exec_addr = undef; my %section_pc = (); my $section = "main"; my $infile; my $defining_macro = undef; my $expanding_macro = undef; my @instr_queue = (); my @line_queue = (); my @syntax_errors = (); # syntax errors fatal after first pass my @errors = (); # other errors and warnings go in here my %label_errors = (); # only report missing labels once # # First pass - read in all files, generating a list of instructions. # Main work done by process_line sub below. # my $pass = 1; print STDERR "=== Pass $pass\n" if ($verbose); while (my $file_ctx = pop @file_stack) { $infile = $$file_ctx{file}; $filename = $$file_ctx{filename}; $line_number = $$file_ctx{line_number}; $file_changes{$ino} = $file_ctx; while (my $line = <$infile>) { chomp $line; $line_number++; process_line($line); while (my $instr = shift @instr_queue) { my $mode = undef; my $opcode = undef; if (exists $$instr{opcode}) { $opcode = $$instr{opcode}; if (exists $modes{$opcode}) { $mode = $modes{$opcode}; } elsif (exists $tmpls{inherent}->{$opcode}) { $mode = 'inherent'; } elsif (exists $tmpls{relative}->{$opcode}) { $mode = 'relative'; } elsif (exists $tmpls{longrelative}->{$opcode}) { $mode = 'longrelative'; } elsif (exists $$instr{arg}) { my $arg = $$instr{arg}; if ($arg =~ /^#/) { $mode = 'immediate'; } elsif ($arg =~ /^\[.*\]$/) { $mode = 'indexed'; } elsif ($arg =~ /,/) { $mode = 'indexed'; } elsif ($arg =~ /^/) { $mode = 'extended'; } elsif ($arg) { $mode = 'address'; # either direct or extended } else { push @syntax_errors, "$filename:$line_number: error: unknown opcode '$opcode'"; } } } if (defined $mode) { $$instr{mode} = $mode; if (exists $tmpls{$mode}->{$opcode}) { $$instr{tmpl} = $tmpls{$mode}->{$opcode}->{tmpl}; } } $instrs[$ino] = $instr; process_instr($instr); $pc += $$instr{size} if (exists $$instr{size}); $line_text[$ino] = shift @line_queue; $ino++; } } close $infile; if (defined $exec_label) { $exec_addr = eval_expr($exec_label); $unresolved = 1 if (!defined $exec_addr); } } # Syntax errors are fatal here - others might resolve themselves later if (scalar(@syntax_errors) > 0) { for (@syntax_errors) { print STDERR "$_\n"; } exit 1; } $num_instrs = $ino; # # Reprocess list of instructions until no instruction sizes or labels change. # while ($unresolved && $pass < 10) { $pass++; print STDERR "=== Pass $pass\n" if ($verbose); # Suppress non-fatal reporting until after the last pass @errors = (); %label_errors = (); $pc = 0; $unresolved = 0; $dp = undef; for ($ino = 0; $ino < $num_instrs; $ino++) { if (exists $file_changes{$ino}) { $filename = $file_changes{$ino}->{filename}; $line_number = $file_changes{$ino}->{line_number}; } $line_number++ if (defined $line_text[$ino]); next if (!defined $instrs[$ino]); my $instr = $instrs[$ino]; process_instr($instr); $pc += $$instr{size} if (exists $$instr{size}); } if (defined $exec_label) { $exec_addr = eval_expr($exec_label); $unresolved = 1 if (!defined $exec_addr); } } for (@errors) { print STDERR "$_\n"; } if ($unresolved) { print "error: unresolved after $pass passes\n"; exit 1; } my %region_start = (); my %region_end = (); my $listing_file = undef; if (defined $listing_filename) { open($listing_file, ">", $listing_filename); } # # Place instruction text into regions. # Generate listing file if required. # $pc = 0; for ($ino = 0; $ino < $num_instrs; $ino++) { my $instr = $instrs[$ino]; my ($start,$end) = (undef,undef); my $addr = ""; my $text = ""; my $line = expand($line_text[$ino] || ""); $text = $$instr{text} || ""; if ($$instr{label} && $$instr{label} !~ /^\d+$/) { $start = $equs{$$instr{label}}; $addr = sprintf("\%04X", $equs{$$instr{label}} & 0xffff); } elsif ($text || $$instr{label}) { $start = $$instr{addr}; $addr = sprintf("\%04X", $start); } if (defined $listing_file) { printf $listing_file "\%-6s\%-14s \%s\n", $addr, uc $text, $line; } if ($$instr{text}) { my $end = $start + $$instr{size}; if (exists $region_end{$start}) { my $region = $region_end{$start}; $$region{text} .= $text; $$region{end} = $end; delete $region_end{$start}; $region_end{$end} = $region; } elsif (exists $region_start{$end}) { my $region = $region_start{$end}; $$region{text} = $text . $$region{text}; $$region{start} = $start; delete $region_start{$end}; $region_start{$start} = $region; } else { my $region = { text => $text, start => $start, end => $end, }; $region_start{$start} = $region; $region_end{$end} = $region; } } } # # Generate symbols file, if required. # if (defined $symbols_filename) { my $symbols_file = undef; if (open($symbols_file, ">", $symbols_filename)) { for my $label (keys %export_labels) { if (exists $macros{lc($label)}) { print $symbols_file "$label\tmacro\n"; for (@{$macros{lc($label)}}) { print $symbols_file "$_\n"; } print $symbols_file "\tendm\n"; } elsif (exists $equs{$label}) { print $symbols_file "$label\tequ\t$equs{$label}\n"; } } } else { die "Couldn't open $symbols_filename for writing: $!\n"; } } exit 0 if (!defined $output_filename); # # Generate output file. # my $output_file = undef; unless (open($output_file, ">", $output_filename)) { die "Couldn't open $output_filename for writing: $!\n"; } my @regions = sort { $a <=> $b } keys %region_start; if ($output_format eq 'dragondos') { my $start = $regions[0]; my $region0 = $region_start{$start}; my $regionl = $region_start{$regions[$#regions]}; my $length = ($regions[$#regions] + (length($$regionl{text}) / 2)) - $start; my $exec = (defined $exec_addr) ? $exec_addr : $start; print $output_file pack("CCnnnC", 0x55, 0x02, $start, $length, $exec, 0xaa); } my $last_region_end = undef; for my $r (sort { $a <=> $b } keys %region_start) { my $region = $region_start{$r}; my $addr = $r; if ($output_format eq 'binary' || $output_format eq 'dragondos') { if (defined $last_region_end) { my $skip = $addr - $last_region_end; print $output_file "\0" x $skip; } print $output_file pack("H*", $$region{text}); } elsif ($output_format eq 'hex') { my $size = length($$region{text}) / 2; my $offset = 0; while ($size > 0) { my $bytes = ($size > 32) ? 32 : $size; printf $output_file ":\%02X\%04X00\%s00\n", $bytes, $addr, uc substr($$region{text}, $offset, $bytes * 2); $offset += $bytes * 2; $addr += $bytes; $size -= $bytes; } } elsif ($output_format eq 'coco') { print $output_file pack("xnnH*", length($$region{text}) / 2, $addr, $$region{text}); } $last_region_end = $$region{end}; } if ($output_format eq 'hex') { print $output_file ":00000001FF\n"; } elsif ($output_format eq 'coco' && defined $exec_addr) { print $output_file pack("Cxxn", 255, $exec_addr); } close $output_file; exit 0; ############################################################################ # # During the first pass, this is called once per line read. Also calls # itself if macro expansion is required (therefore macros must be defined # before they are used). # sub process_line { my ($line) = @_; push @line_queue, $line; my ($label,$opcode,$arg) = split(/\s+/, $line, 3); if (!defined $label || $label =~ /^[;\*]/) { push @instr_queue, {}; return; } if (defined $defining_macro) { if (defined $opcode && $opcode eq "macro") { push @syntax_errors, "$filename:$line_number: nested macro definitions not allowed"; return; } # Push an empty instruction on the queue for each macro # line so it still gets printed in a listing. push @instr_queue, {}; if (defined $opcode && $opcode eq "endm") { undef $defining_macro; return; } push @{$macros{$defining_macro}}, $line; return; } if ($label =~ /^([\w\@]*)$/) { $label = lc $1; # de-taint label } else { push @syntax_errors, "$filename:$line_number: error: bad label"; return; } if (defined $opcode && $opcode eq 'macro') { if ($label eq "") { push @syntax_errors, "$filename:$line_number: error: no name for macro"; } else { push @instr_queue, {}; $defining_macro = lc($label); } return; } my $instr = {}; push @instr_queue, $instr; if ($label eq "") { return if (!defined $opcode || $opcode eq "" || $opcode =~ /^[;\*]/); } else { if (exists $have_label{$label}) { push @syntax_errors, "$filename:$line_number: error: label '$label' previously defined at $have_label{$label}"; return; } $have_label{$label} = "$filename:$line_number" unless ($label =~ /^\d+$/ || $label =~ /\@$/); $$instr{label} = $label; return if (!defined $opcode || $opcode eq "" || $opcode =~ /^;/); } if (exists $macros{lc($opcode)}) { $expanding_macro = lc($opcode); my @args = split_args($arg); for my $mac_line (@{$macros{$expanding_macro}}) { my $mac_copy = $mac_line; $mac_copy =~ s/\&(\d+)/$args[$1-1]/g; process_line($mac_copy); } undef $expanding_macro; return; } if ($opcode eq 'include') { push @file_stack, { file => $infile, filename => $filename, line_number => $line_number, }; if ($arg =~ m#^([\/\"])([^\1]*)(\1)#) { $filename = $2; print STDERR "=== Reading file '$filename'\n" if ($verbose); if (open(my $new, "<", $filename)) { $infile = $new; $line_number = 0; $file_changes{$ino+1} = { filename => $filename, line_number => $line_number, }; return; } push @syntax_errors, "$filename:$line_number: error: couldn't open '$filename' for reading: $!"; return; } push @syntax_errors, "$filename:$line_number: error: bad filename"; return; } if ($opcode eq 'export') { $export_labels{$arg} = 1; return; } if ($opcode =~ /^(\w+)$/) { $$instr{opcode} = $1; # de-taint opcode } else { push @syntax_errors, "$filename:$line_number: error: bad opcode"; return; } return if (!defined $arg || $arg =~ /^;/); # TODO: Possibly check for any valid $arg here? # For now just lazily strip comments from non-fcc args if ($opcode ne 'fcc') { $arg =~ s/\s+;.*//; } # arg deliberately left tainted here - nothing should use it directly $$instr{arg} = $arg; } ############################################################################ sub process_instr { my ($instr) = @_; my $mode = $$instr{mode}; if (!defined $mode || $mode ne 'equ') { if (exists $$instr{addr} && $$instr{addr} != $pc) { $unresolved = 1; } $$instr{addr} = $pc; if (exists $$instr{label} && $$instr{label} !~ /^\d+$/) { my $label = $$instr{label}; if (exists $equs{$label} && $equs{$label} != $pc) { $unresolved = 1; } $equs{$label} = $pc; } } if (defined $mode && exists $arg_parse{$mode}) { $arg_parse{$mode}($instr); } } # Tries to convert an expression into one eval-able by Perl. Any undefined # label references cause $unresolved to be set and undef returned. sub eval_expr { my ($expr) = @_; my $evalstr = ""; my $invalid = 0; $expr =~ s/^\s+//; my $nest = 0; my $expect_value = 1; while ($expr ne "") { if ($expect_value) { if ($expr =~ /^(\(+)\s*(.*)$/) { $evalstr .= $1; $nest += length($1); $expr = $2; } elsif ($expr =~ /^([~\-])\s*(.*)$/) { $evalstr .= $1; $expr = $2; } elsif ($expr =~ /^(\*)\s*(.*)$/) { $evalstr .= $pc; $expr = $2; $expect_value = 0; } elsif ($expr =~ /^\$([\da-fA-F]+)\s*(.*)$/) { $evalstr .= hex($1); $expr = $2; $expect_value = 0; } elsif ($expr =~ /^\%([01]+)\s*(.*)$/) { $evalstr .= oct("0b$1"); $expr = $2; $expect_value = 0; } elsif ($expr =~ /^\@([0-7]+)\s*(.*)$/) { $evalstr .= oct("0$1"); $expr = $2; $expect_value = 0; } elsif ($expr =~ /^'(.)\s*(.*)$/) { $evalstr .= ord($1); $expr = $2; $expect_value = 0; } elsif ($expr =~ /^(\d+)([fb])\s*(.*)$/i) { my $num .= $1; my $dir = uc $2; $expr = $3; my $instr = undef; # Search backwards or forwards for local label if ($dir eq 'B') { for (my $i = $ino; $i >= 0; $i--) { if (exists $instrs[$i]->{label} && $instrs[$i]->{label} eq $num) { $instr = $instrs[$i]; last; } } } else { for (my $i = $ino + 1; $i < $num_instrs; $i++) { if (exists $instrs[$i]->{label} && $instrs[$i]->{label} eq $num) { $instr = $instrs[$i]; last; } } } if (!defined $instr) { push @errors, "$filename:$line_number: error: unknown local label '$num$dir'"; $unresolved = 1; $invalid = 1; } else { $evalstr .= (0+$$instr{addr}); } $expect_value = 0; } elsif ($expr =~ /^(\d+(\.\d+)?)\s*(.*)$/) { $evalstr .= (0+$1); $expr = $3; $expect_value = 0; } elsif ($expr =~ /^(\w+)\s*(.*)$/) { $expr = $2; if (exists $equs{$1}) { $evalstr .= (0+$equs{$1}); } else { $evalstr .= (0); if (!exists $label_errors{$1}) { push @errors, "$filename:$line_number: error: unknown label '$1'"; $label_errors{$1} = 1; } $unresolved = 1; $invalid = 1; } $expect_value = 0; } else { push @syntax_errors, "$filename:$line_number: error: bad expression"; return undef; } } else { if ($expr =~ /^(\)+)\s*(.*)$/) { $evalstr .= $1; $nest -= length($1); $expr = $2; } elsif ($expr =~ /^([\+\-\*\/\&\|\^]|<<|>>)\s*(.*)$/) { $evalstr .= $1; $expr = $2; $expect_value = 1; } else { push @syntax_errors, "$filename:$line_number: error: bad expression"; return undef; } } } if ($nest != 0) { push @syntax_errors, "$filename:$line_number: error: bad expression"; return undef; } return undef if ($invalid); my $ret = undef; eval "\$ret = ($evalstr);"; return $ret; } sub split_args { my ($arg) = @_; $arg =~ s/^\s+//; my @args = (); while ($arg ne "") { last if ($arg =~ /^;/); # skip comments if ($arg =~ /^,/) { # empty expression not allowed push @syntax_errors, "$filename:$line_number: error: bad expression"; return; } elsif ($arg =~ /^([\/\"!])(.*?)(\1)\s*(.*)?$/) { # string $arg = $4; push @args, $2; } elsif ($arg =~ /^([^,;]*)(.*)$/) { # expression $arg = $2; push @args, $1; } $arg =~ s/^,\s*//; } return @args; } sub parse_equ { my ($instr) = @_; my $arg = lc $$instr{arg}; my $val = eval_expr($arg); return undef if (!defined $val); if (exists $$instr{addr} && $$instr{addr} != $val) { $unresolved = 1; } $$instr{addr} = $val; if ($$instr{opcode} eq 'org') { $pc = $val; } if (exists $$instr{label} && $$instr{label} !~ /^\d+$/) { $equs{$$instr{label}} = $val; } } sub parse_section { my ($instr) = @_; my $arg = $$instr{arg}; $arg =~ s/^\s+//; $arg =~ s/\s+$//; $section_pc{$section} = $pc; $section = $arg; $pc = $section_pc{$section}; } sub parse_setdp { my ($instr) = @_; my $arg = lc $$instr{arg}; my $val = eval_expr($arg); return undef if (!defined $val); $dp = $val & 0xff; } sub parse_rmb { my ($instr) = @_; my $arg = lc $$instr{arg}; $arg =~ s/\s+//g; my $num = eval_expr($arg); return undef if (!defined $num); $$instr{size} = $num; } sub parse_fcb { my ($instr) = @_; my $arg = $$instr{arg}; my $text = ""; for my $i (split(/,/, $arg)) { my $num = eval_expr($i); if (defined $num) { $text .= sprintf("%02x", $num & 0xff); } else { $unresolved = 1; } } $$instr{text} = $text; $$instr{size} = length($text) / 2; } sub parse_fdb { my ($instr) = @_; my $arg = lc $$instr{arg}; my $text = ""; for my $i (split(/,/, $arg)) { my $num = eval_expr($i); if (defined $num) { $text .= sprintf("%04x", $num & 0xffff); } else { $unresolved = 1; } } $$instr{text} = $text; $$instr{size} = length($text) / 2; } sub parse_fcc { my ($instr) = @_; my $arg = $$instr{arg}; my $text = ""; my $c; my $tmp = ""; my $valid = 1; $arg =~ s/^\s+//; while (defined $arg && $arg ne "") { last if ($arg =~ /^;/); # skip comments if ($arg =~ /^,/) { # empty expression not allowed push @syntax_errors, "$filename:$line_number: error: bad expression"; return; } elsif ($arg =~ /^([\/\"!])(.*?)(\1)\s*(.*)?$/) { # string $arg = $4; my $string = $2; $string =~ s/(.)/sprintf("%02x", ord($1))/ge; $text .= $string; } elsif ($arg =~ /^([^,;]*)(.*)$/) { # expression $arg = $2; my $val = eval_expr($1); if (defined $val) { $text .= sprintf("%02x", $val & 0xff); } else { $text .= "xx"; $valid = 0; } } $arg =~ s/^,\s*//; } $$instr{size} = length($text) / 2; return undef unless $valid; $$instr{text} = $text; } sub parse_stack { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $arg = lc $$instr{arg}; my $sstack = 0; my $ustack = 0; my %regs = ( cc => 0x01, a => 0x02, b => 0x04, d => 0x06, dp => 0x08, x => 0x10, y => 0x20, pc => 0x80, ); $arg =~ s/\s+//g; for my $reg (split(/,/, $arg)) { if ($reg eq 's') { $ustack |= 0x40; } elsif ($reg eq 'u') { $sstack |= 0x40; } elsif (exists $regs{$reg}) { $ustack |= $regs{$reg}; $sstack |= $regs{$reg}; } else { push @syntax_errors, "$filename:$line_number: error: bad register in stack operation"; } } $sstack = sprintf("%02x", $sstack); $ustack = sprintf("%02x", $ustack); $$instr{text} = $tmpl; $$instr{text} =~ s/\%s/$sstack/g; $$instr{text} =~ s/\%u/$ustack/g; $$instr{size} = length($$instr{text}) / 2; } sub parse_pair { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $arg = lc $$instr{arg}; my %regs = ( d => 0, x => 1, y => 2, u => 3, s => 4, pc => 5, a => 8, b => 9, cc => 10, dp => 11, ); $arg =~ s/\s+//g; my $warn_numerical = 0; my ($src,$dest) = split(/,/, $arg); if ($src =~ /^\d+$/) { $warn_numerical = 1; } elsif (exists $regs{$src}) { $src = $regs{$src}; } else { push @syntax_errors, "$filename:$line_number: error: bad source register"; $src = 0; } if ($dest =~ /^\d+$/) { $warn_numerical = 1; } elsif (exists $regs{$dest}) { $dest = $regs{$dest}; } else { push @syntax_errors, "$filename:$line_number: error: bad destination register"; $dest = 0; } push @errors, "$filename:$line_number: warning: numerical values used in tfr or exg" if ($warn_numerical && !$quiet); if (($src & 8) != ($dest & 8)) { push @errors, "$filename:$line_number: warning: register sizes don't match in tfr or exg" if (!$quiet); } my $pair = sprintf("%01x%01x", $src & 15, $dest & 15); $$instr{size} = length($tmpl) / 2; $$instr{text} = $tmpl; $$instr{text} =~ s/\%b/$pair/g; } sub parse_address { my ($instr) = @_; my $opcode = $$instr{opcode}; my $arg = lc $$instr{arg}; my $addr = eval_expr($arg); if (!defined $addr) { $unresolved = 1; if (exists $tmpls{extended}->{$opcode}) { $$instr{size} = length($tmpls{extended}->{$opcode}->{tmpl}) / 2; } elsif (exists $tmpls{direct}->{$opcode}) { $$instr{size} = (length($tmpls{direct}->{$opcode}->{tmpl}) / 2) + 1; } elsif (exists $tmpls{indexed}->{$opcode}) { $$instr{size} = (length($tmpls{indexed}->{$opcode}->{tmpl}) / 2) + 2; } else { push @syntax_errors, "$filename:$line_number: error: unknown opcode '$opcode'"; } return undef; } if (defined $dp && $dp == (($addr >> 8) & 0xff) && exists $tmpls{direct}->{$opcode}) { $$instr{tmpl} = $tmpls{direct}->{$opcode}->{tmpl}; return parse_direct($instr); } elsif (exists $tmpls{extended}->{$opcode}->{tmpl}) { $$instr{tmpl} = $tmpls{extended}->{$opcode}->{tmpl}; return parse_extended($instr); } else { $$instr{tmpl} = $tmpls{indexed}->{$opcode}->{tmpl}; return parse_indexed($instr); } } sub parse_immediate { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $arg = lc $$instr{arg}; $arg =~ s/\s+//g; $arg =~ s/^#//; my $size = length($tmpl) / 2; $size++ if ($tmpl =~ /\%w/); $$instr{size} = $size; my $val = eval_expr($arg); return undef if (!defined $val); my $as_byte = sprintf("%02x", $val & 0xff); my $as_word = sprintf("%04x", $val & 0xffff); $$instr{text} = $tmpl; $$instr{text} =~ s/\%b/$as_byte/g; $$instr{text} =~ s/\%w/$as_word/g; } sub parse_direct { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $arg = lc $$instr{arg}; $arg =~ s/^ 0x06, b => 0x05, d => 0x0b ); my %regs = ( x => 0x00, y => 0x20, u => 0x40, s => 0x60 ); $arg =~ s/\s+//g; my $indirect = 0; my $want_bits = 0; my $text = ""; if ($arg =~ /^\[(.*)\]$/) { $arg = $1; $indirect = 0x10; } # Allow bracketing a second time to reverse the meaning if ($arg =~ /^\[(.*)\]$/) { $arg = $1; $indirect = 0; } if ($arg =~ /^>>(.*)$/) { $want_bits = 16; $arg = $1; } elsif ($arg =~ /^>(.*)$/) { $want_bits = 8; $arg = $1; } elsif ($arg =~ /^<+(.*)$/) { $arg = $1; } my $base_size = length($tmpl) / 2; # includes postbyte $$instr{size} = $base_size + 2; # maximum my ($offset,$reg) = split(/,/, $arg); if ($arg !~ /,/) { if (!$indirect) { push @errors, "$filename:$line_number: warning: invalid extended non-indirect address" if (!$quiet); } my $addr = eval_expr($arg); return undef if (!defined $addr); $text = sprintf("\%02x\%04x", 0x8f | $indirect, $addr & 0xffff); } elsif ($reg eq 'pcr') { my $addr = eval_expr($offset); if ($want_bits < 16) { my $next_pc = $pc + $base_size + 1; my $offset = $addr - $next_pc; if (defined $addr && $offset >= -128 && $offset <= 127) { $text = sprintf("\%02x\%02x", 0x8c | $indirect, $offset & 0xff); } else { $want_bits = 16; } } if ($want_bits == 16) { my $next_pc = $pc + $base_size + 2; my $offset = $addr - $next_pc; $text = sprintf("\%02x\%04x", 0x8d | $indirect, $offset & 0xffff); } } elsif ($offset eq "") { $$instr{size} -= 2; if ($reg =~ /([xyus])\+\+/) { $text = sprintf("\%02x", 0x81 | $regs{$1} | $indirect); } elsif ($reg =~ /--([xyus])/) { $text = sprintf("\%02x", 0x83 | $regs{$1} | $indirect); } elsif ($reg =~ /([xyus])\+/) { if ($indirect) { push @errors, "$filename:$line_number: warning: invalid indirect post-increment" if (!$quiet); } $text = sprintf("\%02x", 0x80 | $regs{$1} | $indirect); } elsif ($reg =~ /-([xyus])/) { if ($indirect) { push @errors, "$filename:$line_number: warning: invalid indirect pre-decrement" if (!$quiet); } $text = sprintf("\%02x", 0x82 | $regs{$1} | $indirect); } elsif ($reg =~ /([xyus])/) { $text = sprintf("\%02x", 0x84 | $regs{$1} | $indirect); } else { push @syntax_errors, "$filename:$line_number: error: unknown index register or operation"; return undef; } } elsif ($offset =~ /^([abd])$/) { my $areg = $1; $$instr{size} -= 2; if ($reg =~ /^([xyus])$/) { $reg = $1; } else { push @syntax_errors, "$filename:$line_number: error: unknown index register or operation"; return undef; } $text = sprintf("\%02x", 0x80 | $regs{$reg} | $aregs{$areg} | $indirect); } else { if ($reg =~ /^([xyus])$/) { $reg = $1; } else { push @syntax_errors, "$filename:$line_number: error: unknown index register or operation"; return undef; } my $val = eval_expr($offset); return undef if (!defined $val); $want_bits = 8 if ($want_bits < 8 && $indirect); if ($want_bits < 8) { if (defined $val && $val >= -16 && $val <= 15) { if ($val == 0) { $text = sprintf("\%02x", 0x84 | $regs{$reg} | $indirect); } else { $val &= 0x1f; $text = sprintf("\%02x", $regs{$reg} | $val); } } else { $want_bits = 8; } } if ($want_bits == 8 || $want_bits == 16) { if (defined $val && $val >= -128 && $val <= 127) { $val &= 0xff; $text = sprintf("\%02x\%02x", 0x88 | $regs{$reg} | $indirect, $val); } else { $want_bits = 16; } } if ($want_bits == 16) { $val &= 0xffff; $text = sprintf("\%02x\%04x", 0x89 | $regs{$reg} | $indirect, $val); } } return undef if (!$text); $$instr{text} = $tmpl; $$instr{text} =~ s/\%i/$text/g; $$instr{size} = length($$instr{text}) / 2; } sub parse_extended { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $arg = lc $$instr{arg}; $arg =~ s/\s+//g; $arg =~ s/^>//; my $size = (length($tmpl) / 2) + 1; my $addr = eval_expr($arg); $$instr{size} = $size; return undef if (!defined $addr); my $as_word = sprintf("%04x", $addr & 0xffff); $$instr{text} = $tmpl; $$instr{text} =~ s/\%w/$as_word/g; } sub parse_inherent { my ($instr) = @_; my $tmpl = $$instr{tmpl}; $$instr{size} = length($tmpl) / 2; $$instr{text} = $tmpl; } sub parse_relative { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $size = length($tmpl) / 2; $$instr{size} = $size; my $arg = lc $$instr{arg}; my $addr = eval_expr($arg); return undef if (!defined $addr); my $next_pc = $pc + $size; my $offset = $addr - $next_pc; if ($offset < -128 || $offset > 127) { push @errors, "$filename:$line_number: error: branch destination out of range"; $unresolved = 1; } my $off_text = sprintf("%02x", $offset & 0xff); $$instr{text} = $tmpl; $$instr{text} =~ s/\%b/$off_text/g; } sub parse_longrelative { my ($instr) = @_; my $tmpl = $$instr{tmpl}; my $size = (length($tmpl) / 2) + 1; $$instr{size} = $size; my $arg = lc $$instr{arg}; my $addr = eval_expr($arg); return undef if (!defined $addr); my $next_pc = $pc + $size; my $offset = $addr - $next_pc; if ($offset >= -128 && $offset <= 127) { push @errors, "$filename:$line_number: warning: long branch could be optimised" if (!$quiet); } my $off_text = sprintf("%04x", $offset & 0xffff); $$instr{text} = $tmpl; $$instr{text} =~ s/\%w/$off_text/g; }