muforth/target/AVR/asm.mu4

( This file is part of muFORTH: http://pages.nimblemachines.com/muforth

  Copyright 2002-2008 David Frech. All rights reserved, and all wrongs
  reversed. (See the file COPYRIGHT for details.)

( Assembler for AVR 8 bit microcontroller.)

" AVR assembler " file[#

( Debug)
: _     char _ hold ;
: ####  # # # # ;
: .b  radix @ push  binary  <# #### _ #### _ #### _ #### #> type  pop radix ! ;

comment
==========================================================================
  If I decide I want to *carefully* support the different flavors of the AVR
  instruction set, I should put the common core insns into .assembler. and
  make separate chains for each extension. Then the name of the device would
  be a word which would choose a search sequence, including only the insns
  that belong to that CPU.
==========================================================================

hex

forth
: op,  |, ;
-- : op,  ( compile)  dup |,  ( and print)  .b  cr ;

comment
==========================================================================
  Weird encodings, of constants and registers, in different and alarming ways.

  Rd     d_dddd -> 0000_000d_dddd_0000  ( dest reg)
  Rs     s_ssss -> 0000_00s0_0000_ssss  ( source reg)
  I6    ii_iiii -> 0000_0ii0_0000_iiii  ( 6 bit io offset: 0-63)
  I5     i_iiii -> 0000_0000_iiii_i000  ( 5 bit io offset: 0-31)
  Rh       dddd -> 0000_0000_dddd_0000  ( high regs, 16 to 31)
  Rp         dd -> 0000_0000_00dd_0000  ( pointer/pairs, 0=24/25, 1=26/27 etc)
  K8  kkkk_kkkk -> 0000_kkkk_0000_kkkk  ( 8 bit immed data)
  K6    kk_kkkk -> 0000_0000_kk00_kkkk  ( 6 bit immed data - word insns)
* Ah    kk_kkkk -> 0000_000k_kkkk_000k  ( long jmp/call high 6 bits)
  S7   sss_ssss -> 0000_00ss_ssss_s000  ( 7 bit signed branch offset)
  U6    uu_uuuu -> 00u0_uu00_0000_0uuu  ( 6 bit unsigned offset for ld/st Y,Z)
  T3        bbb -> 0000_0000_0bbb_0000  ( status bit)

*Unused in ATtiny, and in ATmega smaller than 16k.
==========================================================================

( I've put similar words next to each other to make them easier to read.)

: S3  ( const op - bits)  swap  07 and  4 << or ;
: Rd  ( reg op - bits)    swap  1f and  4 << or ;
: I5  ( io op - bits)     swap  1f and  3 << or ;
: S7  ( off op - bits)    swap  7f and  3 << or ;
: Rs  ( reg op - bits)    over  10 and  5 << or  swap 0f and  or ;
: I6  ( io op - bits)     over  30 and  5 << or  swap 0f and  or ;
: K8  ( const op - bits)  over 0f0 and  4 << or  swap 0f and  or ;
: K6  ( const op - bits)  over  30 and  2 << or  swap 0f and  or ;
: K3  ( const op - bits)  swap  07 and  or ;

: U6   ( off op - bits)
   over 20 and 8 << or  over 18 and 7 << or  swap 7 and  or ;

: Rp? ( reg op - index op)
   swap  #24 - dup 0< if error" only even regs 24 to 30 allowed" then
   u2/ swap ;

: Rh? ( reg op - index op)
   swap  #16 - dup 0< if error" only regs 16 to 31 allowed"  then  swap ;

: Rp  ( reg op - bits)  Rp?  Rd ;
: Rh  ( reg op - bits)  Rh?  Rd ;


: Ah  ( addr op - lo hibits)
   over #16 u>>  dup 3e and 3 <<  swap 1 and  or  or ;

: ljmp  ( a32)    constant does>  ( a32 op)  Ah op, op, ;

( 2opw - pppp pppp dddd ssss. d,r >> 1)
: movw  ( Rs Rd -)  u2/ 4 <<  swap u2/ or  0100 or op, ;

( Branch and jump plumbing.)

( Note: dest and src are byte addresses; the relative jump offset is a
  _word_ offset. Also, src points to word _following_ the branch!)
: >offset  ( dest src - offset)  - 2/ ;

( Computes the jump offset from src to dest; checks if within range;
  if so, masks to range-1, and returns -1; else returns offset and 0.
  Range check is done by adding range/2 to offset and checking if that
  is u< range.)

: inrange?  ( offset range - offset f | offset' t)
   push  dup r@ u2/ +   r@ u< if  pop 1- and  -1 ^ then
                                  pop drop     0 ;

( Conditional jumps - cjmps - have a range of 80. Relative jmps - rjmps
  have a range of 1000.)

: checkrange  ( offset range - offset | throw)
   inrange?  if ^ then   error" relative jump out of range" ;

( Called when |here points _at_ rjmp/call, not past it, so we fix up
  the offset.)
: _rjmp  ( dest op)  swap  |here >offset 1-  1000 checkrange  or op, ;

( XXX: fix to generate long call/jmp when necessary)
: rjmp  ( dest)  constant does>  ( dest op)  _rjmp ;

assembler

( Once again, issues of using the manufacturer's mnemonics - which are
  absolutely terrible - or inventing my own.)

( 2op - pppp ppsd dddd ssss)
forth
: 2op    ( Rs Rd)   constant does>  (  Rs Rd op)  Rd Rs op, ;
assembler
0400 2op cpc   (       Rd - Rs - carry)  ( cmpc)
0800 2op sbc   ( Rd <- Rd - Rs - carry)  ( subc)
0c00 2op add
1000 2op cpse  (       Rd - Rs)  ( cmpskipeq)
1400 2op cp    (       Rd - Rs)  ( cmp)
1800 2op sub   ( Rd <- Rd - Rs)
1c00 2op adc
2000 2op and
2400 2op eor
2800 2op or
2c00 2op mov
: tst  dup \a and ;  ( tst == and Rd,Rd)
: clr  dup \a eor ;  ( clr == eor Rd,Rd)

( bi - byte immed. pppp kkkk dddd kkkk - r16-31)
forth
: bi     ( imm Rh)  constant does>  ( imm Rh op)  Rh K8 op, ;
assembler
( No addi or adci! Interesting.)
3000 bi cpi  (       Rh - K)         ( cmpi)
4000 bi sbci ( Rh <- Rh - K - carry) ( subci)
5000 bi subi ( Rh <- Rh - K)
6000 bi ori
7000 bi andi
e000 bi ldi

( 1op - pppp pppd dddd pppp)
forth
: 1op    ( Rd)      constant does>  (     Rd op)  Rd    op, ;
assembler
9400 1op com
9401 1op neg
9402 1op swap  ( swap nibbles)
9403 1op inc
9405 1op asr
9406 1op lsr
9407 1op ror
940a 1op dec
: lsl  dup \a add ;  ( lsl == add Rd,Rd)
: rol  dup \a adc ;  ( rol == adc Rd,Rd)

( long jump and call - pppp pppk kkkk pppk, kkkk kkkk kkkk kkkk)
940c ljmp ljmp
940e ljmp lcall

( STATUS bit set & clear)
forth
: statusbit  ( bit)  constant does>  ( bit op)  S3 op, ;
assembler
9408 statusbit bset ( set STATUS bit)
9488 statusbit bclr ( clr STATUS bit)

( Indexed load & store.)
( X can be post-inc, pre-dec, offset by 0. Y & Z can be post-inc, pre-dec,
  and offset by 0-63.)
forth
: ld/st           ( Rd)  constant does>  (  Rd op)  Rd ;
: ix       (     op+Rd)  constant does>  (     op ix)      or op, ;
: ix_off   ( op+Rd off)  constant does>  ( op off ix)   U6 or op, ;
: ix_data  ( op+Rd a16)  constant does>  ( op a16 ix)  rot or op, op, ;

assembler
8000 ld/st ld  ( Rd - bits)
8200 ld/st st  ( Rd - bits)

( Alternate syntax)
: <-  \a ld ;
: ->  \a st ;

( suffixes - these do the action)
0000 ix_off  ,z   ( op+Rd u6)
0008 ix_off  ,y   ( op+Rd u6)
1000 ix_data  $   ( op+Rd a16)  ( data space - compiles op then following addr)
1001 ix       z+
1002 ix      -z
1004 ix     pmz  ( prog mem)
1005 ix     pmz+ ( prog mem)
1009 ix       y+
100a ix      -y
100c ix       x  ( XXX: should this be x@ or something?)
100d ix       x+
100e ix      -x
100f ix     stk  ( for push and pop!)

: push  ( Rd)  \a st \a stk ;
: pop   ( Rd)  \a ld \a stk ;

1e constant z
1f constant zh
: top   \a z ;
: toph  \a zh ;
: sp+  \a y+ ;
: -sp  \a -y ;

( wi - word immed. pppp pppp kkdd kkkk. dd=0..3 -> r24,26,28,30)
forth
: wi     ( imm Rp)  constant does>  ( imm Rp op)  Rp K6 op, ;
assembler
9600 wi adiw  ( addiw)
9700 wi sbiw  ( subiw)

( iobit - set/clear bit in io space - pppp pppp aaaa abbb)
( These can address 0-31 in io space.)
forth
: iobit  ( bit io)  constant does>  ( bit io op)  I5 K3 op, ;
assembler
9800 iobit cbi   ( clear bit io)
9900 iobit sbic  ( skip if bit io clear)
9a00 iobit sbi   ( set bit io)
9b00 iobit sbis  ( skip if bit io set)

( io - pppp piid dddd iiii)
( These address 0-63 in io space.)
forth
: io     ( io Rd)   constant does>  (  io Rd op)  Rd I6 op, ;
assembler
b000 io in    ( Rd <- io[i])
b800 io out   ( Rd -> io[i])

( rel jump and call - pppp kkkk kkkk kkkk)
c000 rjmp rjmp
d000 rjmp rcall

( Load, store, and test bits in registers.)
forth
: regbit  ( bit reg)  constant does>  ( bit reg op)  Rd K3 op, ;
assembler
f800 regbit bld  ( load from Status.T)
fa00 regbit bst  ( store to Status.T)
                 ( the difference is 0200! like all ld/st pairs)
fc00 regbit sbrc  ( skip if bit in reg clear)
fe00 regbit sbrs  ( skip if bit in reg set)

( Zero ops.)
forth
: 0op  constant does> op, ;
assembler
0000 0op nop
c000 0op nop2  ( rjmp to following word; takes 2 cycles!)
9508 0op ret
9518 0op reti
9509 0op icall
9409 0op ijmp
9588 0op sleep
95a8 0op wdr    ( watchdog reset)

comment
=============================================================================
Digression on the AVR's condition code idiosyncrasies, and an explanation
of how carry and borrow work.

First, let's define how condition code flags represent the results of a
signed subtract (or compare). If we execute X - Y, where both are _signed_
values, then the N (negative), V (overflow) and Z (zero) bits are set as
follows:

Let S = N xor V. Then

  (LT)  X < Y  ==   S
  (GE)  X >= Y ==  !S
  (GT)  X > Y  ==  !S and !Z
               == !(S or   Z)  (deMorgan's law)
  (LE)  X <= Y ==   S or   Z

I've written on the left the traditional "conditional branch" names for
each relation.

There are two conditional branch instructions on the AVR: each tests a
single bit in the status register; one branches if the bit is set, the
other if clear.

AVR defines a bit S == N xor V. So S set is equivalent to "<"; S clear
means ">=". The only way to test "<=" or ">" is to switch the order of the
operands; we have no way of testing (S or Z) (LE) or !(S or Z) (GT)
directly. Slightly odd, but it works.

We have a similar issue with subtract or compare of _unsigned_ values. But
before discussing that we have to understand what the carry bit represents.
Since the natural way of doing subtraction to _add_ the two's complement of
the subtrahend - ie:

  X - Y == X + (-Y)

and since two's complement is the one's complement plus one:

  -Y == ~Y + 1

we can write subtract as

  X - Y == X + ~Y + 1

and subtract with a _borrow_ as

  X - Y - 1 == X + ~Y

Note something odd here: the carry in complemented: we carry in a one when
there is no carry in (ie, no borrow); and carry in a zero when there _is_ a
borrow. So, carry = ~borrow.

There are only two architectures that I know of that represent carry this
way: ARM and 6502. All others use a synthesized carry, where carry =
borrow. Since the AVR is one of these latter, in the following we'll
understand C = 1 to mean borrow, and C = 0 to mean no borrow.

So let's define how X - Y affects the condition codes, if X and Y are
_unsigned_.

  (LO)  X < Y   ==   C
  (HS)  X >= Y  ==  !C
  (HI)  X > Y   ==  !C and !Z
                == !(C or   Z)   (deMorgan's law)
  (LS)  X <= Y  ==   C or   Z

Again I've written traditional conditional branch names for these
conditions. LO == lower; HS == higher or same; HI == higher; LS == lower or
same.

NOTE: On AVR "HS" does **NOT** mean "higher or same" but instead "half
carry bit set"!! This is another COMPLETELY RETARDED choice of mnemonics.
BRHS and BRHC mean "branch if half carry set" and "branch if half carry
clear", resp. The branch if higher or same is BRSH (branch if same or
higher). Barf!! Has this caused bugs? I'm sure of it.

Here again, since we can only test one bit (C) we have to synthesize HI and
LS by swapping the arguments to the compare or subtract.
=============================================================================

assembler
( Status register bit names)

( XXX: rename these? so that C doesn't conflict with "0C? call them what?
  .C? C.? And that brings up other issues with number parsing which should
  also be fixed! Or at least documented... .C and C. are not parsed as
  numbers, because of the way the loops work. Is this right? And what about
  sign and radix? Do we prefer #-10 or -#10? Or should both work? Right now
  sign must precede radix.)

0 constant .C
1 constant .Z
2 constant .N
3 constant .V
4 constant .S
5 constant .H
6 constant .T
7 constant .I

: u<  \a .C ;  ( carry set)
: 0=  \a .Z ;  ( zero  set)
: 0<  \a .N ;  ( negative set)
: <   \a .S ;  ( signed bit set; ie N xor V == 1)

( Conditional branches: test status reg bit; branch if set/clear)
( branch if bit clear: 1111 01kk kkkk ksss)
( branch if bit set:   1111 00kk kkkk ksss)
: not  400 xor ;  ( toggle sense of condition code)

forth

( Calculate offset encoding for the jmp, by first inspecting the opcode
  to see what kind of jmp it is.)

( src points to word _after_ jmp)  ( this code doesn't work if checkrange
  calcs its own offset)

( checkrange masks the offset to fit into range)
: offsetbits  ( jmp offset - jmp bits)  over f000 and f000 = if
   ( short)   80 checkrange  3 <<  ^  then
    ( rel)  1000 checkrange  ;

: resolve>  (s src dest)  ( doesn't change type of jmp - short or relative)
   over >offset push  ( src)  2 - ( point to jmp)
   dup |@ pop ( 'jmp jmp offset)  offsetbits  or  swap |! ;

( <resolve can _in theory_ compile a short jump around an rjmp. If the
  offset is too large to fit in 7 bits, 0404 xor with branch - makes it
  skip following - and then compile rjmp. For now, let's get this going
  with short branches.

  I say _in theory_ because we are *not* doing this for three reasons:

    * It's simpler. This is important.

    * resolve is symmetric if we don't rewrite jumps - we can't rewrite
      forward jumps because there isn't room to add an rjmp afterwards
      since we've already compiled code there;

    * There really should be no need. Code words should be short. We can
      jump +/-64 words. That is plenty.)

: <resolve  ( dest src)  swap resolve> ;

( Compile a conditional jump; on entry stack has status bit, possibly or'ed
  with 400 to reverse its sense, so we should xor into opcode. By default
  opcode generated is jump if status bit _clear_.)

: cjmp,  ( statusbit - src)  f400 xor op,  |here ;

( Unconditional jmp)
: rjmp,   ( - src)  c000 op,  |here ;

assembler
( These are different from the pure Forth ones. Forward conditional branches
  are always _short_, but backwards branches can be short or long. A long
  branch is formed by inverting the condition to branch _around_ the
  longer branch.

  Thus we need words to compile forward conditional branches, forward
  unconditional branches, and backwards branches of either kind.)

( if compiles "branch if bit clr")
: if      (s bit - src)    cjmp, ;
: skip    (s - src)    rjmp, ;
: then    (s src)          |here resolve> ;
: else    (s src1 - src2)  rjmp, swap  \a then ;
: begin   (s - dest)       |here ;
: until   (s bit dest -)   \a if  <resolve ;
: again   (s dest -)       rjmp,  <resolve ;
: while   (s bit dest - src dest)   \a if  swap ;
: repeat  (s src dest -)   \a again  \a then ;

( XXX testing)
: .  . ;
: .s  .s ;
: .b  .b ;

: \f   .forth. chain' execute ;

: label  |here ( dup . cr) machine constant forth  __asm__ ;
outside
: code   \a label  does>  \a rcall ;
: '   .target. chain' execute ;
: forward  rjmp, ;
: resolve  |here resolve> ;
: host  \ [ ;

: macro  assembler : __macro__ ;

compiler
: ;m     \ ;  __outside__ ;
assembler
: (   \f ( ;
: ;c   __outside__ ;
:  color=gray>--   \f -- ;
: -F   -F ;
: decimal  decimal ;
: equ machine constant forth ;
forth
: }  __asm__ ;

#]file