/* Output routines for GCC for ARM/Thumb
   Copyright (C) 1996 Cygnus Software Technologies Ltd
   The basis of this contribution was generated by
   		Richard Earnshaw, Advanced RISC Machines Ltd

This file is part of GNU CC.

GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

#include <stdio.h>
#include <string.h>
#include "config.h"
#include "rtl.h"
#include "hard-reg-set.h"
#include "regs.h"
#include "output.h"
#include "insn-flags.h"
#include "insn-attr.h"
#include "flags.h"
#include "tree.h"
#include "expr.h"


int current_function_anonymous_args = 0;

/* Used to parse -mstructure_size_boundary command line option.  */
char * structure_size_string = NULL;
int    arm_structure_size_boundary = 32; /* Used to be 8 */


/* Predicates */
int
reload_memory_operand (op, mode)
     rtx op;
     enum machine_mode mode;
{
  int regno = true_regnum (op);

  return (! CONSTANT_P (op)
	  && (regno == -1
	      || (GET_CODE (op) == REG
		  && REGNO (op) >= FIRST_PSEUDO_REGISTER)));
}

/* Return nonzero if op is suitable for the RHS of a cmp instruction.  */
int
thumb_cmp_operand (op, mode)
     rtx op;
     enum machine_mode mode;
{
  return ((GET_CODE (op) == CONST_INT
	   && (unsigned HOST_WIDE_INT) (INTVAL (op)) < 256)
	  || register_operand (op, mode));
}

int
thumb_shiftable_const (val)
     HOST_WIDE_INT val;
{
  unsigned HOST_WIDE_INT x = val;
  unsigned HOST_WIDE_INT mask = 0xff;
  int i;

  for (i = 0; i < 25; i++)
    if ((val & (mask << i)) == val)
      return 1;

  return 0;
}

int
thumb_trivial_epilogue ()
{
  int regno;

  /* ??? If this function ever returns 1, we get a function without any
     epilogue at all.  It appears that the intent was to cause a "return"
     insn to be emitted, but that does not happen.  */
  return 0;

#if 0
  if (get_frame_size () 
      || current_function_outgoing_args_size
      || current_function_pretend_args_size)
    return 0;

  for (regno = 8; regno < 13; regno++)
    if (regs_ever_live[regno] && ! call_used_regs[regno])
      return 0;

  return 1;
#endif
}


/* Routines for handling the constant pool */
/* This is unashamedly hacked from the version in sh.c, since the problem is
   extremely similar.  */

/* Thumb instructions cannot load a large constant into a register,
   constants have to come from a pc relative load.  The reference of a pc
   relative load instruction must be less than 1k infront of the instruction.
   This means that we often have to dump a constant inside a function, and
   generate code to branch around it.
 
   It is important to minimize this, since the branches will slow things
   down and make things bigger.
 
   Worst case code looks like:
 
	ldr	rn, L1
	b	L2
	align
	L1:   .long value
	L2:
	..
 
	ldr	rn, L3
	b	L4
	align
	L3:   .long value
	L4:
	..
 
   We fix this by performing a scan before scheduling, which notices which
   instructions need to have their operands fetched from the constant table
   and builds the table.
 
 
   The algorithm is:
 
   scan, find an instruction which needs a pcrel move.  Look forward, find the
   last barrier which is within MAX_COUNT bytes of the requirement.
   If there isn't one, make one.  Process all the instructions between
   the find and the barrier.
 
   In the above example, we can tell that L3 is within 1k of L1, so
   the first move can be shrunk from the 2 insn+constant sequence into
   just 1 insn, and the constant moved to L3 to make:
 
	ldr	rn, L1
	..
	ldr	rn, L3
	b	L4
	align
	L1:	.long value
	L3:	.long value
	L4:
 
   Then the second move becomes the target for the shortening process.
 
 */
 
typedef struct
{
  rtx value;			/* Value in table */
  HOST_WIDE_INT next_offset;
  enum machine_mode mode;	/* Mode of value */
} pool_node;

/* The maximum number of constants that can fit into one pool, since
   the pc relative range is 0...1020 bytes and constants are at least 4
   bytes long */

#define MAX_POOL_SIZE (1020/4)
static pool_node pool_vector[MAX_POOL_SIZE];
static int pool_size;
static rtx pool_vector_label;

/* Add a constant to the pool and return its label.  */
 
static HOST_WIDE_INT
add_constant (x, mode)
     rtx x;
     enum machine_mode mode;
{
  int i;
  rtx lab;
  HOST_WIDE_INT offset;

  if (mode == SImode && GET_CODE (x) == MEM && CONSTANT_P (XEXP (x, 0))
      && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
    x = get_pool_constant (XEXP (x, 0));

  /* First see if we've already got it */
 
  for (i = 0; i < pool_size; i++)
    {
      if (x->code == pool_vector[i].value->code
          && mode == pool_vector[i].mode)
        {
          if (x->code == CODE_LABEL)
            {
              if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
                continue;
            }
          if (rtx_equal_p (x, pool_vector[i].value))
            return pool_vector[i].next_offset - GET_MODE_SIZE (mode);
        }
    }
 
  /* Need a new one */
 
  pool_vector[pool_size].next_offset = GET_MODE_SIZE (mode);
  offset = 0;
  if (pool_size == 0)
    pool_vector_label = gen_label_rtx ();
  else
    pool_vector[pool_size].next_offset 
      += (offset = pool_vector[pool_size - 1].next_offset);

  pool_vector[pool_size].value = x;
  pool_vector[pool_size].mode = mode;
  pool_size++;
  return offset;
}
 
/* Output the literal table */
                    
static void         
dump_table (scan)
     rtx scan;
{
  int i;

  scan = emit_label_after (gen_label_rtx (), scan);
  scan = emit_insn_after (gen_align_4 (), scan);
  scan = emit_label_after (pool_vector_label, scan);

  for (i = 0; i < pool_size; i++)
    {
      pool_node *p = pool_vector + i;

      switch (GET_MODE_SIZE (p->mode))
	{
	case 4:
	  scan = emit_insn_after (gen_consttable_4 (p->value), scan);
	  break;

	case 8:
	  scan = emit_insn_after (gen_consttable_8 (p->value), scan);
	  break;

	default:
	  abort ();
	  break;
	}
    }

  scan = emit_insn_after (gen_consttable_end (), scan);
  scan = emit_barrier_after (scan);
  pool_size = 0;
}

/* Non zero if the src operand needs to be fixed up */
static
int
fixit (src, mode)
     rtx src;
     enum machine_mode mode;
{
  return ((CONSTANT_P (src)
	   && (GET_CODE (src) != CONST_INT
	       || ! (CONST_OK_FOR_LETTER_P (INTVAL (src), 'I')
		     || CONST_OK_FOR_LETTER_P (INTVAL (src), 'J')
		     || (mode != DImode
			 && CONST_OK_FOR_LETTER_P (INTVAL (src), 'K')))))
	  || (mode == SImode && GET_CODE (src) == MEM
	      && GET_CODE (XEXP (src, 0)) == SYMBOL_REF
	      && CONSTANT_POOL_ADDRESS_P (XEXP (src, 0))));
}

/* Find the last barrier less than MAX_COUNT bytes from FROM, or create one. */

#define MAX_COUNT_SI 1000
 
static rtx
find_barrier (from)
     rtx from;
{
  int count = 0;
  rtx found_barrier = 0;
  rtx label;

  while (from && count < MAX_COUNT_SI)
    {
      if (GET_CODE (from) == BARRIER)
	return from;

      /* Count the length of this insn */
      if (GET_CODE (from) == INSN
	  && GET_CODE (PATTERN (from)) == SET
	  && CONSTANT_P (SET_SRC (PATTERN (from)))
	  && CONSTANT_POOL_ADDRESS_P (SET_SRC (PATTERN (from))))
	{
	  rtx src = SET_SRC (PATTERN (from));
	  count += 2;
	}
      else
	count += get_attr_length (from);

      from = NEXT_INSN (from);
    }

  /* We didn't find a barrier in time to
     dump our stuff, so we'll make one */
  label = gen_label_rtx ();
  
  if (from)
    from = PREV_INSN (from);
  else
    from = get_last_insn ();
  
  /* Walk back to be just before any jump */
  while (GET_CODE (from) == JUMP_INSN
	 || GET_CODE (from) == NOTE
	 || GET_CODE (from) == CODE_LABEL)
    from = PREV_INSN (from);
  
  from = emit_jump_insn_after (gen_jump (label), from);
  JUMP_LABEL (from) = label;
  found_barrier = emit_barrier_after (from);
  emit_label_after (label, found_barrier);
  return found_barrier;
}

/* Non zero if the insn is a move instruction which needs to be fixed. */
 
static int
broken_move (insn)
     rtx insn;
{
  if (!INSN_DELETED_P (insn)
      && GET_CODE (insn) == INSN
      && GET_CODE (PATTERN (insn)) == SET)
    {
      rtx pat = PATTERN (insn);
      rtx src = SET_SRC (pat);
      rtx dst = SET_DEST (pat);
      enum machine_mode mode = GET_MODE (dst);
      if (dst == pc_rtx)
	return 0;
      return fixit (src, mode);
    }
  return 0;
}

#ifdef DBX_DEBUGGING_INFO

/* Recursively search through all of the blocks in a function
   checking to see if any of the variables created in that
   function match the RTX called 'orig'.  If they do then
   replace them with the RTX called 'new'.  */

static void
replace_symbols_in_block (tree block, rtx orig, rtx new)
{
  for (; block; block = BLOCK_CHAIN (block))
    {
      tree sym;
      
      if (! TREE_USED (block))
	continue;

      for (sym = BLOCK_VARS (block); sym; sym = TREE_CHAIN (sym))
	{
	  if (  (DECL_NAME (sym) == 0 && TREE_CODE (sym) != TYPE_DECL)
	      || DECL_IGNORED_P (sym)
	      || TREE_CODE (sym) != VAR_DECL
	      || DECL_EXTERNAL (sym)
	      || ! rtx_equal_p (DECL_RTL (sym), orig)
	      )
	    continue;

	  DECL_RTL (sym) = new;
	}
      
      replace_symbols_in_block (BLOCK_SUBBLOCKS (block), orig, new);
    }
}
#endif

void
thumb_reorg (first)
     rtx first;
{
  rtx insn;
  for (insn = first; insn; insn = NEXT_INSN (insn))
    {
      if (broken_move (insn))
	{
	  /* This is a broken move instruction, scan ahead looking for
	     a barrier to stick the constant table behind */
	  rtx scan;
	  rtx barrier = find_barrier (insn);

	  /* Now find all the moves between the points and modify them */
	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
	    {
	      if (broken_move (scan))
		{
		  /* This is a broken move instruction, add it to the pool */
		  rtx pat = PATTERN (scan);
		  rtx src = SET_SRC (pat);
		  rtx dst = SET_DEST (pat);
		  enum machine_mode mode = GET_MODE (dst);
		  HOST_WIDE_INT offset;
		  rtx newinsn;
		  rtx newsrc;

		  /* If this is an HImode constant load, convert it into
		     an SImode constant load.  Since the register is always
		     32 bits this is safe.  We have to do this, since the
		     load pc-relative instruction only does a 32-bit load. */
		  if (mode == HImode)
		    {
		      mode = SImode;
		      if (GET_CODE (dst) != REG)
			abort ();
		      PUT_MODE (dst, SImode);
		    }

		  offset = add_constant (src, mode);
		  newsrc = gen_rtx (MEM, mode,
				    plus_constant (gen_rtx (LABEL_REF,
							    VOIDmode, 
							    pool_vector_label),
						   offset));

		  /* Build a jump insn wrapper around the move instead
		     of an ordinary insn, because we want to have room for
		     the target label rtx in fld[7], which an ordinary
		     insn doesn't have. */
		  newinsn = emit_jump_insn_after (gen_rtx (SET, VOIDmode,
							   dst, newsrc), scan);
		  JUMP_LABEL (newinsn) = pool_vector_label;

		  /* But it's still an ordinary insn */
		  PUT_CODE (newinsn, INSN);

#ifdef DBX_DEBUGGING_INFO
		  /* If debugging information is going to be emitted then we must
		     make sure that any refences to symbols which are removed by
		     the above code are also removed in the descriptions of the
		     function's variables.  Failure to do this means that the
		     debugging information emitted could refer to symbols which
		     are not emited by output_constant_pool() because
		     mark_constant_pool() never sees them as being used.  */
		  
		  if (optimize > 0			          /* These are the tests used in output_constant_pool() */
		      && flag_expensive_optimizations             /*  to decide if the constant pool will be marked.  */
		      && write_symbols == DBX_DEBUG               /* Only necessary if debugging info is being emitted.  */
		      && GET_CODE (src) == MEM                    /* Only necessary for references to memory ... */
		      && GET_CODE (XEXP (src, 0)) == SYMBOL_REF)  /*  ... whose address is given by a symbol.  */
		    {
		      replace_symbols_in_block (DECL_INITIAL (current_function_decl), src, newsrc);
		    }
#endif
		  
		  /* Kill old insn */
		  delete_insn (scan);
		  scan = newinsn;
		}
	    }
	  dump_table (barrier);
	}
    }
}


/* Routines for generating rtl */

void
thumb_expand_movstrqi (operands)
     rtx *operands;
{
  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
  rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
  HOST_WIDE_INT len = INTVAL (operands[2]);
  HOST_WIDE_INT offset = 0;

  while (len >= 12)
    {
      emit_insn (gen_movmem12b (out, in));
      len -= 12;
    }
  if (len >= 8)
    {
      emit_insn (gen_movmem8b (out, in));
      len -= 8;
    }
  if (len >= 4)
    {
      rtx reg = gen_reg_rtx (SImode);
      emit_insn (gen_movsi (reg, gen_rtx (MEM, SImode, in)));
      emit_insn (gen_movsi (gen_rtx (MEM, SImode, out), reg));
      len -= 4;
      offset += 4;
    }
  if (len >= 2)
    {
      rtx reg = gen_reg_rtx (HImode);
      emit_insn (gen_movhi (reg, gen_rtx (MEM, HImode, 
					  plus_constant (in, offset))));
      emit_insn (gen_movhi (gen_rtx (MEM, HImode, plus_constant (out, offset)),
			    reg));
      len -= 2;
      offset += 2;
    }
  if (len)
    {
      rtx reg = gen_reg_rtx (QImode);
      emit_insn (gen_movqi (reg, gen_rtx (MEM, QImode,
					  plus_constant (in, offset))));
      emit_insn (gen_movqi (gen_rtx (MEM, QImode, plus_constant (out, offset)),
			    reg));
    }
}


/* Routines for reloading */

void
thumb_reload_out_si (operands)
     rtx operands;
{
  abort ();
}

/* Return non-zero if FUNC must be entered in ARM mode.  */
int
is_called_in_ARM_mode (func)
     tree func;
{
  if (TREE_CODE (func) != FUNCTION_DECL)
    abort ();

  /* Ignore the problem about functions whoes address is taken.  */
  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
    return TRUE;

  return FALSE;
}


/* Routines for emitting code */

void
final_prescan_insn(insn)
     rtx insn;
{
  extern int *insn_addresses;

  if (flag_print_asm_name)
    fprintf (asm_out_file, "%s 0x%04x\n", ASM_COMMENT_START,
	     insn_addresses[INSN_UID (insn)]);
}


static void thumb_pushpop ( FILE *, int, int ); /* Forward declaration.  */

#ifdef __GNUC__
inline
#endif
static int
number_of_first_bit_set (mask)
     int mask;
{
  int bit;

  for (bit = 0;
       (mask & (1 << bit)) == 0;
       ++ bit)
    continue;

  return bit;
}

#define ARG_1_REGISTER   0
#define ARG_2_REGISTER   1
#define ARG_3_REGISTER   2
#define ARG_4_REGISTER   3
#define WORK_REGISTER    7
#define FRAME_POINTER	11
#define IP_REGISTER	12
#define STACK_POINTER	STACK_POINTER_REGNUM
#define LINK_REGISTER	14
#define PROGRAM_COUNTER 15

/* Generate code to return from a thumb function.  If
   'reg_containing_return_addr' is -1, then the return address is
   actually on the stack, at the stack pointer.  */
static void
thumb_exit (f, reg_containing_return_addr)
     FILE * f;
     int    reg_containing_return_addr;
{
  int regs_available_for_popping;
  int regs_to_pop;
  int pops_needed;
  int reg;
  int available;
  int required;
  int mode;
  int size;
  int restore_a4 = FALSE;

  /* Compute the registers we need to pop.  */
  regs_to_pop = 0;
  pops_needed = 0;
  
  if (reg_containing_return_addr == -1)
    {
      regs_to_pop |= 1 << LINK_REGISTER;
      ++ pops_needed;
    }

  if (TARGET_BACKTRACE)
    {
      /* Restore frame pointer and stack pointer.  */
      regs_to_pop |= (1 << FRAME_POINTER) | (1 << STACK_POINTER);
      pops_needed += 2;
    }

  /* If there is nothing to pop then just emit the BX instruction and return.*/
  if (pops_needed == 0)
    {
      asm_fprintf (f, "\tbx\t%s\n", reg_names [reg_containing_return_addr]);

      return;
    }

  /* Otherwise if we are not supporting interworking and we have not created
     a backtrace structure and the function was not entered in ARM mode then
     just pop the return address straight into the PC. */
  else if (   ! TARGET_THUMB_INTERWORK
	   && ! TARGET_BACKTRACE
	   && ! is_called_in_ARM_mode (current_function_decl))
    {
      asm_fprintf (f, "\tpop\t{pc}\n" );

      return;
    }

  /* Find out how many of the (return) argument registers we can corrupt. */
  regs_available_for_popping = 0;
  
#ifdef RTX_CODE
  /* If we can deduce the registers used from the function's return value.
     This is more reliable that examining regs_ever_live[] because that
     will be set if the register is ever used in the function, not just if
     the register is used to hold a return value.  */

  if (current_function_return_rtx != 0)
      mode = GET_MODE (current_function_return_rtx);
  else
#endif
      mode = DECL_MODE (DECL_RESULT (current_function_decl));

  size = GET_MODE_SIZE (mode);

  if (size == 0)
    {
      /* In a void function we can use any argument register.
	 In a function that returns a structure on the stack
	 we can use the second and third argument registers.  */
      if (mode == VOIDmode)
	regs_available_for_popping =
	    (1 << ARG_1_REGISTER)
	  | (1 << ARG_2_REGISTER)
	  | (1 << ARG_3_REGISTER);
      else
	regs_available_for_popping =
	    (1 << ARG_2_REGISTER)
	  | (1 << ARG_3_REGISTER);
    }
  else if (size <= 4) regs_available_for_popping =
			  (1 << ARG_2_REGISTER)
			| (1 << ARG_3_REGISTER);
  else if (size <= 8) regs_available_for_popping =
			(1 << ARG_3_REGISTER);
  
  /* Match registers to be popped with registers into which we pop them.  */
  for (available = regs_available_for_popping,
       required  = regs_to_pop;
       required != 0 && available != 0;
       available &= ~(available & - available),
       required  &= ~(required  & - required))
    -- pops_needed;

  /* If we have any popping registers left over, remove them.  */
  if (available > 0)
    regs_available_for_popping &= ~ available;
  
  /* Otherwise if we need another popping register we can use
     the fourth argument register.  */
  else if (pops_needed)
    {
      /* If we have not found any free argument registers and
	 reg a4 contains the return address, we must move it.  */
      if (regs_available_for_popping == 0
	  && reg_containing_return_addr == ARG_4_REGISTER)
	{
	  asm_fprintf (f, "\tmov\t%s, %s\n",
		       reg_names [LINK_REGISTER], reg_names [ARG_4_REGISTER]);
	  reg_containing_return_addr = LINK_REGISTER;
	}
      else if (size > 12)
	{
	  /* Register a4 is being used to hold part of the return value,
	     but we have dire need of a free, low register.  */
	  restore_a4 = TRUE;
	  
	  asm_fprintf (f, "\tmov\t%s, %s\n",
		       reg_names [IP_REGISTER], reg_names [ARG_4_REGISTER]);
	}
      
      if (reg_containing_return_addr != ARG_4_REGISTER)
	{
	  /* The fourth argument register is available.  */
	  regs_available_for_popping |= 1 << ARG_4_REGISTER;
	  
	  -- pops_needed;
	}
    }

  /* Pop as many registers as we can.  */
  thumb_pushpop (f, regs_available_for_popping, FALSE);

  /* Process the registers we popped.  */
  if (reg_containing_return_addr == -1)
    {
      /* The return address was popped into the lowest numbered register.  */
      regs_to_pop &= ~ (1 << LINK_REGISTER);
      
      reg_containing_return_addr =
	number_of_first_bit_set (regs_available_for_popping);

      /* Remove this register for the mask of available registers, so that
         the return address will not be corrupted by futher pops.  */
      regs_available_for_popping &= ~ (1 << reg_containing_return_addr);
    }

  /* If we popped other registers then handle them here.  */
  if (regs_available_for_popping)
    {
      int frame_pointer;
      
      /* Work out which register currently contains the frame pointer.  */
      frame_pointer = number_of_first_bit_set (regs_available_for_popping);

      /* Move it into the correct place.  */
      asm_fprintf (f, "\tmov\tfp, %s\n", reg_names [frame_pointer]);

      /* (Temporarily) remove it from the mask of popped registers.  */
      regs_available_for_popping &= ~ (1 << frame_pointer);
      regs_to_pop &= ~ (1 << FRAME_POINTER);
      
      if (regs_available_for_popping)
	{
	  int stack_pointer;
	  
	  /* We popped the stack pointer as well, find the register that
	     contains it.*/
	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);

	  /* Move it into the stack register.  */
	  asm_fprintf (f, "\tmov\tsp, %s\n", reg_names [stack_pointer]);
	  
	  /* At this point we have popped all necessary registers, so
	     do not worry about restoring regs_available_for_popping
	     to its correct value:

	     assert (pops_needed == 0)
	     assert (regs_available_for_popping == (1 << frame_pointer))
	     assert (regs_to_pop == (1 << STACK_POINTER))  */
	}
      else
	{
	  /* Since we have just move the popped value into the frame
	     pointer, the popping register is available for reuse, and
	     we know that we still have the stack pointer left to pop.  */
	  regs_available_for_popping |= (1 << frame_pointer);
	}
    }
  
  /* If we still have registers left on the stack, but we no longer have
     any registers into which we can pop them, then we must move the return
     address into the link register and make available the register that
     contained it.  */
  if (regs_available_for_popping == 0 && pops_needed > 0)
    {
      regs_available_for_popping |= 1 << reg_containing_return_addr;
      
      asm_fprintf (f, "\tmov\t%s, %s\n",
		   reg_names [LINK_REGISTER],
		   reg_names [reg_containing_return_addr]);
      
      reg_containing_return_addr = LINK_REGISTER;
    }

  /* If we have registers left on the stack then pop some more.
     We know that at most we will want to pop FP and SP.  */
  if (pops_needed > 0)
    {
      int  popped_into;
      int  move_to;
      
      thumb_pushpop (f, regs_available_for_popping, FALSE);

      /* We have popped either FP or SP.
	 Move whichever one it is into the correct register.  */
      popped_into = number_of_first_bit_set (regs_available_for_popping);
      move_to     = number_of_first_bit_set (regs_to_pop);

      asm_fprintf (f, "\tmov\t%s, %s\n",
		   reg_names [move_to], reg_names [popped_into]);

      regs_to_pop &= ~ (1 << move_to);

      -- pops_needed;
    }
  
  /* If we still have not popped everything then we must have only
     had one register available to us and we are now popping the SP.  */
  if (pops_needed > 0)
    {
      int  popped_into;
      
      thumb_pushpop (f, regs_available_for_popping, FALSE);

      popped_into = number_of_first_bit_set (regs_available_for_popping);

      asm_fprintf (f, "\tmov\tsp, %s\n", reg_names [popped_into]);

      /*
	assert (regs_to_pop == (1 << STACK_POINTER))
	assert (pops_needed == 1)
      */
    }

  /* If necessary restore the a4 register.  */
  if (restore_a4)
    {
      if (reg_containing_return_addr != LINK_REGISTER)
	{
	  asm_fprintf (f, "\tmov\t%s, %s\n",
		       reg_names [LINK_REGISTER], reg_names [ARG_4_REGISTER]);
	  reg_containing_return_addr = LINK_REGISTER;
	}
    
      asm_fprintf (f, "\tmov\t%s, %s\n",
		   reg_names [ARG_4_REGISTER], reg_names [IP_REGISTER]);
    }
  
  /* Return to caller.  */
  asm_fprintf (f, "\tbx\t%s\n", reg_names [reg_containing_return_addr]);
}

/* Emit code to push or pop registers to or from the stack.  */
static void
thumb_pushpop (f, mask, push)
     FILE * f;
     int mask;
     int push;
{
  int regno;
  int lo_mask = mask & 0xFF;

  if (lo_mask == 0 && ! push && (mask & (1 << 15)))
    {
      /* Special case.  Do not generate a POP PC statement here, do it in
	 thumb_exit() */
      
      thumb_exit (f, -1);
      return;
    }
      
  asm_fprintf (f, "\t%s\t{", push ? "push" : "pop");

  /* Look at the low registers first.  */
  
  for (regno = 0; regno < 8; regno ++, lo_mask >>= 1)
    {
      if (lo_mask & 1)
	{
	  asm_fprintf (f, reg_names[regno]);
	  
	  if ((lo_mask & ~1) != 0)
	    asm_fprintf (f, ", ");
	}
    }
  
  if (push && (mask & (1 << 14)))
    {
      /* Catch pushing the LR.  */

      if (mask & 0xFF)
	asm_fprintf (f, ", ");
      
      asm_fprintf (f, reg_names[14]);
    }
  else if (!push && (mask & (1 << 15)))
    {
      /* Catch popping the PC.  */
      
      if (TARGET_THUMB_INTERWORK || TARGET_BACKTRACE)
	{
	  /* The PC is never poped directly, instead
	     it is popped into r3 and then BX is used. */
	  
	  asm_fprintf (f, "}\n");

	  thumb_exit (f, -1);

	  return;
	}
      else
	{
	  if (mask & 0xFF)
	    asm_fprintf (f, ", ");
	  
	  asm_fprintf (f, reg_names[15]);
	}
    }
       
  asm_fprintf (f, "}\n");
}

/* Returns non-zero if the current function contains a far jump */

int
far_jump_used_p (void)
{
  rtx insn;
  
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
    {
      if (GET_CODE (insn) == JUMP_INSN
	  /* Ignore tablejump patterns.  */
	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
	  && get_attr_far_jump (insn) == FAR_JUMP_YES)
	return 1;
    }

  return 0;
}

static int return_used_this_function = 0;

char *
output_return ()
{
  int regno;
  int live_regs_mask = 0;

  return_used_this_function = 1;

  for (regno = 0; regno < 8; regno++)
    if (regs_ever_live[regno] && ! call_used_regs[regno])
      live_regs_mask |= 1 << regno;

  if (live_regs_mask == 0)
    {
      if (leaf_function_p () && ! far_jump_used_p())
	{
	  thumb_exit (asm_out_file, 14);	      
	}
      else if (   TARGET_THUMB_INTERWORK
	       || TARGET_BACKTRACE
	       || is_called_in_ARM_mode (current_function_decl))
	{
	  thumb_exit (asm_out_file, -1);
	}
      else
	asm_fprintf (asm_out_file, "\tpop\t{pc}\n");
    }
  else
    {
      asm_fprintf (asm_out_file,  "\tpop\t{");
      
      for (regno = 0; live_regs_mask; regno ++, live_regs_mask >>= 1)
	if (live_regs_mask & 1)
	  {
	    asm_fprintf (asm_out_file, reg_names[regno]);
	    if (live_regs_mask & ~1)
		asm_fprintf (asm_out_file, ", ");
	  }

      if (   TARGET_THUMB_INTERWORK
	  || TARGET_BACKTRACE
	  || is_called_in_ARM_mode (current_function_decl))
	{
	  asm_fprintf (asm_out_file, "}\n");
	  thumb_exit (asm_out_file, -1);
	}
      else
	asm_fprintf (asm_out_file, ", pc}\n");
    }
  
  return "";
}

void
thumb_function_prologue (f, frame_size)
     FILE *f;
     int frame_size;
{
  int amount = frame_size + current_function_outgoing_args_size;
  int live_regs_mask = 0;
  int high_regs_pushed = 0;
  int store_arg_regs = 0;
  int regno;

  if (is_called_in_ARM_mode (current_function_decl))
    {
      char * name;
      if (GET_CODE (DECL_RTL (current_function_decl)) != MEM)
	abort();
      if (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) != SYMBOL_REF)
	abort();
      name = XSTR  (XEXP (DECL_RTL (current_function_decl), 0), 0);
      
      /* Generate code sequence to switch us into Thumb mode.  */
      /* The .code 32 directive has already been emitted by
	 ASM_DECLARE_FUNCITON_NAME */
      asm_fprintf (f, "\torr\tr12, pc, #1\n");
      asm_fprintf (f, "\tbx\tr12\n");

      /* Generate a label, so that the debugger will notice the
	 change in instruction sets.  This label is also used by
	 the assembler to bypass the ARM code when this function
	 is called from a Thumb encoded function elsewhere in the
	 same file.  Hence the definition of STUB_NAME here must
	 agree with the definition in gas/config/tc-arm.c  */
      
#define STUB_NAME ".real_start_of"
      
      asm_fprintf (f, "\t.code\t16\n");
      asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
      asm_fprintf (f, "\t.thumb_func\n");
      asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
    }
    
  if (current_function_anonymous_args && current_function_pretend_args_size)
    store_arg_regs = 1;

  if (current_function_pretend_args_size)
    {
      if (store_arg_regs)
	{
	  asm_fprintf (f, "\tpush\t{");
	  for (regno = 4 - current_function_pretend_args_size / 4 ; regno < 4;
	       regno++)
	    asm_fprintf (f, "%s%s", reg_names[regno], regno == 3 ? "" : ", ");
	  asm_fprintf (f, "}\n");
	}
      else
	asm_fprintf (f, "\tsub\t%Rsp, %Rsp, #%d\n", 
		     current_function_pretend_args_size);
    }

  for (regno = 0; regno < 8; regno++)
    if (regs_ever_live[regno] && ! call_used_regs[regno])
      live_regs_mask |= 1 << regno;

  if (live_regs_mask || ! leaf_function_p () || far_jump_used_p())
    live_regs_mask |= 1 << 14;

  if (TARGET_BACKTRACE)
    {
      char * name;
      int    offset;
      int    work_register = 0;
      
      
      /* We have been asked to create a stack backtrace structure.
         The code looks like this:
	 
	 0   .align 2
	 0   func:
         0     sub   SP, #16         Reserve space for 4 registers.
	 2     push  {R7}            Get a work register.
         4     add   R7, SP, #20     Get the stack pointer before the push.
         6     str   R7, [SP, #8]    Store the stack pointer (before reserving the space).
         8     mov   R7, PC          Get hold of the start of this code plus 12.
        10     str   R7, [SP, #16]   Store it.
        12     mov   R7, FP          Get hold of the current frame pointer.
        14     str   R7, [SP, #4]    Store it.
        16     mov   R7, LR          Get hold of the current return address.
        18     str   R7, [SP, #12]   Store it.
        20     add   R7, SP, #16     Point at the start of the backtrace structure.
        22     mov   FP, R7          Put this value into the frame pointer.  */

      if ((live_regs_mask & 0xFF) == 0)
	{
	  /* See if the a4 register is free.  */

	  if (regs_ever_live[ 3 ] == 0)
	    work_register = 3;
	  else	  /* We must push a register of our own */
	    live_regs_mask |= (1 << 7);
	}

      if (work_register == 0)
	{
	  /* Select a register from the list that will be pushed to use as our work register. */

	  for (work_register = 8; work_register--;)
	    if ((1 << work_register) & live_regs_mask)
	      break;
	}
      
      name = reg_names[ work_register ];
      
      asm_fprintf (f, "\tsub\tsp, sp, #16\t@ Create stack backtrace structure\n");
      
      if (live_regs_mask)
	thumb_pushpop (f, live_regs_mask, 1);
      
      for (offset = 0, work_register = 1 << 15; work_register; work_register >>= 1)
	if (work_register & live_regs_mask)
	  offset += 4;
      
      asm_fprintf (f, "\tadd\t%s, sp, #%d\n",
		   name, offset + 16 + current_function_pretend_args_size);
      
      asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset + 4);

      /* Make sure that the instruction fetching the PC is in the right place
	 to calculate "start of backtrace creation code + 12".  */
      
      if (live_regs_mask)
	{
	  asm_fprintf (f, "\tmov\t%s, pc\n", name);
	  asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset + 12);
	  asm_fprintf (f, "\tmov\t%s, fp\n", name);
	  asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset);
	}
      else
	{
	  asm_fprintf (f, "\tmov\t%s, fp\n", name);
	  asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset);
	  asm_fprintf (f, "\tmov\t%s, pc\n", name);
	  asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset + 12);
	}
      
      asm_fprintf (f, "\tmov\t%s, lr\n", name);
      asm_fprintf (f, "\tstr\t%s, [sp, #%d]\n", name, offset + 8);
      asm_fprintf (f, "\tadd\t%s, sp, #%d\n", name, offset + 12);
      asm_fprintf (f, "\tmov\tfp, %s\t\t@ Backtrace structure created\n", name);
    }
  else if (live_regs_mask)
    thumb_pushpop (f, live_regs_mask, 1);

  for (regno = 8; regno < 13; regno++)
    {
      if (regs_ever_live[regno] && ! call_used_regs[regno])
	high_regs_pushed++;
    }

  if (high_regs_pushed)
    {
      int pushable_regs = 0;
      int mask = live_regs_mask & 0xff;
      int next_hi_reg;

      for (next_hi_reg = 12; next_hi_reg > 7; next_hi_reg--)
	{
	  if (regs_ever_live[next_hi_reg] && ! call_used_regs[next_hi_reg])
	    break;
	}

      pushable_regs = mask;

      if (pushable_regs == 0)
	{
	  /* desperation time -- this probably will never happen */
	  if (regs_ever_live[3] || ! call_used_regs[3])
	    asm_fprintf (f, "\tmov\t%s, %s\n", reg_names[12], reg_names[3]);
	  mask = 1 << 3;
	}

      while (high_regs_pushed > 0)
	{
	  for (regno = 7; regno >= 0; regno--)
	    {
	      if (mask & (1 << regno))
		{
		  asm_fprintf (f, "\tmov\t%s, %s\n", reg_names[regno],
			       reg_names[next_hi_reg]);
		  high_regs_pushed--;
		  if (high_regs_pushed)
		    for (next_hi_reg--; next_hi_reg > 7; next_hi_reg--)
		      {
			if (regs_ever_live[next_hi_reg]
			    && ! call_used_regs[next_hi_reg])
			  break;
		      }
		  else
		    {
		      mask &= ~ ((1 << regno) - 1);
		      break;
		    }
		}
	    }
	  thumb_pushpop (f, mask, 1);
	}

      if (pushable_regs == 0 && (regs_ever_live[3] || ! call_used_regs[3]))
	    asm_fprintf (f, "\tmov\t%s, %s\n", reg_names[3], reg_names[12]);
    }
}

void
thumb_expand_prologue ()
{
  HOST_WIDE_INT amount = (get_frame_size ()
			  + current_function_outgoing_args_size);
  int regno;
  int live_regs_mask;

  if (amount)
    {
      live_regs_mask = 0;
      for (regno = 0; regno < 8; regno++)
	if (regs_ever_live[regno] && ! call_used_regs[regno])
	  live_regs_mask |= 1 << regno;

      if (amount < 512)
	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
			       GEN_INT (-amount)));
      else
	{
	  rtx reg, spare;

	  if ((live_regs_mask & 0xff) == 0) /* Very unlikely */
	    emit_insn (gen_movsi (spare = gen_rtx (REG, SImode, 12),
				  reg = gen_rtx (REG, SImode, 4)));
	  else
	    {
	      for (regno = 0; regno < 8; regno++)
		if (live_regs_mask & (1 << regno))
		  break;
	      reg = gen_rtx (REG, SImode, regno);
	    }

	  emit_insn (gen_movsi (reg, GEN_INT (-amount)));
	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
	  if ((live_regs_mask & 0xff) == 0)
	    emit_insn (gen_movsi (reg, spare));
	}
    }

  if (frame_pointer_needed)
    {
      if (current_function_outgoing_args_size)
	{
	  rtx offset = GEN_INT (current_function_outgoing_args_size);

	  if (current_function_outgoing_args_size < 1024)
	    emit_insn (gen_addsi3 (frame_pointer_rtx, stack_pointer_rtx,
				   offset));
	  else
	    {
	      emit_insn (gen_movsi (frame_pointer_rtx, offset));
	      emit_insn (gen_addsi3 (frame_pointer_rtx, frame_pointer_rtx,
				     stack_pointer_rtx));
	    }
	}
      else
	emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
    }

  /* if (profile_flag || profile_block_flag) */
  emit_insn (gen_blockage ());
}

void
thumb_expand_epilogue ()
{
  HOST_WIDE_INT amount = (get_frame_size ()
			  + current_function_outgoing_args_size);
  int regno;

  if (amount)
    {
      if (amount < 512)
	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
			       GEN_INT (amount)));
      else
	{
	  rtx reg = gen_rtx (REG, SImode, 3); /* Always free in the epilogue */

	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
	}
      /* if (profile_flag || profile_block_flag) */
      emit_insn (gen_blockage ());
    }
}

void
thumb_function_epilogue (f, frame_size)
     FILE *f;
     int frame_size;
{
  /* ??? Probably not safe to set this here, since it assumes that a
     function will be emitted as assembly immediately after we generate
     RTL for it.  This does not happen for inline functions.  */
  return_used_this_function = 0;
#if 0 /* TODO : comment not really needed */
  fprintf (f, "%s THUMB Epilogue\n", ASM_COMMENT_START);
#endif
}

/* The bits which aren't usefully expanded as rtl. */
char *
thumb_unexpanded_epilogue ()
{
  int regno;
  int live_regs_mask = 0;
  int high_regs_pushed = 0;
  int leaf_function = leaf_function_p ();
  int had_to_push_lr;

  if (return_used_this_function)
    return "";

  for (regno = 0; regno < 8; regno++)
    if (regs_ever_live[regno] && ! call_used_regs[regno])
      live_regs_mask |= 1 << regno;

  for (regno = 8; regno < 13; regno++)
    {
      if (regs_ever_live[regno] && ! call_used_regs[regno])
	high_regs_pushed ++;
    }

  /* The prolog may have pushed some high registers to use as
     work registers.  eg the testuite file:
     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
     compiles to produce:
	push	{r4, r5, r6, r7, lr}
	mov	r7, r9
	mov	r6, r8
	push	{r6, r7}
     as part of the prolog.  We have to undo that pushing here.  */
  
  if (high_regs_pushed)
    {
      int mask = live_regs_mask;
      int next_hi_reg;
      int size;
      int mode;
       
#ifdef RTX_CODE
      /* If we can deduce the registers used from the function's return value.
	 This is more reliable that examining regs_ever_live[] because that
	 will be set if the register is ever used in the function, not just if
	 the register is used to hold a return value.  */

      if (current_function_return_rtx != 0)
	{
	  mode = GET_MODE (current_function_return_rtx);
	}
      else
#endif
	{
	  mode = DECL_MODE (DECL_RESULT (current_function_decl));
	}

      size = GET_MODE_SIZE (mode);

      /* Unless we are returning a type of size > 12 register r3 is available.  */
      if (size < 13)
	mask |=  1 << 3;

      if (mask == 0)
	{
	  /* Oh dear!  We have no low registers into which we can pop high registers!  */

	  fatal ("No low registers available for popping high registers");
	}
      
      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
	if (regs_ever_live[next_hi_reg] && ! call_used_regs[next_hi_reg])
	  break;

      while (high_regs_pushed)
	{
	  /* Find low register(s) into which the high register(s) can be popped.  */
	  for (regno = 0; regno < 8; regno++)
	    {
	      if (mask & (1 << regno))
		high_regs_pushed--;
	      if (high_regs_pushed == 0)
		break;
	    }

	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */

	  /* Pop the values into the low register(s). */
	  thumb_pushpop (asm_out_file, mask, 0);

	  /* Move the value(s) into the high registers.  */
	  for (regno = 0; regno < 8; regno++)
	    {
	      if (mask & (1 << regno))
		{
		  asm_fprintf (asm_out_file, "\tmov\t%s, %s\n", 
			       reg_names[next_hi_reg], reg_names[regno]);
		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
		    if (regs_ever_live[next_hi_reg] && 
			! call_used_regs[next_hi_reg])
		      break;
		}
	    }
	}
    }

  had_to_push_lr = (live_regs_mask || ! leaf_function || far_jump_used_p());
  
  if (TARGET_BACKTRACE && ((live_regs_mask & 0xFF) == 0) && regs_ever_live[ ARG_4_REGISTER ] != 0)
    {
      /* The stack backtrace structure creation code had to
	 push R7 in order to get a work register, so we pop
	 it now.   */
      
      live_regs_mask |= (1 << WORK_REGISTER);
    }
  
  if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
    {
      if (had_to_push_lr
	  && ! is_called_in_ARM_mode (current_function_decl))
	live_regs_mask |= 1 << PROGRAM_COUNTER;

      /* Either no argument registers were pushed or a backtrace
	 structure was created which includes an adjusted stack
	 pointer, so just pop everything.  */
      
      if (live_regs_mask)
	thumb_pushpop (asm_out_file, live_regs_mask, FALSE);
      
      /* We have either just popped the return address into the
	 PC or it is was kept in LR for the entire function or
	 it is still on the stack because we do not want to
	 return by doing a pop {pc}.  */
      
      if ((live_regs_mask & (1 << PROGRAM_COUNTER)) == 0)
	thumb_exit (asm_out_file,
		    (had_to_push_lr
		     && is_called_in_ARM_mode (current_function_decl)) ?
		    -1 : LINK_REGISTER);
    }
  else
    {
      /* Pop everything but the return address.  */
      live_regs_mask &= ~ (1 << PROGRAM_COUNTER);
      
      if (live_regs_mask)
	thumb_pushpop (asm_out_file, live_regs_mask, FALSE);

      if (had_to_push_lr)
	{
	  /* Get the return address into a temporary register.  */
	  thumb_pushpop (asm_out_file, 1 << ARG_4_REGISTER, 0);
	}
      
      /* Remove the argument registers that were pushed onto the stack.  */
      asm_fprintf (asm_out_file, "\tadd\t%s, %s, #%d\n",
		   reg_names [STACK_POINTER],
		   reg_names [STACK_POINTER],
		   current_function_pretend_args_size);
      
      thumb_exit (asm_out_file, had_to_push_lr ? ARG_4_REGISTER : LINK_REGISTER);
    }

  return "";
}

/* Handle the case of a double word load into a low register from
   a computed memory address.  The computed address may involve a
   register which is overwritten by the load.  */

char *
thumb_load_double_from_address (operands)
     rtx * operands;
{
  rtx addr;
  rtx base;
  rtx offset;
  rtx arg1;
  rtx arg2;
  
  if (GET_CODE (operands[0]) != REG)
    fatal ("thumb_load_double_from_address: destination is not a register");
  
  if (GET_CODE (operands[1]) != MEM)
    fatal ("thumb_load_double_from_address: source is not a computed memory address");

  /* Get the memory address.  */
  
  addr = XEXP (operands[1], 0);
      
  /* Work out how the memory address is computed.  */

  switch (GET_CODE (addr))
    {
    case REG:
      operands[2] = gen_rtx (MEM, SImode, plus_constant (XEXP (operands[1], 0), 4));

      if (REGNO (operands[0]) == REGNO (addr))
	{
	  output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
	  output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
	}
      else
	{
	  output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
	  output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
	}
      break;
      
    case CONST:
      /* Compute <address> + 4 for the high order load.  */
	  
      operands[2] = gen_rtx (MEM, SImode, plus_constant (XEXP (operands[1], 0), 4));
	  
      output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
      output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
      break;
	  
    case PLUS:
      arg1   = XEXP (addr, 0);
      arg2   = XEXP (addr, 1);
	    
      if (CONSTANT_P (arg1))
	base = arg2, offset = arg1;
      else
	base = arg1, offset = arg2;
  
      if (GET_CODE (base) != REG)
	fatal ("thumb_load_double_from_address: base is not a register");

      /* Catch the case of <address> = <reg> + <reg> */
  
      if (GET_CODE (offset) == REG)
	{
	  int reg_offset = REGNO (offset);
	  int reg_base   = REGNO (base);
	  int reg_dest   = REGNO (operands[0]);
	  
	  /* Add the base and offset registers together into the higher destination register.  */
	  
	  fprintf (asm_out_file, "\tadd\t%s, %s, %s\t\t%s created by thumb_load_double_from_address",
		   reg_names[ reg_dest + 1 ],
		   reg_names[ reg_base     ],
		   reg_names[ reg_offset   ],
		   ASM_COMMENT_START);
	  
	  /* Load the lower destination register from the address in the higher destination register.  */
	  
	  fprintf (asm_out_file, "\tldr\t%s, [%s, #0]\t\t%s created by thumb_load_double_from_address",
		   reg_names[ reg_dest ],
		   reg_names[ reg_dest + 1],
		   ASM_COMMENT_START);
	  
	  /* Load the higher destination register from its own address plus 4.  */
	  
	  fprintf (asm_out_file, "\tldr\t%s, [%s, #4]\t\t%s created by thumb_load_double_from_address",
		   reg_names[ reg_dest + 1 ],
		   reg_names[ reg_dest + 1 ],
		   ASM_COMMENT_START);
	}
      else
	{
	  /* Compute <address> + 4 for the high order load.  */
	  
	  operands[2] = gen_rtx (MEM, SImode, plus_constant (XEXP (operands[1], 0), 4));
	  
	  /* If the computed address is held in the low order register
	     then load the high order register first, otherwise always
	     load the low order register first.  */
      
	  if (REGNO (operands[0]) == REGNO (base))
	    {
	      output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
	      output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
	    }
	  else
	    {
	      output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
	      output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
	    }
	}
      break;

    case LABEL_REF:
      /* With no registers to worry about we can just load the value directly.  */
      operands[2] = gen_rtx (MEM, SImode, plus_constant (XEXP (operands[1], 0), 4));
	  
      output_asm_insn ("ldr\t%H0, %2\t\t%@ created by thumb_load_double_from_address", operands);
      output_asm_insn ("ldr\t%0, %1\t\t%@ created by thumb_load_double_from_address", operands);
      break;
      
    default:
      debug_rtx (operands[1]);
      fatal ("thumb_load_double_from_address: Unhandled address calculation");
      break;
    }
  
  return "";
}

char *
output_move_mem_multiple (n, operands)
     int n;
     rtx *operands;
{
  rtx tmp;

  switch (n)
    {
    case 2:
      if (REGNO (operands[2]) > REGNO (operands[3]))
	{
	  tmp = operands[2];
	  operands[2] = operands[3];
	  operands[3] = tmp;
	}
      output_asm_insn ("ldmia\t%1!, {%2, %3}", operands);
      output_asm_insn ("stmia\t%0!, {%2, %3}", operands);
      break;

    case 3:
      if (REGNO (operands[2]) > REGNO (operands[3]))
	{
	  tmp = operands[2];
	  operands[2] = operands[3];
	  operands[3] = tmp;
	}
      if (REGNO (operands[3]) > REGNO (operands[4]))
	{
	  tmp = operands[3];
	  operands[3] = operands[4];
	  operands[4] = tmp;
	}
      if (REGNO (operands[2]) > REGNO (operands[3]))
	{
	  tmp = operands[2];
	  operands[2] = operands[3];
	  operands[3] = tmp;
	}
      output_asm_insn ("ldmia\t%1!, {%2, %3, %4}", operands);
      output_asm_insn ("stmia\t%0!, {%2, %3, %4}", operands);
      break;

    default:
      abort ();
    }

  return "";
}

  
int
thumb_epilogue_size ()
{
  return 42; /* The answer to .... */
}

static char *conds[] =
{
  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 
  "hi", "ls", "ge", "lt", "gt", "le"
};

static char *
thumb_condition_code (x, invert)
     rtx x;
     int invert;
{
  int val;

  switch (GET_CODE (x))
    {
    case EQ: val = 0; break;
    case NE: val = 1; break;
    case GEU: val = 2; break;
    case LTU: val = 3; break;
    case GTU: val = 8; break;
    case LEU: val = 9; break;
    case GE: val = 10; break;
    case LT: val = 11; break;
    case GT: val = 12; break;
    case LE: val = 13; break;
    default:
      abort ();
    }

  return conds[val ^ invert];
}

void
thumb_print_operand (f, x, code)
     FILE *f;
     rtx x;
     int code;
{
  if (code)
    {
      switch (code)
	{
	case '@':
	  fputs (ASM_COMMENT_START, f);
	  return;

	case '_':
	  fputs (user_label_prefix, f);
	  return;
	  
	case 'D':
	  if (x)
	    fputs (thumb_condition_code (x, 1), f);
	  return;

	case 'd':
	  if (x)
	    fputs (thumb_condition_code (x, 0), f);
	  return;

	  /* An explanation of the 'Q', 'R' and 'H' register operands:
	     
	     In a pair of registers containing a DI or DF value the 'Q'
	     operand returns the register number of the register containing
	     the least signficant part of the value.  The 'R' operand returns
	     the register number of the register containing the most
	     significant part of the value.

	     The 'H' operand returns the higher of the two register numbers.
	     On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
	     same as the 'Q' operand, since the most signficant part of the
	     value is held in the lower number register.  The reverse is true
	     on systems where WORDS_BIG_ENDIAN is false.

	     The purpose of these operands is to distinguish between cases
	     where the endian-ness of the values is important (for example
	     when they are added together), and cases where the endian-ness
	     is irrelevant, but the order of register operations is important.
	     For example when loading a value from memory into a register
	     pair, the endian-ness does not matter.  Provided that the value
	     from the lower memory address is put into the lower numbered
	     register, and the value from the higher address is put into the
	     higher numbered register, the load will work regardless of whether
	     the value being loaded is big-wordian or little-wordian.  The
	     order of the two register loads can matter however, if the address
	     of the memory location is actually held in one of the registers
	     being overwritten by the load.  */
	case 'Q':
	  if (REGNO (x) > 15)
	    abort ();
	  fputs (reg_names[REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)], f);
	  return;
	  
	case 'R':
	  if (REGNO (x) > 15)
	    abort ();
	  fputs (reg_names[REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)], f);
	  return;

	case 'H':
	  if (REGNO (x) > 15)
	    abort ();
	  fputs (reg_names[REGNO (x) + 1], f);
	  return;

	default:
	  abort ();
	}
    }
  if (GET_CODE (x) == REG)
    fputs (reg_names[REGNO (x)], f);
  else if (GET_CODE (x) == MEM)
    output_address (XEXP (x, 0));
  else if (GET_CODE (x) == CONST_INT)
    {
      fputc ('#', f);
      output_addr_const (f, x);
    }
  else
    abort ();
}

#ifdef AOF_ASSEMBLER
int arm_text_section_count = 1;

char *
aof_text_section (in_readonly)
     int in_readonly;
{
  static char buf[100];
  if (in_readonly)
    return "";
  sprintf (buf, "\tCODE16\n\tAREA |C$$code%d|, CODE, READONLY",
	   arm_text_section_count++);
  return buf;
}

static int arm_data_section_count = 1;

char *
aof_data_section ()
{
  static char buf[100];
  sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
  return buf;
}

/* The AOF thumb assembler is religiously strict about declarations of
   imported and exported symbols, so that it is impossible to declare a
   function as imported near the begining of the file, and then to export
   it later on.  It is, however, possible to delay the decision until all 
   the functions in the file have been compiled.  To get around this, we
   maintain a list of the imports and exports, and delete from it any that
   are subsequently defined.  At the end of compilation we spit the 
   remainder of the list out before the END directive.  */

struct import
{
  struct import *next;
  char *name;
};

static struct import *imports_list = NULL;

void
thumb_aof_add_import (name)
     char *name;
{
  struct import *new;

  for (new = imports_list; new; new = new->next)
    if (new->name == name)
      return;

  new = (struct import *) xmalloc (sizeof (struct import));
  new->next = imports_list;
  imports_list = new;
  new->name = name;
}

void
thumb_aof_delete_import (name)
     char *name;
{
  struct import **old;

  for (old = &imports_list; *old; old = & (*old)->next)
    {
      if ((*old)->name == name)
	{
	  *old = (*old)->next;
	  return;
	}
    }
}

void
thumb_aof_dump_imports (f)
     FILE *f;
{
  while (imports_list)
    {
      fprintf (f, "\tIMPORT\t");
      assemble_name (f, imports_list->name);
      fputc ('\n', f);
      imports_list = imports_list->next;
    }
}
#endif

/* Decide whether a type should be returned in memory (true)
   or in a register (false).  This is called by the macro
   RETURN_IN_MEMORY.  */

int
thumb_return_in_memory (type)
     tree type;
{
  if (! AGGREGATE_TYPE_P (type))
    {
      /* All simple types are returned in registers. */

      return 0;
    }
  else if (int_size_in_bytes (type) > 4)
    {
      /* All structures/unions bigger than one word are returned in memory. */
      
      return 1;
    }
  else if (TREE_CODE (type) == RECORD_TYPE)
    {
      tree field;

      /* For a struct the APCS says that we must return in a register if
	 every addressable element has an offset of zero.  For practical
	 purposes this means that the structure can have at most one non-
	 bit-field element and that this element must be the first one in
	 the structure.  */

      /* Find the first field, ignoring non FIELD_DECL things which will
	 have been created by C++. */
      for (field = TYPE_FIELDS (type);
	   field && TREE_CODE (field) != FIELD_DECL;
	   field = TREE_CHAIN (field))
	continue;

      if (field == NULL)
	return 0; /* An empty structure.  Allowed by an extension to ANSI C. */

      /* Now check the remaining fields, if any. */
      for (field = TREE_CHAIN (field); field;  field = TREE_CHAIN (field))
	{
	  if (TREE_CODE (field) != FIELD_DECL)
	    continue;
	  
	  if (! DECL_BIT_FIELD_TYPE (field))
	    return 1;
	}

      return 0;
    }
  else if (TREE_CODE (type) == UNION_TYPE)
    {
      tree field;

      /* Unions can be returned in registers if every element is
	 integral, or can be returned in an integer register.  */
      
      for (field = TYPE_FIELDS (type);
	   field;
	   field = TREE_CHAIN (field))
	{
	  if (TREE_CODE (field) != FIELD_DECL)
	    continue;
	  
	  if (RETURN_IN_MEMORY (TREE_TYPE (field)))
	    return 1;
	}
      
      return 0;
    }
  /* XXX Not sure what should be done for other aggregates, so put them in
     memory. */
  return 1;
}

void
thumb_override_options ()
{
  if (structure_size_string != NULL)
    {
      int size = strtol (structure_size_string, NULL, 0);
      
      if (size == 8 || size == 32)
	arm_structure_size_boundary = size;
      else
	warning ("Structure size boundary can only be set to 8 or 32");
    }

  if (flag_pic)
    {
      warning ("Position independent code not supported.  Ignored");
      flag_pic = 0;
    }
}