;; Machine description of the Mitsubishi M32R cpu for GNU C compiler
;; Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.

;; This file is part of GNU CC.

;; GNU CC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU CC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU CC; see the file COPYING.  If not, write to
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;; See file "rtl.def" for documentation on define_insn, match_*, et. al.

;; unspec usage
;; 0 - blockage
;; 1 - flush_icache
;; 2 - load_sda_base
;; 3 - setting carry in addx/subx instructions.

;; Insn type.  Used to default other attribute values.
(define_attr "type"
  "int2,int4,load2,load4,load8,store2,store4,store8,shift2,shift4,mul2,div4,uncond_branch,branch,call,multi,misc"
  (const_string "misc"))

;; Length in bytes.
(define_attr "length" ""
  (cond [(eq_attr "type" "int2,load2,store2,shift2,mul2")
	 (const_int 2)

	 (eq_attr "type" "int4,load4,store4,shift4,div4")
	 (const_int 4)

	 (eq_attr "type" "multi")
	 (const_int 8)

	 (eq_attr "type" "uncond_branch,branch,call")
	 (const_int 4)]

	 (const_int 4)))

;; The length here is the length of a single asm.  Unfortunately it might be
;; 2 or 4 so we must allow for 4.  That's ok though.
(define_asm_attributes
  [(set_attr "length" "4")
   (set_attr "type" "multi")])


;; Whether an instruction is 16-bit or 32-bit
(define_attr "insn_size" "short,long"
  (if_then_else (eq_attr "type" "int2,load2,store2,shift2,mul2")
		(const_string "short")
		(const_string "long")))

(define_attr "debug" "no,yes"
  (const (symbol_ref "(TARGET_DEBUG != 0)")))

(define_attr "opt_size" "no,yes"
  (const (symbol_ref "(optimize_size != 0)")))

(define_attr "m32r" "no,yes"
  (const (symbol_ref "(TARGET_M32R != 0)")))




;; ::::::::::::::::::::
;; ::
;; :: Function Units
;; ::
;; ::::::::::::::::::::

;; On most RISC machines, there are instructions whose results are not
;; available for a specific number of cycles.  Common cases are instructions
;; that load data from memory.  On many machines, a pipeline stall will result
;; if the data is referenced too soon after the load instruction.

;; In addition, many newer microprocessors have multiple function units,
;; usually one for integer and one for floating point, and often will incur
;; pipeline stalls when a result that is needed is not yet ready.

;; The descriptions in this section allow the specification of how much time
;; must elapse between the execution of an instruction and the time when its
;; result is used.  It also allows specification of when the execution of an
;; instruction will delay execution of similar instructions due to function
;; unit conflicts.

;; For the purposes of the specifications in this section, a machine is divided
;; into "function units", each of which execute a specific class of
;; instructions in first-in-first-out order.  Function units that accept one
;; instruction each cycle and allow a result to be used in the succeeding
;; instruction (usually via forwarding) need not be specified.  Classic RISC
;; microprocessors will normally have a single function unit, which we can call
;; `memory'.  The newer "superscalar" processors will often have function units
;; for floating point operations, usually at least a floating point adder and
;; multiplier.

;; Each usage of a function units by a class of insns is specified with a
;; `define_function_unit' expression, which looks like this:

;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
;;   ISSUE-DELAY [CONFLICT-LIST])

;; NAME is a string giving the name of the function unit.

;; MULTIPLICITY is an integer specifying the number of identical units in the
;; processor.  If more than one unit is specified, they will be scheduled
;; independently.  Only truly independent units should be counted; a pipelined
;; unit should be specified as a single unit.  (The only common example of a
;; machine that has multiple function units for a single instruction class that
;; are truly independent and not pipelined are the two multiply and two
;; increment units of the CDC 6600.)

;; SIMULTANEITY specifies the maximum number of insns that can be executing in
;; each instance of the function unit simultaneously or zero if the unit is
;; pipelined and has no limit.

;; All `define_function_unit' definitions referring to function unit NAME must
;; have the same name and values for MULTIPLICITY and SIMULTANEITY.

;; TEST is an attribute test that selects the insns we are describing in this
;; definition.  Note that an insn may use more than one function unit and a
;; function unit may be specified in more than one `define_function_unit'.

;; READY-DELAY is an integer that specifies the number of cycles after which
;; the result of the instruction can be used without introducing any stalls.

;; ISSUE-DELAY is an integer that specifies the number of cycles after the
;; instruction matching the TEST expression begins using this unit until a
;; subsequent instruction can begin.  A cost of N indicates an N-1 cycle delay.
;; A subsequent instruction may also be delayed if an earlier instruction has a
;; longer READY-DELAY value.  This blocking effect is computed using the
;; SIMULTANEITY, READY-DELAY, ISSUE-DELAY, and CONFLICT-LIST terms.  For a
;; normal non-pipelined function unit, SIMULTANEITY is one, the unit is taken
;; to block for the READY-DELAY cycles of the executing insn, and smaller
;; values of ISSUE-DELAY are ignored.

;; CONFLICT-LIST is an optional list giving detailed conflict costs for this
;; unit.  If specified, it is a list of condition test expressions to be
;; applied to insns chosen to execute in NAME following the particular insn
;; matching TEST that is already executing in NAME.  For each insn in the list,
;; ISSUE-DELAY specifies the conflict cost; for insns not in the list, the cost
;; is zero.  If not specified, CONFLICT-LIST defaults to all instructions that
;; use the function unit.

;; Typical uses of this vector are where a floating point function unit can
;; pipeline either single- or double-precision operations, but not both, or
;; where a memory unit can pipeline loads, but not stores, etc.

;; As an example, consider a classic RISC machine where the result of a load
;; instruction is not available for two cycles (a single "delay" instruction is
;; required) and where only one load instruction can be executed
;; simultaneously.  This would be specified as:

;; (define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)

;; For the case of a floating point function unit that can pipeline
;; either single or double precision, but not both, the following could be
;; specified:
;;
;; (define_function_unit "fp" 1 0
;;   (eq_attr "type" "sp_fp") 4 4
;;   [(eq_attr "type" "dp_fp")])
;;
;; (define_function_unit "fp" 1 0
;;   (eq_attr "type" "dp_fp") 4 4
;;   [(eq_attr "type" "sp_fp")])

;; Note: The scheduler attempts to avoid function unit conflicts and uses all
;; the specifications in the `define_function_unit' expression.  It has
;; recently come to our attention that these specifications may not allow
;; modeling of some of the newer "superscalar" processors that have insns using
;; multiple pipelined units.  These insns will cause a potential conflict for
;; the second unit used during their execution and there is no way of
;; representing that conflict.  We welcome any examples of how function unit
;; conflicts work in such processors and suggestions for their representation.

;; Function units of the M32R
;; Units that take one cycle do not need to be specified.

;; (define_function_unit {name} {multiplicity} {simulataneity} {test}
;;                       {ready-delay} {issue-delay} [{conflict-list}])

;; Hack to get GCC to better pack the instructions.
;; We pretend there is a separate long function unit that conflicts with
;; both the left and right 16 bit insn slots.

(define_function_unit "short" 2 2
  (and (eq_attr "m32r" "yes")
       (and (eq_attr "insn_size" "short")
	    (eq_attr "type" "!load2")))
  1 0
  [(eq_attr "insn_size" "long")])

(define_function_unit "short" 2 2	;; load delay of 1 clock for mem execution + 1 clock for WB
  (and (eq_attr "m32r" "yes")
       (eq_attr "type" "load2"))
  3 0
  [(eq_attr "insn_size" "long")])

(define_function_unit "long" 1 1
  (and (eq_attr "m32r" "yes")
       (and (eq_attr "insn_size" "long")
	    (eq_attr "type" "!load4,load8")))
  1 0
  [(eq_attr "insn_size" "short")])

(define_function_unit "long" 1 1	;; load delay of 1 clock for mem execution + 1 clock for WB
  (and (eq_attr "m32r" "yes")
       (and (eq_attr "insn_size" "long")
	    (eq_attr "type" "load4,load8")))
  3 0
  [(eq_attr "insn_size" "short")])



;; Instruction grouping


;; Expand prologue as RTL
(define_expand "prologue"
  [(const_int 1)]
  ""
  "
{
  m32r_expand_prologue ();
  DONE;
}")


;; Move instructions.
;;
;; For QI and HI moves, the register must contain the full properly
;; sign-extended value.  nonzero_bits assumes this [otherwise
;; SHORT_IMMEDIATES_SIGN_EXTEND must be used, but the comment for it
;; says it's a kludge and the .md files should be fixed instead].

(define_expand "movqi"
  [(set (match_operand:QI 0 "general_operand" "")
	(match_operand:QI 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.
     Objects in the small data area are handled too.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (QImode, operands[1]);
}")

(define_insn "*movqi_insn"
  [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,r,r,T,m")
	(match_operand:QI 1 "move_src_operand" "r,I,JQR,T,m,r,r"))]
  "register_operand (operands[0], QImode) || register_operand (operands[1], QImode)"
  "@
   mv %0,%1
   ldi %0,%#%1
   ldi %0,%#%1
   ldub %0,%1
   ldub %0,%1
   stb %1,%0
   stb %1,%0"
  [(set_attr "type" "int2,int2,int4,load2,load4,store2,store4")
   (set_attr "length" "2,2,4,2,4,2,4")])

(define_expand "movhi"
  [(set (match_operand:HI 0 "general_operand" "")
	(match_operand:HI 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (HImode, operands[1]);
}")

(define_insn "*movhi_insn"
  [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,r,r,r,T,m")
	(match_operand:HI 1 "move_src_operand" "r,I,JQR,K,T,m,r,r"))]
  "register_operand (operands[0], HImode) || register_operand (operands[1], HImode)"
  "@
   mv %0,%1
   ldi %0,%#%1
   ldi %0,%#%1
   ld24 %0,%#%1
   lduh %0,%1
   lduh %0,%1
   sth %1,%0
   sth %1,%0"
  [(set_attr "type" "int2,int2,int4,int4,load2,load4,store2,store4")
   (set_attr "length" "2,2,4,4,2,4,2,4")])

(define_expand "movsi_push"
  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "")))
	(match_operand:SI 1 "register_operand" ""))]
  ""
  "")

(define_expand "movsi_pop"
  [(set (match_operand:SI 0 "register_operand" "")
	(mem:SI (post_inc:SI (match_operand:SI 1 "register_operand" ""))))]
  ""
  "")

(define_expand "movsi"
  [(set (match_operand:SI 0 "general_operand" "")
	(match_operand:SI 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (SImode, operands[1]);

  /* Small Data Area reference?  */
  if (small_data_operand (operands[1], SImode))
    {
      emit_insn (gen_movsi_sda (operands[0], operands[1]));
      DONE;
    }

  /* If medium or large code model, symbols have to be loaded with
     seth/add3.  */
  if (addr32_operand (operands[1], SImode))
    {
      emit_insn (gen_movsi_addr32 (operands[0], operands[1]));
      DONE;
    }
}")

;; ??? Do we need a const_double constraint here for large unsigned values?
(define_insn "*movsi_insn"
  [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,r,r,r,r,r,r,T,U,m")
	(match_operand:SI 1 "move_src_operand" "r,I,J,MQ,L,n,T,U,m,r,r,r"))]
  "register_operand (operands[0], SImode) || register_operand (operands[1], SImode)"
  "*
{
  if (GET_CODE (operands[0]) == REG || GET_CODE (operands[1]) == SUBREG)
    {
      switch (GET_CODE (operands[1]))
	{
	  HOST_WIDE_INT value;

	  default:
	    break;

	  case REG:
	  case SUBREG:
	    return \"mv %0,%1\";

	  case MEM:
	    return \"ld %0,%1\";

	  case CONST_INT:
	    value = INTVAL (operands[1]);
	    if (INT16_P (value))
	      return \"ldi %0,%#%1\\t; %X1\";

	    if (UINT24_P (value))
	      return \"ld24 %0,%#%1\\t; %X1\";

	    if (UPPER16_P (value))
	      return \"seth %0,%#%T1\\t; %X1\";

	    return \"#\";

	  case CONST:
	  case SYMBOL_REF:
	  case LABEL_REF:
	    if (TARGET_ADDR24)
	      return \"ld24 %0,%#%1\";

	    return \"#\";
	}
    }

  else if (GET_CODE (operands[0]) == MEM
	   && (GET_CODE (operands[1]) == REG || GET_CODE (operands[1]) == SUBREG))
    return \"st %1,%0\";

  fatal_insn (\"bad movsi insn\", insn);
}"
  [(set_attr "type" "int2,int2,int4,int4,int4,multi,load2,load2,load4,store2,store2,store4")
   (set_attr "length" "2,2,4,4,4,8,2,2,4,2,2,4")])

; Try to use a four byte / two byte pair for constants not loadable with
; ldi, ld24, seth.

(define_split
 [(set (match_operand:SI 0 "register_operand" "")
       (match_operand:SI 1 "two_insn_const_operand" ""))]
  ""
  [(set (match_dup 0) (match_dup 2))
   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 3)))]
  "
{
  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
  unsigned HOST_WIDE_INT tmp;
  int shift;

  /* In all cases we will emit two instructions.  However we try to
     use 2 byte instructions wherever possible.  We can assume the
     constant isn't loadable with any of ldi, ld24, or seth.  */

  /* See if we can load a 24 bit unsigned value and invert it.  */
  if (UINT24_P (~ val))
    {
      emit_insn (gen_movsi (operands[0], GEN_INT (~ val)));
      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
      DONE;
    }

  /* See if we can load a 24 bit unsigned value and shift it into place.
     0x01fffffe is just beyond ld24's range.  */
  for (shift = 1, tmp = 0x01fffffe;
       shift < 8;
       ++shift, tmp <<= 1)
    {
      if ((val & ~tmp) == 0)
	{
	  emit_insn (gen_movsi (operands[0], GEN_INT (val >> shift)));
	  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (shift)));
	  DONE;
	}
    }

  /* Can't use any two byte insn, fall back to seth/or3.  Use ~0xffff instead
     of 0xffff0000, since the later fails on a 64-bit host.  */
  operands[2] = GEN_INT ((val) & ~0xffff);
  operands[3] = GEN_INT ((val) & 0xffff);
}")

(define_split
  [(set (match_operand:SI 0 "register_operand" "")
	(match_operand:SI 1 "seth_add3_operand" "i"))]
  "TARGET_ADDR32"
  [(set (match_dup 0)
	(high:SI (match_dup 1)))
   (set (match_dup 0)
	(lo_sum:SI (match_dup 0)
		   (match_dup 1)))]
  "")

;; Small data area support.
;; The address of _SDA_BASE_ is loaded into a register and all objects in
;; the small data area are indexed off that.  This is done for each reference
;; but cse will clean things up for us.  We let the compiler choose the
;; register to use so we needn't allocate (and maybe even fix) a special
;; register to use.  Since the load and store insns have a 16 bit offset the
;; total size of the data area can be 64K.  However, if the data area lives
;; above 16M (24 bits), _SDA_BASE_ will have to be loaded with seth/add3 which
;; would then yield 3 instructions to reference an object [though there would
;; be no net loss if two or more objects were referenced].  The 3 insns can be
;; reduced back to 2 if the size of the small data area were reduced to 32K
;; [then seth + ld/st would work for any object in the area].  Doing this
;; would require special handling of _SDA_BASE_ (its value would be
;; (.sdata + 32K) & 0xffff0000) and reloc computations would be different
;; [I think].  What to do about this is deferred until later and for now we
;; require .sdata to be in the first 16M.

(define_expand "movsi_sda"
  [(set (match_dup 2)
	(unspec [(const_int 0)] 2))
   (set (match_operand:SI 0 "register_operand" "")
	(lo_sum:SI (match_dup 2)
		   (match_operand:SI 1 "small_data_operand" "")))]
  ""
  "
{
  if (reload_in_progress || reload_completed)
    operands[2] = operands[0];
  else
    operands[2] = gen_reg_rtx (SImode);
}")

(define_insn "*load_sda_base"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec [(const_int 0)] 2))]
  ""
  "ld24 %0,#_SDA_BASE_"
  [(set_attr "type" "int4")
   (set_attr "length" "4")])

;; 32 bit address support.

(define_expand "movsi_addr32"
  [(set (match_dup 2)
	; addr32_operand isn't used because it's too restrictive,
	; seth_add3_operand is more general and thus safer.
	(high:SI (match_operand:SI 1 "seth_add3_operand" "")))
   (set (match_operand:SI 0 "register_operand" "")
	(lo_sum:SI (match_dup 2) (match_dup 1)))]
  ""
  "
{
  if (reload_in_progress || reload_completed)
    operands[2] = operands[0];
  else
    operands[2] = gen_reg_rtx (SImode);
}")

(define_insn "set_hi_si"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(high:SI (match_operand 1 "symbolic_operand" "")))]
  ""
  "seth %0,%#shigh(%1)"
  [(set_attr "type" "int4")
   (set_attr "length" "4")])

(define_insn "lo_sum_si"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
		   (match_operand:SI 2 "immediate_operand" "in")))]
  ""
  "add3 %0,%1,%#%B2"
  [(set_attr "type" "int4")
   (set_attr "length" "4")])

(define_expand "movdi"
  [(set (match_operand:DI 0 "general_operand" "")
	(match_operand:DI 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (DImode, operands[1]);
}")

(define_insn "*movdi_insn"
  [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,r,m")
	(match_operand:DI 1 "move_double_src_operand" "r,nG,F,m,r"))]
  "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
  "#"
  [(set_attr "type" "multi,multi,multi,load8,store8")
   (set_attr "length" "4,4,16,6,6")])

(define_split
  [(set (match_operand:DI 0 "move_dest_operand" "")
	(match_operand:DI 1 "move_double_src_operand" ""))]
  "reload_completed"
  [(match_dup 2)]
  "operands[2] = gen_split_move_double (operands);")

;; Floating point move insns.

(define_expand "movsf"
  [(set (match_operand:SF 0 "general_operand" "")
	(match_operand:SF 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (SFmode, operands[1]);
}")

(define_insn "*movsf_insn"
  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,r,T,m")
	(match_operand:SF 1 "move_src_operand" "r,F,T,m,r,r"))]
  "register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)"
  "@
   mv %0,%1
   #
   ld %0,%1
   ld %0,%1
   st %1,%0
   st %1,%0"
  ;; ??? Length of alternative 1 is either 2, 4 or 8.
  [(set_attr "type" "int2,multi,load2,load4,store2,store4")
   (set_attr "length" "2,8,2,4,2,4")])

(define_split
  [(set (match_operand:SF 0 "register_operand" "")
	(match_operand:SF 1 "const_double_operand" ""))]
  "reload_completed"
  [(set (match_dup 2) (match_dup 3))]
  "
{
  operands[2] = operand_subword (operands[0], 0, 0, SFmode);
  operands[3] = operand_subword (operands[1], 0, 0, SFmode);
}")

(define_expand "movdf"
  [(set (match_operand:DF 0 "general_operand" "")
	(match_operand:DF 1 "general_operand" ""))]
  ""
  "
{
  /* Everything except mem = const or mem = mem can be done easily.  */

  if (GET_CODE (operands[0]) == MEM)
    operands[1] = force_reg (DFmode, operands[1]);
}")

(define_insn "*movdf_insn"
  [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m")
	(match_operand:DF 1 "move_double_src_operand" "r,F,m,r"))]
  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
  "#"
  [(set_attr "type" "multi,multi,load8,store8")
   (set_attr "length" "4,16,6,6")])

(define_split
  [(set (match_operand:DF 0 "move_dest_operand" "")
	(match_operand:DF 1 "move_double_src_operand" ""))]
  "reload_completed"
  [(match_dup 2)]
  "operands[2] = gen_split_move_double (operands);")

;; Zero extension instructions.

(define_insn "zero_extendqihi2"
  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,T,m")))]
  ""
  "@
   and3 %0,%1,%#255
   ldub %0,%1
   ldub %0,%1"
  [(set_attr "type" "int4,load2,load4")
   (set_attr "length" "4,2,4")])

(define_insn "zero_extendqisi2"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,T,m")))]
  ""
  "@
   and3 %0,%1,%#255
   ldub %0,%1
   ldub %0,%1"
  [(set_attr "type" "int4,load2,load4")
   (set_attr "length" "4,2,4")])

(define_insn "zero_extendhisi2"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,T,m")))]
  ""
  "@
   and3 %0,%1,%#65535
   lduh %0,%1
   lduh %0,%1"
  [(set_attr "type" "int4,load2,load4")
   (set_attr "length" "4,2,4")])

;; Sign extension instructions.
;; ??? See v850.md.

;; These patterns originally accepted general_operands, however, slightly
;; better code is generated by only accepting register_operands, and then
;; letting combine generate the lds[hb] insns.
;; [This comment copied from sparc.md, I think.]

(define_expand "extendqihi2"
  [(set (match_operand:HI 0 "register_operand" "")
	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
  ""
  "
{
  rtx temp = gen_reg_rtx (SImode);
  rtx shift_24 = GEN_INT (24);
  int op1_subword = 0;
  int op0_subword = 0;

  if (GET_CODE (operand1) == SUBREG)
    {
      op1_subword = SUBREG_WORD (operand1);
      operand1 = XEXP (operand1, 0);
    }
  if (GET_CODE (operand0) == SUBREG)
    {
      op0_subword = SUBREG_WORD (operand0);
      operand0 = XEXP (operand0, 0);
    }
  emit_insn (gen_ashlsi3 (temp, gen_rtx (SUBREG, SImode, operand1,
					 op1_subword),
			  shift_24));
  if (GET_MODE (operand0) != SImode)
    operand0 = gen_rtx (SUBREG, SImode, operand0, op0_subword);
  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
  DONE;
}")

(define_insn "*sign_extendqihi2_insn"
  [(set (match_operand:HI 0 "register_operand" "=r,r")
	(sign_extend:HI (match_operand:QI 1 "memory_operand" "T,m")))]
  ""
  "ldb %0,%1"
  [(set_attr "type" "load2,load4")
   (set_attr "length" "2,4")])

(define_expand "extendqisi2"
  [(set (match_operand:SI 0 "register_operand" "")
	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
  ""
  "
{
  rtx temp = gen_reg_rtx (SImode);
  rtx shift_24 = GEN_INT (24);
  int op1_subword = 0;

  if (GET_CODE (operand1) == SUBREG)
    {
      op1_subword = SUBREG_WORD (operand1);
      operand1 = XEXP (operand1, 0);
    }

  emit_insn (gen_ashlsi3 (temp, gen_rtx (SUBREG, SImode, operand1,
					 op1_subword),
			  shift_24));
  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
  DONE;
}")

(define_insn "*sign_extendqisi2_insn"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(sign_extend:SI (match_operand:QI 1 "memory_operand" "T,m")))]
  ""
  "ldb %0,%1"
  [(set_attr "type" "load2,load4")
   (set_attr "length" "2,4")])

(define_expand "extendhisi2"
  [(set (match_operand:SI 0 "register_operand" "")
	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
  ""
  "
{
  rtx temp = gen_reg_rtx (SImode);
  rtx shift_16 = GEN_INT (16);
  int op1_subword = 0;

  if (GET_CODE (operand1) == SUBREG)
    {
      op1_subword = SUBREG_WORD (operand1);
      operand1 = XEXP (operand1, 0);
    }

  emit_insn (gen_ashlsi3 (temp, gen_rtx (SUBREG, SImode, operand1,
					 op1_subword),
			  shift_16));
  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
  DONE;
}")

(define_insn "*sign_extendhisi2_insn"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(sign_extend:SI (match_operand:HI 1 "memory_operand" "T,m")))]
  ""
  "ldh %0,%1"
  [(set_attr "type" "load2,load4")
   (set_attr "length" "2,4")])

;; Arithmetic instructions.

; ??? Adding an alternative to split add3 of small constants into two
; insns yields better instruction packing but slower code.  Adds of small
; values is done a lot.

(define_insn "addsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,r")
		 (match_operand:SI 2 "nonmemory_operand" "r,I,J")))]
  ""
  "@
   add %0,%2
   addi %0,%#%2
   add3 %0,%1,%#%2"
  [(set_attr "type" "int2,int2,int4")
   (set_attr "length" "2,2,4")])

;(define_split
;  [(set (match_operand:SI 0 "register_operand" "")
;	(plus:SI (match_operand:SI 1 "register_operand" "")
;		 (match_operand:SI 2 "int8_operand" "")))]
;  "reload_completed
;   && REGNO (operands[0]) != REGNO (operands[1])
;   && INT8_P (INTVAL (operands[2]))
;   && INTVAL (operands[2]) != 0"
;  [(set (match_dup 0) (match_dup 1))
;   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
;  "")

(define_insn "adddi3"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(plus:DI (match_operand:DI 1 "register_operand" "%0")
		 (match_operand:DI 2 "register_operand" "r")))
   (clobber (reg:SI 17))]
  ""
  "#"
  [(set_attr "type" "multi")
   (set_attr "length" "6")])

;; ??? The cmp clears the condition bit.  Can we speed up somehow?
(define_split
  [(set (match_operand:DI 0 "register_operand" "")
	(plus:DI (match_operand:DI 1 "register_operand" "")
		 (match_operand:DI 2 "register_operand" "")))
   (clobber (match_operand 3 "" ""))]
  "reload_completed"
  [(parallel [(set (match_dup 3)
		   (const_int 0))
	      (use (match_dup 4))])
   (parallel [(set (match_dup 4)
		   (plus:SI (match_dup 4)
			    (plus:SI (match_dup 5)
				     (match_dup 3))))
	      (set (match_dup 3)
		   (unspec [(const_int 0)] 3))])
   (parallel [(set (match_dup 6)
		   (plus:SI (match_dup 6)
			    (plus:SI (match_dup 7)
				     (match_dup 3))))
	      (set (match_dup 3)
		   (unspec [(const_int 0)] 3))])]
  "
{
  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
}")

(define_insn "*clear_c"
  [(set (reg:SI 17)
	(const_int 0))
   (use (match_operand:SI 0 "register_operand" "r"))]
  ""
  "cmp %0,%0"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

(define_insn "*add_carry"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(plus:SI (match_operand:SI 1 "register_operand" "%0")
		 (plus:SI (match_operand:SI 2 "register_operand" "r")
			  (reg:SI 17))))
   (set (reg:SI 17)
	(unspec [(const_int 0)] 3))]
  ""
  "addx %0,%2"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

(define_insn "subsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(minus:SI (match_operand:SI 1 "register_operand" "0")
		  (match_operand:SI 2 "register_operand" "r")))]
  ""
  "sub %0,%2"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

(define_insn "subdi3"
  [(set (match_operand:DI 0 "register_operand" "=r")
	(minus:DI (match_operand:DI 1 "register_operand" "0")
		  (match_operand:DI 2 "register_operand" "r")))
   (clobber (reg:SI 17))]
  ""
  "#"
  [(set_attr "type" "multi")
   (set_attr "length" "6")])

;; ??? The cmp clears the condition bit.  Can we speed up somehow?
(define_split
  [(set (match_operand:DI 0 "register_operand" "")
	(minus:DI (match_operand:DI 1 "register_operand" "")
		  (match_operand:DI 2 "register_operand" "")))
   (clobber (match_operand 3 "" ""))]
  "reload_completed"
  [(parallel [(set (match_dup 3)
		   (const_int 0))
	      (use (match_dup 4))])
   (parallel [(set (match_dup 4)
		   (minus:SI (match_dup 4)
			     (minus:SI (match_dup 5)
				       (match_dup 3))))
	      (set (match_dup 3)
		   (unspec [(const_int 0)] 3))])
   (parallel [(set (match_dup 6)
		   (minus:SI (match_dup 6)
			     (minus:SI (match_dup 7)
				       (match_dup 3))))
	      (set (match_dup 3)
		   (unspec [(const_int 0)] 3))])]
  "
{
  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
}")

(define_insn "*sub_carry"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(minus:SI (match_operand:SI 1 "register_operand" "%0")
		  (minus:SI (match_operand:SI 2 "register_operand" "r")
			    (reg:SI 17))))
   (set (reg:SI 17)
	(unspec [(const_int 0)] 3))]
  ""
  "subx %0,%2"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

; Multiply/Divide instructions.

(define_insn "mulhisi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
  ""
  "mullo %1,%2\;mvfacmi %0"
  [(set_attr "type" "multi")
   (set_attr "length" "4")])

(define_insn "mulsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(mult:SI (match_operand:SI 1 "register_operand" "%0")
		 (match_operand:SI 2 "register_operand" "r")))]
  ""
  "mul %0,%2"
  [(set_attr "type" "mul2")
   (set_attr "length" "2")])

(define_insn "divsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(div:SI (match_operand:SI 1 "register_operand" "0")
		(match_operand:SI 2 "register_operand" "r")))]
  ""
  "div %0,%2"
  [(set_attr "type" "div4")
   (set_attr "length" "4")])

(define_insn "udivsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(udiv:SI (match_operand:SI 1 "register_operand" "0")
		 (match_operand:SI 2 "register_operand" "r")))]
  ""
  "divu %0,%2"
  [(set_attr "type" "div4")
   (set_attr "length" "4")])

(define_insn "modsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(mod:SI (match_operand:SI 1 "register_operand" "0")
		(match_operand:SI 2 "register_operand" "r")))]
  ""
  "rem %0,%2"
  [(set_attr "type" "div4")
   (set_attr "length" "4")])

(define_insn "umodsi3"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(umod:SI (match_operand:SI 1 "register_operand" "0")
		 (match_operand:SI 2 "register_operand" "r")))]
  ""
  "remu %0,%2"
  [(set_attr "type" "div4")
   (set_attr "length" "4")])

;; Boolean instructions.
;;
;; We don't define the DImode versions as expand_binop does a good enough job.
;; And if it doesn't it should be fixed.

(define_insn "andsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(and:SI (match_operand:SI 1 "register_operand" "%0,r")
		(match_operand:SI 2 "nonmemory_operand" "r,K")))]
  ""
  "@
   and %0,%2
   and3 %0,%1,%#%2\\t; %X2"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

(define_insn "iorsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(ior:SI (match_operand:SI 1 "register_operand" "%0,r")
		(match_operand:SI 2 "nonmemory_operand" "r,K")))]
  ""
  "@
   or %0,%2
   or3 %0,%1,%#%2\\t; %X2"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

(define_insn "xorsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(xor:SI (match_operand:SI 1 "register_operand" "%0,r")
		(match_operand:SI 2 "nonmemory_operand" "r,K")))]
  ""
  "@
   xor %0,%2
   xor3 %0,%1,%#%2\\t; %X2"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

(define_insn "negsi2"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
  ""
  "neg %0,%1"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

(define_insn "one_cmplsi2"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(not:SI (match_operand:SI 1 "register_operand" "r")))]
  ""
  "not %0,%1"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

;; Shift instructions.

(define_insn "ashlsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(ashift:SI (match_operand:SI 1 "register_operand" "0,0,r")
		   (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
  ""
  "@
   sll %0,%2
   slli %0,%#%2
   sll3 %0,%1,%#%2"
  [(set_attr "type" "shift2,shift2,shift4")
   (set_attr "length" "2,2,4")])

(define_insn "ashrsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
  ""
  "@
   sra %0,%2
   srai %0,%#%2
   sra3 %0,%1,%#%2"
  [(set_attr "type" "shift2,shift2,shift4")
   (set_attr "length" "2,2,4")])

(define_insn "lshrsi3"
  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
  ""
  "@
   srl %0,%2
   srli %0,%#%2
   srl3 %0,%1,%#%2"
  [(set_attr "type" "shift2,shift2,shift4")
   (set_attr "length" "2,2,4")])

;; Compare instructions.
;; This controls RTL generation and register allocation.

;; We generate RTL for comparisons and branches by having the cmpxx 
;; patterns store away the operands.  Then the bcc patterns
;; emit RTL for both the compare and the branch.
;;
;; On the m32r it is more efficient to use the bxxz instructions and
;; thus merge the compare and branch into one instruction, so they are
;; preferred.

(define_expand "cmpsi"
  [(set (reg:SI 17)
	(compare:SI (match_operand:SI 0 "register_operand" "")
		    (match_operand:SI 1 "nonmemory_operand" "")))]
  ""
  "
{
  m32r_compare_op0 = operands[0];
  m32r_compare_op1 = operands[1];
  DONE;
}")


;; The cmp_xxx_insn patterns set the condition bit to the result of the
;; comparison.  There isn't a "compare equal" instruction so cmp_eqsi_insn
;; is quite inefficient.  However, it is rarely used.

(define_insn "cmp_eqsi_insn"
  [(set (reg:SI 17)
	(eq:SI (match_operand:SI 0 "register_operand" "r,r")
	       (match_operand:SI 1 "reg_or_cmp_int16_operand" "r,P")))
   (clobber (match_scratch:SI 2 "=&r,&r"))]
  ""
  "*
{
  if (which_alternative == 0)
    {
         return \"mv %2,%0\;sub %2,%1\;cmpui %2,#1\";
    }
  else
    {
        if (INTVAL (operands [1]) == 0)
          return \"cmpui %0, #1\";
        else if (REGNO (operands [2]) == REGNO (operands [0]))
          return \"addi %0,%#%N1\;cmpui %2,#1\";
        else
          return \"add3 %2,%0,%#%N1\;cmpui %2,#1\";
    }
}"
  [(set_attr "type" "multi,multi")
   (set_attr "length" "8,8")])

(define_insn "cmp_ltsi_insn"
  [(set (reg:SI 17)
	(lt:SI (match_operand:SI 0 "register_operand" "r,r")
	       (match_operand:SI 1 "reg_or_int16_operand" "r,J")))]
  ""
  "@
   cmp %0,%1
   cmpi %0,%#%1"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

(define_insn "cmp_ltusi_insn"
  [(set (reg:SI 17)
	(ltu:SI (match_operand:SI 0 "register_operand" "r,r")
	        (match_operand:SI 1 "reg_or_uint16_operand" "r,K")))]
  ""
  "@
   cmpu %0,%1
   cmpui %0,%#%1"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

;; reg == small constant comparisons are best handled by putting the result
;; of the comparison in a tmp reg and then using beqz/bnez.
;; ??? The result register doesn't contain 0/STORE_FLAG_VALUE,
;; it contains 0/non-zero.

(define_insn "cmp_ne_small_const_insn"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
	(ne:SI (match_operand:SI 1 "register_operand" "0,r")
	       (match_operand:SI 2 "cmp_int16_operand" "N,P")))]
  ""
  "@
   addi %0,%#%N2
   add3 %0,%1,%#%N2"
  [(set_attr "type" "int2,int4")
   (set_attr "length" "2,4")])

;; These control RTL generation for conditional jump insns.

(define_expand "beq"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)EQ, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bne"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)NE, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bgt"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)GT, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "ble"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)LE, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bge"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)GE, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "blt"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)LT, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bgtu"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)GTU, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bleu"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)LEU, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bgeu"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)GEU, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

(define_expand "bltu"
  [(set (pc)
	(if_then_else (match_dup 1)
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "
{
  operands[1] = gen_compare ((int)LTU, m32r_compare_op0, m32r_compare_op1, FALSE);
}")

;; Now match both normal and inverted jump.

(define_insn "*branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "eqne_comparison_operator"
				      [(reg 17) (const_int 0)])
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "*
{
  static char instruction[40];
  sprintf (instruction, \"%s%s %%l0\",
	   (GET_CODE (operands[1]) == NE) ? \"bc\" : \"bnc\",
	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
  return instruction;
}"
  [(set_attr "type" "branch")
   ; We use 400/800 instead of 512,1024 to account for inaccurate insn
   ; lengths and insn alignments that are complex to track.
   ; It's not important that we be hyper-precise here.  It may be more
   ; important blah blah blah when the chip supports parallel execution
   ; blah blah blah but until then blah blah blah this is simple and
   ; suffices.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 400))
					   (const_int 800))
				      (const_int 2)
				      (const_int 4)))])

(define_insn "*rev_branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "eqne_comparison_operator"
				      [(reg 17) (const_int 0)])
		      (pc)
		      (label_ref (match_operand 0 "" ""))))]
  ;"REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))"
  ""
  "*
{
  static char instruction[40];
  sprintf (instruction, \"%s%s %%l0\",
	   (GET_CODE (operands[1]) == EQ) ? \"bc\" : \"bnc\",
	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
  return instruction;
}"
  [(set_attr "type" "branch")
   ; We use 400/800 instead of 512,1024 to account for inaccurate insn
   ; lengths and insn alignments that are complex to track.
   ; It's not important that we be hyper-precise here.  It may be more
   ; important blah blah blah when the chip supports parallel execution
   ; blah blah blah but until then blah blah blah this is simple and
   ; suffices.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 400))
					   (const_int 800))
				      (const_int 2)
				      (const_int 4)))])

; reg/reg compare and branch insns

(define_insn "*reg_branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "eqne_comparison_operator"
				      [(match_operand:SI 2 "register_operand" "r")
				       (match_operand:SI 3 "register_operand" "r")])
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "*
{
  /* Is branch target reachable with beq/bne?  */
  if (get_attr_length (insn) == 4)
    {
      if (GET_CODE (operands[1]) == EQ)
	return \"beq %2,%3,%l0\";
      else
	return \"bne %2,%3,%l0\";
    }
  else
    {
      if (GET_CODE (operands[1]) == EQ)
	return \"bne %2,%3,1f\;bra %l0\;1:\";
      else
	return \"beq %2,%3,1f\;bra %l0\;1:\";
    }
}"
  [(set_attr "type" "branch")
  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
  ; which is complex to track and inaccurate length specs.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 25000))
					   (const_int 50000))
				      (const_int 4)
				      (const_int 8)))])

(define_insn "*rev_reg_branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "eqne_comparison_operator"
				      [(match_operand:SI 2 "register_operand" "r")
				       (match_operand:SI 3 "register_operand" "r")])
		      (pc)
		      (label_ref (match_operand 0 "" ""))))]
  ""
  "*
{
  /* Is branch target reachable with beq/bne?  */
  if (get_attr_length (insn) == 4)
    {
      if (GET_CODE (operands[1]) == NE)
	return \"beq %2,%3,%l0\";
      else
	return \"bne %2,%3,%l0\";
    }
  else
    {
      if (GET_CODE (operands[1]) == NE)
	return \"bne %2,%3,1f\;bra %l0\;1:\";
      else
	return \"beq %2,%3,1f\;bra %l0\;1:\";
    }
}"
  [(set_attr "type" "branch")
  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
  ; which is complex to track and inaccurate length specs.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 25000))
					   (const_int 50000))
				      (const_int 4)
				      (const_int 8)))])

; reg/zero compare and branch insns

(define_insn "*zero_branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "signed_comparison_operator"
				      [(match_operand:SI 2 "register_operand" "r")
				       (const_int 0)])
		      (label_ref (match_operand 0 "" ""))
		      (pc)))]
  ""
  "*
{
  char *br,*invbr;
  char asmtext[40];

  switch (GET_CODE (operands[1]))
    {
      case EQ : br = \"eq\"; invbr = \"ne\"; break;
      case NE : br = \"ne\"; invbr = \"eq\"; break;
      case LE : br = \"le\"; invbr = \"gt\"; break;
      case GT : br = \"gt\"; invbr = \"le\"; break;
      case LT : br = \"lt\"; invbr = \"ge\"; break;
      case GE : br = \"ge\"; invbr = \"lt\"; break;
    }

  /* Is branch target reachable with bxxz?  */
  if (get_attr_length (insn) == 4)
    {
      sprintf (asmtext, \"b%sz %%2,%%l0\", br);
      output_asm_insn (asmtext, operands);
    }
  else
    {
      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", invbr);
      output_asm_insn (asmtext, operands);
    }
  return \"\";
}"
  [(set_attr "type" "branch")
  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
  ; which is complex to track and inaccurate length specs.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 25000))
					   (const_int 50000))
				      (const_int 4)
				      (const_int 8)))])

(define_insn "*rev_zero_branch_insn"
  [(set (pc)
	(if_then_else (match_operator 1 "eqne_comparison_operator"
				      [(match_operand:SI 2 "register_operand" "r")
				       (const_int 0)])
		      (pc)
		      (label_ref (match_operand 0 "" ""))))]
  ""
  "*
{
  char *br,*invbr;
  char asmtext[40];

  switch (GET_CODE (operands[1]))
    {
      case EQ : br = \"eq\"; invbr = \"ne\"; break;
      case NE : br = \"ne\"; invbr = \"eq\"; break;
      case LE : br = \"le\"; invbr = \"gt\"; break;
      case GT : br = \"gt\"; invbr = \"le\"; break;
      case LT : br = \"lt\"; invbr = \"ge\"; break;
      case GE : br = \"ge\"; invbr = \"lt\"; break;
    }

  /* Is branch target reachable with bxxz?  */
  if (get_attr_length (insn) == 4)
    {
      sprintf (asmtext, \"b%sz %%2,%%l0\", invbr);
      output_asm_insn (asmtext, operands);
    }
  else
    {
      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", br);
      output_asm_insn (asmtext, operands);
    }
  return \"\";
}"
  [(set_attr "type" "branch")
  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
  ; which is complex to track and inaccurate length specs.
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 25000))
					   (const_int 50000))
				      (const_int 4)
				      (const_int 8)))])

;; Unconditional and other jump instructions.

(define_insn "jump"
  [(set (pc) (label_ref (match_operand 0 "" "")))]
  ""
  "bra %l0"
  [(set_attr "type" "uncond_branch")
   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
						 (const_int 400))
					   (const_int 800))
				      (const_int 2)
				      (const_int 4)))])

(define_insn "indirect_jump"
  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
  ""
  "jmp %a0"
  [(set_attr "type" "uncond_branch")
   (set_attr "length" "2")])
 
(define_insn "tablejump"
  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
   (use (label_ref (match_operand 1 "" "")))]
  ""
  "jmp %a0"
  [(set_attr "type" "uncond_branch")
   (set_attr "length" "2")])

(define_expand "call"
  ;; operands[1] is stack_size_rtx
  ;; operands[2] is next_arg_register
  [(parallel [(call (match_operand:SI 0 "call_operand" "")
		    (match_operand 1 "" ""))
	     (clobber (reg:SI 14))])]
  ""
  "")

(define_insn "*call_via_reg"
  [(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
	 (match_operand 1 "" ""))
   (clobber (reg:SI 14))]
  ""
  "jl %0"
  [(set_attr "type" "call")
   (set_attr "length" "2")])

(define_insn "*call_via_label"
  [(call (mem:SI (match_operand:SI 0 "call_address_operand" ""))
	 (match_operand 1 "" ""))
   (clobber (reg:SI 14))]
  ""
  "*
{
  int call26_p = call26_operand (operands[0], FUNCTION_MODE);

  if (! call26_p)
    {
      /* We may not be able to reach with a `bl' insn so punt and leave it to
	 the linker.
	 We do this here, rather than doing a force_reg in the define_expand
	 so these insns won't be separated, say by scheduling, thus simplifying
	 the linker.  */
      return \"seth r14,%T0\;add3 r14,r14,%B0\;jl r14\";
    }
  else
    return \"bl %0\";
}"
  [(set_attr "type" "call")
   (set (attr "length")
	(if_then_else (eq (symbol_ref "call26_operand (operands[0], FUNCTION_MODE)")
			  (const_int 0))
		      (const_int 12) ; 10 + 2 for nop filler
		      ; The return address must be on a 4 byte boundary so
		      ; there's no point in using a value of 2 here.  A 2 byte
		      ; insn may go in the left slot but we currently can't
		      ; use such knowledge.
		      (const_int 4)))])

(define_expand "call_value"
  ;; operand 2 is stack_size_rtx
  ;; operand 3 is next_arg_register
  [(parallel [(set (match_operand 0 "register_operand" "=r")
		   (call (match_operand:SI 1 "call_operand" "")
			 (match_operand 2 "" "")))
	     (clobber (reg:SI 14))])]
  ""
  "")

(define_insn "*call_value_via_reg"
  [(set (match_operand 0 "register_operand" "=r")
	(call (mem:SI (match_operand:SI 1 "register_operand" "r"))
	      (match_operand 2 "" "")))
   (clobber (reg:SI 14))]
  ""
  "jl %1"
  [(set_attr "type" "call")
   (set_attr "length" "2")])

(define_insn "*call_value_via_label"
  [(set (match_operand 0 "register_operand" "=r")
	(call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
	      (match_operand 2 "" "")))
   (clobber (reg:SI 14))]
  ""
  "*
{
  int call26_p = call26_operand (operands[1], FUNCTION_MODE);

  if (! call26_p)
    {
      /* We may not be able to reach with a `bl' insn so punt and leave it to
	 the linker.
	 We do this here, rather than doing a force_reg in the define_expand
	 so these insns won't be separated, say by scheduling, thus simplifying
	 the linker.  */
      return \"seth r14,%T1\;add3 r14,r14,%B1\;jl r14\";
    }
  else
    return \"bl %1\";
}"
  [(set_attr "type" "call")
   (set (attr "length")
	(if_then_else (eq (symbol_ref "call26_operand (operands[1], FUNCTION_MODE)")
			  (const_int 0))
		      (const_int 12) ; 10 + 2 for nop filler
		      ; The return address must be on a 4 byte boundary so
		      ; there's no point in using a value of 2 here.  A 2 byte
		      ; insn may go in the left slot but we currently can't
		      ; use such knowledge.
		      (const_int 4)))])

(define_insn "nop"
  [(const_int 0)]
  ""
  "nop"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
;; all of memory.  This blocks insns from being moved across this point.

(define_insn "blockage"
  [(unspec_volatile [(const_int 0)] 0)]
  ""
  "")

;; Special pattern to flush the icache.

(define_insn "flush_icache"
  [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 0)]
  ""
  "* return \"nop ; flush-icache\";"
  [(set_attr "type" "int2")
   (set_attr "length" "2")])

;; Conditional move instructions
;; Based on those done for the d10v


(define_expand "movsicc"
  [
   (set (match_operand:SI 0 "register_operand" "r")
	(if_then_else:SI (match_operand 1 "" "")
			 (match_operand:SI 2 "conditional_move_operand" "O")
			 (match_operand:SI 3 "conditional_move_operand" "O")
        )
   )
  ]
  ""
  "
{
  if (! zero_and_one (operands [2], operands [3]))
    FAIL;

  /* Generate the comparision that will set the carry flag.  */
  operands[1] = gen_compare ((int)GET_CODE (operands[1]), m32r_compare_op0,
			     m32r_compare_op1, TRUE);

  /* See other movsicc pattern below for reason why.  */
  emit_insn (gen_blockage());
}")

;; Generate the conditional instructions based on how the carry flag is examined.
(define_insn "*movsicc_internal"
  [(set (match_operand:SI 0 "register_operand" "r")
	(if_then_else:SI (match_operand 1 "carry_compare_operand" "")
			 (match_operand:SI 2 "conditional_move_operand" "O")
			 (match_operand:SI 3 "conditional_move_operand" "O")
        )
   )]
  "zero_and_one (operands [2], operands[3])"
  "* return emit_cond_move (operands, insn);"
  [(set_attr "type" "multi")
   (set_attr "length" "8")
  ]
)

(define_insn "movcc_insn"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(reg:SI 17))]
  ""
  "mvfc %0, cbr"
  [(set_attr "type" "misc")
   (set_attr "length" "2")]
)


;; Split up troublesome insns for better scheduling.

;; Peepholes go at the end.

;; ??? Setting the type attribute may not be useful, but for completeness
;; we do it.

(define_peephole
  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
			 (const_int 4)))
        (match_operand:SI 1 "register_operand" "r"))]
  "0 && dead_or_set_p (insn, operands[0])"
  "st %1,@+%0"
  [(set_attr "type" "store2")
   (set_attr "length" "2")])

;; This case is triggered by compiling this code:
;; 
;; extern void sub(int *);
;; void main (void)
;; {
;;   int i=2,j=3,k;
;;   while (i < j)  sub(&k);
;;   i = j / k;
;;   sub(&i);
;;   i = j - k;
;;   sub(&i);
;; }
;;
;; Without the peephole the following assembler is generated for the
;; divide and subtract expressions:
;;
;;         div r5,r4     
;;         mv r4,r5      
;;         st r4,@(4,sp) 
;;         bl sub
;; 
;; Simialr code is produced for the subtract expression.  With this
;; peephole the redundant move is eliminated.
;;
;; This optimisation onbly works if PRESERVE_DEATH_INFO_REGNO_P is
;; defined in m32r.h

(define_peephole
  [(set (match_operand:SI 0 "register_operand" "r")
        (match_operand:SI 1 "register_operand" "r")
   )
   (set (mem:SI (plus: SI (match_operand:SI 2 "register_operand" "r")
                (match_operand:SI 3 "immediate_operand" "J")))
        (match_dup 0)
   )
  ]
  "0 && dead_or_set_p (insn, operands [0])"
  "st %1,@(%3,%2)"
  [(set_attr "type" "store4")
   (set_attr "length" "4")
  ]
)

;; Block moves, see m32r.c for more details.
;; Argument 0 is the destination
;; Argument 1 is the source
;; Argument 2 is the length
;; Argument 3 is the alignment

(define_expand "movstrsi"
  [(parallel [(set (match_operand:BLK 0 "general_operand" "")
		   (match_operand:BLK 1 "general_operand" ""))
	      (use (match_operand:SI  2 "immediate_operand" ""))
	      (use (match_operand:SI  3 "immediate_operand" ""))])]
  ""
  "
{
  if (operands[0])		/* avoid unused code messages */
    {
      m32r_expand_block_move (operands);
      DONE;
    }
}")

;; Insn generated by block moves

(define_insn "movstrsi_internal"
  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r"))	;; destination
	(mem:BLK (match_operand:SI 1 "register_operand" "r")))	;; source
   (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move
   (set (match_dup 0) (plus:SI (match_dup 0) (minus:SI (match_dup 2) (const_int 4))))
   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))
   (clobber (match_scratch:SI 3 "=&r"))				;; temp 1
   (clobber (match_scratch:SI 4 "=&r"))]			;; temp 2
  ""
  "* return m32r_output_block_move (insn, operands);"
  [(set_attr "type"	"store8")
   (set_attr "length"	"72")]) ;; Maximum