mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-05-08 05:52:31 +02:00

+ darwin/ppc64 support + val/str/read(ln)/write(ln) support for enums + simple cse at the node tree level + if-node simplify support + simple ssa support for memory locations + support for optional overflow/rangecheck boolean parameters for operators * a lot of unification of the ppc32/ppc64 code generators ........ r6380 | jonas | 2007-02-08 21:25:36 +0100 (Thu, 08 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/ncgld.pas M /branches/fpc_2_3/compiler/tgobj.pas A /branches/fpc_2_3/tests/webtbs/tw8283.pp + support for replacing the memory location of a temp (including local variables) with that of another temp to avoid unnecessary copies (mantis #8283) ........ r6381 | jonas | 2007-02-08 22:53:36 +0100 (Thu, 08 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/nflw.pas A /branches/fpc_2_3/tests/webtbs/tw8282.pp + simplify support for ifn (based on patch by Florian) ........ r6386 | peter | 2007-02-09 13:48:53 +0100 (Fri, 09 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/htypechk.pas M /branches/fpc_2_3/compiler/ncal.pas M /branches/fpc_2_3/compiler/symconst.pas * overflow,rangecheck optional parameters for operators, patch from 8281 ........ r6391 | jonas | 2007-02-09 23:52:13 +0100 (Fri, 09 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc/agppcgas.pas M /branches/fpc_2_3/compiler/powerpc64/cpunode.pas D /branches/fpc_2_3/compiler/powerpc64/nppcinl.pas M /branches/fpc_2_3/compiler/ppcgen/ngppcinl.pas * merged fsqrt(s) support to common powerpc unit, activate for ppc32 if -Op970 is used (still default for ppc64, since default cpu there is already ppc970) ........ r6394 | jonas | 2007-02-10 18:58:47 +0100 (Sat, 10 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc/cgcpu.pas M /branches/fpc_2_3/compiler/powerpc64/cgcpu.pas M /branches/fpc_2_3/compiler/ppcgen/cgppc.pas * adapted a_jmp_name for darwin/ppc64 * merged g_intf_wrapper for ppc32 and ppc64, and added darwin/ppc64 support to it ........ r6396 | jonas | 2007-02-10 20:16:06 +0100 (Sat, 10 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/cgobj.pas + darwin/ppc64 support for g_indirect_sym_load ........ r6397 | jonas | 2007-02-10 20:22:49 +0100 (Sat, 10 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc/cgcpu.pas M /branches/fpc_2_3/compiler/powerpc64/cgcpu.pas M /branches/fpc_2_3/compiler/ppcgen/cgppc.pas + darwin/ppc64 support to ppc64's fixref * moved ppc32 a_load_store to cgppc and use it for darwin/ppc64 as well (its relocatable symbols are only 32 bits large) ........ r6399 | jonas | 2007-02-10 22:02:37 +0100 (Sat, 10 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/systems.pas + system_x86_64_darwin identifier + set default source system for system_x86_64_darwin and system_powerpc64_darwin ........ r6404 | jonas | 2007-02-10 23:01:23 +0100 (Sat, 10 Feb 2007) | 5 lines Changed paths: M /branches/fpc_2_3/compiler/aasmdata.pas M /branches/fpc_2_3/compiler/aggas.pas M /branches/fpc_2_3/compiler/cgobj.pas M /branches/fpc_2_3/compiler/cgutils.pas M /branches/fpc_2_3/compiler/cresstr.pas M /branches/fpc_2_3/compiler/dbgdwarf.pas M /branches/fpc_2_3/compiler/dbgstabs.pas M /branches/fpc_2_3/compiler/ncgutil.pas M /branches/fpc_2_3/compiler/ogelf.pas M /branches/fpc_2_3/compiler/pdecvar.pas M /branches/fpc_2_3/compiler/pmodules.pas M /branches/fpc_2_3/compiler/symdef.pas M /branches/fpc_2_3/compiler/systems.pas + system_x86_64_darwin identifier + systems_darwin set which collects all darwin variants + added support for darwin/ppc64 and darwin/x86_64 where needed in the generic code ........ r6406 | jonas | 2007-02-10 23:24:32 +0100 (Sat, 10 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/cgobj.pas * ifdef cpu64 -> ifdef cpu64bit ........ r6409 | jonas | 2007-02-11 00:34:04 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/pdecvar.pas * fixed ppc64 compilation ........ r6413 | jonas | 2007-02-11 12:41:27 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/bsd/system.pp M /branches/fpc_2_3/rtl/darwin/powerpc/sig_cpu.inc M /branches/fpc_2_3/rtl/darwin/signal.inc + darwin/ppc64 support for signal routines ........ r6415 | jonas | 2007-02-11 13:54:53 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/systems/i_linux.pas * set abi of linux/ppc64 to abi_powerpc_sysv ........ r6416 | jonas | 2007-02-11 13:55:51 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/cputarg.pas M /branches/fpc_2_3/compiler/systems/i_bsd.pas M /branches/fpc_2_3/compiler/systems/t_bsd.pas + darwin/ppc64 source and target information ........ r6418 | jonas | 2007-02-11 14:19:55 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/powerpc64/math.inc * darwin/ppc64 compilation fixes ........ r6419 | jonas | 2007-02-11 14:22:22 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc/cgcpu.pas M /branches/fpc_2_3/compiler/powerpc64/cgcpu.pas M /branches/fpc_2_3/compiler/ppcgen/cgppc.pas * darwin/ppc64 needs the 32 bit version of a_loadaddr_ref_reg ........ r6420 | jonas | 2007-02-11 14:22:55 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/utils/fpcm/fpcmmain.pp + darwin/ppc64 support ........ r6426 | jonas | 2007-02-11 16:13:19 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/rappcgas.pas * fixed refaddr parsing for darwin/ppc64 ........ r6427 | jonas | 2007-02-11 16:14:21 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc/agppcgas.pas M /branches/fpc_2_3/compiler/powerpc64/agppcgas.pas A /branches/fpc_2_3/compiler/ppcgen/agppcutl.pas * moved ppc32/ppc64 assembler writer helpers to a common unit ........ r6430 | jonas | 2007-02-11 17:53:23 +0100 (Sun, 11 Feb 2007) | 4 lines Changed paths: D /branches/fpc_2_3/rtl/darwin/powerpc/sig_cpu.inc D /branches/fpc_2_3/rtl/darwin/powerpc/sighnd.inc A /branches/fpc_2_3/rtl/darwin/powerpc64 A /branches/fpc_2_3/rtl/darwin/powerpc64/sig_cpu.inc A /branches/fpc_2_3/rtl/darwin/powerpc64/sighnd.inc A /branches/fpc_2_3/rtl/darwin/ppcgen A /branches/fpc_2_3/rtl/darwin/ppcgen/ppchnd.inc (from /branches/fpc_2_3/rtl/darwin/powerpc/sighnd.inc:6422) A /branches/fpc_2_3/rtl/darwin/ppcgen/sig_ppc.inc (from /branches/fpc_2_3/rtl/darwin/powerpc/sig_cpu.inc:6422) M /branches/fpc_2_3/rtl/darwin/signal.inc * fixed ppc/ppc64 signal include handling (both real files are in ppcgen, dummies in powerpc and powerpc64 which include those files) (1st step because pre-commit filter can't handle replaced files) ........ r6431 | jonas | 2007-02-11 17:53:47 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: A /branches/fpc_2_3/rtl/darwin/powerpc/sig_cpu.inc A /branches/fpc_2_3/rtl/darwin/powerpc/sighnd.inc * second step of signal include patch ........ r6432 | jonas | 2007-02-11 19:00:12 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/systems/t_bsd.pas * changed darwin checks to use systems_darwin constant ........ r6433 | jonas | 2007-02-11 19:05:38 +0100 (Sun, 11 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/cgcpu.pas * handle non-multiple-of-4 offsets with 64 bit loads/stores for darwin/ppc64 ........ r6434 | jonas | 2007-02-11 19:05:56 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: D /branches/fpc_2_3/compiler/powerpc/agppcgas.pas D /branches/fpc_2_3/compiler/powerpc64/agppcgas.pas A /branches/fpc_2_3/compiler/ppcgen/agppcgas.pas (from /branches/fpc_2_3/compiler/ppcgen/agppcutl.pas:6427) D /branches/fpc_2_3/compiler/ppcgen/agppcutl.pas * completely merged ppc assembler writers ........ r6435 | jonas | 2007-02-11 19:06:40 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/darwin/console.pp M /branches/fpc_2_3/rtl/darwin/termiosproc.inc * fixed 64 bit compilation ........ r6436 | jonas | 2007-02-11 19:09:28 +0100 (Sun, 11 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/packages/extra/Makefile.fpc * universal interfaces aren't 64 bit ready yet -> only compile for darwin/ppc and darwin/i386 ........ r6438 | jonas | 2007-02-11 19:22:34 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: A /branches/fpc_2_3/tests/test/cg/obj/darwin/powerpc64 A /branches/fpc_2_3/tests/test/cg/obj/darwin/powerpc64/ctest.o A /branches/fpc_2_3/tests/test/cg/obj/darwin/powerpc64/tcext3.o A /branches/fpc_2_3/tests/test/cg/obj/darwin/powerpc64/tcext4.o A /branches/fpc_2_3/tests/test/cg/obj/darwin/powerpc64/tcext5.o + compiled for darwin/ppc64 ........ r6439 | jonas | 2007-02-11 20:24:42 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ppcgen/cgppc.pas * patch from Thomas to fix linux/ppc64 ........ r6440 | jonas | 2007-02-11 20:25:15 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/systems.pas * fixed setting source OS for darwin/ppc64 ........ r6444 | florian | 2007-02-11 22:24:20 +0100 (Sun, 11 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/globtype.pas M /branches/fpc_2_3/compiler/nopt.pas M /branches/fpc_2_3/compiler/nutils.pas M /branches/fpc_2_3/compiler/optcse.pas M /branches/fpc_2_3/compiler/psub.pas + first node cse implementation ........ r6445 | jonas | 2007-02-11 22:30:07 +0100 (Sun, 11 Feb 2007) | 6 lines Changed paths: M /branches/fpc_2_3/compiler/cresstr.pas * hack to work around strange darwin/ppc64 linker bug: it seems to have problems if you put a global symbol at the end of a section without any data following (at least in case of the resource strings section) -> add dummy byte at the end for darwin/ppc64 (otherwise it messes up the address of the first symbol stub entry) ........ r6449 | jonas | 2007-02-11 23:23:44 +0100 (Sun, 11 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/systems/i_bsd.pas * cpupowerpc is defined for both ppc32 and ppc64 -> changed to cpupowerpc32 to avoid defining source wrongly on ppc64 ........ r6450 | jonas | 2007-02-11 23:26:34 +0100 (Sun, 11 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/ppcgen/ngppcset.pas * disable jump tables for darwin/ppc64 for now, don't work yet for some reason ........ r6451 | florian | 2007-02-11 23:54:37 +0100 (Sun, 11 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/ncal.pas M /branches/fpc_2_3/compiler/nutils.pas M /branches/fpc_2_3/compiler/optcse.pas * improved cse * better complexity calculation for subscript nodes with classes or interfaces ........ r6456 | jonas | 2007-02-12 19:33:22 +0100 (Mon, 12 Feb 2007) | 4 lines Changed paths: M /branches/fpc_2_3/compiler/nutils.pas + support for notn,shln,shrn,equaln,unequaln,gtn,gten,ltn,lten in node_cplexity() * mark muln,divn,modn as more complex ........ r6469 | jonas | 2007-02-13 15:56:01 +0100 (Tue, 13 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/optcse.pas * fixed when cross-compiling a 64 bit compiler from a 32 bit platform ........ r6471 | jonas | 2007-02-13 16:17:16 +0100 (Tue, 13 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/cputarg.pas * include stabs support (can work on darwin/ppc64, but doesn't work yet) ........ r6473 | jonas | 2007-02-13 16:45:48 +0100 (Tue, 13 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/cgcpu.pas M /branches/fpc_2_3/compiler/powerpc64/cpupara.pas * R2 is a volatile and usable register under darwin/ppc64 * R13 is a reserved non-volatile register under darwin/ppc64 (tls) ........ r6479 | jonas | 2007-02-13 20:40:50 +0100 (Tue, 13 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/systems/i_bsd.pas * maxCrecordalign seems to have to be 8 rather 4, in spite of what the ABI docs say (although they are contradictory to some extent) ........ r6487 | jonas | 2007-02-14 15:57:40 +0100 (Wed, 14 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/tests/webtbs/tw8153a.pp * fixed for darwin/ppc64 ........ r6488 | jonas | 2007-02-14 15:58:56 +0100 (Wed, 14 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/tests/webtbs/tw7851a.pp * fixed for darwin/ppc64 ........ r6494 | jonas | 2007-02-15 19:36:55 +0100 (Thu, 15 Feb 2007) | 3 lines Changed paths: M /branches/fpc_2_3/compiler/systems/i_bsd.pas * set default debug info for darwin/ppc64 to dwarf2 since it works better than stabs currently ........ r6500 | jonas | 2007-02-15 21:38:16 +0100 (Thu, 15 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/version.pas * updated version to 2.3.0 ........ r6505 | jonas | 2007-02-15 22:39:28 +0100 (Thu, 15 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/version.pas * changed version to 2.3.1 ........ r6511 | jonas | 2007-02-16 15:17:24 +0100 (Fri, 16 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/powerpc64/itcpugas.pas * system_powerpc_darwin -> system_powerpc64_darwin ........ r6546 | daniel | 2007-02-18 15:48:54 +0100 (Sun, 18 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ncginl.pas M /branches/fpc_2_3/compiler/ncgld.pas M /branches/fpc_2_3/compiler/ncgrtti.pas M /branches/fpc_2_3/compiler/ncnv.pas M /branches/fpc_2_3/compiler/ninl.pas M /branches/fpc_2_3/compiler/nld.pas M /branches/fpc_2_3/compiler/nutils.pas M /branches/fpc_2_3/compiler/pinline.pas M /branches/fpc_2_3/rtl/inc/astrings.inc M /branches/fpc_2_3/rtl/inc/compproc.inc M /branches/fpc_2_3/rtl/inc/sstrings.inc M /branches/fpc_2_3/rtl/inc/text.inc M /branches/fpc_2_3/rtl/inc/wstrings.inc + Val/str/read/write support for enumeration types. ........ r6547 | daniel | 2007-02-18 17:01:20 +0100 (Sun, 18 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/inc/sstrings.inc * Fix val code that I broke. ........ r6571 | daniel | 2007-02-20 09:27:44 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/inc/astrings.inc M /branches/fpc_2_3/rtl/inc/sstrings.inc M /branches/fpc_2_3/rtl/inc/text.inc M /branches/fpc_2_3/rtl/inc/wstrings.inc * o2s -> ord2str, s2o -> str2ord ........ r6572 | daniel | 2007-02-20 09:33:30 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ncgld.pas M /branches/fpc_2_3/compiler/ncgrtti.pas M /branches/fpc_2_3/compiler/ninl.pas M /branches/fpc_2_3/compiler/nld.pas * o2s -> ord2str, s2o -> str2ord ........ r6574 | daniel | 2007-02-20 12:07:58 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/inc/compproc.inc * o2s -> ord2str, s2o -> str2ord ........ r6578 | daniel | 2007-02-20 22:18:49 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/rtl/inc/text.inc * Change longint to valsint. ........ r6579 | daniel | 2007-02-20 22:29:09 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ninl.pas * Handle ordinal currency types. ........ r6580 | jonas | 2007-02-20 22:29:11 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ncgrtti.pas * fixed compilation for cpurequiresproperalignment ........ r6581 | jonas | 2007-02-20 22:30:21 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ninl.pas * fixed typo ........ r6582 | daniel | 2007-02-20 22:36:19 +0100 (Tue, 20 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/ninl.pas * Set is_real to true. ........ r6590 | jonas | 2007-02-21 20:23:54 +0100 (Wed, 21 Feb 2007) | 2 lines Changed paths: M /branches/fpc_2_3/compiler/systems/i_bsd.pas * set tf_dwarf_only_local_labels for darwin/ppc64 git-svn-id: trunk@6720 -
2192 lines
79 KiB
ObjectPascal
2192 lines
79 KiB
ObjectPascal
{
|
|
Copyright (c) 1998-2002 by Florian Klaempfl
|
|
|
|
This unit implements the code generator for the PowerPC
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit cgcpu;
|
|
|
|
{$I fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
globtype, symtype, symdef, symsym,
|
|
cgbase, cgobj,cgppc,
|
|
aasmbase, aasmcpu, aasmtai,aasmdata,
|
|
cpubase, cpuinfo, cgutils, rgcpu,
|
|
parabase;
|
|
|
|
type
|
|
tcgppc = class(tcgppcgen)
|
|
procedure init_register_allocators; override;
|
|
procedure done_register_allocators; override;
|
|
|
|
{ passing parameters, per default the parameter is pushed }
|
|
{ nr gives the number of the parameter (enumerated from }
|
|
{ left to right), this allows to move the parameter to }
|
|
{ register, if the cpu supports register calling }
|
|
{ conventions }
|
|
procedure a_param_ref(list: TAsmList; size: tcgsize; const r: treference;
|
|
const paraloc: tcgpara); override;
|
|
|
|
procedure a_call_name(list: TAsmList; const s: string); override;
|
|
procedure a_call_reg(list: TAsmList; reg: tregister); override;
|
|
|
|
procedure a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
|
|
aint; reg: TRegister); override;
|
|
procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
|
|
dst: TRegister); override;
|
|
|
|
procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
|
|
size: tcgsize; a: aint; src, dst: tregister); override;
|
|
procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
|
|
size: tcgsize; src1, src2, dst: tregister); override;
|
|
|
|
{ move instructions }
|
|
procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: aint; reg:
|
|
tregister); override;
|
|
{ loads the memory pointed to by ref into register reg }
|
|
procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const
|
|
Ref: treference; reg: tregister); override;
|
|
procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1,
|
|
reg2: tregister); override;
|
|
|
|
procedure a_load_subsetreg_reg(list : TAsmList; subsetsize, tosize: tcgsize; const sreg: tsubsetregister; destreg: tregister); override;
|
|
procedure a_load_const_subsetreg(list: TAsmlist; subsetsize: tcgsize; a: aint; const sreg: tsubsetregister); override;
|
|
|
|
{ comparison operations }
|
|
procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op:
|
|
topcmp; a: aint; reg: tregister;
|
|
l: tasmlabel); override;
|
|
procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op:
|
|
topcmp; reg1, reg2: tregister; l: tasmlabel); override;
|
|
|
|
procedure a_jmp_name(list: TAsmList; const s: string); override;
|
|
procedure a_jmp_always(list: TAsmList; l: tasmlabel); override;
|
|
procedure a_jmp_flags(list: TAsmList; const f: TResFlags; l: tasmlabel);
|
|
override;
|
|
|
|
procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags;
|
|
reg: TRegister); override;
|
|
|
|
procedure g_profilecode(list: TAsmList); override;
|
|
procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe:
|
|
boolean); override;
|
|
procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
|
|
boolean); override;
|
|
procedure g_save_standard_registers(list: TAsmList); override;
|
|
procedure g_restore_standard_registers(list: TAsmList); override;
|
|
|
|
procedure a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
|
|
tregister); override;
|
|
|
|
procedure g_concatcopy(list: TAsmList; const source, dest: treference;
|
|
len: aint); override;
|
|
|
|
private
|
|
|
|
procedure a_load_regconst_subsetreg_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
|
|
|
|
procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
|
|
|
|
{ Make sure ref is a valid reference for the PowerPC and sets the }
|
|
{ base to the value of the index if (base = R_NO). }
|
|
{ Returns true if the reference contained a base, index and an }
|
|
{ offset or symbol, in which case the base will have been changed }
|
|
{ to a tempreg (which has to be freed by the caller) containing }
|
|
{ the sum of part of the original reference }
|
|
function fixref(list: TAsmList; var ref: treference): boolean; override;
|
|
|
|
function load_got_symbol(list : TAsmList; symbol : string) : tregister;
|
|
|
|
{ returns whether a reference can be used immediately in a powerpc }
|
|
{ instruction }
|
|
function issimpleref(const ref: treference): boolean;
|
|
|
|
{ contains the common code of a_load_reg_ref and a_load_ref_reg }
|
|
procedure a_load_store(list: TAsmList; op: tasmop; reg: tregister;
|
|
ref: treference); override;
|
|
|
|
{ returns the lowest numbered FP register in use, and the number of used FP registers
|
|
for the current procedure }
|
|
procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
|
|
{ returns the lowest numbered GP register in use, and the number of used GP registers
|
|
for the current procedure }
|
|
procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
|
|
|
|
{ generates code to call a method with the given string name. The boolean options
|
|
control code generation. If prependDot is true, a single dot character is prepended to
|
|
the string, if addNOP is true a single NOP instruction is added after the call, and
|
|
if includeCall is true, the method is marked as having a call, not if false. This
|
|
option is particularly useful to prevent generation of a larger stack frame for the
|
|
register save and restore helper functions. }
|
|
procedure a_call_name_direct(list: TAsmList; s: string; prependDot : boolean;
|
|
addNOP : boolean; includeCall : boolean = true);
|
|
|
|
procedure a_jmp_name_direct(list : TAsmList; s : string; prependDot : boolean);
|
|
|
|
{ emits code to store the given value a into the TOC (if not already in there), and load it from there
|
|
as well }
|
|
procedure loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
|
|
|
|
procedure profilecode_savepara(para : tparavarsym; list : TAsmList);
|
|
procedure profilecode_restorepara(para : tparavarsym; list : TAsmList);
|
|
end;
|
|
|
|
const
|
|
TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
|
|
(A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
|
|
);
|
|
|
|
implementation
|
|
|
|
uses
|
|
sysutils, cclasses,
|
|
globals, verbose, systems, cutils,
|
|
symconst, fmodule,
|
|
rgobj, tgobj, cpupi, procinfo, paramgr, cpupara;
|
|
|
|
function ref2string(const ref : treference) : string;
|
|
begin
|
|
result := 'base : ' + inttostr(ord(ref.base)) + ' index : ' + inttostr(ord(ref.index)) + ' refaddr : ' + inttostr(ord(ref.refaddr)) + ' offset : ' + inttostr(ref.offset) + ' symbol : ';
|
|
if (assigned(ref.symbol)) then
|
|
result := result + ref.symbol.name;
|
|
end;
|
|
|
|
function cgsize2string(const size : TCgSize) : string;
|
|
const
|
|
cgsize_strings : array[TCgSize] of string[8] = (
|
|
'OS_NO', 'OS_8', 'OS_16', 'OS_32', 'OS_64', 'OS_128', 'OS_S8', 'OS_S16', 'OS_S32',
|
|
'OS_S64', 'OS_S128', 'OS_F32', 'OS_F64', 'OS_F80', 'OS_C64', 'OS_F128',
|
|
'OS_M8', 'OS_M16', 'OS_M32', 'OS_M64', 'OS_M128', 'OS_MS8', 'OS_MS16', 'OS_MS32',
|
|
'OS_MS64', 'OS_MS128');
|
|
begin
|
|
result := cgsize_strings[size];
|
|
end;
|
|
|
|
function cgop2string(const op : TOpCg) : String;
|
|
const
|
|
opcg_strings : array[TOpCg] of string[6] = (
|
|
'None', 'Move', 'Add', 'And', 'Div', 'IDiv', 'IMul', 'Mul',
|
|
'Neg', 'Not', 'Or', 'Sar', 'Shl', 'Shr', 'Sub', 'Xor'
|
|
);
|
|
begin
|
|
result := opcg_strings[op];
|
|
end;
|
|
|
|
function is_signed_cgsize(const size : TCgSize) : Boolean;
|
|
begin
|
|
case size of
|
|
OS_S8,OS_S16,OS_S32,OS_S64 : result := true;
|
|
OS_8,OS_16,OS_32,OS_64 : result := false;
|
|
else
|
|
internalerror(2006050701);
|
|
end;
|
|
end;
|
|
|
|
{$ifopt r+}
|
|
{$r-}
|
|
{$define rangeon}
|
|
{$endif}
|
|
|
|
{$ifopt q+}
|
|
{$q-}
|
|
{$define overflowon}
|
|
{$endif}
|
|
{ helper function which calculate "magic" values for replacement of unsigned
|
|
division by constant operation by multiplication. See the PowerPC compiler
|
|
developer manual for more information }
|
|
procedure getmagic_unsignedN(const N : byte; const d : aWord;
|
|
out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
|
|
var
|
|
p : aInt;
|
|
nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
|
|
begin
|
|
assert(d > 0);
|
|
|
|
two_N_minus_1 := aWord(1) shl (N-1);
|
|
|
|
magic_add := false;
|
|
nc := - 1 - (-d) mod d;
|
|
p := N-1; { initialize p }
|
|
q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
|
|
r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
|
|
q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
|
|
r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
|
|
repeat
|
|
inc(p);
|
|
if (r1 >= (nc - r1)) then begin
|
|
q1 := 2 * q1 + 1; { update q1 }
|
|
r1 := 2*r1 - nc; { update r1 }
|
|
end else begin
|
|
q1 := 2*q1; { update q1 }
|
|
r1 := 2*r1; { update r1 }
|
|
end;
|
|
if ((r2 + 1) >= (d - r2)) then begin
|
|
if (q2 >= (two_N_minus_1-1)) then
|
|
magic_add := true;
|
|
q2 := 2*q2 + 1; { update q2 }
|
|
r2 := 2*r2 + 1 - d; { update r2 }
|
|
end else begin
|
|
if (q2 >= two_N_minus_1) then
|
|
magic_add := true;
|
|
q2 := 2*q2; { update q2 }
|
|
r2 := 2*r2 + 1; { update r2 }
|
|
end;
|
|
delta := d - 1 - r2;
|
|
until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
|
magic_m := q2 + 1; { resulting magic number }
|
|
magic_shift := p - N; { resulting shift }
|
|
end;
|
|
|
|
{ helper function which calculate "magic" values for replacement of signed
|
|
division by constant operation by multiplication. See the PowerPC compiler
|
|
developer manual for more information }
|
|
procedure getmagic_signedN(const N : byte; const d : aInt;
|
|
out magic_m : aInt; out magic_s : aInt);
|
|
var
|
|
p : aInt;
|
|
ad, anc, delta, q1, r1, q2, r2, t : aWord;
|
|
two_N_minus_1 : aWord;
|
|
|
|
begin
|
|
assert((d < -1) or (d > 1));
|
|
|
|
two_N_minus_1 := aWord(1) shl (N-1);
|
|
|
|
ad := abs(d);
|
|
t := two_N_minus_1 + (aWord(d) shr (N-1));
|
|
anc := t - 1 - t mod ad; { absolute value of nc }
|
|
p := (N-1); { initialize p }
|
|
q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
|
|
r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
|
q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
|
|
r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
|
repeat
|
|
inc(p);
|
|
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
|
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
|
if (r1 >= anc) then begin { must be unsigned comparison }
|
|
inc(q1);
|
|
dec(r1, anc);
|
|
end;
|
|
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
|
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
|
if (r2 >= ad) then begin { must be unsigned comparison }
|
|
inc(q2);
|
|
dec(r2, ad);
|
|
end;
|
|
delta := ad - r2;
|
|
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
|
magic_m := q2 + 1;
|
|
if (d < 0) then begin
|
|
magic_m := -magic_m; { resulting magic number }
|
|
end;
|
|
magic_s := p - N; { resulting shift }
|
|
end;
|
|
{$ifdef rangeon}
|
|
{$r+}
|
|
{$undef rangeon}
|
|
{$endif}
|
|
|
|
{$ifdef overflowon}
|
|
{$q+}
|
|
{$undef overflowon}
|
|
{$endif}
|
|
|
|
{ finds positive and negative powers of two of the given value, returning the
|
|
power and whether it's a negative power or not in addition to the actual result
|
|
of the function }
|
|
function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
|
|
var
|
|
i : longint;
|
|
hl : aInt;
|
|
begin
|
|
neg := false;
|
|
{ also try to find negative power of two's by negating if the
|
|
value is negative. low(aInt) is special because it can not be
|
|
negated. Simply return the appropriate values for it }
|
|
if (value < 0) then begin
|
|
neg := true;
|
|
if (value = low(aInt)) then begin
|
|
power := sizeof(aInt)*8-1;
|
|
result := true;
|
|
exit;
|
|
end;
|
|
value := -value;
|
|
end;
|
|
|
|
if ((value and (value-1)) <> 0) then begin
|
|
result := false;
|
|
exit;
|
|
end;
|
|
hl := 1;
|
|
for i := 0 to (sizeof(aInt)*8-1) do begin
|
|
if (hl = value) then begin
|
|
result := true;
|
|
power := i;
|
|
exit;
|
|
end;
|
|
hl := hl shl 1;
|
|
end;
|
|
end;
|
|
|
|
{ returns the number of instruction required to load the given integer into a register.
|
|
This is basically a stripped down version of a_load_const_reg, increasing a counter
|
|
instead of emitting instructions. }
|
|
function getInstructionLength(a : aint) : longint;
|
|
|
|
function get32bitlength(a : longint; var length : longint) : boolean; inline;
|
|
var
|
|
is_half_signed : byte;
|
|
begin
|
|
{ if the lower 16 bits are zero, do a single LIS }
|
|
if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
|
|
inc(length);
|
|
get32bitlength := longint(a) < 0;
|
|
end else begin
|
|
is_half_signed := ord(smallint(lo(a)) < 0);
|
|
inc(length);
|
|
if smallint(hi(a) + is_half_signed) <> 0 then
|
|
inc(length);
|
|
get32bitlength := (smallint(a) < 0) or (a < 0);
|
|
end;
|
|
end;
|
|
|
|
var
|
|
extendssign : boolean;
|
|
|
|
begin
|
|
result := 0;
|
|
if (lo(a) = 0) and (hi(a) <> 0) then begin
|
|
get32bitlength(hi(a), result);
|
|
inc(result);
|
|
end else begin
|
|
extendssign := get32bitlength(lo(a), result);
|
|
if (extendssign) and (hi(a) = 0) then
|
|
inc(result)
|
|
else if (not
|
|
((extendssign and (longint(hi(a)) = -1)) or
|
|
((not extendssign) and (hi(a)=0)))
|
|
) then begin
|
|
get32bitlength(hi(a), result);
|
|
inc(result);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.init_register_allocators;
|
|
begin
|
|
inherited init_register_allocators;
|
|
if (target_info.system <> system_powerpc64_darwin) then
|
|
rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE,
|
|
[RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
|
|
RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
|
|
RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
|
|
RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
|
|
RS_R14, RS_R13], first_int_imreg, [])
|
|
else
|
|
{ special for darwin/ppc64: r2 available volatile, r13 = tls }
|
|
rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE,
|
|
[RS_R2, RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
|
|
RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
|
|
RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
|
|
RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
|
|
RS_R14], first_int_imreg, []);
|
|
rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE,
|
|
[RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9,
|
|
RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27,
|
|
RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18,
|
|
RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []);
|
|
{$WARNING FIX ME}
|
|
rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE,
|
|
[RS_M0, RS_M1, RS_M2], first_mm_imreg, []);
|
|
end;
|
|
|
|
procedure tcgppc.done_register_allocators;
|
|
begin
|
|
rg[R_INTREGISTER].free;
|
|
rg[R_FPUREGISTER].free;
|
|
rg[R_MMREGISTER].free;
|
|
inherited done_register_allocators;
|
|
end;
|
|
|
|
procedure tcgppc.a_param_ref(list: TAsmList; size: tcgsize; const r:
|
|
treference; const paraloc: tcgpara);
|
|
|
|
var
|
|
tmpref, ref: treference;
|
|
location: pcgparalocation;
|
|
sizeleft: aint;
|
|
adjusttail : boolean;
|
|
|
|
begin
|
|
location := paraloc.location;
|
|
tmpref := r;
|
|
sizeleft := paraloc.intsize;
|
|
adjusttail := false;
|
|
while assigned(location) do begin
|
|
case location^.loc of
|
|
LOC_REGISTER, LOC_CREGISTER:
|
|
begin
|
|
if (size <> OS_NO) then
|
|
a_load_ref_reg(list, size, location^.size, tmpref,
|
|
location^.register)
|
|
else begin
|
|
{ load non-integral sized memory location into register. This
|
|
memory location be 1-sizeleft byte sized.
|
|
Always assume that this memory area is properly aligned, eg. start
|
|
loading the larger quantities for "odd" quantities first }
|
|
case sizeleft of
|
|
1,2,4,8 :
|
|
a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
|
|
location^.register);
|
|
3 : begin
|
|
a_reg_alloc(list, NR_R12);
|
|
a_load_ref_reg(list, OS_16, location^.size, tmpref,
|
|
NR_R12);
|
|
inc(tmpref.offset, tcgsize2size[OS_16]);
|
|
a_load_ref_reg(list, OS_8, location^.size, tmpref,
|
|
location^.register);
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
|
|
a_reg_dealloc(list, NR_R12);
|
|
end;
|
|
5 : begin
|
|
a_reg_alloc(list, NR_R12);
|
|
a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
|
|
inc(tmpref.offset, tcgsize2size[OS_32]);
|
|
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
|
|
a_reg_dealloc(list, NR_R12);
|
|
end;
|
|
6 : begin
|
|
a_reg_alloc(list, NR_R12);
|
|
a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
|
|
inc(tmpref.offset, tcgsize2size[OS_32]);
|
|
a_load_ref_reg(list, OS_16, location^.size, tmpref, location^.register);
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 16, 16));
|
|
a_reg_dealloc(list, NR_R12);
|
|
end;
|
|
7 : begin
|
|
a_reg_alloc(list, NR_R12);
|
|
a_reg_alloc(list, NR_R0);
|
|
a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
|
|
inc(tmpref.offset, tcgsize2size[OS_32]);
|
|
a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
|
|
inc(tmpref.offset, tcgsize2size[OS_16]);
|
|
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
|
|
a_reg_dealloc(list, NR_R0);
|
|
a_reg_dealloc(list, NR_R12);
|
|
end;
|
|
else begin
|
|
{ still > 8 bytes to load, so load data single register now }
|
|
a_load_ref_reg(list, location^.size, location^.size, tmpref,
|
|
location^.register);
|
|
{ the block is > 8 bytes, so we have to store any bytes not
|
|
a multiple of the register size beginning with the MSB }
|
|
adjusttail := true;
|
|
end;
|
|
end;
|
|
if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
|
|
a_op_const_reg(list, OP_SHL, OS_INT,
|
|
(tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT],
|
|
location^.register);
|
|
end;
|
|
end;
|
|
LOC_REFERENCE:
|
|
begin
|
|
reference_reset_base(ref, location^.reference.index,
|
|
location^.reference.offset);
|
|
g_concatcopy(list, tmpref, ref, sizeleft);
|
|
if assigned(location^.next) then
|
|
internalerror(2005010710);
|
|
end;
|
|
LOC_FPUREGISTER, LOC_CFPUREGISTER:
|
|
case location^.size of
|
|
OS_F32, OS_F64:
|
|
a_loadfpu_ref_reg(list, location^.size, location^.size, tmpref, location^.register);
|
|
else
|
|
internalerror(2002072801);
|
|
end;
|
|
LOC_VOID:
|
|
{ nothing to do }
|
|
;
|
|
else
|
|
internalerror(2002081103);
|
|
end;
|
|
inc(tmpref.offset, tcgsize2size[location^.size]);
|
|
dec(sizeleft, tcgsize2size[location^.size]);
|
|
location := location^.next;
|
|
end;
|
|
end;
|
|
|
|
{ calling a procedure by name }
|
|
|
|
procedure tcgppc.a_call_name(list: TAsmList; const s: string);
|
|
begin
|
|
if (target_info.system <> system_powerpc64_darwin) then
|
|
a_call_name_direct(list, s, true, true)
|
|
else
|
|
begin
|
|
list.concat(taicpu.op_sym(A_BL,get_darwin_call_stub(s)));
|
|
include(current_procinfo.flags,pi_do_call);
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure tcgppc.a_call_name_direct(list: TAsmList; s: string; prependDot : boolean; addNOP : boolean; includeCall : boolean);
|
|
begin
|
|
if (prependDot) then
|
|
s := '.' + s;
|
|
list.concat(taicpu.op_sym(A_BL, current_asmdata.RefAsmSymbol(s)));
|
|
if (addNOP) then
|
|
list.concat(taicpu.op_none(A_NOP));
|
|
|
|
if (includeCall) then
|
|
include(current_procinfo.flags, pi_do_call);
|
|
end;
|
|
|
|
|
|
{ calling a procedure by address }
|
|
|
|
procedure tcgppc.a_call_reg(list: TAsmList; reg: tregister);
|
|
var
|
|
tmpref: treference;
|
|
tempreg : TRegister;
|
|
begin
|
|
if (target_info.system = system_powerpc64_darwin) then
|
|
inherited a_call_reg(list,reg)
|
|
else if (not (cs_opt_size in current_settings.optimizerswitches)) then begin
|
|
tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
{ load actual function entry (reg contains the reference to the function descriptor)
|
|
into tempreg }
|
|
reference_reset_base(tmpref, reg, 0);
|
|
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, tempreg);
|
|
|
|
{ save TOC pointer in stackframe }
|
|
reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
|
|
a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
|
|
|
|
{ move actual function pointer to CTR register }
|
|
list.concat(taicpu.op_reg(A_MTCTR, tempreg));
|
|
|
|
{ load new TOC pointer from function descriptor into RTOC register }
|
|
reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
|
|
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
|
|
|
|
{ load new environment pointer from function descriptor into R11 register }
|
|
reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
|
|
a_reg_alloc(list, NR_R11);
|
|
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
|
|
{ call function }
|
|
list.concat(taicpu.op_none(A_BCTRL));
|
|
a_reg_dealloc(list, NR_R11);
|
|
end else begin
|
|
{ call ptrgl helper routine which expects the pointer to the function descriptor
|
|
in R11 }
|
|
a_reg_alloc(list, NR_R11);
|
|
a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
|
|
a_call_name_direct(list, '.ptrgl', false, false);
|
|
a_reg_dealloc(list, NR_R11);
|
|
end;
|
|
|
|
{ we need to load the old RTOC from stackframe because we changed it}
|
|
reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
|
|
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
|
|
|
|
include(current_procinfo.flags, pi_do_call);
|
|
end;
|
|
|
|
{********************** load instructions ********************}
|
|
|
|
procedure tcgppc.a_load_const_reg(list: TAsmList; size: TCGSize; a: aint;
|
|
reg: TRegister);
|
|
|
|
{ loads a 32 bit constant into the given register, using an optimal instruction sequence.
|
|
This is either LIS, LI or LI+ADDIS.
|
|
Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
|
|
sign extension was performed) }
|
|
function load32bitconstant(list : TAsmList; size : TCGSize; a : longint;
|
|
reg : TRegister) : boolean;
|
|
var
|
|
is_half_signed : byte;
|
|
begin
|
|
{ if the lower 16 bits are zero, do a single LIS }
|
|
if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
|
|
list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
|
|
load32bitconstant := longint(a) < 0;
|
|
end else begin
|
|
is_half_signed := ord(smallint(lo(a)) < 0);
|
|
list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
|
|
if smallint(hi(a) + is_half_signed) <> 0 then begin
|
|
list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
|
|
end;
|
|
load32bitconstant := (smallint(a) < 0) or (a < 0);
|
|
end;
|
|
end;
|
|
|
|
{ loads a 32 bit constant into R0, using an optimal instruction sequence.
|
|
This is either LIS, LI or LI+ORIS.
|
|
Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
|
|
sign extension was performed) }
|
|
function load32bitconstantR0(list : TAsmList; size : TCGSize; a : longint) : boolean;
|
|
begin
|
|
{ if it's a value we can load with a single LI, do it }
|
|
if (a >= low(smallint)) and (a <= high(smallint)) then begin
|
|
list.concat(taicpu.op_reg_const(A_LI, NR_R0, smallint(a)));
|
|
end else begin
|
|
{ if the lower 16 bits are zero, do a single LIS }
|
|
list.concat(taicpu.op_reg_const(A_LIS, NR_R0, smallint(a shr 16)));
|
|
if (smallint(a) <> 0) then begin
|
|
list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(a)));
|
|
end;
|
|
end;
|
|
load32bitconstantR0 := a < 0;
|
|
end;
|
|
|
|
|
|
{ emits the code to load a constant by emitting various instructions into the output
|
|
code}
|
|
procedure loadConstantNormal(list: TAsmList; size : TCgSize; a: aint; reg: TRegister);
|
|
var
|
|
extendssign : boolean;
|
|
instr : taicpu;
|
|
begin
|
|
if (lo(a) = 0) and (hi(a) <> 0) then begin
|
|
{ load only upper 32 bits, and shift }
|
|
load32bitconstant(list, size, longint(hi(a)), reg);
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));
|
|
end else begin
|
|
{ load lower 32 bits }
|
|
extendssign := load32bitconstant(list, size, longint(lo(a)), reg);
|
|
if (extendssign) and (hi(a) = 0) then
|
|
{ if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic
|
|
sign extension, clear those bits }
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, reg, reg, 0, 32))
|
|
else if (not
|
|
((extendssign and (longint(hi(a)) = -1)) or
|
|
((not extendssign) and (hi(a)=0)))
|
|
) then begin
|
|
{ only load the upper 32 bits, if the automatic sign extension is not okay,
|
|
that is, _not_ if
|
|
- loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper
|
|
32 bits should contain -1
|
|
- loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
|
|
32 bits should contain 0 }
|
|
a_reg_alloc(list, NR_R0);
|
|
load32bitconstantR0(list, size, longint(hi(a)));
|
|
{ combine both registers }
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
|
|
a_reg_dealloc(list, NR_R0);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{$IFDEF EXTDEBUG}
|
|
var
|
|
astring : string;
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
begin
|
|
{$IFDEF EXTDEBUG}
|
|
astring := 'a_load_const_reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]) + ' ' + hexstr(a, 16);
|
|
list.concat(tai_comment.create(strpnew(astring)));
|
|
{$ENDIF EXTDEBUG}
|
|
if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
|
|
internalerror(2002090902);
|
|
{ if PIC or basic optimizations are enabled, and the number of instructions which would be
|
|
required to load the value is greater than 2, store (and later load) the value from there }
|
|
if (((cs_opt_peephole in current_settings.optimizerswitches) or (cs_create_pic in current_settings.moduleswitches)) and
|
|
(getInstructionLength(a) > 2)) then
|
|
loadConstantPIC(list, size, a, reg)
|
|
else
|
|
loadConstantNormal(list, size, a, reg);
|
|
end;
|
|
|
|
|
|
procedure tcgppc.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize;
|
|
const ref: treference; reg: tregister);
|
|
|
|
const
|
|
LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
|
|
{ indexed? updating? }
|
|
(((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
|
|
((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
|
|
((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
|
|
((A_LD, A_LDU), (A_LDX, A_LDUX)),
|
|
{ 128bit stuff too }
|
|
((A_NONE, A_NONE), (A_NONE, A_NONE)),
|
|
{ there's no load-byte-with-sign-extend :( }
|
|
((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
|
|
((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
|
|
{ there's no load-word-arithmetic-indexed with update, simulate it in code :( }
|
|
((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
|
|
((A_LD, A_LDU), (A_LDX, A_LDUX))
|
|
);
|
|
var
|
|
op: tasmop;
|
|
ref2: treference;
|
|
|
|
begin
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('a_load_ref_reg ' + ref2string(ref))));
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
|
|
internalerror(2002090904);
|
|
ref2 := ref;
|
|
fixref(list, ref2);
|
|
{ the caller is expected to have adjusted the reference already
|
|
in this case }
|
|
if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
|
|
fromsize := tosize;
|
|
op := loadinstr[fromsize, ref2.index <> NR_NO, false];
|
|
{ there is no LWAU instruction, simulate using ADDI and LWA }
|
|
if (op = A_NOP) then begin
|
|
list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
|
|
ref2.offset := 0;
|
|
op := A_LWA;
|
|
end;
|
|
a_load_store(list, op, reg, ref2);
|
|
{ sign extend shortint if necessary, since there is no
|
|
load instruction that does that automatically (JM) }
|
|
if fromsize = OS_S8 then
|
|
list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
|
|
end;
|
|
|
|
procedure tcgppc.a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize;
|
|
reg1, reg2: tregister);
|
|
var
|
|
instr: TAiCpu;
|
|
bytesize : byte;
|
|
begin
|
|
{$ifdef extdebug}
|
|
list.concat(tai_comment.create(strpnew('a_load_reg_reg from : ' + cgsize2string(fromsize) + ' to ' + cgsize2string(tosize))));
|
|
{$endif}
|
|
|
|
if (tcgsize2size[fromsize] > tcgsize2size[tosize]) or
|
|
((tcgsize2size[fromsize] = tcgsize2size[tosize]) and (fromsize <> tosize)) or
|
|
{ do we need to mask out the sign when loading from smaller signed to larger unsigned type? }
|
|
( is_signed_cgsize(fromsize) and (not is_signed_cgsize(tosize)) and
|
|
(tcgsize2size[fromsize] < tcgsize2size[tosize]) and (tcgsize2size[tosize] <> tcgsize2size[OS_INT]) ) then begin
|
|
case tosize of
|
|
OS_S8:
|
|
instr := taicpu.op_reg_reg(A_EXTSB,reg2,reg1);
|
|
OS_S16:
|
|
instr := taicpu.op_reg_reg(A_EXTSH,reg2,reg1);
|
|
OS_S32:
|
|
instr := taicpu.op_reg_reg(A_EXTSW,reg2,reg1);
|
|
OS_8, OS_16, OS_32:
|
|
instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[tosize])*8);
|
|
OS_S64, OS_64:
|
|
instr := taicpu.op_reg_reg(A_MR, reg2, reg1);
|
|
end;
|
|
end else
|
|
instr := taicpu.op_reg_reg(A_MR, reg2, reg1);
|
|
|
|
list.concat(instr);
|
|
rg[R_INTREGISTER].add_move_instruction(instr);
|
|
end;
|
|
|
|
procedure tcgppc.a_load_subsetreg_reg(list : TAsmList; subsetsize, tosize: tcgsize; const sreg: tsubsetregister; destreg: tregister);
|
|
var
|
|
extrdi_startbit : byte;
|
|
begin
|
|
{$ifdef extdebug}
|
|
list.concat(tai_comment.create(strpnew('a_load_subsetreg_reg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' tosize = ' + cgsize2string(tosize))));
|
|
{$endif}
|
|
{ calculate the correct startbit for the extrdi instruction, do the extraction if required and then
|
|
extend the sign correctly. (The latter is actually required only for signed subsets and if that
|
|
subset is not >= the tosize). }
|
|
extrdi_startbit := 64 - (sreg.bitlen + sreg.startbit);
|
|
if (sreg.startbit <> 0) or
|
|
(sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin
|
|
list.concat(taicpu.op_reg_reg_const_const(A_EXTRDI, destreg, sreg.subsetreg, sreg.bitlen, extrdi_startbit));
|
|
if (subsetsize in [OS_S8..OS_S128]) then
|
|
if ((sreg.bitlen mod 8) = 0) then begin
|
|
a_load_reg_reg(list, tcgsize2unsigned[subsetsize], subsetsize, destreg, destreg);
|
|
a_load_reg_reg(list, subsetsize, tosize, destreg, destreg);
|
|
end else begin
|
|
a_op_const_reg(list,OP_SHL,OS_INT,64-sreg.bitlen,destreg);
|
|
a_op_const_reg(list,OP_SAR,OS_INT,64-sreg.bitlen,destreg);
|
|
end;
|
|
end else begin
|
|
a_load_reg_reg(list, tcgsize2unsigned[sreg.subsetregsize], subsetsize, sreg.subsetreg, destreg);
|
|
a_load_reg_reg(list, subsetsize, tosize, destreg, destreg);
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.a_load_regconst_subsetreg_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
|
|
begin
|
|
{$ifdef extdebug}
|
|
list.concat(tai_comment.create(strpnew('a_load_reg_subsetreg fromsize = ' + cgsize2string(fromsize) + ' subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + IntToStr(sreg.startbit))));
|
|
{$endif}
|
|
if (slopt in [SL_SETZERO,SL_SETMAX]) then
|
|
inherited a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,sreg,slopt)
|
|
else if (sreg.bitlen <> sizeof(aint)*8) then
|
|
{ simply use the INSRDI instruction }
|
|
list.concat(taicpu.op_reg_reg_const_const(A_INSRDI, sreg.subsetreg, fromreg, sreg.bitlen, (64 - (sreg.startbit + sreg.bitlen)) and 63))
|
|
else
|
|
a_load_reg_reg(list, fromsize, subsetsize, fromreg, sreg.subsetreg);
|
|
end;
|
|
|
|
procedure tcgppc.a_load_const_subsetreg(list: TAsmlist; subsetsize: tcgsize;
|
|
a: aint; const sreg: tsubsetregister);
|
|
var
|
|
tmpreg : TRegister;
|
|
begin
|
|
{$ifdef extdebug}
|
|
list.concat(tai_comment.create(strpnew('a_load_const_subsetreg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' a = ' + intToStr(a))));
|
|
{$endif}
|
|
{ loading the constant into the lowest bits of a temp register and then inserting is
|
|
better than loading some usually large constants and do some masking and shifting on ppc64 }
|
|
tmpreg := getintregister(list,subsetsize);
|
|
a_load_const_reg(list,subsetsize,a,tmpreg);
|
|
a_load_reg_subsetreg(list, subsetsize, subsetsize, tmpreg, sreg);
|
|
end;
|
|
|
|
procedure tcgppc.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
|
|
aint; reg: TRegister);
|
|
begin
|
|
a_op_const_reg_reg(list, op, size, a, reg, reg);
|
|
end;
|
|
|
|
procedure tcgppc.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
|
|
dst: TRegister);
|
|
begin
|
|
a_op_reg_reg_reg(list, op, size, src, dst, dst);
|
|
end;
|
|
|
|
procedure tcgppc.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
|
|
size: tcgsize; a: aint; src, dst: tregister);
|
|
var
|
|
useReg : boolean;
|
|
|
|
procedure do_lo_hi(loOp, hiOp : TAsmOp);
|
|
begin
|
|
{ Optimization for logical ops (excluding AND), trying to do this as efficiently
|
|
as possible by only generating code for the affected halfwords. Note that all
|
|
the instructions handled here must have "X op 0 = X" for every halfword. }
|
|
usereg := false;
|
|
if (aword(a) > high(dword)) then begin
|
|
usereg := true;
|
|
end else begin
|
|
if (word(a) <> 0) then begin
|
|
list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
|
|
if (word(a shr 16) <> 0) then
|
|
list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
|
|
end else if (word(a shr 16) <> 0) then
|
|
list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
|
|
end;
|
|
end;
|
|
|
|
procedure do_lo_hi_and;
|
|
begin
|
|
{ optimization logical and with immediate: only use "andi." for 16 bit
|
|
ands, otherwise use register method. Doing this for 32 bit constants
|
|
would not give any advantage to the register method (via useReg := true),
|
|
requiring a scratch register and three instructions. }
|
|
usereg := false;
|
|
if (aword(a) > high(word)) then
|
|
usereg := true
|
|
else
|
|
list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
|
|
end;
|
|
|
|
procedure do_constant_div(list : TAsmList; size : TCgSize; a : aint; src, dst : TRegister;
|
|
signed : boolean);
|
|
const
|
|
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
|
|
var
|
|
magic, shift : int64;
|
|
u_magic : qword;
|
|
u_shift : byte;
|
|
u_add : boolean;
|
|
power : byte;
|
|
isNegPower : boolean;
|
|
|
|
divreg : tregister;
|
|
begin
|
|
if (a = 0) then begin
|
|
internalerror(2005061701);
|
|
end else if (a = 1) then begin
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, src, dst);
|
|
end else if (a = -1) and (signed) then begin
|
|
{ note: only in the signed case possible..., may overflow }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(negops[cs_check_overflow in current_settings.localswitches], dst, src));
|
|
end else if (ispowerof2(a, power, isNegPower)) then begin
|
|
if (signed) then begin
|
|
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, power,
|
|
src, dst);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
|
|
if (isNegPower) then
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
|
|
end else begin
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, power, src, dst)
|
|
end;
|
|
end else begin
|
|
{ replace division by multiplication, both implementations }
|
|
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
|
|
divreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
if (signed) then begin
|
|
getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
|
|
{ load magic value }
|
|
cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, magic, divreg);
|
|
{ multiply }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
|
|
{ add/subtract numerator }
|
|
if (a > 0) and (magic < 0) then begin
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, src, dst, dst);
|
|
end else if (a < 0) and (magic > 0) then begin
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, src, dst, dst);
|
|
end;
|
|
{ shift shift places to the right (arithmetic) }
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, shift, dst, dst);
|
|
{ extract and add sign bit }
|
|
if (a >= 0) then begin
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, src, divreg);
|
|
end else begin
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, dst, divreg);
|
|
end;
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, dst, divreg, dst);
|
|
end else begin
|
|
getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
|
|
{ load magic in divreg }
|
|
cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, aint(u_magic), divreg);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
|
|
if (u_add) then begin
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, dst, src, divreg);
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 1, divreg, divreg);
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, divreg, dst, divreg);
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift-1, divreg, dst);
|
|
end else begin
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift, dst, dst);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
scratchreg: tregister;
|
|
shift : byte;
|
|
shiftmask : longint;
|
|
isneg : boolean;
|
|
|
|
begin
|
|
{ subtraction is the same as addition with negative constant }
|
|
if op = OP_SUB then begin
|
|
a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
|
|
exit;
|
|
end;
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('a_op_const_reg_reg ' + cgop2string(op))));
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
{ This case includes some peephole optimizations for the various operations,
|
|
(e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
|
|
independent of architecture? }
|
|
|
|
{ assume that we do not need a scratch register for the operation }
|
|
useReg := false;
|
|
case (op) of
|
|
OP_DIV, OP_IDIV:
|
|
if (cs_opt_level1 in current_settings.optimizerswitches) then
|
|
do_constant_div(list, size, a, src, dst, op = OP_IDIV)
|
|
else
|
|
usereg := true;
|
|
OP_IMUL, OP_MUL:
|
|
{ idea: factorize constant multiplicands and use adds/shifts with few factors;
|
|
however, even a 64 bit multiply is already quite fast on PPC64 }
|
|
if (a = 0) then
|
|
a_load_const_reg(list, size, 0, dst)
|
|
else if (a = -1) then
|
|
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
|
|
else if (a = 1) then
|
|
a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
|
|
else if ispowerof2(a, shift, isneg) then begin
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
|
|
if (isneg) then
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
|
|
end else if (a >= low(smallint)) and (a <= high(smallint)) then
|
|
list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
|
|
smallint(a)))
|
|
else
|
|
usereg := true;
|
|
OP_ADD:
|
|
if (a = 0) then
|
|
a_load_reg_reg(list, size, size, src, dst)
|
|
else if (a >= low(smallint)) and (a <= high(smallint)) then
|
|
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
|
|
else
|
|
useReg := true;
|
|
OP_OR:
|
|
if (a = 0) then
|
|
a_load_reg_reg(list, size, size, src, dst)
|
|
else if (a = -1) then
|
|
a_load_const_reg(list, size, -1, dst)
|
|
else
|
|
do_lo_hi(A_ORI, A_ORIS);
|
|
OP_AND:
|
|
if (a = 0) then
|
|
a_load_const_reg(list, size, 0, dst)
|
|
else if (a = -1) then
|
|
a_load_reg_reg(list, size, size, src, dst)
|
|
else
|
|
do_lo_hi_and;
|
|
OP_XOR:
|
|
if (a = 0) then
|
|
a_load_reg_reg(list, size, size, src, dst)
|
|
else if (a = -1) then
|
|
list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
|
|
else
|
|
do_lo_hi(A_XORI, A_XORIS);
|
|
OP_SHL, OP_SHR, OP_SAR:
|
|
begin
|
|
if (size in [OS_64, OS_S64]) then
|
|
shift := 6
|
|
else
|
|
shift := 5;
|
|
|
|
shiftmask := (1 shl shift)-1;
|
|
if (a and shiftmask) <> 0 then begin
|
|
list.concat(taicpu.op_reg_reg_const(
|
|
TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask));
|
|
end else
|
|
a_load_reg_reg(list, size, size, src, dst);
|
|
if ((a shr shift) <> 0) then
|
|
internalError(68991);
|
|
end
|
|
else
|
|
internalerror(200109091);
|
|
end;
|
|
{ if all else failed, load the constant in a register and then
|
|
perform the operation }
|
|
if (useReg) then begin
|
|
scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_load_const_reg(list, size, a, scratchreg);
|
|
a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
|
|
end else
|
|
maybeadjustresult(list, op, size, dst);
|
|
end;
|
|
|
|
procedure tcgppc.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
|
|
size: tcgsize; src1, src2, dst: tregister);
|
|
const
|
|
op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
|
|
(A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
|
|
A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
|
|
op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
|
|
(A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
|
|
A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
|
|
begin
|
|
case op of
|
|
OP_NEG, OP_NOT:
|
|
begin
|
|
list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
|
|
if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
|
|
{ zero/sign extend result again, fromsize is not important here }
|
|
a_load_reg_reg(list, OS_S64, size, dst, dst)
|
|
end;
|
|
else
|
|
if (size in [OS_64, OS_S64]) then begin
|
|
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
|
|
src1));
|
|
end else begin
|
|
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
|
|
src1));
|
|
maybeadjustresult(list, op, size, dst);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{*************** compare instructructions ****************}
|
|
|
|
procedure tcgppc.a_cmp_const_reg_label(list: TAsmList; size: tcgsize;
|
|
cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
|
|
const
|
|
{ unsigned useconst 32bit-op }
|
|
cmpop_table : array[boolean, boolean, boolean] of TAsmOp = (
|
|
((A_CMPD, A_CMPW), (A_CMPDI, A_CMPWI)),
|
|
((A_CMPLD, A_CMPLW), (A_CMPLDI, A_CMPLWI))
|
|
);
|
|
|
|
var
|
|
tmpreg : TRegister;
|
|
signed, useconst : boolean;
|
|
opsize : TCgSize;
|
|
op : TAsmOp;
|
|
begin
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('a_cmp_const_reg_label ' + cgsize2string(size) + ' ' + booltostr(cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE]) + ' ' + inttostr(a) )));
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
|
|
{ in the following case, we generate more efficient code when
|
|
signed is true }
|
|
if (cmp_op in [OC_EQ, OC_NE]) and
|
|
(aword(a) > $FFFF) then
|
|
signed := true;
|
|
|
|
opsize := size;
|
|
|
|
{ do we need to change the operand size because ppc64 only supports 32 and
|
|
64 bit compares? }
|
|
if (not (size in [OS_32, OS_S32, OS_64, OS_S64])) then begin
|
|
if (signed) then
|
|
opsize := OS_S32
|
|
else
|
|
opsize := OS_32;
|
|
a_load_reg_reg(current_asmdata.CurrAsmList, size, opsize, reg, reg);
|
|
end;
|
|
|
|
{ can we use immediate compares? }
|
|
useconst := (signed and ( (a >= low(smallint)) and (a <= high(smallint)))) or
|
|
((not signed) and (aword(a) <= $FFFF));
|
|
|
|
op := cmpop_table[not signed, useconst, opsize in [OS_32, OS_S32]];
|
|
|
|
if (useconst) then begin
|
|
list.concat(taicpu.op_reg_reg_const(op, NR_CR0, reg, a));
|
|
end else begin
|
|
tmpreg := getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
a_load_const_reg(current_asmdata.CurrAsmList, opsize, a, tmpreg);
|
|
list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg, tmpreg));
|
|
end;
|
|
|
|
a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
|
|
end;
|
|
|
|
procedure tcgppc.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize;
|
|
cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
|
|
var
|
|
op: tasmop;
|
|
begin
|
|
{$IFDEF extdebug}
|
|
list.concat(tai_comment.create(strpnew('a_cmp_reg_reg_label, size ' + cgsize2string(size) + ' op ' + inttostr(ord(cmp_op)))));
|
|
{$ENDIF extdebug}
|
|
|
|
{$note Commented out below check because of compiler weirdness}
|
|
{
|
|
if (not (size in [OS_32, OS_S32, OS_64, OS_S64])) then
|
|
internalerror(200606041);
|
|
}
|
|
|
|
if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
|
|
if (size in [OS_64, OS_S64]) then
|
|
op := A_CMPD
|
|
else
|
|
op := A_CMPW
|
|
else
|
|
if (size in [OS_64, OS_S64]) then
|
|
op := A_CMPLD
|
|
else
|
|
op := A_CMPLW;
|
|
list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1));
|
|
a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
|
|
end;
|
|
|
|
procedure tcgppc.a_jmp_name_direct(list : TAsmList; s : string; prependDot : boolean);
|
|
var
|
|
p: taicpu;
|
|
begin
|
|
if (prependDot) then
|
|
s := '.' + s;
|
|
p := taicpu.op_sym(A_B, current_asmdata.RefAsmSymbol(s));
|
|
p.is_jmp := true;
|
|
list.concat(p)
|
|
end;
|
|
|
|
procedure tcgppc.a_jmp_name(list: TAsmList; const s: string);
|
|
var
|
|
p: taicpu;
|
|
begin
|
|
if (target_info.system = system_powerpc64_darwin) then
|
|
begin
|
|
p := taicpu.op_sym(A_B,get_darwin_call_stub(s));
|
|
p.is_jmp := true;
|
|
list.concat(p)
|
|
end
|
|
else
|
|
a_jmp_name_direct(list, s, true);
|
|
end;
|
|
|
|
procedure tcgppc.a_jmp_always(list: TAsmList; l: tasmlabel);
|
|
|
|
begin
|
|
a_jmp(list, A_B, C_None, 0, l);
|
|
end;
|
|
|
|
procedure tcgppc.a_jmp_flags(list: TAsmList; const f: TResFlags; l:
|
|
tasmlabel);
|
|
|
|
var
|
|
c: tasmcond;
|
|
begin
|
|
c := flags_to_cond(f);
|
|
a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l);
|
|
end;
|
|
|
|
procedure tcgppc.g_flags2reg(list: TAsmList; size: TCgSize; const f:
|
|
TResFlags; reg: TRegister);
|
|
var
|
|
testbit: byte;
|
|
bitvalue: boolean;
|
|
begin
|
|
{ get the bit to extract from the conditional register + its requested value (0 or 1) }
|
|
testbit := ((f.cr - RS_CR0) * 4);
|
|
case f.flag of
|
|
F_EQ, F_NE:
|
|
begin
|
|
inc(testbit, 2);
|
|
bitvalue := f.flag = F_EQ;
|
|
end;
|
|
F_LT, F_GE:
|
|
begin
|
|
bitvalue := f.flag = F_LT;
|
|
end;
|
|
F_GT, F_LE:
|
|
begin
|
|
inc(testbit);
|
|
bitvalue := f.flag = F_GT;
|
|
end;
|
|
else
|
|
internalerror(200112261);
|
|
end;
|
|
{ load the conditional register in the destination reg }
|
|
list.concat(taicpu.op_reg(A_MFCR, reg));
|
|
{ we will move the bit that has to be tested to bit 0 by rotating left }
|
|
testbit := (testbit + 1) and 31;
|
|
{ extract bit }
|
|
list.concat(taicpu.op_reg_reg_const_const_const(
|
|
A_RLWINM,reg,reg,testbit,31,31));
|
|
|
|
{ if we need the inverse, xor with 1 }
|
|
if not bitvalue then
|
|
list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1));
|
|
end;
|
|
|
|
{ *********** entry/exit code and address loading ************ }
|
|
|
|
procedure tcgppc.g_save_standard_registers(list: TAsmList);
|
|
begin
|
|
{ this work is done in g_proc_entry; additionally it is not safe
|
|
to use it because it is called at some weird time }
|
|
end;
|
|
|
|
procedure tcgppc.g_restore_standard_registers(list: TAsmList);
|
|
begin
|
|
{ this work is done in g_proc_exit; mainly because it is not safe to
|
|
put the register restore code here because it is called at some weird time }
|
|
end;
|
|
|
|
procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
|
|
var
|
|
reg : TSuperRegister;
|
|
begin
|
|
fprcount := 0;
|
|
firstfpr := RS_F31;
|
|
if not (po_assembler in current_procinfo.procdef.procoptions) then
|
|
for reg := RS_F14 to RS_F31 do
|
|
if reg in rg[R_FPUREGISTER].used_in_proc then begin
|
|
fprcount := ord(RS_F31)-ord(reg)+1;
|
|
firstfpr := reg;
|
|
break;
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
|
|
var
|
|
reg : TSuperRegister;
|
|
begin
|
|
gprcount := 0;
|
|
firstgpr := RS_R31;
|
|
if not (po_assembler in current_procinfo.procdef.procoptions) then
|
|
for reg := RS_R14 to RS_R31 do
|
|
if reg in rg[R_INTREGISTER].used_in_proc then begin
|
|
gprcount := ord(RS_R31)-ord(reg)+1;
|
|
firstgpr := reg;
|
|
break;
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.profilecode_savepara(para : tparavarsym; list : TAsmList);
|
|
begin
|
|
case (para.paraloc[calleeside].location^.loc) of
|
|
LOC_REGISTER, LOC_CREGISTER:
|
|
a_load_reg_ref(list, OS_INT, para.paraloc[calleeside].Location^.size,
|
|
para.paraloc[calleeside].Location^.register, para.localloc.reference);
|
|
LOC_FPUREGISTER, LOC_CFPUREGISTER:
|
|
a_loadfpu_reg_ref(list, para.paraloc[calleeside].Location^.size,
|
|
para.paraloc[calleeside].Location^.size,
|
|
para.paraloc[calleeside].Location^.register, para.localloc.reference);
|
|
LOC_MMREGISTER, LOC_CMMREGISTER:
|
|
{ not supported }
|
|
internalerror(2006041801);
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.profilecode_restorepara(para : tparavarsym; list : TAsmList);
|
|
begin
|
|
case (para.paraloc[calleeside].Location^.loc) of
|
|
LOC_REGISTER, LOC_CREGISTER:
|
|
a_load_ref_reg(list, para.paraloc[calleeside].Location^.size, OS_INT,
|
|
para.localloc.reference, para.paraloc[calleeside].Location^.register);
|
|
LOC_FPUREGISTER, LOC_CFPUREGISTER:
|
|
a_loadfpu_ref_reg(list, para.paraloc[calleeside].Location^.size,
|
|
para.paraloc[calleeside].Location^.size,
|
|
para.localloc.reference, para.paraloc[calleeside].Location^.register);
|
|
LOC_MMREGISTER, LOC_CMMREGISTER:
|
|
{ not supported }
|
|
internalerror(2006041802);
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure tcgppc.g_profilecode(list: TAsmList);
|
|
begin
|
|
current_procinfo.procdef.paras.ForEachCall(TObjectListCallback(@profilecode_savepara), list);
|
|
|
|
a_call_name_direct(list, '_mcount', false, true);
|
|
|
|
current_procinfo.procdef.paras.ForEachCall(TObjectListCallback(@profilecode_restorepara), list);
|
|
end;
|
|
|
|
{ Generates the entry code of a procedure/function.
|
|
|
|
This procedure may be called before, as well as after g_return_from_proc
|
|
is called. localsize is the sum of the size necessary for local variables
|
|
and the maximum possible combined size of ALL the parameters of a procedure
|
|
called by the current one
|
|
|
|
IMPORTANT: registers are not to be allocated through the register
|
|
allocator here, because the register colouring has already occured !!
|
|
}
|
|
procedure tcgppc.g_proc_entry(list: TAsmList; localsize: longint;
|
|
nostackframe: boolean);
|
|
var
|
|
firstregfpu, firstreggpr: TSuperRegister;
|
|
needslinkreg: boolean;
|
|
|
|
fprcount, gprcount : aint;
|
|
|
|
{ Save standard registers, both FPR and GPR; does not support VMX/Altivec }
|
|
procedure save_standard_registers;
|
|
var
|
|
regcount : TSuperRegister;
|
|
href : TReference;
|
|
mayNeedLRStore : boolean;
|
|
begin
|
|
{ there are two ways to do this: manually, by generating a few "std" instructions,
|
|
or via the restore helper functions. The latter are selected by the -Og switch,
|
|
i.e. "optimize for size" }
|
|
if (cs_opt_size in current_settings.optimizerswitches) then begin
|
|
mayNeedLRStore := false;
|
|
if ((fprcount > 0) and (gprcount > 0)) then begin
|
|
a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
|
|
a_call_name_direct(list, '_savegpr1_' + intToStr(32-gprcount), false, false, false);
|
|
a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false);
|
|
end else if (gprcount > 0) then
|
|
a_call_name_direct(list, '_savegpr0_' + intToStr(32-gprcount), false, false, false)
|
|
else if (fprcount > 0) then
|
|
a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false)
|
|
else
|
|
mayNeedLRStore := true;
|
|
end else begin
|
|
{ save registers, FPU first, then GPR }
|
|
reference_reset_base(href, NR_STACK_POINTER_REG, -8);
|
|
if (fprcount > 0) then
|
|
for regcount := RS_F31 downto firstregfpu do begin
|
|
a_loadfpu_reg_ref(list, OS_FLOAT, OS_FLOAT, newreg(R_FPUREGISTER,
|
|
regcount, R_SUBNONE), href);
|
|
dec(href.offset, tcgsize2size[OS_FLOAT]);
|
|
end;
|
|
if (gprcount > 0) then
|
|
for regcount := RS_R31 downto firstreggpr do begin
|
|
a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount,
|
|
R_SUBNONE), href);
|
|
dec(href.offset, tcgsize2size[OS_INT]);
|
|
end;
|
|
{ VMX registers not supported by FPC atm }
|
|
|
|
{ in this branch we always need to store LR ourselves}
|
|
mayNeedLRStore := true;
|
|
end;
|
|
|
|
{ we may need to store R0 (=LR) ourselves }
|
|
if ((cs_profile in init_settings.moduleswitches) or (mayNeedLRStore)) and (needslinkreg) then begin
|
|
reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
|
|
list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
|
|
end;
|
|
end;
|
|
|
|
var
|
|
href: treference;
|
|
begin
|
|
calcFirstUsedFPR(firstregfpu, fprcount);
|
|
calcFirstUsedGPR(firstreggpr, gprcount);
|
|
|
|
{ calculate real stack frame size }
|
|
localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
|
|
gprcount, fprcount);
|
|
|
|
{ determine whether we need to save the link register }
|
|
needslinkreg :=
|
|
((not (po_assembler in current_procinfo.procdef.procoptions)) and
|
|
((pi_do_call in current_procinfo.flags) or (cs_profile in init_settings.moduleswitches))) or
|
|
((cs_opt_size in current_settings.optimizerswitches) and ((fprcount > 0) or (gprcount > 0))) or
|
|
([cs_lineinfo, cs_debuginfo] * current_settings.moduleswitches <> []);
|
|
|
|
a_reg_alloc(list, NR_STACK_POINTER_REG);
|
|
a_reg_alloc(list, NR_R0);
|
|
|
|
{ move link register to r0 }
|
|
if (needslinkreg) then
|
|
list.concat(taicpu.op_reg(A_MFLR, NR_R0));
|
|
|
|
save_standard_registers;
|
|
|
|
{ save old stack frame pointer }
|
|
if (tppcprocinfo(current_procinfo).needs_frame_pointer) then begin
|
|
a_reg_alloc(list, NR_OLD_STACK_POINTER_REG);
|
|
list.concat(taicpu.op_reg_reg(A_MR, NR_OLD_STACK_POINTER_REG, NR_STACK_POINTER_REG));
|
|
end;
|
|
|
|
{ create stack frame }
|
|
if (not nostackframe) and (localsize > 0) and
|
|
tppcprocinfo(current_procinfo).needstackframe then begin
|
|
if (localsize <= high(smallint)) then begin
|
|
reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
|
|
a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href);
|
|
end else begin
|
|
reference_reset_base(href, NR_NO, -localsize);
|
|
|
|
{ Use R0 for loading the constant (which is definitely > 32k when entering
|
|
this branch).
|
|
|
|
Inlined at this position because it must not use temp registers because
|
|
register allocations have already been done }
|
|
{ Code template:
|
|
lis r0,ofs@highest
|
|
ori r0,r0,ofs@higher
|
|
sldi r0,r0,32
|
|
oris r0,r0,ofs@h
|
|
ori r0,r0,ofs@l
|
|
}
|
|
list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
|
|
|
|
list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
|
|
end;
|
|
end;
|
|
|
|
{ CR register not used by FPC atm }
|
|
|
|
{ keep R1 allocated??? }
|
|
a_reg_dealloc(list, NR_R0);
|
|
end;
|
|
|
|
{ Generates the exit code for a method.
|
|
|
|
This procedure may be called before, as well as after g_stackframe_entry
|
|
is called.
|
|
|
|
IMPORTANT: registers are not to be allocated through the register
|
|
allocator here, because the register colouring has already occured !!
|
|
}
|
|
procedure tcgppc.g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
|
|
boolean);
|
|
var
|
|
firstregfpu, firstreggpr: TSuperRegister;
|
|
needslinkreg : boolean;
|
|
fprcount, gprcount: aint;
|
|
|
|
{ Restore standard registers, both FPR and GPR; does not support VMX/Altivec }
|
|
procedure restore_standard_registers;
|
|
var
|
|
{ flag indicating whether we need to manually add the exit code (e.g. blr instruction)
|
|
or not }
|
|
needsExitCode : Boolean;
|
|
href : treference;
|
|
regcount : TSuperRegister;
|
|
begin
|
|
{ there are two ways to do this: manually, by generating a few "ld" instructions,
|
|
or via the restore helper functions. The latter are selected by the -Og switch,
|
|
i.e. "optimize for size" }
|
|
if (cs_opt_size in current_settings.optimizerswitches) then begin
|
|
needsExitCode := false;
|
|
if ((fprcount > 0) and (gprcount > 0)) then begin
|
|
a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
|
|
a_call_name_direct(list, '_restgpr1_' + intToStr(32-gprcount), false, false, false);
|
|
a_jmp_name_direct(list, '_restfpr_' + intToStr(32-fprcount), false);
|
|
end else if (gprcount > 0) then
|
|
a_jmp_name_direct(list, '_restgpr0_' + intToStr(32-gprcount), false)
|
|
else if (fprcount > 0) then
|
|
a_jmp_name_direct(list, '_restfpr_' + intToStr(32-fprcount), false)
|
|
else
|
|
needsExitCode := true;
|
|
end else begin
|
|
needsExitCode := true;
|
|
{ restore registers, FPU first, GPR next }
|
|
reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
|
|
if (fprcount > 0) then
|
|
for regcount := RS_F31 downto firstregfpu do begin
|
|
a_loadfpu_ref_reg(list, OS_FLOAT, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount,
|
|
R_SUBNONE));
|
|
dec(href.offset, tcgsize2size[OS_FLOAT]);
|
|
end;
|
|
if (gprcount > 0) then
|
|
for regcount := RS_R31 downto firstreggpr do begin
|
|
a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount,
|
|
R_SUBNONE));
|
|
dec(href.offset, tcgsize2size[OS_INT]);
|
|
end;
|
|
|
|
{ VMX not supported by FPC atm }
|
|
end;
|
|
|
|
if (needsExitCode) then begin
|
|
|
|
{ restore LR (if needed) }
|
|
if (needslinkreg) then begin
|
|
reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
|
|
list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
|
|
list.concat(taicpu.op_reg(A_MTLR, NR_R0));
|
|
end;
|
|
|
|
{ generate return instruction }
|
|
list.concat(taicpu.op_none(A_BLR));
|
|
end;
|
|
end;
|
|
|
|
var
|
|
href: treference;
|
|
localsize : aint;
|
|
|
|
begin
|
|
calcFirstUsedFPR(firstregfpu, fprcount);
|
|
calcFirstUsedGPR(firstreggpr, gprcount);
|
|
|
|
{ determine whether we need to restore the link register }
|
|
needslinkreg :=
|
|
((not (po_assembler in current_procinfo.procdef.procoptions)) and
|
|
((pi_do_call in current_procinfo.flags) or (cs_profile in init_settings.moduleswitches))) or
|
|
((cs_opt_size in current_settings.optimizerswitches) and ((fprcount > 0) or (gprcount > 0))) or
|
|
([cs_lineinfo, cs_debuginfo] * current_settings.moduleswitches <> []);
|
|
|
|
{ calculate stack frame }
|
|
localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
|
|
gprcount, fprcount);
|
|
{ CR register not supported }
|
|
|
|
{ restore stack pointer }
|
|
if (not nostackframe) and (localsize > 0) and
|
|
tppcprocinfo(current_procinfo).needstackframe then begin
|
|
if (localsize <= high(smallint)) then begin
|
|
list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
|
|
end else begin
|
|
reference_reset_base(href, NR_NO, localsize);
|
|
|
|
{ use R0 for loading the constant (which is definitely > 32k when entering
|
|
this branch)
|
|
Inlined because it must not use temp registers because register allocations
|
|
have already been done
|
|
}
|
|
{ Code template:
|
|
lis r0,ofs@highest
|
|
ori r0,ofs@higher
|
|
sldi r0,r0,32
|
|
oris r0,r0,ofs@h
|
|
ori r0,r0,ofs@l
|
|
}
|
|
list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
|
|
list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
|
|
|
|
list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0));
|
|
end;
|
|
end;
|
|
|
|
restore_standard_registers;
|
|
end;
|
|
|
|
|
|
procedure tcgppc.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
|
|
tregister);
|
|
|
|
var
|
|
ref2, tmpref: treference;
|
|
{ register used to construct address }
|
|
tempreg : TRegister;
|
|
|
|
begin
|
|
if (target_info.system = system_powerpc64_darwin) then
|
|
begin
|
|
inherited a_loadaddr_ref_reg(list,ref,r);
|
|
exit;
|
|
end;
|
|
|
|
ref2 := ref;
|
|
fixref(list, ref2);
|
|
{ load a symbol }
|
|
if (assigned(ref2.symbol) or (hasLargeOffset(ref2))) then begin
|
|
{ add the symbol's value to the base of the reference, and if the }
|
|
{ reference doesn't have a base, create one }
|
|
reference_reset(tmpref);
|
|
tmpref.offset := ref2.offset;
|
|
tmpref.symbol := ref2.symbol;
|
|
tmpref.relsymbol := ref2.relsymbol;
|
|
{ load 64 bit reference into r. If the reference already has a base register,
|
|
first load the 64 bit value into a temp register, then add it to the result
|
|
register rD }
|
|
if (ref2.base <> NR_NO) then begin
|
|
{ already have a base register, so allocate a new one }
|
|
tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
end else begin
|
|
tempreg := r;
|
|
end;
|
|
|
|
{ code for loading a reference from a symbol into a register rD }
|
|
(*
|
|
lis rX,SYM@highest
|
|
ori rX,SYM@higher
|
|
sldi rX,rX,32
|
|
oris rX,rX,SYM@h
|
|
ori rX,rX,SYM@l
|
|
*)
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('loadaddr_ref_reg ')));
|
|
{$ENDIF EXTDEBUG}
|
|
if (assigned(tmpref.symbol)) then begin
|
|
tmpref.refaddr := addr_highest;
|
|
list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref));
|
|
tmpref.refaddr := addr_higher;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32));
|
|
tmpref.refaddr := addr_high;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref));
|
|
tmpref.refaddr := addr_low;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
|
|
end else
|
|
a_load_const_reg(list, OS_ADDR, tmpref.offset, tempreg);
|
|
|
|
{ if there's already a base register, add the temp register contents to
|
|
the base register }
|
|
if (ref2.base <> NR_NO) then begin
|
|
list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
|
|
end;
|
|
end else if (ref2.offset <> 0) then begin
|
|
{ no symbol, but offset <> 0 }
|
|
if (ref2.base <> NR_NO) then begin
|
|
a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
|
|
{ FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
|
|
occurs, so now only ref.offset has to be loaded }
|
|
end else begin
|
|
a_load_const_reg(list, OS_64, ref2.offset, r);
|
|
end;
|
|
end else if (ref2.index <> NR_NO) then begin
|
|
list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index))
|
|
end else if (ref2.base <> NR_NO) and
|
|
(r <> ref2.base) then begin
|
|
a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r)
|
|
end else begin
|
|
list.concat(taicpu.op_reg_const(A_LI, r, 0));
|
|
end;
|
|
end;
|
|
|
|
{ ************* concatcopy ************ }
|
|
|
|
const
|
|
maxmoveunit = 8;
|
|
|
|
|
|
procedure tcgppc.g_concatcopy(list: TAsmList; const source, dest: treference;
|
|
len: aint);
|
|
|
|
var
|
|
countreg, tempreg: TRegister;
|
|
src, dst: TReference;
|
|
lab: tasmlabel;
|
|
count, count2: longint;
|
|
size: tcgsize;
|
|
|
|
begin
|
|
{$IFDEF extdebug}
|
|
if len > high(aint) then
|
|
internalerror(2002072704);
|
|
list.concat(tai_comment.create(strpnew('g_concatcopy1 ' + inttostr(len) + ' bytes left ')));
|
|
{$ENDIF extdebug}
|
|
{ if the references are equal, exit, there is no need to copy anything }
|
|
if (references_equal(source, dest)) then
|
|
exit;
|
|
|
|
{ make sure short loads are handled as optimally as possible;
|
|
note that the data here never overlaps, so we can do a forward
|
|
copy at all times.
|
|
NOTE: maybe use some scratch registers to pair load/store instructions
|
|
}
|
|
|
|
if (len <= maxmoveunit) then begin
|
|
src := source; dst := dest;
|
|
{$IFDEF extdebug}
|
|
list.concat(tai_comment.create(strpnew('g_concatcopy3 ' + inttostr(src.offset) + ' ' + inttostr(dst.offset))));
|
|
{$ENDIF extdebug}
|
|
while (len <> 0) do begin
|
|
if (len = 8) then begin
|
|
a_load_ref_ref(list, OS_64, OS_64, src, dst);
|
|
dec(len, 8);
|
|
end else if (len >= 4) then begin
|
|
a_load_ref_ref(list, OS_32, OS_32, src, dst);
|
|
inc(src.offset, 4); inc(dst.offset, 4);
|
|
dec(len, 4);
|
|
end else if (len >= 2) then begin
|
|
a_load_ref_ref(list, OS_16, OS_16, src, dst);
|
|
inc(src.offset, 2); inc(dst.offset, 2);
|
|
dec(len, 2);
|
|
end else begin
|
|
a_load_ref_ref(list, OS_8, OS_8, src, dst);
|
|
inc(src.offset, 1); inc(dst.offset, 1);
|
|
dec(len, 1);
|
|
end;
|
|
end;
|
|
exit;
|
|
end;
|
|
{$IFDEF extdebug}
|
|
list.concat(tai_comment.create(strpnew('g_concatcopy2 ' + inttostr(len) + ' bytes left ')));
|
|
{$ENDIF extdebug}
|
|
|
|
|
|
count := len div maxmoveunit;
|
|
|
|
reference_reset(src);
|
|
reference_reset(dst);
|
|
{ load the address of source into src.base }
|
|
if (count > 4) or
|
|
not issimpleref(source) or
|
|
((source.index <> NR_NO) and
|
|
((source.offset + len) > high(smallint))) then begin
|
|
src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_loadaddr_ref_reg(list, source, src.base);
|
|
end else begin
|
|
src := source;
|
|
end;
|
|
{ load the address of dest into dst.base }
|
|
if (count > 4) or
|
|
not issimpleref(dest) or
|
|
((dest.index <> NR_NO) and
|
|
((dest.offset + len) > high(smallint))) then begin
|
|
dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_loadaddr_ref_reg(list, dest, dst.base);
|
|
end else begin
|
|
dst := dest;
|
|
end;
|
|
|
|
{ generate a loop }
|
|
if count > 4 then begin
|
|
{ the offsets are zero after the a_loadaddress_ref_reg and just
|
|
have to be set to 8. I put an Inc there so debugging may be
|
|
easier (should offset be different from zero here, it will be
|
|
easy to notice in the generated assembler }
|
|
inc(dst.offset, 8);
|
|
inc(src.offset, 8);
|
|
list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
|
|
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
|
|
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_load_const_reg(list, OS_64, count, countreg);
|
|
{ explicitely allocate F0 since it can be used safely here
|
|
(for holding date that's being copied) }
|
|
a_reg_alloc(list, NR_F0);
|
|
current_asmdata.getjumplabel(lab);
|
|
a_label(list, lab);
|
|
list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
|
|
list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src));
|
|
list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst));
|
|
a_jmp(list, A_BC, C_NE, 0, lab);
|
|
a_reg_dealloc(list, NR_F0);
|
|
len := len mod 8;
|
|
end;
|
|
|
|
count := len div 8;
|
|
{ unrolled loop }
|
|
if count > 0 then begin
|
|
a_reg_alloc(list, NR_F0);
|
|
for count2 := 1 to count do begin
|
|
a_loadfpu_ref_reg(list, OS_F64, OS_F64, src, NR_F0);
|
|
a_loadfpu_reg_ref(list, OS_F64, OS_F64, NR_F0, dst);
|
|
inc(src.offset, 8);
|
|
inc(dst.offset, 8);
|
|
end;
|
|
a_reg_dealloc(list, NR_F0);
|
|
len := len mod 8;
|
|
end;
|
|
|
|
if (len and 4) <> 0 then begin
|
|
a_reg_alloc(list, NR_R0);
|
|
a_load_ref_reg(list, OS_32, OS_32, src, NR_R0);
|
|
a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst);
|
|
inc(src.offset, 4);
|
|
inc(dst.offset, 4);
|
|
a_reg_dealloc(list, NR_R0);
|
|
end;
|
|
{ copy the leftovers }
|
|
if (len and 2) <> 0 then begin
|
|
a_reg_alloc(list, NR_R0);
|
|
a_load_ref_reg(list, OS_16, OS_16, src, NR_R0);
|
|
a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst);
|
|
inc(src.offset, 2);
|
|
inc(dst.offset, 2);
|
|
a_reg_dealloc(list, NR_R0);
|
|
end;
|
|
if (len and 1) <> 0 then begin
|
|
a_reg_alloc(list, NR_R0);
|
|
a_load_ref_reg(list, OS_8, OS_8, src, NR_R0);
|
|
a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst);
|
|
a_reg_dealloc(list, NR_R0);
|
|
end;
|
|
|
|
end;
|
|
|
|
{***************** This is private property, keep out! :) *****************}
|
|
|
|
procedure tcgppc.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
|
|
const
|
|
overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
|
|
begin
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('maybeadjustresult op = ' + cgop2string(op) + ' size = ' + cgsize2string(size))));
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
if (op in overflowops) and (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32]) then
|
|
a_load_reg_reg(list, OS_64, size, dst, dst);
|
|
end;
|
|
|
|
function tcgppc.issimpleref(const ref: treference): boolean;
|
|
|
|
begin
|
|
if (ref.base = NR_NO) and
|
|
(ref.index <> NR_NO) then
|
|
internalerror(200208101);
|
|
result :=
|
|
not (assigned(ref.symbol)) and
|
|
(((ref.index = NR_NO) and
|
|
(ref.offset >= low(smallint)) and
|
|
(ref.offset <= high(smallint))) or
|
|
((ref.index <> NR_NO) and
|
|
(ref.offset = 0)));
|
|
end;
|
|
|
|
function tcgppc.load_got_symbol(list: TAsmList; symbol : string) : tregister;
|
|
var
|
|
l: tasmsymbol;
|
|
ref: treference;
|
|
symname : string;
|
|
begin
|
|
maybe_new_object_file(current_asmdata.asmlists[al_picdata]);
|
|
symname := '_$' + current_asmdata.name + '$got$' + symbol;
|
|
l:=current_asmdata.getasmsymbol(symname);
|
|
if not(assigned(l)) then begin
|
|
l:=current_asmdata.DefineAsmSymbol(symname, AB_COMMON, AT_DATA);
|
|
current_asmdata.asmlists[al_picdata].concat(tai_section.create(sec_toc, '.toc', 8));
|
|
current_asmdata.asmlists[al_picdata].concat(tai_symbol.create_global(l,0));
|
|
current_asmdata.asmlists[al_picdata].concat(tai_directive.create(asd_toc_entry, symbol + '[TC], ' + symbol));
|
|
end;
|
|
reference_reset_symbol(ref,l,0);
|
|
ref.base := NR_R2;
|
|
ref.refaddr := addr_pic;
|
|
|
|
result := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('loading got reference for ' + symbol)));
|
|
{$ENDIF EXTDEBUG}
|
|
// cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,result);
|
|
list.concat(taicpu.op_reg_ref(A_LD, result, ref));
|
|
end;
|
|
|
|
|
|
function tcgppc.fixref(list: TAsmList; var ref: treference): boolean;
|
|
{ symbol names must not be larger than this to be able to make a GOT reference out of them,
|
|
otherwise they get truncated by the compiler resulting in failing of the assembling stage }
|
|
const
|
|
MAX_GOT_SYMBOL_NAME_LENGTH_HACK = 120;
|
|
var
|
|
tmpreg: tregister;
|
|
name : string;
|
|
begin
|
|
result := false;
|
|
{ Avoids recursion. }
|
|
if (ref.refaddr = addr_pic) then exit;
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('fixref0 ' + ref2string(ref))));
|
|
{$ENDIF EXTDEBUG}
|
|
|
|
if (target_info.system = system_powerpc64_darwin) and
|
|
assigned(ref.symbol) and
|
|
(ref.symbol.bind = AB_EXTERNAL) then
|
|
begin
|
|
tmpreg := g_indirect_sym_load(list,ref.symbol.name);
|
|
if (ref.base = NR_NO) then
|
|
ref.base := tmpreg
|
|
else if (ref.index = NR_NO) then
|
|
ref.index := tmpreg
|
|
else
|
|
begin
|
|
list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
|
|
ref.base := tmpreg;
|
|
end;
|
|
ref.symbol := nil;
|
|
end;
|
|
|
|
|
|
{ if we have to create PIC, add the symbol to the TOC/GOT }
|
|
{$WARNING Hack for avoiding too long manglednames enabled!!}
|
|
if (target_info.system <> system_powerpc64_darwin) and
|
|
(cs_create_pic in current_settings.moduleswitches) and (assigned(ref.symbol) and
|
|
(length(ref.symbol.name) < MAX_GOT_SYMBOL_NAME_LENGTH_HACK)) then begin
|
|
tmpreg := load_got_symbol(list, ref.symbol.name);
|
|
if (ref.base = NR_NO) then
|
|
ref.base := tmpreg
|
|
else if (ref.index = NR_NO) then
|
|
ref.index := tmpreg
|
|
else begin
|
|
a_op_reg_reg_reg(list, OP_ADD, OS_ADDR, ref.base, tmpreg, tmpreg);
|
|
ref.base := tmpreg;
|
|
end;
|
|
ref.symbol := nil;
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('fixref-pic ' + ref2string(ref))));
|
|
{$ENDIF EXTDEBUG}
|
|
end;
|
|
|
|
if (ref.base = NR_NO) then begin
|
|
ref.base := ref.index;
|
|
ref.index := NR_NO;
|
|
end;
|
|
if (ref.base <> NR_NO) and (ref.index <> NR_NO) and
|
|
((ref.offset <> 0) or assigned(ref.symbol)) then begin
|
|
result := true;
|
|
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_op_reg_reg_reg(list, OP_ADD, OS_ADDR, ref.base, ref.index, tmpreg);
|
|
ref.base := tmpreg;
|
|
ref.index := NR_NO;
|
|
end;
|
|
if (ref.index <> NR_NO) and (assigned(ref.symbol) or (ref.offset <> 0)) then
|
|
internalerror(2006010506);
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('fixref1 ' + ref2string(ref))));
|
|
{$ENDIF EXTDEBUG}
|
|
end;
|
|
|
|
procedure tcgppc.a_load_store(list: TAsmList; op: tasmop; reg: tregister;
|
|
ref: treference);
|
|
|
|
procedure maybefixup64bitoffset;
|
|
var
|
|
tmpreg: tregister;
|
|
begin
|
|
{ for some instructions we need to check that the offset is divisible by at
|
|
least four. If not, add the bytes which are "off" to the base register and
|
|
adjust the offset accordingly }
|
|
case op of
|
|
A_LD, A_LDU, A_STD, A_STDU, A_LWA :
|
|
if ((ref.offset mod 4) <> 0) then begin
|
|
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
|
|
if (ref.base <> NR_NO) then begin
|
|
a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
|
|
ref.base := tmpreg;
|
|
end else begin
|
|
list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
|
|
ref.base := tmpreg;
|
|
end;
|
|
ref.offset := (ref.offset div 4) * 4;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
tmpreg, tmpreg2: tregister;
|
|
tmpref: treference;
|
|
largeOffset: Boolean;
|
|
begin
|
|
if (target_info.system = system_powerpc64_darwin) then
|
|
begin
|
|
{ darwin/ppc64 works with 32 bit relocatable symbol addresses }
|
|
maybefixup64bitoffset;
|
|
inherited a_load_store(list,op,reg,ref);
|
|
exit
|
|
end;
|
|
|
|
{ at this point there must not be a combination of values in the ref treference
|
|
which is not possible to directly map to instructions of the PowerPC architecture }
|
|
if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
|
|
internalerror(200310131);
|
|
|
|
{ if this is a PIC'ed address, handle it and exit }
|
|
if (ref.refaddr = addr_pic) then begin
|
|
if (ref.offset <> 0) then
|
|
internalerror(2006010501);
|
|
if (ref.index <> NR_NO) then
|
|
internalerror(2006010502);
|
|
if (not assigned(ref.symbol)) then
|
|
internalerror(200601050);
|
|
list.concat(taicpu.op_reg_ref(op, reg, ref));
|
|
exit;
|
|
end;
|
|
|
|
maybefixup64bitoffset;
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('a_load_store1 ' + BoolToStr(ref.refaddr = addr_pic))));
|
|
{$ENDIF EXTDEBUG}
|
|
{ if we have to load/store from a symbol or large addresses, use a temporary register
|
|
containing the address }
|
|
if (assigned(ref.symbol) or (hasLargeOffset(ref))) then begin
|
|
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
|
|
if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
|
|
ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
|
|
ref.offset := 0;
|
|
end;
|
|
|
|
reference_reset(tmpref);
|
|
tmpref.symbol := ref.symbol;
|
|
tmpref.relsymbol := ref.relsymbol;
|
|
tmpref.offset := ref.offset;
|
|
|
|
if (ref.base <> NR_NO) then begin
|
|
{ As long as the TOC isn't working we try to achieve highest speed (in this
|
|
case by allowing instructions execute in parallel) as possible at the cost
|
|
of using another temporary register. So the code template when there is
|
|
a base register and an offset is the following:
|
|
|
|
lis rT1, SYM+offs@highest
|
|
ori rT1, rT1, SYM+offs@higher
|
|
lis rT2, SYM+offs@hi
|
|
ori rT2, SYM+offs@lo
|
|
rldimi rT2, rT1, 32
|
|
|
|
<op>X reg, base, rT2
|
|
}
|
|
|
|
tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
|
if (assigned(tmpref.symbol)) then begin
|
|
tmpref.refaddr := addr_highest;
|
|
list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
|
|
tmpref.refaddr := addr_higher;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
|
|
|
|
tmpref.refaddr := addr_high;
|
|
list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
|
|
tmpref.refaddr := addr_low;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
|
|
|
|
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
|
|
end else
|
|
a_load_const_reg(list, OS_ADDR, tmpref.offset, tmpreg2);
|
|
|
|
reference_reset(tmpref);
|
|
tmpref.base := ref.base;
|
|
tmpref.index := tmpreg2;
|
|
case op of
|
|
{ the code generator doesn't generate update instructions anyway, so
|
|
error out on those instructions }
|
|
A_LBZ : op := A_LBZX;
|
|
A_LHZ : op := A_LHZX;
|
|
A_LWZ : op := A_LWZX;
|
|
A_LD : op := A_LDX;
|
|
A_LHA : op := A_LHAX;
|
|
A_LWA : op := A_LWAX;
|
|
A_LFS : op := A_LFSX;
|
|
A_LFD : op := A_LFDX;
|
|
|
|
A_STB : op := A_STBX;
|
|
A_STH : op := A_STHX;
|
|
A_STW : op := A_STWX;
|
|
A_STD : op := A_STDX;
|
|
|
|
A_STFS : op := A_STFSX;
|
|
A_STFD : op := A_STFDX;
|
|
else
|
|
{ unknown load/store opcode }
|
|
internalerror(2005101302);
|
|
end;
|
|
list.concat(taicpu.op_reg_ref(op, reg, tmpref));
|
|
end else begin
|
|
{ when accessing value from a reference without a base register, use the
|
|
following code template:
|
|
|
|
lis rT,SYM+offs@highesta
|
|
ori rT,SYM+offs@highera
|
|
sldi rT,rT,32
|
|
oris rT,rT,SYM+offs@ha
|
|
ld rD,SYM+offs@l(rT)
|
|
}
|
|
tmpref.refaddr := addr_highesta;
|
|
list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
|
|
tmpref.refaddr := addr_highera;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
|
|
list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32));
|
|
tmpref.refaddr := addr_higha;
|
|
list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
|
|
|
|
tmpref.base := tmpreg;
|
|
tmpref.refaddr := addr_low;
|
|
list.concat(taicpu.op_reg_ref(op, reg, tmpref));
|
|
end;
|
|
end else begin
|
|
list.concat(taicpu.op_reg_ref(op, reg, ref));
|
|
end;
|
|
end;
|
|
|
|
procedure tcgppc.loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
|
|
var
|
|
l: tasmsymbol;
|
|
ref: treference;
|
|
symname : string;
|
|
begin
|
|
maybe_new_object_file(current_asmdata.asmlists[al_picdata]);
|
|
symname := '_$' + current_asmdata.name + '$toc$' + hexstr(a, sizeof(a)*2);
|
|
l:=current_asmdata.getasmsymbol(symname);
|
|
if not(assigned(l)) then begin
|
|
l:=current_asmdata.DefineAsmSymbol(symname,AB_GLOBAL, AT_DATA);
|
|
current_asmdata.asmlists[al_picdata].concat(tai_section.create(sec_toc, '.toc', 8));
|
|
current_asmdata.asmlists[al_picdata].concat(tai_symbol.create_global(l,0));
|
|
current_asmdata.asmlists[al_picdata].concat(tai_directive.create(asd_toc_entry, symname + '[TC], ' + inttostr(a)));
|
|
end;
|
|
reference_reset_symbol(ref,l,0);
|
|
ref.base := NR_R2;
|
|
ref.refaddr := addr_pic;
|
|
|
|
{$IFDEF EXTDEBUG}
|
|
list.concat(tai_comment.create(strpnew('loading value from TOC reference for ' + symname)));
|
|
{$ENDIF EXTDEBUG}
|
|
cg.a_load_ref_reg(list, OS_INT, OS_INT, ref, reg);
|
|
end;
|
|
|
|
begin
|
|
cg := tcgppc.create;
|
|
end.
|