diff --git a/compiler/llvm/hlcgllvm.pas b/compiler/llvm/hlcgllvm.pas index 798cb38dd0..bb151e7db2 100644 --- a/compiler/llvm/hlcgllvm.pas +++ b/compiler/llvm/hlcgllvm.pas @@ -164,7 +164,7 @@ implementation verbose,cutils,globals,fmodule,constexp,systems, defutil,llvmdef,llvmsym, aasmtai,aasmcpu, - aasmllvm,llvmbase,tgllvm, + aasmllvm,llvmbase,llvminfo,tgllvm, symtable,symllvm, paramgr, pass_2,procinfo,llvmpi,cpuinfo,cgobj,cgllvm,cghlcpu, @@ -1169,6 +1169,7 @@ implementation pd: tprocdef; sourcepara, destpara, sizepara, alignpara, volatilepara: tcgpara; maxalign: longint; + indivalign: boolean; begin { perform small copies directly; not larger ones, because then llvm will try to load the entire large datastructure into registers and @@ -1180,7 +1181,11 @@ implementation a_load_ref_ref(list,size,size,source,dest); exit; end; - pd:=search_system_proc('llvm_memcpy64'); + indivalign:=llvmflag_memcpy_indiv_align in llvmversion_properties[current_settings.llvmversion]; + if indivalign then + pd:=search_system_proc('llvm_memcpy64_indivalign') + else + pd:=search_system_proc('llvm_memcpy64'); sourcepara.init; destpara.init; sizepara.init; @@ -1189,15 +1194,27 @@ implementation paramanager.getintparaloc(list,pd,1,destpara); paramanager.getintparaloc(list,pd,2,sourcepara); paramanager.getintparaloc(list,pd,3,sizepara); - paramanager.getintparaloc(list,pd,4,alignpara); - paramanager.getintparaloc(list,pd,5,volatilepara); + if indivalign then + begin + paramanager.getintparaloc(list,pd,4,volatilepara); + destpara.Alignment:=dest.alignment; + sourcepara.Alignment:=source.alignment; + end + else + begin + paramanager.getintparaloc(list,pd,4,alignpara); + paramanager.getintparaloc(list,pd,5,volatilepara); + maxalign:=newalignment(max(source.alignment,dest.alignment),min(source.alignment,dest.alignment)); + a_load_const_cgpara(list,u32inttype,maxalign,alignpara); + end; a_loadaddr_ref_cgpara(list,size,dest,destpara); a_loadaddr_ref_cgpara(list,size,source,sourcepara); a_load_const_cgpara(list,u64inttype,size.size,sizepara); - maxalign:=newalignment(max(source.alignment,dest.alignment),min(source.alignment,dest.alignment)); - a_load_const_cgpara(list,u32inttype,maxalign,alignpara); a_load_const_cgpara(list,llvmbool1type,ord((vol_read in source.volatility) or (vol_write in dest.volatility)),volatilepara); - g_call_system_proc(list,pd,[@destpara,@sourcepara,@sizepara,@alignpara,@volatilepara],nil).resetiftemp; + if indivalign then + g_call_system_proc(list,pd,[@destpara,@sourcepara,@sizepara,@volatilepara],nil).resetiftemp + else + g_call_system_proc(list,pd,[@destpara,@sourcepara,@sizepara,@alignpara,@volatilepara],nil).resetiftemp; sourcepara.done; destpara.done; sizepara.done; diff --git a/compiler/llvm/llvminfo.pas b/compiler/llvm/llvminfo.pas index 8863c8aeec..0138a00033 100644 --- a/compiler/llvm/llvminfo.pas +++ b/compiler/llvm/llvminfo.pas @@ -70,7 +70,8 @@ type llvmflag_load_getelptr_type, { the return type of loads and the base type of getelementptr must be specified } llvmflag_call_no_ptr, { with direct calls, the function type is not a function pointer } llvmflag_alias_double_type, { with "alias" declarations, have to print both aliasee and aliasee* types } - llvmflag_fembed_bitcode { support embedding bitcode in object files } + llvmflag_fembed_bitcode, { support embedding bitcode in object files } + llvmflag_memcpy_indiv_align { memcpy intrinsic supports separate alignment for source and dest } ); tllvmversionflags = set of tllvmversionflag; @@ -129,7 +130,7 @@ Const { llvmver_xc_9_2 } [llvmflag_load_getelptr_type,llvmflag_call_no_ptr,llvmflag_alias_double_type,llvmflag_fembed_bitcode], { llvmver_6_0 } [llvmflag_load_getelptr_type,llvmflag_call_no_ptr,llvmflag_alias_double_type,llvmflag_fembed_bitcode], { llvmver_xc_10_0 } [llvmflag_load_getelptr_type,llvmflag_call_no_ptr,llvmflag_alias_double_type,llvmflag_fembed_bitcode], - { llvmver_7_0 } [llvmflag_load_getelptr_type,llvmflag_call_no_ptr,llvmflag_alias_double_type,llvmflag_fembed_bitcode] + { llvmver_7_0 } [llvmflag_load_getelptr_type,llvmflag_call_no_ptr,llvmflag_alias_double_type,llvmflag_fembed_bitcode,llvmflag_memcpy_indiv_align] ); { Supported optimizations, only used for information } diff --git a/rtl/inc/llvmintr.inc b/rtl/inc/llvmintr.inc index 5f76e82dab..968ff9f89d 100644 --- a/rtl/inc/llvmintr.inc +++ b/rtl/inc/llvmintr.inc @@ -15,6 +15,8 @@ procedure llvm_memcpy64(dest, source: pointer; len: qword; align: cardinal; isvolatile: LLVMBool1); compilerproc; external name 'llvm.memcpy.p0i8.p0i8.i64'; +{ LLVM 7.0+, alignment specified as part of the dest and source parameters } +procedure llvm_memcpy64_indivalign(dest, source: pointer; len: qword; isvolatile: LLVMBool1); compilerproc; external name 'llvm.memcpy.p0i8.p0i8.i64'; function llvm_frameaddress(level: longint): pointer; compilerproc; external name 'llvm.frameaddress';