- Adds a number of optimizations for 64bit integer operations on AVR. Patch from Christo Crause in issue #35691.

git-svn-id: trunk@42495 -
2025-09-06 02:31:30 +02:00 · 2019-07-25 15:36:24 +00:00 · 2019-07-25 15:36:24 +00:00 · 893507a5d6
commit 893507a5d6
parent 4b93eb64b3
1 changed files with 551 additions and 0 deletions
--- a/rtl/avr/int64p.inc
+++ b/rtl/avr/int64p.inc
@ -12,3 +12,554 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

 **********************************************************************}
+
+{$define FPC_SYSTEM_HAS_SHR_QWORD}
+// Simplistic version with checking if whole bytes can be shifted
+// Doesn't change bitshift portion even if possible because of byteshift
+// Shorter code but not shortest execution time version
+function fpc_shr_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
+  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R18, R19    // if so, then shift all bytes right by 1 position
+  mov R19, R20
+  mov R20, R21
+  mov R21, R22
+  mov R22, R23
+  mov R23, R24
+  mov R24, R25
+  clr R25         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes right by 1 bit
+  lsr R25
+  ror R24
+  ror R23
+  ror R22
+  ror R21
+  ror R20
+  ror R19
+  ror R18
+
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+function fpc_shr_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHR_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_QWORD}
+function fpc_shl_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
+[public, alias: 'FPC_SHL_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R25, R24    // if so, then shift all bytes left by 1 position
+  mov R24, R23
+  mov R23, R22
+  mov R22, R21
+  mov R21, R20
+  mov R20, R19
+  mov R19, R18
+  clr R18         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes left by 1 bit
+  lsl R18
+  rol R19
+  rol R20
+  rol R21
+  rol R22
+  rol R23
+  rol R24
+  rol R25
+
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+
+function fpc_shl_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHL_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_INT64}
+function fpc_shl_int64(value: int64; shift: ALUUInt): int64;
+  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
+begin
+  Result := fpc_shl_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_SHR_INT64}
+// shr of signed int is same as shr of unsigned int (logical shift right)
+function fpc_shr_int64(value: int64; shift: ALUUInt): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
+begin
+  Result := fpc_shr_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_DIV_QWORD}
+function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i       counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call fpc_divbyzero
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall fpc_divbyzero
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1      // needed below for OR instruction
+
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R17    // Move answer from R17..10 to R25..18
+  mov R24, R16
+  mov R23, R15
+  mov R22, R14
+  mov R21, R13
+  mov R20, R12
+  mov R19, R11
+  mov R18, R10
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
+
+{$define FPC_SYSTEM_HAS_MOD_QWORD}
+function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i	   counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call fpc_divbyzero
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall fpc_divbyzero
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R9     // Move answer from R9..2 to R25..18
+  mov R24, R8
+  mov R23, R7
+  mov R22, R6
+  mov R21, R5
+  mov R20, R4
+  mov R19, R3
+  mov R18, R2
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
+
+
+{$define FPC_SYSTEM_HAS_DIV_INT64}
+function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call div_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17    // store hi8(z)
+  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set
+
+  // convert n to absolute
+  ldi R31, 1      // 1 in R31 used later
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call fpc_div_qword
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall fpc_div_qword
+{$endif CPUAVR_HAS_JMP_CALL}
+
+  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
+  rjmp fin
+  com R25         // result from FPC_DIV_WORD in R25 ... R22
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+
+  ldi R31, 1
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;
+
+{$define FPC_SYSTEM_HAS_MOD_INT64}
+function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call mod_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17  // store hi8(z)
+
+  // convert n to absolute
+  ldi R31, 1
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call fpc_mod_qword
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall fpc_mod_qword
+{$endif CPUAVR_HAS_JMP_CALL}
+
+  sbrs R30, 7     // Not finished if sign bit is set
+  rjmp fin
+  com R25         // Convert to 2's complement
+  com R24         // Complement all bits...
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  ldi R31, 1
+  add R18, R31    // ...and add 1 to answer
+  adc R19, R1
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;