lazarus/components/virtualtreeview/include/intf/qt5/laz.vtgraphicsi.inc
2020-04-24 10:58:37 +00:00

908 lines
36 KiB
PHP

uses
qt5, qtobjects;
{$if Defined(CPU386)}
{$ASMMODE INTEL}
{$endif}
procedure AlphaBlendLineConstant(Source, Destination: Pointer; Count: Integer; ConstantAlpha, Bias: Integer);
// Blends a line of Count pixels from Source to Destination using a constant alpha value.
// The layout of a pixel must be BGRA where A is ignored (but is calculated as the other components).
// ConstantAlpha must be in the range 0..255 where 0 means totally transparent (destination pixel only)
// and 255 totally opaque (source pixel only).
// Bias is an additional value which gets added to every component and must be in the range -128..127
{$if not Defined(CPU386)}
begin
end;
{$else}
asm
{$ifdef CPU64}
//windows
// RCX contains Source
// RDX contains Destination
// R8D contains Count
// R9D contains ConstantAlpha
// Bias is on the stack
//non windows
// RDI contains Source
// RSI contains Destination
// EDX contains Count
// ECX contains ConstantAlpha
// R8D contains Bias
//.NOFRAME
// Load XMM3 with the constant alpha value (replicate it for every component).
// Expand it to word size.
{$ifdef windows}
MOVD XMM3, R9D // ConstantAlpha
{$else}
MOVD XMM3, ECX // ConstantAlpha
{$endif}
PUNPCKLWD XMM3, XMM3
PUNPCKLDQ XMM3, XMM3
// Load XMM5 with the bias value.
{$ifdef windows}
MOVD XMM5, [Bias]
{$else}
MOVD XMM5, R8D //Bias
{$endif}
PUNPCKLWD XMM5, XMM5
PUNPCKLDQ XMM5, XMM5
// Load XMM4 with 128 to allow for saturated biasing.
MOV R10D, 128
MOVD XMM4, R10D
PUNPCKLWD XMM4, XMM4
PUNPCKLDQ XMM4, XMM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
{$ifdef windows}
MOVD XMM1, DWORD PTR [RCX] // data is unaligned
MOVD XMM2, DWORD PTR [RDX] // data is unaligned
{$else}
MOVD XMM1, DWORD PTR [RDI] // data is unaligned
MOVD XMM2, DWORD PTR [RSI] // data is unaligned
{$endif}
PXOR XMM0, XMM0 // clear source pixel register for unpacking
PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words
PSRLW XMM0, 8 // move higher bytes to lower bytes
PXOR XMM1, XMM1 // clear target pixel register for unpacking
PUNPCKLBW XMM1, XMM2{[RDX]} // unpack target pixel byte values into words
MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again
PSRLW XMM1, 8 // move higher bytes to lower bytes
// calculation is: target = (alpha * (source - target) + 256 * target) / 256
PSUBW XMM0, XMM1 // source - target
PMULLW XMM0, XMM3 // alpha * (source - target)
PADDW XMM0, XMM2 // add target (in shifted form)
PSRLW XMM0, 8 // divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
PSUBW XMM0, XMM4
PADDSW XMM0, XMM5
PADDW XMM0, XMM4
PACKUSWB XMM0, XMM0 // convert words to bytes with saturation
{$ifdef windows}
MOVD DWORD PTR [RDX], XMM0 // store the result
{$else}
MOVD DWORD PTR [RSI], XMM0 // store the result
{$endif}
@3:
{$ifdef windows}
ADD RCX, 4
ADD RDX, 4
DEC R8D
{$else}
ADD RDI, 4
ADD RSI, 4
DEC EDX
{$endif}
JNZ @1
{$else}
// EAX contains Source
// EDX contains Destination
// ECX contains Count
// ConstantAlpha and Bias are on the stack
PUSH ESI // save used registers
PUSH EDI
MOV ESI, EAX // ESI becomes the actual source pointer
MOV EDI, EDX // EDI becomes the actual target pointer
// Load MM6 with the constant alpha value (replicate it for every component).
// Expand it to word size.
MOV EAX, [ConstantAlpha]
DB $0F, $6E, $F0 /// MOVD MM6, EAX
DB $0F, $61, $F6 /// PUNPCKLWD MM6, MM6
DB $0F, $62, $F6 /// PUNPCKLDQ MM6, MM6
// Load MM5 with the bias value.
MOV EAX, [Bias]
DB $0F, $6E, $E8 /// MOVD MM5, EAX
DB $0F, $61, $ED /// PUNPCKLWD MM5, MM5
DB $0F, $62, $ED /// PUNPCKLDQ MM5, MM5
// Load MM4 with 128 to allow for saturated biasing.
MOV EAX, 128
DB $0F, $6E, $E0 /// MOVD MM4, EAX
DB $0F, $61, $E4 /// PUNPCKLWD MM4, MM4
DB $0F, $62, $E4 /// PUNPCKLDQ MM4, MM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
DB $0F, $EF, $C0 /// PXOR MM0, MM0, clear source pixel register for unpacking
DB $0F, $60, $06 /// PUNPCKLBW MM0, [ESI], unpack source pixel byte values into words
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, move higher bytes to lower bytes
DB $0F, $EF, $C9 /// PXOR MM1, MM1, clear target pixel register for unpacking
DB $0F, $60, $0F /// PUNPCKLBW MM1, [EDI], unpack target pixel byte values into words
DB $0F, $6F, $D1 /// MOVQ MM2, MM1, make a copy of the shifted values, we need them again
DB $0F, $71, $D1, $08 /// PSRLW MM1, 8, move higher bytes to lower bytes
// calculation is: target = (alpha * (source - target) + 256 * target) / 256
DB $0F, $F9, $C1 /// PSUBW MM0, MM1, source - target
DB $0F, $D5, $C6 /// PMULLW MM0, MM6, alpha * (source - target)
DB $0F, $FD, $C2 /// PADDW MM0, MM2, add target (in shifted form)
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
DB $0F, $F9, $C4 /// PSUBW MM0, MM4
DB $0F, $ED, $C5 /// PADDSW MM0, MM5
DB $0F, $FD, $C4 /// PADDW MM0, MM4
DB $0F, $67, $C0 /// PACKUSWB MM0, MM0, convert words to bytes with saturation
DB $0F, $7E, $07 /// MOVD [EDI], MM0, store the result
@3:
ADD ESI, 4
ADD EDI, 4
DEC ECX
JNZ @1
POP EDI
POP ESI
{$endif}
end;
{$endif}
//----------------------------------------------------------------------------------------------------------------------
procedure AlphaBlendLinePerPixel(Source, Destination: Pointer; Count, Bias: Integer);
// Blends a line of Count pixels from Source to Destination using the alpha value of the source pixels.
// The layout of a pixel must be BGRA.
// Bias is an additional value which gets added to every component and must be in the range -128..127
{$if not Defined(CPU386)}
begin
end;
{$else}
asm
{$ifdef CPU64}
//windows
// RCX contains Source
// RDX contains Destination
// R8D contains Count
// R9D contains Bias
//non windows
// RDI contains Source
// RSI contains Destination
// EDX contains Count
// ECX contains Bias
//.NOFRAME
// Load XMM5 with the bias value.
{$ifdef windows}
MOVD XMM5, R9D // Bias
{$else}
MOVD XMM5, ECX // Bias
{$endif}
PUNPCKLWD XMM5, XMM5
PUNPCKLDQ XMM5, XMM5
// Load XMM4 with 128 to allow for saturated biasing.
MOV R10D, 128
MOVD XMM4, R10D
PUNPCKLWD XMM4, XMM4
PUNPCKLDQ XMM4, XMM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
{$ifdef windows}
MOVD XMM1, DWORD PTR [RCX] // data is unaligned
MOVD XMM2, DWORD PTR [RDX] // data is unaligned
{$else}
MOVD XMM1, DWORD PTR [RDI] // data is unaligned
MOVD XMM2, DWORD PTR [RSI] // data is unaligned
{$endif}
PXOR XMM0, XMM0 // clear source pixel register for unpacking
PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words
PSRLW XMM0, 8 // move higher bytes to lower bytes
PXOR XMM1, XMM1 // clear target pixel register for unpacking
PUNPCKLBW XMM1, XMM2{[RDX]} // unpack target pixel byte values into words
MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again
PSRLW XMM1, 8 // move higher bytes to lower bytes
// Load XMM3 with the source alpha value (replicate it for every component).
// Expand it to word size.
MOVQ XMM3, XMM0
PUNPCKHWD XMM3, XMM3
PUNPCKHDQ XMM3, XMM3
// calculation is: target = (alpha * (source - target) + 256 * target) / 256
PSUBW XMM0, XMM1 // source - target
PMULLW XMM0, XMM3 // alpha * (source - target)
PADDW XMM0, XMM2 // add target (in shifted form)
PSRLW XMM0, 8 // divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
PSUBW XMM0, XMM4
PADDSW XMM0, XMM5
PADDW XMM0, XMM4
PACKUSWB XMM0, XMM0 // convert words to bytes with saturation
{$ifdef windows}
MOVD DWORD PTR [RDX], XMM0 // store the result
{$else}
MOVD DWORD PTR [RSI], XMM0 // store the result
{$endif}
@3:
{$ifdef windows}
ADD RCX, 4
ADD RDX, 4
DEC R8D
{$else}
ADD RDI, 4
ADD RSI, 4
DEC EDX
{$endif}
JNZ @1
{$else}
// EAX contains Source
// EDX contains Destination
// ECX contains Count
// Bias is on the stack
PUSH ESI // save used registers
PUSH EDI
MOV ESI, EAX // ESI becomes the actual source pointer
MOV EDI, EDX // EDI becomes the actual target pointer
// Load MM5 with the bias value.
MOV EAX, [Bias]
DB $0F, $6E, $E8 /// MOVD MM5, EAX
DB $0F, $61, $ED /// PUNPCKLWD MM5, MM5
DB $0F, $62, $ED /// PUNPCKLDQ MM5, MM5
// Load MM4 with 128 to allow for saturated biasing.
MOV EAX, 128
DB $0F, $6E, $E0 /// MOVD MM4, EAX
DB $0F, $61, $E4 /// PUNPCKLWD MM4, MM4
DB $0F, $62, $E4 /// PUNPCKLDQ MM4, MM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
DB $0F, $EF, $C0 /// PXOR MM0, MM0, clear source pixel register for unpacking
DB $0F, $60, $06 /// PUNPCKLBW MM0, [ESI], unpack source pixel byte values into words
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, move higher bytes to lower bytes
DB $0F, $EF, $C9 /// PXOR MM1, MM1, clear target pixel register for unpacking
DB $0F, $60, $0F /// PUNPCKLBW MM1, [EDI], unpack target pixel byte values into words
DB $0F, $6F, $D1 /// MOVQ MM2, MM1, make a copy of the shifted values, we need them again
DB $0F, $71, $D1, $08 /// PSRLW MM1, 8, move higher bytes to lower bytes
// Load MM6 with the source alpha value (replicate it for every component).
// Expand it to word size.
DB $0F, $6F, $F0 /// MOVQ MM6, MM0
DB $0F, $69, $F6 /// PUNPCKHWD MM6, MM6
DB $0F, $6A, $F6 /// PUNPCKHDQ MM6, MM6
// calculation is: target = (alpha * (source - target) + 256 * target) / 256
DB $0F, $F9, $C1 /// PSUBW MM0, MM1, source - target
DB $0F, $D5, $C6 /// PMULLW MM0, MM6, alpha * (source - target)
DB $0F, $FD, $C2 /// PADDW MM0, MM2, add target (in shifted form)
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
DB $0F, $F9, $C4 /// PSUBW MM0, MM4
DB $0F, $ED, $C5 /// PADDSW MM0, MM5
DB $0F, $FD, $C4 /// PADDW MM0, MM4
DB $0F, $67, $C0 /// PACKUSWB MM0, MM0, convert words to bytes with saturation
DB $0F, $7E, $07 /// MOVD [EDI], MM0, store the result
@3:
ADD ESI, 4
ADD EDI, 4
DEC ECX
JNZ @1
POP EDI
POP ESI
{$endif}
end;
{$endif}
//----------------------------------------------------------------------------------------------------------------------
procedure AlphaBlendLineMaster(Source, Destination: Pointer; Count: Integer; ConstantAlpha, Bias: Integer);
// Blends a line of Count pixels from Source to Destination using the source pixel and a constant alpha value.
// The layout of a pixel must be BGRA.
// ConstantAlpha must be in the range 0..255.
// Bias is an additional value which gets added to every component and must be in the range -128..127
{$if not Defined(CPU386)}
begin
end;
{$else}
asm
{$ifdef CPU64}
//windows
// RCX contains Source
// RDX contains Destination
// R8D contains Count
// R9D contains ConstantAlpha
// Bias is on the stack
//non windows
// RDI contains Source
// RSI contains Destination
// EDX contains Count
// ECX contains ConstantAlpha
// R8D contains Bias
//.SAVENV XMM6 //todo see how implement in fpc
// Load XMM3 with the constant alpha value (replicate it for every component).
// Expand it to word size.
{$ifdef windows}
MOVD XMM3, R9D // ConstantAlpha
{$else}
MOVD XMM3, ECX // ConstantAlpha
{$endif}
PUNPCKLWD XMM3, XMM3
PUNPCKLDQ XMM3, XMM3
// Load XMM5 with the bias value.
{$ifdef windows}
MOV R10D, [Bias]
MOVD XMM5, R10D
{$else}
MOVD XMM5, R8D
{$endif}
PUNPCKLWD XMM5, XMM5
PUNPCKLDQ XMM5, XMM5
// Load XMM4 with 128 to allow for saturated biasing.
MOV R10D, 128
MOVD XMM4, R10D
PUNPCKLWD XMM4, XMM4
PUNPCKLDQ XMM4, XMM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
{$ifdef windows}
MOVD XMM1, DWORD PTR [RCX] // data is unaligned
MOVD XMM2, DWORD PTR [RDX] // data is unaligned
{$else}
MOVD XMM1, DWORD PTR [RDI] // data is unaligned
MOVD XMM2, DWORD PTR [RSI] // data is unaligned
{$endif}
PXOR XMM0, XMM0 // clear source pixel register for unpacking
PUNPCKLBW XMM0, XMM1{[RCX]} // unpack source pixel byte values into words
PSRLW XMM0, 8 // move higher bytes to lower bytes
PXOR XMM1, XMM1 // clear target pixel register for unpacking
PUNPCKLBW XMM1, XMM2{[RCX]} // unpack target pixel byte values into words
MOVQ XMM2, XMM1 // make a copy of the shifted values, we need them again
PSRLW XMM1, 8 // move higher bytes to lower bytes
// Load XMM6 with the source alpha value (replicate it for every component).
// Expand it to word size.
MOVQ XMM6, XMM0
PUNPCKHWD XMM6, XMM6
PUNPCKHDQ XMM6, XMM6
PMULLW XMM6, XMM3 // source alpha * master alpha
PSRLW XMM6, 8 // divide by 256
// calculation is: target = (alpha * master alpha * (source - target) + 256 * target) / 256
PSUBW XMM0, XMM1 // source - target
PMULLW XMM0, XMM6 // alpha * (source - target)
PADDW XMM0, XMM2 // add target (in shifted form)
PSRLW XMM0, 8 // divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
PSUBW XMM0, XMM4
PADDSW XMM0, XMM5
PADDW XMM0, XMM4
PACKUSWB XMM0, XMM0 // convert words to bytes with saturation
{$ifdef windows}
MOVD DWORD PTR [RDX], XMM0 // store the result
{$else}
MOVD DWORD PTR [RSI], XMM0 // store the result
{$endif}
@3:
{$ifdef windows}
ADD RCX, 4
ADD RDX, 4
DEC R8D
{$else}
ADD RDI, 4
ADD RSI, 4
DEC EDX
{$endif}
JNZ @1
{$else}
// EAX contains Source
// EDX contains Destination
// ECX contains Count
// ConstantAlpha and Bias are on the stack
PUSH ESI // save used registers
PUSH EDI
MOV ESI, EAX // ESI becomes the actual source pointer
MOV EDI, EDX // EDI becomes the actual target pointer
// Load MM6 with the constant alpha value (replicate it for every component).
// Expand it to word size.
MOV EAX, [ConstantAlpha]
DB $0F, $6E, $F0 /// MOVD MM6, EAX
DB $0F, $61, $F6 /// PUNPCKLWD MM6, MM6
DB $0F, $62, $F6 /// PUNPCKLDQ MM6, MM6
// Load MM5 with the bias value.
MOV EAX, [Bias]
DB $0F, $6E, $E8 /// MOVD MM5, EAX
DB $0F, $61, $ED /// PUNPCKLWD MM5, MM5
DB $0F, $62, $ED /// PUNPCKLDQ MM5, MM5
// Load MM4 with 128 to allow for saturated biasing.
MOV EAX, 128
DB $0F, $6E, $E0 /// MOVD MM4, EAX
DB $0F, $61, $E4 /// PUNPCKLWD MM4, MM4
DB $0F, $62, $E4 /// PUNPCKLDQ MM4, MM4
@1: // The pixel loop calculates an entire pixel in one run.
// Note: The pixel byte values are expanded into the higher bytes of a word due
// to the way unpacking works. We compensate for this with an extra shift.
DB $0F, $EF, $C0 /// PXOR MM0, MM0, clear source pixel register for unpacking
DB $0F, $60, $06 /// PUNPCKLBW MM0, [ESI], unpack source pixel byte values into words
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, move higher bytes to lower bytes
DB $0F, $EF, $C9 /// PXOR MM1, MM1, clear target pixel register for unpacking
DB $0F, $60, $0F /// PUNPCKLBW MM1, [EDI], unpack target pixel byte values into words
DB $0F, $6F, $D1 /// MOVQ MM2, MM1, make a copy of the shifted values, we need them again
DB $0F, $71, $D1, $08 /// PSRLW MM1, 8, move higher bytes to lower bytes
// Load MM7 with the source alpha value (replicate it for every component).
// Expand it to word size.
DB $0F, $6F, $F8 /// MOVQ MM7, MM0
DB $0F, $69, $FF /// PUNPCKHWD MM7, MM7
DB $0F, $6A, $FF /// PUNPCKHDQ MM7, MM7
DB $0F, $D5, $FE /// PMULLW MM7, MM6, source alpha * master alpha
DB $0F, $71, $D7, $08 /// PSRLW MM7, 8, divide by 256
// calculation is: target = (alpha * master alpha * (source - target) + 256 * target) / 256
DB $0F, $F9, $C1 /// PSUBW MM0, MM1, source - target
DB $0F, $D5, $C7 /// PMULLW MM0, MM7, alpha * (source - target)
DB $0F, $FD, $C2 /// PADDW MM0, MM2, add target (in shifted form)
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8, divide by 256
// Bias is accounted for by conversion of range 0..255 to -128..127,
// doing a saturated add and convert back to 0..255.
DB $0F, $F9, $C4 /// PSUBW MM0, MM4
DB $0F, $ED, $C5 /// PADDSW MM0, MM5
DB $0F, $FD, $C4 /// PADDW MM0, MM4
DB $0F, $67, $C0 /// PACKUSWB MM0, MM0, convert words to bytes with saturation
DB $0F, $7E, $07 /// MOVD [EDI], MM0, store the result
@3:
ADD ESI, 4
ADD EDI, 4
DEC ECX
JNZ @1
POP EDI
POP ESI
{$endif}
end;
{$endif}
//----------------------------------------------------------------------------------------------------------------------
procedure AlphaBlendLineMasterAndColor(Destination: Pointer; Count: Integer; ConstantAlpha, Color: Integer);
// Blends a line of Count pixels in Destination against the given color using a constant alpha value.
// The layout of a pixel must be BGRA and Color must be rrggbb00 (as stored by a COLORREF).
// ConstantAlpha must be in the range 0..255.
{$if not Defined(CPU386)}
begin
end;
{$else}
asm
{$ifdef CPU64}
//windows
// RCX contains Destination
// EDX contains Count
// R8D contains ConstantAlpha
// R9D contains Color
//non windows
// RDI contains Destination
// ESI contains Count
// EDX contains ConstantAlpha
// ECX contains Color
//.NOFRAME
// The used formula is: target = (alpha * color + (256 - alpha) * target) / 256.
// alpha * color (factor 1) and 256 - alpha (factor 2) are constant values which can be calculated in advance.
// The remaining calculation is therefore: target = (F1 + F2 * target) / 256
// Load XMM3 with the constant alpha value (replicate it for every component).
// Expand it to word size. (Every calculation here works on word sized operands.)
{$ifdef windows}
MOVD XMM3, R8D // ConstantAlpha
{$else}
MOVD XMM3, EDX // ConstantAlpha
{$endif}
PUNPCKLWD XMM3, XMM3
PUNPCKLDQ XMM3, XMM3
// Calculate factor 2.
MOV R10D, $100
MOVD XMM2, R10D
PUNPCKLWD XMM2, XMM2
PUNPCKLDQ XMM2, XMM2
PSUBW XMM2, XMM3 // XMM2 contains now: 255 - alpha = F2
// Now calculate factor 1. Alpha is still in XMM3, but the r and b components of Color must be swapped.
{$ifdef windows}
BSWAP R9D // Color
ROR R9D, 8
MOVD XMM1, R9D // Load the color and convert to word sized values.
{$else}
BSWAP ECX // Color
ROR ECX, 8
MOVD XMM1, ECX // Load the color and convert to word sized values.
{$endif}
PXOR XMM4, XMM4
PUNPCKLBW XMM1, XMM4
PMULLW XMM1, XMM3 // XMM1 contains now: color * alpha = F1
@1: // The pixel loop calculates an entire pixel in one run.
{$ifdef windows}
MOVD XMM0, DWORD PTR [RCX]
{$else}
MOVD XMM0, DWORD PTR [RDI]
{$endif}
PUNPCKLBW XMM0, XMM4
PMULLW XMM0, XMM2 // calculate F1 + F2 * target
PADDW XMM0, XMM1
PSRLW XMM0, 8 // divide by 256
PACKUSWB XMM0, XMM0 // convert words to bytes with saturation
{$ifdef windows}
MOVD DWORD PTR [RCX], XMM0 // store the result
ADD RCX, 4
DEC EDX
{$else}
MOVD DWORD PTR [RDI], XMM0 // store the result
ADD RDI, 4
DEC ESI
{$endif}
JNZ @1
{$else}
// EAX contains Destination
// EDX contains Count
// ECX contains ConstantAlpha
// Color is passed on the stack
// The used formula is: target = (alpha * color + (256 - alpha) * target) / 256.
// alpha * color (factor 1) and 256 - alpha (factor 2) are constant values which can be calculated in advance.
// The remaining calculation is therefore: target = (F1 + F2 * target) / 256
// Load MM3 with the constant alpha value (replicate it for every component).
// Expand it to word size. (Every calculation here works on word sized operands.)
DB $0F, $6E, $D9 /// MOVD MM3, ECX
DB $0F, $61, $DB /// PUNPCKLWD MM3, MM3
DB $0F, $62, $DB /// PUNPCKLDQ MM3, MM3
// Calculate factor 2.
MOV ECX, $100
DB $0F, $6E, $D1 /// MOVD MM2, ECX
DB $0F, $61, $D2 /// PUNPCKLWD MM2, MM2
DB $0F, $62, $D2 /// PUNPCKLDQ MM2, MM2
DB $0F, $F9, $D3 /// PSUBW MM2, MM3 // MM2 contains now: 255 - alpha = F2
// Now calculate factor 1. Alpha is still in MM3, but the r and b components of Color must be swapped.
MOV ECX, [Color]
BSWAP ECX
ROR ECX, 8
DB $0F, $6E, $C9 /// MOVD MM1, ECX // Load the color and convert to word sized values.
DB $0F, $EF, $E4 /// PXOR MM4, MM4
DB $0F, $60, $CC /// PUNPCKLBW MM1, MM4
DB $0F, $D5, $CB /// PMULLW MM1, MM3 // MM1 contains now: color * alpha = F1
@1: // The pixel loop calculates an entire pixel in one run.
DB $0F, $6E, $00 /// MOVD MM0, [EAX]
DB $0F, $60, $C4 /// PUNPCKLBW MM0, MM4
DB $0F, $D5, $C2 /// PMULLW MM0, MM2 // calculate F1 + F2 * target
DB $0F, $FD, $C1 /// PADDW MM0, MM1
DB $0F, $71, $D0, $08 /// PSRLW MM0, 8 // divide by 256
DB $0F, $67, $C0 /// PACKUSWB MM0, MM0 // convert words to bytes with saturation
DB $0F, $7E, $00 /// MOVD [EAX], MM0 // store the result
ADD EAX, 4
DEC EDX
JNZ @1
{$endif}
end;
{$endif}
//----------------------------------------------------------------------------------------------------------------------
procedure EMMS;
{$if not Defined(CPU386)}
begin
end;
{$else}
// Reset MMX state to use the FPU for other tasks again.
{$ifdef CPU64}
inline;
begin
end;
{$else}
asm
DB $0F, $77 /// EMMS
end;
{$endif}
{$endif}
//----------------------------------------------------------------------------------------------------------------------
function GetBitmapBitsFromDeviceContext(DC: HDC; out Width, Height: Integer): Pointer;
// Helper function used to retrieve the bitmap selected into the given device context. If there is a bitmap then
// the function will return a pointer to its bits otherwise nil is returned.
// Additionally the dimensions of the bitmap are returned.
var
Bitmap: HBITMAP;
DIB: TDIBSection;
begin
Result := nil;
Width := 0;
Height := 0;
Bitmap := GetCurrentObject(DC, OBJ_BITMAP);
if Bitmap <> 0 then
begin
if GetObject(Bitmap, SizeOf(DIB), @DIB) = SizeOf(DIB) then
begin
Assert(DIB.dsBm.bmPlanes * DIB.dsBm.bmBitsPixel = 32, 'Alpha blending error: bitmap must use 32 bpp.');
Result := DIB.dsBm.bmBits;
Width := DIB.dsBmih.biWidth;
Height := DIB.dsBmih.biHeight;
end;
end;
Assert(Result <> nil, 'Alpha blending DC error: no bitmap available.');
end;
//----------------------------------------------------------------------------------------------------------------------
function GetBitmapBitsFromBitmap(Bitmap: HBITMAP): Pointer;
var
DIB: TDIBSection;
begin
Result := nil;
if Bitmap <> 0 then
begin
if GetObject(Bitmap, SizeOf(DIB), @DIB) = SizeOf(DIB) then
begin
Assert(DIB.dsBm.bmPlanes * DIB.dsBm.bmBitsPixel = 32, 'Alpha blending error: bitmap must use 32 bpp.');
Result := DIB.dsBm.bmBits;
end;
end;
end;
function CalculateScanline(Bits: Pointer; Width, Height, Row: Integer): Pointer;
// Helper function to calculate the start address for the given row.
begin
//todo: Height is always > 0 in LCL
{
if Height > 0 then // bottom-up DIB
Row := Height - Row - 1;
}
// Return DWORD aligned address of the requested scanline.
Result := Bits + Row * ((Width * 32 + 31) and not 31) div 8;
end;
//----------------------------------------------------------------------------------------------------------------------
procedure AlphaBlend(Source, Destination: HDC; const R: TRect; const Target: TPoint; Mode: TBlendMode; ConstantAlpha, Bias: Integer);
// Optimized alpha blend procedure using MMX instructions to perform as quick as possible.
// For this procedure to work properly it is important that both source and target bitmap use the 32 bit color format.
// R describes the source rectangle to work on.
// Target is the place (upper left corner) in the target bitmap where to blend to. Note that source width + X offset
// must be less or equal to the target width. Similar for the height.
// If Mode is bmConstantAlpha then the blend operation uses the given ConstantAlpha value for all pixels.
// If Mode is bmPerPixelAlpha then each pixel is blended using its individual alpha value (the alpha value of the source).
// If Mode is bmMasterAlpha then each pixel is blended using its individual alpha value multiplied by ConstantAlpha.
// If Mode is bmConstantAlphaAndColor then each destination pixel is blended using ConstantAlpha but also a constant
// color which will be obtained from Bias. In this case no offset value is added, otherwise Bias is used as offset.
// Blending of a color into target only (bmConstantAlphaAndColor) ignores Source (the DC) and Target (the position).
// CAUTION: This procedure does not check whether MMX instructions are actually available! Call it only if MMX is really
// usable.
var
Y: Integer;
SourceRun,
TargetRun: PByte;
SourceBits,
DestBits: Pointer;
SourceWidth,
SourceHeight,
DestWidth,
DestHeight: Integer;
{$IFDEF CPU64}
ATransform: QTransformH;
DX, DY: QReal;
AColor: QColorH;
ADst: TQtDeviceContext absolute Destination;
AFillColor: TQColor;
{$ENDIF}
//BlendColor: TQColor;
begin
if not IsRectEmpty(R) then
begin
{$ifdef CPU64}
//avoid MasterAlpha due to incomplete AlphaBlendLineMaster. See comment in procedure
if Mode = bmMasterAlpha then
Mode := bmConstantAlpha;
{$endif}
// Note: it is tempting to optimize the special cases for constant alpha 0 and 255 by just ignoring soure
// (alpha = 0) or simply do a blit (alpha = 255). But this does not take the bias into account.
case Mode of
bmConstantAlpha:
begin
// Get a pointer to the bitmap bits for the source and target device contexts.
// Note: this supposes that both contexts do actually have bitmaps assigned!
SourceBits := GetBitmapBitsFromDeviceContext(Source, SourceWidth, SourceHeight);
DestBits := GetBitmapBitsFromDeviceContext(Destination, DestWidth, DestHeight);
if Assigned(SourceBits) and Assigned(DestBits) then
begin
for Y := 0 to R.Bottom - R.Top - 1 do
begin
SourceRun := CalculateScanline(SourceBits, SourceWidth, SourceHeight, Y + R.Top);
Inc(SourceRun, 4 * R.Left);
TargetRun := CalculateScanline(DestBits, DestWidth, DestHeight, Y + Target.Y);
Inc(TargetRun, 4 * Target.X);
AlphaBlendLineConstant(SourceRun, TargetRun, R.Right - R.Left, ConstantAlpha, Bias);
end;
end;
EMMS;
end;
bmPerPixelAlpha:
begin
SourceBits := GetBitmapBitsFromDeviceContext(Source, SourceWidth, SourceHeight);
DestBits := GetBitmapBitsFromDeviceContext(Destination, DestWidth, DestHeight);
if Assigned(SourceBits) and Assigned(DestBits) then
begin
for Y := 0 to R.Bottom - R.Top - 1 do
begin
SourceRun := CalculateScanline(SourceBits, SourceWidth, SourceHeight, Y + R.Top);
Inc(SourceRun, 4 * R.Left);
TargetRun := CalculateScanline(DestBits, DestWidth, DestHeight, Y + Target.Y);
Inc(TargetRun, 4 * Target.X);
AlphaBlendLinePerPixel(SourceRun, TargetRun, R.Right - R.Left, Bias);
end;
end;
EMMS;
end;
bmMasterAlpha:
begin
SourceBits := GetBitmapBitsFromDeviceContext(Source, SourceWidth, SourceHeight);
DestBits := GetBitmapBitsFromDeviceContext(Destination, DestWidth, DestHeight);
if Assigned(SourceBits) and Assigned(DestBits) then
begin
for Y := 0 to R.Bottom - R.Top - 1 do
begin
SourceRun := CalculateScanline(SourceBits, SourceWidth, SourceHeight, Y + R.Top);
Inc(SourceRun, 4 * Target.X);
TargetRun := CalculateScanline(DestBits, DestWidth, DestHeight, Y + Target.Y);
AlphaBlendLineMaster(SourceRun, TargetRun, R.Right - R.Left, ConstantAlpha, Bias);
end;
end;
EMMS;
end;
bmConstantAlphaAndColor:
begin
//todo: see why is not working
{
QColor_fromRgb(@BlendColor,
Bias and $000000FF,
(Bias shr 8) and $000000FF,
(Bias shr 16) and $000000FF,
ConstantAlpha);
QPainter_fillRect(TQTDeviceContext(Destination).Widget,
R.Left + Target.x, R.Top + Target.y,
R.Right - R.Left, R.Bottom - R.Top, @BlendColor);
}
// Source is ignored since there is a constant color value.
{$IFDEF CPU64}
if ADst <> nil then
begin
ATransform := QPainter_transform(ADst.Widget);
DX := QTransform_dx(ATransform);
DY := QTransform_dy(ATransform);
ADst.translate(-DX, -DY);
AColor := QColor_create((Bias and $000000FF), ((Bias shr 8) and $000000FF), ((Bias shr 16) and $000000FF), ConstantAlpha);
QColor_convertTo(AColor, @AFillColor, QColorRgb);
QPainter_fillRect(ADst.Widget, R.Left + Target.x, R.Top + Target.y,
R.Right - R.Left, R.Bottom - R.Top, PQColor(@AFillColor));
ADst.translate(DX, DY);
QColor_destroy(AColor);
end;
{$ELSE}
DestBits := GetBitmapBitsFromDeviceContext(Destination, DestWidth, DestHeight);
if Assigned(DestBits) then
begin
for Y := 0 to R.Bottom - R.Top - 1 do
begin
TargetRun := CalculateScanline(DestBits, DestWidth, DestHeight, Y + R.Top);
Inc(TargetRun, 4 * R.Left);
AlphaBlendLineMasterAndColor(TargetRun, R.Right - R.Left, ConstantAlpha, Bias);
end;
end;
EMMS;
{$ENDIF}
end;
end;
end;
end;