diff --git a/.gitattributes b/.gitattributes
index 118e35112a..8e13ac678f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -6933,6 +6933,7 @@ tests/test/tset4.pp svneol=native#text/plain
 tests/test/tset5.pp svneol=native#text/plain
 tests/test/tset5a.pp svneol=native#text/plain
 tests/test/tset6.pp svneol=native#text/plain
+tests/test/tset7.pp svneol=native#text/plain
 tests/test/tstack.pp svneol=native#text/plain
 tests/test/tstprocv.pp svneol=native#text/plain
 tests/test/tstring1.pp svneol=native#text/plain
diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas
index 8f11d25b2b..b530f74a63 100644
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@@ -237,6 +237,24 @@ unit cgobj;
           procedure a_load_subsetref_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize : tcgsize; const fromsref: tsubsetreference; const tosreg: tsubsetregister); virtual;
           procedure a_load_subsetreg_subsetref(list: TAsmlist; fromsubsetsize, tosubsetsize : tcgsize; const fromsreg: tsubsetregister; const tosref: tsubsetreference); virtual;
 
+          { bit test instructions }
+          procedure a_bit_test_reg_reg_reg(list : TAsmList; bitnumbersize,valuesize,destsize: tcgsize;bitnumber,value,destreg: tregister); virtual;
+          procedure a_bit_test_const_ref_reg(list: TAsmList; destsize: tcgsize; bitnumber: aint; const ref: treference; destreg: tregister); virtual;
+          procedure a_bit_test_const_reg_reg(list: TAsmList; setregsize, destsize: tcgsize; bitnumber: aint; setreg, destreg: tregister); virtual;
+          procedure a_bit_test_const_subsetreg_reg(list: TAsmList; setregsize, destsize: tcgsize; bitnumber: aint; const setreg: tsubsetregister; destreg: tregister); virtual;
+          procedure a_bit_test_reg_ref_reg(list: TAsmList; bitnumbersize, destsize: tcgsize; bitnumber: tregister; const ref: treference; destreg: tregister); virtual;
+          procedure a_bit_test_reg_loc_reg(list: TAsmList; bitnumbersize, destsize: tcgsize; bitnumber: tregister; const loc: tlocation; destreg: tregister);
+          procedure a_bit_test_const_loc_reg(list: TAsmList; destsize: tcgsize; bitnumber: aint; const loc: tlocation; destreg: tregister);
+
+          { bit set/clear instructions }
+          procedure a_bit_set_reg_reg(list : TAsmList; doset: boolean; bitnumbersize, destsize: tcgsize; bitnumber,dest: tregister); virtual;
+          procedure a_bit_set_const_ref(list: TAsmList; doset: boolean;destsize: tcgsize; bitnumber: aint; const ref: treference); virtual;
+          procedure a_bit_set_const_reg(list: TAsmList; doset: boolean; destsize: tcgsize; bitnumber: aint; destreg: tregister); virtual;
+          procedure a_bit_set_const_subsetreg(list: TAsmList; doset: boolean; destsize: tcgsize; bitnumber: aint; const destreg: tsubsetregister); virtual;
+          procedure a_bit_set_reg_ref(list: TAsmList; doset: boolean; bitnumbersize: tcgsize; bitnumber: tregister; const ref: treference); virtual;
+          procedure a_bit_set_reg_loc(list: TAsmList; doset: boolean; bitnumbersize: tcgsize; bitnumber: tregister; const loc: tlocation);
+          procedure a_bit_set_const_loc(list: TAsmList; doset: boolean; bitnumber: aint; const loc: tlocation);
+
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
           procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
@@ -463,6 +481,10 @@ unit cgobj;
 
           procedure a_load_regconst_subsetref_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sref: tsubsetreference; slopt: tsubsetloadopt); virtual;
           procedure a_load_regconst_subsetreg_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); virtual;
+
+          function get_bit_const_ref_sref(bitnumber: aint; const ref: treference): tsubsetreference;
+          function get_bit_const_reg_sreg(setregsize: tcgsize; bitnumber: aint; setreg: tregister): tsubsetregister;
+          function get_bit_reg_ref_sref(list: TAsmList; bitnumbersize: tcgsize; bitnumber: tregister; const ref: treference): tsubsetreference;
        end;
 
 {$ifndef cpu64bit}
@@ -1312,7 +1334,7 @@ implementation
         loadbitsize := tcgsize2size[loadsize]*8;
 
         { load the (first part) of the bit sequence }
-        valuereg := cg.getintregister(list,OS_INT);
+        valuereg := getintregister(list,OS_INT);
         a_load_ref_reg(list,loadsize,OS_INT,sref.ref,valuereg);
 
         if not extra_load then
@@ -1401,6 +1423,7 @@ implementation
         tmpreg, tmpindexreg, valuereg, extra_value_reg, maskreg: tregister;
         tosreg, fromsreg: tsubsetregister;
         tmpref: treference;
+        bitmask: aword;
         loadsize: tcgsize;
         loadbitsize: byte;
         extra_load: boolean;
@@ -1413,7 +1436,7 @@ implementation
         loadbitsize := tcgsize2size[loadsize]*8;
 
         { load the (first part) of the bit sequence }
-        valuereg := cg.getintregister(list,OS_INT);
+        valuereg := getintregister(list,OS_INT);
         a_load_ref_reg(list,loadsize,OS_INT,sref.ref,valuereg);
 
         { constant offset of bit sequence? }
@@ -1441,21 +1464,20 @@ implementation
                 if (sref.bitlen = AIntBits) then
                   internalerror(2006081711);
 
-                { calculated correct shiftcount for big endian }
-                tmpindexreg := getintregister(list,OS_INT);
-                a_load_reg_reg(list,OS_INT,OS_INT,sref.bitindexreg,tmpindexreg);
-                if (target_info.endian = endian_big) then
-                  begin
-                    a_op_const_reg(list,OP_SUB,OS_INT,loadbitsize-sref.bitlen,tmpindexreg);
-                    a_op_reg_reg(list,OP_NEG,OS_INT,tmpindexreg,tmpindexreg);
-                  end;
-
                 { zero the bits we have to insert }
                 if (slopt <> SL_SETMAX) then
                   begin
                     maskreg := getintregister(list,OS_INT);
-                    a_load_const_reg(list,OS_INT,aint((aword(1) shl sref.bitlen)-1),maskreg);
-                    a_op_reg_reg(list,OP_SHL,OS_INT,tmpindexreg,maskreg);
+                    if (target_info.endian = endian_big) then
+                      begin
+                        a_load_const_reg(list,OS_INT,aint((aword(1) shl sref.bitlen)-1) shl (loadbitsize-sref.bitlen),maskreg);
+                        a_op_reg_reg(list,OP_SHR,OS_INT,sref.bitindexreg,maskreg);
+                      end
+                    else
+                      begin
+                        a_load_const_reg(list,OS_INT,aint((aword(1) shl sref.bitlen)-1),maskreg);
+                        a_op_reg_reg(list,OP_SHL,OS_INT,sref.bitindexreg,maskreg);
+                      end;
                     a_op_reg_reg(list,OP_NOT,OS_INT,maskreg,maskreg);
                     a_op_reg_reg(list,OP_AND,OS_INT,maskreg,valuereg);
                   end;
@@ -1470,9 +1492,25 @@ implementation
                       a_load_const_reg(list,OS_INT,aint((aword(1) shl sref.bitlen) - 1), tmpreg)
                     else
                       a_load_const_reg(list,OS_INT,-1,tmpreg);
-                    if (slopt <> SL_REGNOSRCMASK) then
-                      a_op_const_reg(list,OP_AND,OS_INT,aint((aword(1) shl sref.bitlen)-1),tmpreg);
-                    a_op_reg_reg(list,OP_SHL,OS_INT,tmpindexreg,tmpreg);
+                    if (target_info.endian = endian_big) then
+                      begin
+                        a_op_const_reg(list,OP_SHL,OS_INT,loadbitsize-sref.bitlen,tmpreg);
+                        if not(slopt in [SL_REGNOSRCMASK,SL_SETMAX]) then
+                          begin
+                            if (loadbitsize <> AIntBits) then
+                              bitmask := (((aword(1) shl loadbitsize)-1) xor ((aword(1) shl (loadbitsize-sref.bitlen))-1))
+                            else
+                              bitmask := (high(aword) xor ((aword(1) shl (loadbitsize-sref.bitlen))-1));
+                            a_op_const_reg(list,OP_AND,OS_INT,bitmask,tmpreg);
+                          end;
+                        a_op_reg_reg(list,OP_SHR,OS_INT,sref.bitindexreg,tmpreg);
+                      end
+                    else
+                      begin
+                        if not(slopt in [SL_REGNOSRCMASK,SL_SETMAX]) then
+                          a_op_const_reg(list,OP_AND,OS_INT,aint((aword(1) shl sref.bitlen)-1),tmpreg);
+                        a_op_reg_reg(list,OP_SHL,OS_INT,sref.bitindexreg,tmpreg);
+                      end;
                     a_op_reg_reg(list,OP_OR,OS_INT,tmpreg,valuereg);
                   end;
               end;
@@ -1786,6 +1824,279 @@ implementation
 {$undef overflowon}
 {$endif}
 
+    { generic bit address calculation routines }
+
+    function tcg.get_bit_const_ref_sref(bitnumber: aint; const ref: treference): tsubsetreference;
+      begin
+        result.ref:=ref;
+        inc(result.ref.offset,bitnumber div 8);
+        result.bitindexreg:=NR_NO;
+        result.startbit:=bitnumber mod 8;
+        result.bitlen:=1;
+      end;
+
+
+    function tcg.get_bit_const_reg_sreg(setregsize: tcgsize; bitnumber: aint; setreg: tregister): tsubsetregister;
+      begin
+        result.subsetreg:=setreg;
+        result.subsetregsize:=setregsize;
+        { subsetregs always count from the least significant to the most significant bit }
+        if (target_info.endian=endian_big) then
+          result.startbit:=(tcgsize2size[setregsize]*8)-bitnumber-1
+        else
+          result.startbit:=bitnumber;
+        result.bitlen:=1;
+      end;
+
+
+    function tcg.get_bit_reg_ref_sref(list: TAsmList; bitnumbersize: tcgsize; bitnumber: tregister; const ref: treference): tsubsetreference;
+      var
+        tmpreg,
+        tmpaddrreg: tregister;
+      begin
+        result.ref:=ref;
+        result.startbit:=0;
+        result.bitlen:=1;
+  
+        tmpreg:=getintregister(list,bitnumbersize);
+        a_op_const_reg_reg(list,OP_SHR,bitnumbersize,3,bitnumber,tmpreg);
+        tmpaddrreg:=cg.getaddressregister(list);
+        a_load_reg_reg(list,bitnumbersize,OS_ADDR,tmpreg,tmpaddrreg);
+        if (result.ref.base=NR_NO) then
+          result.ref.base:=tmpaddrreg
+        else if (result.ref.index=NR_NO) then
+          result.ref.index:=tmpaddrreg
+        else
+          begin
+            a_op_reg_reg(list,OP_ADD,OS_ADDR,result.ref.index,tmpaddrreg);
+            result.ref.index:=tmpaddrreg;
+          end;
+        tmpreg:=getintregister(list,OS_INT);
+        a_op_const_reg_reg(list,OP_AND,OS_INT,7,bitnumber,tmpreg);
+        result.bitindexreg:=tmpreg;
+      end;
+
+
+    { bit testing routines }
+
+    procedure tcg.a_bit_test_reg_reg_reg(list : TAsmList; bitnumbersize,valuesize,destsize: tcgsize;bitnumber,value,destreg: tregister);
+      var
+        tmpvalue: tregister;
+      begin
+        tmpvalue:=cg.getintregister(list,valuesize);
+  
+        if (target_info.endian=endian_little) then
+          begin
+            { rotate value register "bitnumber" bits to the right }
+            a_op_reg_reg_reg(list,OP_SHR,valuesize,bitnumber,value,tmpvalue);
+            { extract the bit we want }
+            a_op_const_reg(list,OP_AND,valuesize,1,tmpvalue);
+          end
+        else
+          begin
+            { highest (leftmost) bit = bit 0 -> shl bitnumber results in wanted }
+            { bit in uppermost position, then move it to the lowest position    }
+            { "and" is not necessary since combination of shl/shr will clear    }
+            { all other bits                                                    }
+            a_op_reg_reg_reg(list,OP_SHL,valuesize,bitnumber,value,tmpvalue);
+            a_op_const_reg(list,OP_SHR,valuesize,tcgsize2size[valuesize]*8-1,tmpvalue);
+          end;
+        a_load_reg_reg(list,valuesize,destsize,tmpvalue,destreg);
+      end;
+
+
+    procedure tcg.a_bit_test_const_ref_reg(list: TAsmList; destsize: tcgsize; bitnumber: aint; const ref: treference; destreg: tregister);
+      begin
+        a_load_subsetref_reg(list,OS_8,destsize,get_bit_const_ref_sref(bitnumber,ref),destreg);
+      end;
+  
+    
+    procedure tcg.a_bit_test_const_reg_reg(list: TAsmList; setregsize, destsize: tcgsize; bitnumber: aint; setreg, destreg: tregister);
+      begin
+        a_load_subsetreg_reg(list,setregsize,destsize,get_bit_const_reg_sreg(setregsize,bitnumber,setreg),destreg);
+      end;
+  
+
+    procedure tcg.a_bit_test_const_subsetreg_reg(list: TAsmList; setregsize, destsize: tcgsize; bitnumber: aint; const setreg: tsubsetregister; destreg: tregister);
+      var
+        tmpsreg: tsubsetregister;
+      begin
+        { the first parameter is used to calculate the bit offset in }
+        { case of big endian, and therefore must be the size of the  }
+        { set and not of the whole subsetreg                         }
+        tmpsreg:=get_bit_const_reg_sreg(setregsize,bitnumber,setreg.subsetreg);
+        { now fix the size of the subsetreg }
+        tmpsreg.subsetregsize:=setreg.subsetregsize;
+        { correct offset of the set in the subsetreg }
+        inc(tmpsreg.startbit,setreg.startbit);
+        a_load_subsetreg_reg(list,setregsize,destsize,tmpsreg,destreg);
+      end;
+
+  
+    procedure tcg.a_bit_test_reg_ref_reg(list: TAsmList; bitnumbersize, destsize: tcgsize; bitnumber: tregister; const ref: treference; destreg: tregister);
+      begin
+        a_load_subsetref_reg(list,OS_8,destsize,get_bit_reg_ref_sref(list,bitnumbersize,bitnumber,ref),destreg);
+      end;
+  
+  
+    procedure tcg.a_bit_test_reg_loc_reg(list: TAsmList; bitnumbersize, destsize: tcgsize; bitnumber: tregister; const loc: tlocation; destreg: tregister);
+      var
+        tmpreg: tregister;
+      begin
+        case loc.loc of
+          LOC_REFERENCE,LOC_CREFERENCE:
+            a_bit_test_reg_ref_reg(list,bitnumbersize,destsize,bitnumber,loc.reference,destreg);
+          LOC_REGISTER,LOC_CREGISTER,
+          LOC_SUBSETREG,LOC_CSUBSETREG,
+          LOC_CONSTANT:
+            begin
+              case loc.loc of
+                LOC_REGISTER,LOC_CREGISTER:
+                  tmpreg:=loc.register;
+                LOC_SUBSETREG,LOC_CSUBSETREG:
+                  begin
+                    tmpreg:=getintregister(list,loc.size);
+                    a_load_subsetreg_reg(list,loc.size,loc.size,loc.sreg,tmpreg);
+                  end;
+                LOC_CONSTANT:
+                  begin
+                    tmpreg:=getintregister(list,loc.size);
+                    a_load_const_reg(list,loc.size,loc.value,tmpreg);
+                  end;
+              end;
+              a_bit_test_reg_reg_reg(list,bitnumbersize,loc.size,destsize,bitnumber,tmpreg,destreg);
+            end;
+          { LOC_SUBSETREF is not possible, because sets are not (yet) bitpacked }
+          else
+            internalerror(2007051701);
+        end;
+      end;
+  
+  
+    procedure tcg.a_bit_test_const_loc_reg(list: TAsmList; destsize: tcgsize; bitnumber: aint; const loc: tlocation; destreg: tregister);
+      begin
+        case loc.loc of
+          LOC_REFERENCE,LOC_CREFERENCE:
+            a_bit_test_const_ref_reg(list,destsize,bitnumber,loc.reference,destreg);
+          LOC_REGISTER,LOC_CREGISTER:
+            a_bit_test_const_reg_reg(list,loc.size,destsize,bitnumber,loc.register,destreg);
+          LOC_SUBSETREG,LOC_CSUBSETREG:
+            a_bit_test_const_subsetreg_reg(list,loc.size,destsize,bitnumber,loc.sreg,destreg);
+          { LOC_SUBSETREF is not possible, because sets are not (yet) bitpacked }
+          else
+            internalerror(2007051702);
+        end;
+      end;
+
+    { bit setting/clearing routines }
+
+    procedure tcg.a_bit_set_reg_reg(list : TAsmList; doset: boolean; bitnumbersize, destsize: tcgsize; bitnumber,dest: tregister);
+      var
+        tmpvalue: tregister;
+      begin
+        tmpvalue:=cg.getintregister(list,destsize);
+  
+        if (target_info.endian=endian_little) then
+          begin
+            a_load_const_reg(list,destsize,1,tmpvalue);
+            { rotate bit "bitnumber" bits to the left }
+            a_op_reg_reg(list,OP_SHL,destsize,bitnumber,tmpvalue);
+          end
+        else
+          begin
+            { highest (leftmost) bit = bit 0 -> "$80/$8000/$80000000/ ... }
+            { shr bitnumber" results in correct mask                      }
+            a_load_const_reg(list,destsize,1 shl (tcgsize2size[destsize]*8-1),tmpvalue);
+            a_op_reg_reg(list,OP_SHR,destsize,bitnumber,tmpvalue);
+          end;
+        { set/clear the bit we want }
+        if (doset) then
+          a_op_reg_reg(list,OP_OR,destsize,tmpvalue,dest)
+        else
+          begin
+            a_op_reg_reg(list,OP_NOT,destsize,tmpvalue,tmpvalue);
+            a_op_reg_reg(list,OP_AND,destsize,tmpvalue,dest)
+          end;
+      end;
+
+
+    procedure tcg.a_bit_set_const_ref(list: TAsmList; doset: boolean;destsize: tcgsize; bitnumber: aint; const ref: treference);
+      begin
+        a_load_const_subsetref(list,OS_8,ord(doset),get_bit_const_ref_sref(bitnumber,ref));
+      end;
+  
+    
+    procedure tcg.a_bit_set_const_reg(list: TAsmList; doset: boolean; destsize: tcgsize; bitnumber: aint; destreg: tregister);
+      begin
+        a_load_const_subsetreg(list,OS_8,ord(doset),get_bit_const_reg_sreg(destsize,bitnumber,destreg));
+      end;
+  
+  
+    procedure tcg.a_bit_set_const_subsetreg(list: TAsmList; doset: boolean; destsize: tcgsize; bitnumber: aint; const destreg: tsubsetregister);
+      var
+        tmpsreg: tsubsetregister;
+      begin
+        { the first parameter is used to calculate the bit offset in }
+        { case of big endian, and therefore must be the size of the  }
+        { set and not of the whole subsetreg                         }
+        tmpsreg:=get_bit_const_reg_sreg(destsize,bitnumber,destreg.subsetreg);
+        { now fix the size of the subsetreg }
+        tmpsreg.subsetregsize:=destreg.subsetregsize;
+        { correct offset of the set in the subsetreg }
+        inc(tmpsreg.startbit,destreg.startbit);
+        a_load_const_subsetreg(list,OS_8,ord(doset),tmpsreg);
+      end;
+
+
+    procedure tcg.a_bit_set_reg_ref(list: TAsmList; doset: boolean; bitnumbersize: tcgsize; bitnumber: tregister; const ref: treference);
+      begin
+        a_load_const_subsetref(list,OS_8,ord(doset),get_bit_reg_ref_sref(list,bitnumbersize,bitnumber,ref));
+      end;
+  
+  
+    procedure tcg.a_bit_set_reg_loc(list: TAsmList; doset: boolean; bitnumbersize: tcgsize; bitnumber: tregister; const loc: tlocation);
+      var
+        tmpreg: tregister;
+      begin
+        case loc.loc of
+          LOC_REFERENCE:
+            a_bit_set_reg_ref(list,doset,bitnumbersize,bitnumber,loc.reference);
+          LOC_CREGISTER:
+            a_bit_set_reg_reg(list,doset,bitnumbersize,loc.size,bitnumber,loc.register);
+          { e.g. a 2-byte set in a record regvar }
+          LOC_CSUBSETREG:
+            begin
+              { hard to do in-place in a generic way, so operate on a copy }
+              tmpreg:=cg.getintregister(list,loc.size);
+              a_load_subsetreg_reg(list,loc.size,loc.size,loc.sreg,tmpreg);
+              a_bit_set_reg_reg(list,doset,bitnumbersize,loc.size,bitnumber,tmpreg);
+              a_load_reg_subsetreg(list,loc.size,loc.size,tmpreg,loc.sreg);
+            end;
+          { LOC_SUBSETREF is not possible, because sets are not (yet) bitpacked }
+          else
+            internalerror(2007051703)
+        end;
+      end;
+  
+  
+    procedure tcg.a_bit_set_const_loc(list: TAsmList; doset: boolean; bitnumber: aint; const loc: tlocation);
+      begin
+        case loc.loc of
+          LOC_REFERENCE:
+            a_bit_set_const_ref(list,doset,loc.size,bitnumber,loc.reference);
+          LOC_CREGISTER:
+            a_bit_set_const_reg(list,doset,loc.size,bitnumber,loc.register);
+          LOC_CSUBSETREG:
+            a_bit_set_const_subsetreg(list,doset,loc.size,bitnumber,loc.sreg);
+          { LOC_SUBSETREF is not possible, because sets are not (yet) bitpacked }
+          else
+            internalerror(2007051704)
+        end;
+      end;
+
+
+    { memory/register loading }
+
     procedure tcg.a_load_reg_ref_unaligned(list : TAsmList;fromsize,tosize : tcgsize;register : tregister;const ref : treference);
       var
         tmpref : treference;
@@ -2188,7 +2499,7 @@ implementation
       var
         tmpreg: tregister;
       begin
-        tmpreg := cg.getintregister(list, size);
+        tmpreg := getintregister(list, size);
         a_load_subsetreg_reg(list,subsetsize,size,sreg,tmpreg);
         a_op_const_reg(list,op,size,a,tmpreg);
         a_load_reg_subsetreg(list,size,subsetsize,tmpreg,sreg);
@@ -2199,7 +2510,7 @@ implementation
       var
         tmpreg: tregister;
       begin
-        tmpreg := cg.getintregister(list, size);
+        tmpreg := getintregister(list, size);
         a_load_subsetref_reg(list,subsetsize,size,sref,tmpreg);
         a_op_const_reg(list,op,size,a,tmpreg);
         a_load_reg_subsetref(list,size,subsetsize,tmpreg,sref);
@@ -2261,7 +2572,7 @@ implementation
       var
         tmpreg: tregister;
       begin
-        tmpreg := cg.getintregister(list, opsize);
+        tmpreg := getintregister(list, opsize);
         a_load_subsetreg_reg(list,subsetsize,opsize,sreg,tmpreg);
         a_op_reg_reg(list,op,opsize,reg,tmpreg);
         a_load_reg_subsetreg(list,opsize,subsetsize,tmpreg,sreg);
@@ -2272,7 +2583,7 @@ implementation
       var
         tmpreg: tregister;
       begin
-        tmpreg := cg.getintregister(list, opsize);
+        tmpreg := getintregister(list, opsize);
         a_load_subsetref_reg(list,subsetsize,opsize,sref,tmpreg);
         a_op_reg_reg(list,op,opsize,reg,tmpreg);
         a_load_reg_subsetref(list,opsize,subsetsize,tmpreg,sref);
@@ -3330,13 +3641,13 @@ implementation
         paraloc:=tparavarsym(hsym).paraloc[callerside].location^;
         case paraloc.loc of
           LOC_REGISTER:
-            cg.a_op_const_reg(list,OP_SUB,paraloc.size,ioffset,paraloc.register);
+            a_op_const_reg(list,OP_SUB,paraloc.size,ioffset,paraloc.register);
           LOC_REFERENCE:
             begin
                { offset in the wrapper needs to be adjusted for the stored
                  return address }
                reference_reset_base(href,paraloc.reference.index,paraloc.reference.offset+sizeof(aint));
-               cg.a_op_const_ref(list,OP_SUB,paraloc.size,ioffset,href);
+               a_op_const_ref(list,OP_SUB,paraloc.size,ioffset,href);
             end
           else
             internalerror(200309189);
@@ -3374,11 +3685,11 @@ implementation
                   current_asmdata.asmlists[al_picdata].concat(tai_const.create_32bit(0));
 {$endif cpu64bit}
                 end;
-              result := cg.getaddressregister(list);
+              result := getaddressregister(list);
               reference_reset_symbol(ref,l,0);
 {              ref.base:=current_procinfo.got;
               ref.relsymbol:=current_procinfo.CurrGOTLabel;}
-              cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,result);
+              a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,result);
             end;
           end;
         end;
diff --git a/compiler/cutils.pas b/compiler/cutils.pas
index 476c223f7d..b654559049 100644
--- a/compiler/cutils.pas
+++ b/compiler/cutils.pas
@@ -54,6 +54,8 @@ interface
     Function SwapQWord(x : qword) : qword;{$ifdef USEINLINE}inline;{$endif}
     {# Return value @var(i) aligned on @var(a) boundary }
     function align(i,a:longint):longint;{$ifdef USEINLINE}inline;{$endif}
+    {# Return @var(b) with the bit order reversed }
+    function reverse_byte(b: byte): byte;
 
     function used_align(varalign,minalign,maxalign:shortint):shortint;
     function isbetteralignedthan(new, org, limit: cardinal): boolean;
@@ -235,6 +237,15 @@ implementation
       End;
 
 
+    function reverse_byte(b: byte): byte;
+      const
+        reverse_nible:array[0..15] of 0..15 =
+          (%0000,%1000,%0100,%1100,%0010,%1010,%0110,%1110,
+           %0001,%1001,%0101,%1101,%0011,%1011,%0111,%1111);
+      begin
+        reverse_byte:=(reverse_nible[b and $f] shl 4) or reverse_nible[b shr 4];
+      end;
+
     function align(i,a:longint):longint;{$ifdef USEINLINE}inline;{$endif}
     {
       return value <i> aligned <a> boundary
diff --git a/compiler/defutil.pas b/compiler/defutil.pas
index ae5bb3ab1a..fdd1aee59a 100644
--- a/compiler/defutil.pas
+++ b/compiler/defutil.pas
@@ -1024,19 +1024,13 @@ implementation
     {# returns true, if the type passed is a varset }
     function is_varset(p : tdef) : boolean;
       begin
-        if target_info.endian=endian_little then
-          result:=(p.typ=setdef) and not(p.size in [1,2,4])
-        else
-          result:=false;
+        result:=(p.typ=setdef) and not(p.size in [1,2,4])
       end;
 
 
     function is_normalset(p : tdef) : boolean;
       begin
-        if target_info.endian=endian_big then
-          result:=(p.typ=setdef) and (tsetdef(p).size=32)
-        else
-          result:=false;
+        result:=false;
       end;
 
 
diff --git a/compiler/ncgadd.pas b/compiler/ncgadd.pas
index 0e06d2a92a..9ecc9ba5bb 100644
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@@ -249,6 +249,7 @@ interface
       var
         cgop   : TOpCg;
         tmpreg : tregister;
+        mask   : aint;
         opdone : boolean;
       begin
         opdone := false;
@@ -279,14 +280,30 @@ interface
                   if assigned(tsetelementnode(right).right) then
                    internalerror(43244);
                   if (right.location.loc = LOC_CONSTANT) then
-                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_OR,location.size,
-                      aint(1 shl right.location.value),
-                      left.location.register,location.register)
+                    begin
+                      if (target_info.endian=endian_big) then
+                        mask:=aint((aword(1) shl (resultdef.size*8-1)) shr aword(right.location.value))
+                      else
+                        mask:=aint(1 shl right.location.value);
+                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_OR,location.size,
+                        mask,left.location.register,location.register);
+                    end
                   else
                     begin
+                      if (target_info.endian=endian_big) then
+                        begin
+                          mask:=aint((aword(1) shl (resultdef.size*8-1)));
+                          cgop:=OP_SHR
+                        end
+                      else
+                        begin
+                          mask:=1;
+                          cgop:=OP_SHL
+                        end;
                       tmpreg := cg.getintregister(current_asmdata.CurrAsmList,location.size);
-                      cg.a_load_const_reg(current_asmdata.CurrAsmList,location.size,1,tmpreg);
-                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SHL,location.size,
+                      cg.a_load_const_reg(current_asmdata.CurrAsmList,location.size,mask,tmpreg);
+                      location_force_reg(current_asmdata.CurrAsmList,right.location,location.size,true);
+                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,location.size,
                         right.location.register,tmpreg);
                       if left.location.loc <> LOC_CONSTANT then
                         cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_OR,location.size,tmpreg,
diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas
index 6ddb1384fa..f6fcedbaf4 100644
--- a/compiler/ncgcon.pas
+++ b/compiler/ncgcon.pas
@@ -67,7 +67,7 @@ implementation
 
     uses
       globtype,widestr,systems,
-      verbose,globals,
+      verbose,globals,cutils,
       symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil,
       cpuinfo,cpubase,
       cgbase,cgobj,cgutils,
@@ -511,23 +511,38 @@ implementation
          lastlabel   : tasmlabel;
          i           : longint;
          neededtyp   : taiconst_type;
-         indexadjust : longint;
       type
          setbytes=array[0..31] of byte;
          Psetbytes=^setbytes;
       begin
-        { xor indexadjust with indexes in a set typecasted to an array of   }
-        { bytes to get the correct locations, also when endianess of source }
-        { and destiantion differs (JM)                                      }
-        if (source_info.endian = target_info.endian) then
-          indexadjust := 0
-        else
-          indexadjust := 3;
         { small sets are loaded as constants }
         if not(is_varset(resultdef)) and not(is_normalset(resultdef)) then
          begin
            location_reset(location,LOC_CONSTANT,int_cgsize(resultdef.size));
-           location.value:=pLongint(value_set)^;
+           if (source_info.endian=target_info.endian) then
+             begin
+{$if defined(FPC_NEW_BIGENDIAN_SETS) or defined(FPC_LITTLE_ENDIAN)}
+               { not plongint, because that will "sign extend" the set on 64 bit platforms }
+               { if changed to "paword", please also modify "32-resultdef.size*8" and      }
+               { cross-endian code below                                                   }
+               location.value:=pCardinal(value_set)^
+{$else}
+               location.value:=reverse_byte(Psetbytes(value_set)^[0]);
+               location.value:=location.value or (reverse_byte(Psetbytes(value_set)^[1]) shl 8);
+               location.value:=location.value or (reverse_byte(Psetbytes(value_set)^[2]) shl 16);
+               location.value:=location.value or (reverse_byte(Psetbytes(value_set)^[3]) shl 24);
+{$endif}
+             end
+           else
+             begin
+               location.value:=cardinal(SwapLong(pLongint(value_set)^));
+               location.value:= reverse_byte (location.value         and $ff)         or
+                               (reverse_byte((location.value shr  8) and $ff) shl  8) or
+                               (reverse_byte((location.value shr 16) and $ff) shl 16) or
+                               (reverse_byte((location.value shr 24) and $ff) shl 24);
+             end;
+           if (target_info.endian=endian_big) then
+             location.value:=location.value shr (32-resultdef.size*8);
            exit;
          end;
         location_reset(location,LOC_CREFERENCE,OS_NO);
@@ -554,7 +569,16 @@ implementation
                              i:=0;
                              while assigned(hp1) and (i<32) do
                               begin
-                                if tai_const(hp1).value<>Psetbytes(value_set)^[i xor indexadjust] then
+                                if (source_info.endian=target_info.endian) then
+                                  begin
+{$if defined(FPC_NEW_BIGENDIAN_SETS) or defined(FPC_LITTLE_ENDIAN)}
+                                    if tai_const(hp1).value<>Psetbytes(value_set)^[i ] then
+{$else}
+                                    if tai_const(hp1).value<>reverse_byte(Psetbytes(value_set)^[i xor 3]) then
+{$endif}
+                                      break
+                                  end
+                                else if tai_const(hp1).value<>reverse_byte(Psetbytes(value_set)^[i]) then
                                   break;
                                 inc(i);
                                 hp1:=tai(hp1.next);
@@ -602,8 +626,17 @@ implementation
                  else
                  }
                   begin
-                    for i:=0 to 31 do
-                      current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_8bit(Psetbytes(value_set)^[i xor indexadjust]));
+                    if (source_info.endian=target_info.endian) then
+{$if defined(FPC_NEW_BIGENDIAN_SETS) or defined(FPC_LITTLE_ENDIAN)}
+                      for i:=0 to 31 do
+                        current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_8bit(Psetbytes(value_set)^[i]))
+{$else}
+                      for i:=0 to 31 do
+                        current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_8bit(reverse_byte(Psetbytes(value_set)^[i xor 3])))
+{$endif}
+                    else
+                      for i:=0 to 31 do
+                        current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_8bit(reverse_byte(Psetbytes(value_set)^[i])));
                   end;
                end;
           end;
diff --git a/compiler/ncginl.pas b/compiler/ncginl.pas
index 05d9ff02cc..8d61750eb4 100644
--- a/compiler/ncginl.pas
+++ b/compiler/ncginl.pas
@@ -508,126 +508,26 @@ implementation
 
       procedure tcginlinenode.second_IncludeExclude;
         var
-          bitsperop,l : longint;
-          opsize : tcgsize;
-          cgop : topcg;
-          addrreg2,addrreg,
-          hregister,hregister2: tregister;
-          use_small : boolean;
-          href : treference;
+          setpara, elepara: tnode;
         begin
-          if not(is_varset(tcallparanode(left).resultdef)) and
-             not(is_normalset(tcallparanode(left).resultdef)) then
-            opsize:=int_cgsize(tcallparanode(left).resultdef.size)
-          else
-            opsize:=OS_32;
-          bitsperop:=(8*tcgsize2size[opsize]);
+          { the set }
           secondpass(tcallparanode(left).left);
-          if tcallparanode(tcallparanode(left).right).left.nodetype=ordconstn then
-            begin
-              { calculate bit position }
-              l:=1 shl (tordconstnode(tcallparanode(tcallparanode(left).right).left).value mod bitsperop);
+          { the element to set }
+          secondpass(tcallparanode(tcallparanode(left).right).left);
 
-              { determine operator }
-              if inlinenumber=in_include_x_y then
-                cgop:=OP_OR
-              else
-                begin
-                  cgop:=OP_AND;
-                  l:=not(l);
-                end;
-              case tcallparanode(left).left.location.loc of
-                LOC_REFERENCE :
-                  begin
-                    inc(tcallparanode(left).left.location.reference.offset,
-                      (tordconstnode(tcallparanode(tcallparanode(left).right).left).value div bitsperop)*tcgsize2size[opsize]);
-                    cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
-                  end;
-                LOC_CREGISTER :
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
-                else
-                  internalerror(200405021);
-              end;
+          setpara:=tcallparanode(left).left;
+          elepara:=tcallparanode(tcallparanode(left).right).left;
+
+          if elepara.location.loc=LOC_CONSTANT then
+            begin
+              cg.a_bit_set_const_loc(current_asmdata.CurrAsmList,(inlinenumber=in_include_x_y),
+                elepara.location.value,setpara.location);
             end
           else
             begin
-              use_small:=
-                 { set type }
-                 (tsetdef(tcallparanode(left).left.resultdef).settype=smallset)
-                  and
-                   { elemenut number between 1 and 32 }
-                  ((tcallparanode(tcallparanode(left).right).left.resultdef.typ=orddef) and
-                   (torddef(tcallparanode(tcallparanode(left).right).left.resultdef).high<=32) or
-                   (tcallparanode(tcallparanode(left).right).left.resultdef.typ=enumdef) and
-                   (tenumdef(tcallparanode(tcallparanode(left).right).left.resultdef).max<=32));
-
-              { generate code for the element to set }
-              secondpass(tcallparanode(tcallparanode(left).right).left);
-
-              { bitnumber - which must be loaded into register }
-              hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
-              hregister2:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
-
-              cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,
-                  tcallparanode(tcallparanode(left).right).left.location,hregister);
-
-              if use_small then
-                begin
-                  { hregister contains the bitnumber to add }
-                  cg.a_load_const_reg(current_asmdata.CurrAsmList, opsize, 1, hregister2);
-                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_SHL, opsize, hregister, hregister2);
-
-                  { possiblities :
-                       bitnumber : LOC_REFERENCE, LOC_REGISTER, LOC_CREGISTER
-                       set value : LOC_REFERENCE, LOC_REGISTER
-                  }
-                  { location of set }
-                  if inlinenumber=in_include_x_y then
-                    begin
-                      cg.a_op_reg_loc(current_asmdata.CurrAsmList, OP_OR, hregister2,
-                      tcallparanode(left).left.location);
-                    end
-                  else
-                    begin
-                      cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_NOT, opsize, hregister2,hregister2);
-                      cg.a_op_reg_loc(current_asmdata.CurrAsmList, OP_AND, hregister2,
-                          tcallparanode(left).left.location);
-                    end;
-                end
-              else
-                begin
-                  { possiblities :
-                       bitnumber : LOC_REFERENCE, LOC_REGISTER, LOC_CREGISTER
-                       set value : LOC_REFERENCE
-                  }
-                  { hregister contains the bitnumber (div 32 to get the correct offset) }
-                  { hregister contains the bitnumber to add }
-
-                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, opsize, 5, hregister,hregister2);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_SHL, opsize, 2, hregister2);
-                  addrreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                  { we need an extra address register to be able to do an ADD operation }
-                  addrreg2:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                  cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,OS_ADDR,hregister2,addrreg2);
-                  { calculate the correct address of the operand }
-                  cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, tcallparanode(left).left.location.reference,addrreg);
-                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_ADDR, addrreg2, addrreg);
-
-                  { hregister contains the bitnumber to add }
-                  cg.a_load_const_reg(current_asmdata.CurrAsmList, opsize, 1, hregister2);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, opsize, 31, hregister);
-                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_SHL, opsize, hregister, hregister2);
-
-                  reference_reset_base(href,addrreg,0);
-
-                  if inlinenumber=in_include_x_y then
-                    cg.a_op_reg_ref(current_asmdata.CurrAsmList, OP_OR, opsize, hregister2, href)
-                  else
-                    begin
-                      cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_NOT, opsize, hregister2, hregister2);
-                      cg.a_op_reg_ref(current_asmdata.CurrAsmList, OP_AND, opsize, hregister2, href);
-                    end;
-                end;
+              location_force_reg(current_asmdata.CurrAsmList,elepara.location,OS_INT,true);
+              cg.a_bit_set_reg_loc(current_asmdata.CurrAsmList,(inlinenumber=in_include_x_y),
+                elepara.location.size,elepara.location.register,setpara.location);
             end;
         end;
 
diff --git a/compiler/ncgset.pas b/compiler/ncgset.pas
index d620e00785..d0838fe77c 100644
--- a/compiler/ncgset.pas
+++ b/compiler/ncgset.pas
@@ -27,7 +27,7 @@ interface
 
     uses
        globtype,globals,
-       node,nset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata;
+       node,nset,cpubase,cgbase,cgutils,cgobj,aasmbase,aasmtai,aasmdata;
 
     type
        tcgsetelementnode = class(tsetelementnode)
@@ -45,18 +45,6 @@ interface
           function pass_1: tnode;override;
           procedure pass_generate_code;override;
        protected
-          {# Routine to test bitnumber in bitnumber register on value
-             in value register. The __result register should be set
-             to one if the bit is set, otherwise __result register
-             should be set to zero.
-
-             Should be overriden on processors which have specific
-             instructions to do bit tests.
-          }
-
-          procedure emit_bit_test_reg_reg(list : TAsmList;
-                                          bitsize: tcgsize; bitnumber,value : tregister;
-                                          ressize: tcgsize; res :tregister);virtual;
           function checkgenjumps(out setparts: Tsetparts; out numparts: byte; out use_small: boolean): boolean; virtual;
           function analizeset(const Aset:Tconstset;out setparts: Tsetparts; out numparts: byte;is_small:boolean):boolean;virtual;
        end;
@@ -100,8 +88,7 @@ implementation
       paramgr,
       procinfo,pass_2,tgobj,
       nbas,ncon,nflw,
-      ncgutil,regvars,
-      cgutils;
+      ncgutil;
 
 
 {*****************************************************************************
@@ -133,45 +120,6 @@ implementation
 {*****************************************************************************
 *****************************************************************************}
 
-  {**********************************************************************}
-  {  Description: Emit operation to do a bit test, where the bitnumber   }
-  {  to test is in the bitnumber register. The value to test against is  }
-  {  located in the value register.                                      }
-  {   WARNING: Bitnumber register value is DESTROYED!                    }
-  {  __Result register is set to 1, if the bit is set otherwise, __Result}
-  {   is set to zero. __RESULT register is also used as scratch.         }
-  {**********************************************************************}
-  procedure tcginnode.emit_bit_test_reg_reg(list : TAsmList;
-                                            bitsize: tcgsize; bitnumber,value : tregister;
-                                            ressize: tcgsize; res :tregister);
-    begin
-      { first make sure that the bit number is modulo 32 }
-
-      { not necessary, since if it's > 31, we have a range error -> will }
-      { be caught when range checking is on! (JM)                        }
-      { cg.a_op_const_reg(list,OP_AND,31,bitnumber);                     }
-
-      if tcgsize2unsigned[bitsize]<>tcgsize2unsigned[ressize] then
-        begin
-          internalerror(2007020401);
-          { FIX ME! We're not allowed to modify the value register here! }
-
-          { shift value register "bitnumber" bits to the right }
-          cg.a_op_reg_reg(list,OP_SHR,bitsize,bitnumber,value);
-          { extract the bit we want }
-          cg.a_op_const_reg(list,OP_AND,bitsize,1,value);
-          cg.a_load_reg_reg(list,bitsize,ressize,value,res);
-        end
-      else
-        begin
-          { rotate value register "bitnumber" bits to the right }
-          cg.a_op_reg_reg_reg(list,OP_SHR,bitsize,bitnumber,value,res);
-          { extract the bit we want }
-          cg.a_op_const_reg(list,OP_AND,bitsize,1,res);
-        end;
-    end;
-
-
   function tcginnode.analizeset(const Aset:Tconstset; out setparts:tsetparts; out numparts: byte; is_small:boolean):boolean;
     var
       compares,maxcompares:word;
@@ -234,8 +182,8 @@ implementation
          { check if we can use smallset operation using btl which is limited
            to 32 bits, the left side may also not contain higher values !! }
          use_small:=(tsetdef(right.resultdef).settype=smallset) and not is_signed(left.resultdef) and
-                    ((left.resultdef.typ=orddef) and (torddef(left.resultdef).high<=32) or
-                     (left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max<=32));
+                    ((left.resultdef.typ=orddef) and (torddef(left.resultdef).high<32) or
+                     (left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max<32));
 
          { Can we generate jumps? Possible for all types of sets }
          checkgenjumps:=(right.nodetype=setconstn) and
@@ -258,6 +206,7 @@ implementation
     procedure tcginnode.pass_generate_code;
        var
          adjustment : aint;
+         l, l2      : tasmlabel;
          href : treference;
          hr,hr2,
          pleftreg   : tregister;
@@ -268,7 +217,6 @@ implementation
          genjumps,
          use_small  : boolean;
          i,numparts : byte;
-         l, l2      : tasmlabel;
          needslabel : Boolean;
        begin
          { We check first if we can generate jumps, this can be done
@@ -375,30 +323,25 @@ implementation
           begin
             { location is always LOC_REGISTER }
             location_reset(location, LOC_REGISTER, uopsize{def_cgsize(resultdef)});
+            { allocate a register for the result }
+            location.register := cg.getintregister(current_asmdata.CurrAsmList, uopsize);
 
             { We will now generated code to check the set itself, no jmps,
               handle smallsets separate, because it allows faster checks }
             if use_small then
              begin
                {****************************  SMALL SET **********************}
-               if left.nodetype=ordconstn then
+               if left.location.loc=LOC_CONSTANT then
                 begin
-                  location_force_reg(current_asmdata.CurrAsmList, right.location, uopsize, true);
-                  location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
-                  { first SHR the register }
-                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, uopsize, tordconstnode(left).value and 31, right.location.register, location.register);
-                  { then extract the lowest bit }
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, uopsize, 1, location.register);
+                  cg.a_bit_test_const_loc_reg(current_asmdata.CurrAsmList,location.size,
+                    left.location.value,right.location,
+                    location.register);
                 end
                else
                 begin
-                  location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
-                  location_force_reg(current_asmdata.CurrAsmList, right.location, uopsize, false);
-                  { allocate a register for the result }
-                  location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
-                  { emit bit test operation }
-                  emit_bit_test_reg_reg(current_asmdata.CurrAsmList,left.location.size,left.location.register,
-                      right.location.register,location.size,location.register);
+                  location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
+                  cg.a_bit_test_reg_loc_reg(current_asmdata.CurrAsmList,left.location.size,
+                    location.size,left.location.register,right.location,location.register);
                 end;
              end
             else
@@ -413,59 +356,39 @@ implementation
                   { assumption (other cases will be caught by range checking) (JM)  }
 
                   { load left in register }
-                  location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
-                  if left.location.loc = LOC_CREGISTER then
-                    hr := cg.getintregister(current_asmdata.CurrAsmList,opsize)
-                  else
-                    hr := left.location.register;
-                  { load right in register }
-                  hr2:=cg.getintregister(current_asmdata.CurrAsmList, uopsize);
-                  cg.a_load_const_reg(current_asmdata.CurrAsmList, uopsize, right.location.value, hr2);
-
+                  location_force_reg(current_asmdata.CurrAsmList,left.location,location.size,true);
+                  location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
                   { emit bit test operation }
-                  emit_bit_test_reg_reg(current_asmdata.CurrAsmList, left.location.size, left.location.register, hr2, uopsize, hr2);
+                  cg.a_bit_test_reg_reg_reg(current_asmdata.CurrAsmList,
+                    left.location.size,right.location.size,location.size,
+                    left.location.register, right.location.register,location.register);
 
-                  { if left > 31 then hr := 0 else hr := $ffffffff }
-                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SUB, uopsize, 32, left.location.register, hr);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_SAR, uopsize, 31, hr);
+                  { now zero the result if left > nr_of_bits_in_right_register }
+                  hr := cg.getintregister(current_asmdata.CurrAsmList,location.size);
+                  { if left > tcgsize2size[opsize]*8 then hr := 0 else hr := $ffffffff }
+                  { (left.location.size = location.size at this point) }
+                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SUB, location.size, tcgsize2size[opsize]*8, left.location.register, hr);
+                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_SAR, location.size, (tcgsize2size[opsize]*8)-1, hr);
 
-                  { if left > 31, then result := 0 else result := result of bit test }
-                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_AND, uopsize, hr, hr2);
-                  { allocate a register for the result }
-                  location.register := cg.getintregister(current_asmdata.CurrAsmList,location.size);
-                  cg.a_load_reg_reg(current_asmdata.CurrAsmList, uopsize, location.size, hr2, location.register);
+                  { if left > tcgsize2size[opsize]*8-1, then result := 0 else result := result of bit test }
+                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_AND, location.size, hr, location.register);
                 end { of right.location.loc=LOC_CONSTANT }
                { do search in a normal set which could have >32 elementsm
                  but also used if the left side contains higher values > 32 }
-               else if left.nodetype=ordconstn then
+               else if (left.location.loc=LOC_CONSTANT) then
                 begin
-                  if (tordconstnode(left).value < 0) or ((tordconstnode(left).value shr 3) >= right.resultdef.size) then
+                  if (left.location.value < 0) or ((left.location.value shr 3) >= right.resultdef.size) then
                     {should be caught earlier }
                     internalerror(2007020402);
 
-                  { use location.register as scratch register here }
-                  if (target_info.endian = endian_little) then
-                    inc(right.location.reference.offset,tordconstnode(left).value shr 3)
-                  else
-                    { adjust for endianess differences }
-                    inc(right.location.reference.offset,(tordconstnode(left).value shr 3) xor 3);
-                  { allocate a register for the result }
-                  location.register := cg.getintregister(current_asmdata.CurrAsmList,location.size);
-                  cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_8,location.size,right.location.reference, location.register);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,location.size,tordconstnode(left).value and 7,
-                    location.register);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,location.size,1,location.register);
+                  cg.a_bit_test_const_loc_reg(current_asmdata.CurrAsmList,location.size,left.location.value,
+                    right.location,location.register);
                 end
                else
                 begin
                   location_force_reg(current_asmdata.CurrAsmList, left.location, opsize, true);
                   pleftreg := left.location.register;
 
-                  location_freetemp(current_asmdata.CurrAsmList,left.location);
-
-                  { allocate a register for the result }
-                  location.register := cg.getintregister(current_asmdata.CurrAsmList, uopsize);
-
                   if (opsize >= OS_S8) or { = if signed }
                      ((left.resultdef.typ=orddef)  and (torddef(left.resultdef).high > tsetdef(right.resultdef).setmax)) or
                      ((left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max > tsetdef(right.resultdef).setmax)) then
@@ -474,47 +397,16 @@ implementation
                       current_asmdata.getjumplabel(l2);
                       needslabel := True;
 
-                      cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, OC_BE, tsetdef(right.resultdef).setmax, pleftreg, l);
+                      cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, left.location.size, OC_BE, tsetdef(right.resultdef).setmax, pleftreg, l);
 
-                      cg.a_load_const_reg(current_asmdata.CurrAsmList, opsize, 0, location.register);
+                      cg.a_load_const_reg(current_asmdata.CurrAsmList, location.size, 0, location.register);
                       cg.a_jmp_always(current_asmdata.CurrAsmList, l2);
 
                       cg.a_label(current_asmdata.CurrAsmList, l);
                     end;
 
-                  case right.location.loc of
-                    LOC_REGISTER, LOC_CREGISTER :
-                      begin
-                        cg.a_load_reg_reg(current_asmdata.CurrAsmList, uopsize, uopsize, right.location.register, location.register);
-                      end;
-                    LOC_CREFERENCE, LOC_REFERENCE :
-                      begin
-                        hr := cg.getaddressregister(current_asmdata.CurrAsmList);
-                        cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, uopsize, 5, pleftreg, hr);
-                        cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_SHL, uopsize, 2, hr);
-
-                        href := right.location.reference;
-                        if (href.base = NR_NO) then
-                          href.base := hr
-                        else if (right.location.reference.index = NR_NO) then
-                          href.index := hr
-                        else
-                          begin
-                            hr2 := cg.getaddressregister(current_asmdata.CurrAsmList);
-                            cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, hr2);
-                            reference_reset_base(href, hr2, 0);
-                            href.index := hr;
-                          end;
-                        cg.a_load_ref_reg(current_asmdata.CurrAsmList, uopsize, uopsize, href, location.register);
-                      end
-                    else
-                      internalerror(2007020403);
-                  end;
-
-                  hr := cg.getintregister(current_asmdata.CurrAsmList, uopsize);
-                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_AND, uopsize, 31, pleftreg, hr);
-                  cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_SHR, uopsize, hr, location.register);
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, uopsize, 1, location.register);
+                  cg.a_bit_test_reg_loc_reg(current_asmdata.CurrAsmList,left.location.size,location.size,
+                    left.location.register,right.location,location.register);
 
                   if needslabel then
                     cg.a_label(current_asmdata.CurrAsmList, l2);
diff --git a/compiler/options.pas b/compiler/options.pas
index 485b494422..416160eb0e 100644
--- a/compiler/options.pas
+++ b/compiler/options.pas
@@ -1990,6 +1990,7 @@ begin
   def_system_macro('FPC_HAS_STR_CURRENCY');
   def_system_macro('FPC_REAL2REAL_FIXED');
   def_system_macro('FPC_STRTOCHARARRAYPROC');
+  def_system_macro('FPC_NEW_BIGENDIAN_SETS');
 
 {$if defined(x86) or defined(arm)}
   def_system_macro('INTERNAL_BACKTRACE');
diff --git a/compiler/ppcgen/ngppcadd.pas b/compiler/ppcgen/ngppcadd.pas
index 66d8471dff..00e929d898 100644
--- a/compiler/ppcgen/ngppcadd.pas
+++ b/compiler/ppcgen/ngppcadd.pas
@@ -416,13 +416,13 @@ implementation
                    internalerror(43244);
                   if (right.location.loc = LOC_CONSTANT) then
                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_OR,OS_INT,
-                      aint(aword(1) shl aword(right.location.value)),
+                      aint((aword(1) shl (resultdef.size*8-1)) shr aword(right.location.value)),
                       left.location.register,location.register)
                   else
                     begin
                       tmpreg := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-                      cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,1,tmpreg);
-                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_INT,
+                      cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,aint((aword(1) shl (resultdef.size*8-1))),tmpreg);
+                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,
                         right.location.register,tmpreg);
                       if left.location.loc <> LOC_CONSTANT then
                         cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_OR,OS_INT,tmpreg,
diff --git a/compiler/ppu.pas b/compiler/ppu.pas
index b8a5b987ee..ead1b27f6b 100644
--- a/compiler/ppu.pas
+++ b/compiler/ppu.pas
@@ -43,7 +43,7 @@ type
 {$endif Test_Double_checksum}
 
 const
-  CurrentPPUVersion=79;
+  CurrentPPUVersion=80;
 
 { buffer sizes }
   maxentrysize = 1024;
diff --git a/compiler/ptconst.pas b/compiler/ptconst.pas
index 1de1cac1ee..fb74d01178 100644
--- a/compiler/ptconst.pas
+++ b/compiler/ptconst.pas
@@ -546,7 +546,7 @@ implementation
            Psetbytes = ^setbytes;
         var
           p : tnode;
-          i,j : longint;
+          i : longint;
         begin
           p:=comp_expr(true);
           if p.nodetype=setconstn then
@@ -564,21 +564,18 @@ implementation
                   { arrays of 32-bit values CEC          }
                   if source_info.endian = target_info.endian then
                     begin
+{$if defined(FPC_NEW_BIGENDIAN_SETS) or defined(FPC_LITTLE_ENDIAN)}
                       for i:=0 to p.resultdef.size-1 do
                         list.concat(tai_const.create_8bit(Psetbytes(tsetconstnode(p).value_set)^[i]));
+{$else}
+                      for i:=0 to p.resultdef.size-1 do
+                        list.concat(tai_const.create_8bit(reverse_byte(Psetbytes(tsetconstnode(p).value_set)^[i xor 3])));
+{$endif}
                     end
                   else
                     begin
-                      { store as longint values in swaped format }
-                      j:=0;
-                      for i:=0 to ((p.resultdef.size-1) div 4) do
-                        begin
-                          list.concat(tai_const.create_8bit(Psetbytes(tsetconstnode(p).value_set)^[j+3]));
-                          list.concat(tai_const.create_8bit(Psetbytes(tsetconstnode(p).value_set)^[j+2]));
-                          list.concat(tai_const.create_8bit(Psetbytes(tsetconstnode(p).value_set)^[j+1]));
-                          list.concat(tai_const.create_8bit(Psetbytes(tsetconstnode(p).value_set)^[j]));
-                          Inc(j,4);
-                        end;
+                      for i:=0 to p.resultdef.size-1 do
+                        list.concat(tai_const.create_8bit(reverse_byte(Psetbytes(tsetconstnode(p).value_set)^[i])));
                     end;
                 end;
             end
diff --git a/compiler/x86/nx86set.pas b/compiler/x86/nx86set.pas
index c6f0b7f243..628192c19f 100644
--- a/compiler/x86/nx86set.pas
+++ b/compiler/x86/nx86set.pas
@@ -162,8 +162,8 @@ implementation
          { check if we can use smallset operation using btl which is limited
            to 32 bits, the left side may also not contain higher values or be signed !! }
          use_small:=(tsetdef(right.resultdef).settype=smallset) and not is_signed(left.resultdef) and
-                    ((left.resultdef.typ=orddef) and (torddef(left.resultdef).high<=32) or
-                     (left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max<=32));
+                    ((left.resultdef.typ=orddef) and (torddef(left.resultdef).high<32) or
+                     (left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max<32));
 
          { Can we generate jumps? Possible for all types of sets }
          genjumps:=(right.nodetype=setconstn) and
@@ -191,6 +191,8 @@ implementation
 
          if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) then
            location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
+         if (right.location.loc in [LOC_SUBSETREG,LOC_CSUBSETREG]) then
+           location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
 
          if genjumps then
           begin
@@ -388,8 +390,19 @@ implementation
                     internalerror(2007020201);
 
                   location.resflags:=F_NE;
-                  inc(right.location.reference.offset,tordconstnode(left).value shr 3);
-                  emit_const_ref(A_TEST,S_B,1 shl (tordconstnode(left).value and 7),right.location.reference);
+                  case right.location.loc of
+                    LOC_REFERENCE,LOC_CREFERENCE:
+                      begin
+                        inc(right.location.reference.offset,tordconstnode(left).value shr 3);
+                        emit_const_ref(A_TEST,S_B,1 shl (tordconstnode(left).value and 7),right.location.reference);
+                      end;
+                    LOC_REGISTER,LOC_CREGISTER:
+                      begin
+                        emit_const_reg(A_TEST,TCGSize2OpSize[right.location.size],1 shl (tordconstnode(left).value),right.location.register);
+                      end;
+                    else
+                      internalerror(2007051901);
+                  end;
                 end
                else
                 begin
diff --git a/rtl/inc/compproc.inc b/rtl/inc/compproc.inc
index 015a4beb80..a3d3a8ea01 100644
--- a/rtl/inc/compproc.inc
+++ b/rtl/inc/compproc.inc
@@ -32,8 +32,15 @@ type
   fpc_big_chararray = array[0..0] of char;
   fpc_big_widechararray = array[0..0] of widechar;
 {$endif ndef FPC_STRTOCHARARRAYPROC}
+{$ifdef FPC_NEW_BIGENDIAN_SETS}
+  fpc_small_set = bitpacked array[0..31] of 0..1;
+  fpc_normal_set = bitpacked array[0..255] of 0..1;
+{$else}
   fpc_small_set = longint;
   fpc_normal_set = array[0..7] of longint;
+{$endif}
+  fpc_normal_set_byte = array[0..31] of byte;
+  fpc_normal_set_long = array[0..7] of longint;
 
 
 {$ifdef FPC_HAS_FEATURE_HEAP}
@@ -410,7 +417,6 @@ function fpc_set_create_element(b : byte): fpc_normal_set; compilerproc;
 function fpc_set_set_byte(const source: fpc_normal_set; b : byte): fpc_normal_set; compilerproc;
 function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_set; compilerproc;
 function fpc_set_set_range(const orgset: fpc_normal_set; l,h : byte): fpc_normal_set; compilerproc;
-function fpc_set_in_byte(const p: fpc_normal_set; b: byte): boolean; compilerproc;
 function fpc_set_add_sets(const set1,set2: fpc_normal_set): fpc_normal_set; compilerproc;
 function fpc_set_mul_sets(const set1,set2: fpc_normal_set): fpc_normal_set; compilerproc;
 function fpc_set_sub_sets(const set1,set2: fpc_normal_set): fpc_normal_set; compilerproc;
@@ -423,7 +429,6 @@ procedure fpc_varset_create_element(b,size : ptrint; var data); compilerproc;
 procedure fpc_varset_set(const source;var dest; b,size : ptrint); compilerproc;
 procedure fpc_varset_unset(const source;var dest; b,size : ptrint); compilerproc;
 procedure fpc_varset_set_range(const orgset; var dest;l,h,size : ptrint); compilerproc;
-function fpc_varset_in(const p; b : ptrint): boolean; compilerproc;
 procedure fpc_varset_add_sets(const set1,set2; var dest;size : ptrint); compilerproc;
 procedure fpc_varset_mul_sets(const set1,set2; var dest;size : ptrint); compilerproc;
 procedure fpc_varset_sub_sets(const set1,set2; var dest;size : ptrint); compilerproc;
diff --git a/rtl/inc/genset.inc b/rtl/inc/genset.inc
index 4d05f5904e..8224831ded 100644
--- a/rtl/inc/genset.inc
+++ b/rtl/inc/genset.inc
@@ -23,8 +23,8 @@ function fpc_set_load_small(l: fpc_small_set): fpc_normal_set; [public,alias:'FP
   load a normal set p from a smallset l
  }
  begin
-   fpc_set_load_small[0] := l;
-   FillDWord(fpc_set_load_small[1],7,0);
+   FillDWord(fpc_set_load_small,sizeof(fpc_set_load_small) div 4,0);
+   move(l,fpc_set_load_small,sizeof(l));
  end;
 {$endif FPC_SYSTEM_HAS_FPC_SET_LOAD_SMALL}
 
@@ -36,7 +36,11 @@ function fpc_set_create_element(b : byte): fpc_normal_set;[public,alias:'FPC_SET
  }
  begin
    FillDWord(fpc_set_create_element,SizeOf(fpc_set_create_element) div 4,0);
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
    fpc_set_create_element[b div 32] := 1 shl (b mod 32);
+{$else}
+   fpc_set_create_element[b] := 1;
+{$endif}
  end;
 {$endif FPC_SYSTEM_HAS_FPC_SET_CREATE_ELEMENT}
 
@@ -50,9 +54,13 @@ function fpc_set_create_element(b : byte): fpc_normal_set;[public,alias:'FPC_SET
    c: longint;
   begin
     move(source,fpc_set_set_byte,sizeof(source));
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     c := fpc_set_set_byte[b div 32];
     c := (1 shl (b mod 32)) or c;
     fpc_set_set_byte[b div 32] := c;
+{$else}
+    fpc_set_set_byte[b] := 1;
+{$endif}
   end;
 {$endif FPC_SYSTEM_HAS_FPC_SET_SET_BYTE}
 
@@ -68,9 +76,13 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
    c: longint;
   begin
     move(source,fpc_set_unset_byte,sizeof(source));
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     c := fpc_set_unset_byte[b div 32];
     c := c and not (1 shl (b mod 32));
     fpc_set_unset_byte[b div 32] := c;
+{$else}
+    fpc_set_unset_byte[b] := 0;
+{$endif}
   end;
 {$endif FPC_SYSTEM_HAS_FPC_SET_UNSET_BYTE}
 
@@ -87,30 +99,24 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
     move(orgset,fpc_set_set_range,sizeof(orgset));
     for i:=l to h do
       begin
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
         c := fpc_set_set_range[i div 32];
         c := (1 shl (i mod 32)) or c;
         fpc_set_set_range[i div 32] := c;
+{$else}
+        fpc_set_set_range[i] := 1;
+{$endif}
       end;
   end;
 {$endif ndef FPC_SYSTEM_HAS_FPC_SET_SET_RANGE}
 
 
-{$ifndef FPC_SYSTEM_HAS_FPC_SET_IN_BYTE}
-
- function fpc_set_in_byte(const p: fpc_normal_set; b: byte): boolean; [public,alias:'FPC_SET_IN_BYTE']; compilerproc;
- {
-   tests if the element b is in the set p the carryflag is set if it present
- }
-  begin
-    fpc_set_in_byte := (p[b div 32] and (1 shl (b mod 32))) <> 0;
-  end;
-{$endif}
-
-
 {$ifndef FPC_SYSTEM_HAS_FPC_SET_ADD_SETS}
  function fpc_set_add_sets(const set1,set2: fpc_normal_set): fpc_normal_set;[public,alias:'FPC_SET_ADD_SETS']; compilerproc;
  var
-   dest: fpc_normal_set absolute fpc_set_add_sets;
+   src1: fpc_normal_set_long absolute set1;
+   src2: fpc_normal_set_long absolute set2;
+   dest: fpc_normal_set_long absolute fpc_set_add_sets;
  {
    adds set1 and set2 into set dest
  }
@@ -118,7 +124,7 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
     i: integer;
    begin
      for i:=0 to 7 do
-       dest[i] := set1[i] or set2[i];
+       dest[i] := src1[i] or src2[i];
    end;
 {$endif}
 
@@ -126,6 +132,8 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
 {$ifndef FPC_SYSTEM_HAS_FPC_SET_MUL_SETS}
  function fpc_set_mul_sets(const set1,set2: fpc_normal_set): fpc_normal_set;[public,alias:'FPC_SET_MUL_SETS']; compilerproc;
  var
+   src1: fpc_normal_set_long absolute set1;
+   src2: fpc_normal_set_long absolute set2;
    dest: fpc_normal_set absolute fpc_set_mul_sets;
  {
    multiplies (takes common elements of) set1 and set2 result put in dest
@@ -134,7 +142,7 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
     i: integer;
    begin
      for i:=0 to 7 do
-       dest[i] := set1[i] and set2[i];
+       dest[i] := src1[i] and src2[i];
    end;
 {$endif}
 
@@ -142,6 +150,8 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
 {$ifndef FPC_SYSTEM_HAS_FPC_SET_SUB_SETS}
  function fpc_set_sub_sets(const set1,set2: fpc_normal_set): fpc_normal_set;[public,alias:'FPC_SET_SUB_SETS']; compilerproc;
  var
+   src1: fpc_normal_set_long absolute set1;
+   src2: fpc_normal_set_long absolute set2;
    dest: fpc_normal_set absolute fpc_set_sub_sets;
  {
   computes the diff from set1 to set2 result in dest
@@ -150,7 +160,7 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
     i: integer;
    begin
      for i:=0 to 7 do
-       dest[i] := set1[i] and not set2[i];
+       dest[i] := src1[i] and not src2[i];
    end;
 {$endif}
 
@@ -158,6 +168,8 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
 {$ifndef FPC_SYSTEM_HAS_FPC_SET_SYMDIF_SETS}
  function fpc_set_symdif_sets(const set1,set2: fpc_normal_set): fpc_normal_set;[public,alias:'FPC_SET_SYMDIF_SETS']; compilerproc;
  var
+   src1: fpc_normal_set_long absolute set1;
+   src2: fpc_normal_set_long absolute set2;
    dest: fpc_normal_set absolute fpc_set_symdif_sets;
  {
    computes the symetric diff from set1 to set2 result in dest
@@ -166,7 +178,7 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
     i: integer;
    begin
      for i:=0 to 7 do
-       dest[i] := set1[i] xor set2[i];
+       dest[i] := src1[i] xor src2[i];
    end;
 {$endif}
 
@@ -177,10 +189,12 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
  }
    var
     i: integer;
+    src1: fpc_normal_set_long absolute set1;
+    src2: fpc_normal_set_long absolute set2;
    begin
      fpc_set_comp_sets:= false;
      for i:=0 to 7 do
-       if set1[i] <> set2[i] then
+       if src1[i] <> src2[i] then
          exit;
      fpc_set_comp_sets:= true;
    end;
@@ -195,10 +209,12 @@ function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_
  }
  var
   i : integer;
+  src1: fpc_normal_set_long absolute set1;
+  src2: fpc_normal_set_long absolute set2;
  begin
    fpc_set_contains_sets:= false;
    for i:=0 to 7 do
-     if (set1[i] and not set2[i]) <> 0 then
+     if (src1[i] and not src2[i]) <> 0 then
        exit;
    fpc_set_contains_sets:= true;
  end;
@@ -229,10 +245,18 @@ procedure fpc_varset_load(const l;sourcesize : longint;var dest;size : ptrint);
 }
 procedure fpc_varset_create_element(b,size : ptrint; var data); compilerproc;
   type
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray = array[0..sizeof(sizeint)-1] of byte;
+{$else}
+    tbsetarray = bitpacked array[0..sizeof(sizeint)-1] of 0..1;
+{$endif}
   begin
     FillChar(data,size,0);
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray(data)[b div 8]:=1 shl (b mod 8);
+{$else}
+    tbsetarray(data)[b]:=1;
+{$endif}
   end;
 {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_CREATE_ELEMENT}
 
@@ -243,10 +267,18 @@ procedure fpc_varset_create_element(b,size : ptrint; var data); compilerproc;
 }
 procedure fpc_varset_set(const source;var dest; b,size : ptrint); compilerproc;
   type
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray = array[0..sizeof(sizeint)-1] of byte;
+{$else}
+    tbsetarray = bitpacked array[0..sizeof(sizeint)-1] of 0..1;
+{$endif}
   begin
     move(source,dest,size);
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray(dest)[b div 8]:=tbytearray(dest)[b div 8] or (1 shl (b mod 8));
+{$else}
+    tbsetarray(dest)[b]:=1;
+{$endif}
   end;
 {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SET_BYTE}
 
@@ -258,10 +290,18 @@ procedure fpc_varset_set(const source;var dest; b,size : ptrint); compilerproc;
 }
 procedure fpc_varset_unset(const source;var dest; b,size : ptrint); compilerproc;
   type
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray = array[0..sizeof(sizeint)-1] of byte;
+{$else}
+    tbsetarray = bitpacked array[0..sizeof(sizeint)-1] of 0..1;
+{$endif}
   begin
     move(source,dest,size);
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray(dest)[b div 8]:=tbytearray(dest)[b div 8] and not (1 shl (b mod 8));
+{$else}
+    tbsetarray(dest)[b]:=0;
+{$endif}
   end;
 {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_UNSET_BYTE}
 
@@ -272,30 +312,25 @@ procedure fpc_varset_unset(const source;var dest; b,size : ptrint); compilerproc
 }
 procedure fpc_varset_set_range(const orgset; var dest;l,h,size : ptrint); compilerproc;
   type
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
     tbytearray = array[0..sizeof(sizeint)-1] of byte;
+{$else}
+    tbsetarray = bitpacked array[0..sizeof(sizeint)-1] of 0..1;
+{$endif}
   var
     i : ptrint;
   begin
     move(orgset,dest,size);
     for i:=l to h do
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
        tbytearray(dest)[i div 8]:=(1 shl (i mod 8)) or tbytearray(dest)[i div 8];
+{$else}
+       tbsetarray(dest)[i]:=1;
+{$endif}
   end;
 {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SET_RANGE}
 
 
-{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_IN_BYTE}
-{
-  tests if the element b is in the set p the carryflag is set if it present
-}
-function fpc_varset_in(const p; b : ptrint): boolean; compilerproc;
-  type
-    tbytearray = array[0..sizeof(sizeint)-1] of byte;
-  begin
-    fpc_varset_in:=(tbytearray(p)[b div 8] and (1 shl (b mod 8)))<>0;
-  end;
-{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_IN_BYTE}
-
-
 {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
 {
   adds set1 and set2 into set dest
diff --git a/rtl/objpas/typinfo.pp b/rtl/objpas/typinfo.pp
index 9135725b50..42a5eaab94 100644
--- a/rtl/objpas/typinfo.pp
+++ b/rtl/objpas/typinfo.pp
@@ -392,23 +392,40 @@ end;
 
 Function SetToString(TypeInfo: PTypeInfo; Value: Integer; Brackets: Boolean) : String;
 
+{$ifdef FPC_NEW_BIGENDIAN_SETS}
+type
+  tsetarr = bitpacked array[0..31] of 0..1;
+{$endif}
 Var
   I : Integer;
   PTI : PTypeInfo;
 
 begin
+{$if defined(FPC_NEW_BIGENDIAN_SETS) and defined(FPC_BIG_ENDIAN)}
+  case GetTypeData(TypeInfo)^.OrdType of
+    otSByte,otUByte: Value:=Value shl 24;
+    otSWord,otUWord: Value:=Value shl 16;
+  end;
+{$endif}
+
   PTI:=GetTypeData(TypeInfo)^.CompType;
   Result:='';
   For I:=0 to SizeOf(Integer)*8-1 do
     begin
+{$ifdef FPC_NEW_BIGENDIAN_SETS}
+      if (tsetarr(Value)[i]<>0) then
+{$else}
       if ((Value and 1)<>0) then
+{$endif}
         begin
           If Result='' then
             Result:=GetEnumName(PTI,i)
           else
             Result:=Result+','+GetEnumName(PTI,I);
         end;
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
       Value:=Value shr 1;
+{$endif FPC_NEW_BIGENDIAN_SETS}
     end;
   if Brackets then
     Result:='['+Result+']';
diff --git a/rtl/powerpc/set.inc b/rtl/powerpc/set.inc
index 4fe6e5247c..d39e72dbb1 100644
--- a/rtl/powerpc/set.inc
+++ b/rtl/powerpc/set.inc
@@ -53,6 +53,7 @@ asm
         stw     r0,24(r3)
         stw     r0,28(r3)
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
         // r0 := 1 shl r4[27-31] -> bit index in dword (rotate instructions
         // with count in register only consider lower 5 bits of this register)
         li      r0,1
@@ -62,9 +63,17 @@ asm
         // (((b div 8) div 4)*4= (b div 8) and not(3))
         // r5 := (r4 rotl(32-3)) and (0x01ffffff8)
         rlwinm  r4,r4,31-3+1,3,31-2
-
         // store the result
         stwx    r0,r3,r4
+{$else}
+        { must be done byte- instead of dword-based }
+        rlwinm  r5,r4,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        srwi    r4,r4,3
+        // store the result
+        stbx    r0,r3,r4
+{$endif}
 end;
 
 
@@ -86,6 +95,7 @@ asm
        stfd     f2,16(r3)
        stfd     f3,24(r3)
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
        // get the index of the correct *dword* in the set
        // r0 := (r5 rotl(32-3)) and (0x0fffffff8)
        rlwinm   r0,r5,31-3+1,3,31-2
@@ -99,6 +109,17 @@ asm
        or       r5,r4,r5
        // store result
        stw      r5,0(r3)
+{$else}
+        { must be done byte- instead of dword-based }
+        srwi    r6,r5,3
+        lbzx    r7,r6,r3
+        rlwinm  r5,r5,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        or      r7,r7,r0
+        // store the result
+        stbx    r7,r6,r3
+{$endif}
 end;
 
 
@@ -120,6 +141,7 @@ asm
        stfd     f1,8(r3)
        stfd     f2,16(r3)
        stfd     f3,24(r3)
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
        // get the index of the correct *dword* in the set
        // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
        rlwinm   r0,r5,31-3+1,3,31-2
@@ -132,9 +154,22 @@ asm
        andc     r5,r4,r5
        // store result
        stw      r4,0(r3)
+{$else}
+        { must be done byte- instead of dword-based }
+        srwi    r6,r5,3
+        lbzx    r7,r6,r3
+        rlwinm  r5,r5,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        andc    r7,r7,r0
+        // store the result
+        stbx    r7,r6,r3
+{$endif}
 end;
 
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
+
 {$define FPC_SYSTEM_HAS_FPC_SET_SET_RANGE}
 function fpc_set_set_range(const orgset: fpc_normal_set; l,h : byte): fpc_normal_set;assembler; compilerproc;
 {
@@ -196,29 +231,7 @@ asm
   stw    r5,0(r3)             // store to set
 .Lset_range_exit:
 end;
-
-
-{$define FPC_SYSTEM_HAS_FPC_SET_IN_BYTE}
-function fpc_set_in_byte(const p: fpc_normal_set; b : byte): boolean;compilerproc;assembler;[public,alias:'FPC_SET_IN_BYTE'];
-{
-  tests if the element b is in the set p, the **zero** flag is cleared if it's present
-
-  on entry: p in r3, b in r4
-}
-asm
-       // get the index of the correct *dword* in the set
-       // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
-       rlwinm   r0,r4,31-3+1,3,31-2
-
-       // load dword in which the bit has to be tested
-       lwzx     r3,r3,r0
-
-       // r4 := 32 - r4 (no problem if r4 > 32, the rlwnm next does a mod 32)
-       subfic   r4,r4,32
-       // r3 := (r3 shr (r4 mod 32)) and 1
-       rlwnm    r3,r3,r4,31,31
-end;
-
+{$endif}
 
 
 {$define FPC_SYSTEM_HAS_FPC_SET_ADD_SETS}
@@ -354,169 +367,3 @@ asm
        cntlzw   r3,r0
        srwi.    r3,r3,5
 end;
-
-
-
-{$ifdef LARGESETS}
-
-procedure do_set(p : pointer;b : word);assembler;[public,alias:'FPC_SET_SET_WORD'];
-{
-  sets the element b in set p works for sets larger than 256 elements
-  not yet use by the compiler so
-}
-asm
-       pushl %eax
-       movl p,%edi
-       movw b,%ax
-       andl $0xfff8,%eax
-       shrl $3,%eax
-       addl %eax,%edi
-       movb 12(%ebp),%al
-       andl $7,%eax
-       btsl %eax,(%edi)
-       popl %eax
-end;
-
-
-procedure do_in(p : pointer;b : word);assembler;[public,alias:'FPC_SET_IN_WORD'];
-{
-  tests if the element b is in the set p the carryflag is set if it present
-  works for sets larger than 256 elements
-}
-asm
-        pushl %eax
-        movl p,%edi
-        movw b,%ax
-        andl $0xfff8,%eax
-        shrl $3,%eax
-        addl %eax,%edi
-        movb 12(%ebp),%al
-        andl $7,%eax
-        btl %eax,(%edi)
-        popl %eax
-end;
-
-
-procedure add_sets(set1,set2,dest : pointer;size : longint);assembler;[public,alias:'FPC_SET_ADD_SETS_SIZE'];
-{
-  adds set1 and set2 into set dest size is the number of bytes in the set
-}
-asm
-      movl set1,%esi
-      movl set2,%ebx
-      movl dest,%edi
-      movl size,%ecx
-  .LMADDSETSIZES1:
-      lodsl
-      orl (%ebx),%eax
-      stosl
-      addl $4,%ebx
-      decl %ecx
-      jnz .LMADDSETSIZES1
-end;
-
-
-procedure mul_sets(set1,set2,dest : pointer;size : longint);assembler;[public,alias:'FPC_SET_MUL_SETS_SIZE'];
-{
-  multiplies (i.E. takes common elements of) set1 and set2 result put in
-  dest size is the number of bytes in the set
-}
-asm
-         movl set1,%esi
-         movl set2,%ebx
-         movl dest,%edi
-         movl size,%ecx
-     .LMMULSETSIZES1:
-         lodsl
-         andl (%ebx),%eax
-         stosl
-         addl $4,%ebx
-         decl %ecx
-         jnz .LMMULSETSIZES1
-end;
-
-
-procedure sub_sets(set1,set2,dest : pointer;size : longint);assembler;[public,alias:'FPC_SET_SUB_SETS_SIZE'];
-asm
-         movl set1,%esi
-         movl set2,%ebx
-         movl dest,%edi
-         movl size,%ecx
-     .LMSUBSETSIZES1:
-         lodsl
-         movl (%ebx),%edx
-         notl %edx
-         andl %edx,%eax
-         stosl
-         addl $4,%ebx
-         decl %ecx
-         jnz .LMSUBSETSIZES1
-end;
-
-
-procedure sym_sub_sets(set1,set2,dest : pointer;size : longint);assembler;[public,alias:'FPC_SET_SYMDIF_SETS_SIZE'];
-{
-   computes the symetric diff from set1 to set2 result in dest
-}
-asm
-      movl set1,%esi
-      movl set2,%ebx
-      movl dest,%edi
-      movl size,%ecx
-  .LMSYMDIFSETSIZE1:
-      lodsl
-      movl (%ebx),%edx
-      xorl %edx,%eax
-      stosl
-      addl $4,%ebx
-      decl %ecx
-      jnz LMSYMDIFSETSIZE1
-end;
-
-
-procedure comp_sets(set1,set2 : pointer;size : longint);assembler;[public,alias:'FPC_SET_COMP_SETS_SIZE'];
-asm
-      movl set1,%esi
-      movl set2,%edi
-      movl size,%ecx
-  LMCOMPSETSIZES1:
-      lodsl
-      movl (%edi),%edx
-      cmpl %edx,%eax
-      jne  LMCOMPSETSIZEEND
-      addl $4,%edi
-      decl %ecx
-      jnz LMCOMPSETSIZES1
-      { we are here only if the two sets are equal
-        we have zero flag set, and that what is expected }
-  LMCOMPSETSIZEEND:
-end;
-
-{$IfNDef NoSetInclusion}
-procedure contains_sets(set1,set2 : pointer; size: longint);assembler;[public,alias:'FPC_SET_CONTAINS_SETS'];
-{
-  on exit, zero flag is set if set1 <= set2 (set2 contains set1)
-}
-asm
-        movl set1,%esi
-        movl set2,%edi
-        movl size,%ecx
-    LMCONTAINSSETS2:
-        movl (%esi),%eax
-        movl (%edi),%edx
-        andl %eax,%edx
-        cmpl %edx,%eax  {set1 and set2 = set1?}
-        jne  LMCONTAINSSETEND2
-        addl $4,%esi
-        addl $4,%edi
-        decl %ecx
-        jnz LMCONTAINSSETS2
-        { we are here only if set2 contains set1
-          we have zero flag set, and that what is expected }
-    LMCONTAINSSETEND2:
-end;
-{$EndIf NoSetInclusion}
-
-
-{$endif LARGESET}
-
diff --git a/rtl/powerpc64/set.inc b/rtl/powerpc64/set.inc
index 4409d782ed..e12b12802f 100644
--- a/rtl/powerpc64/set.inc
+++ b/rtl/powerpc64/set.inc
@@ -50,6 +50,7 @@ asm
         stw     r0,24(r3)
         stw     r0,28(r3)
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
         // r0 := 1 shl r4[27-31] -> bit index in dword (rotate instructions
         // with count in register only consider lower 5 bits of this register)
         li      r0,1
@@ -59,9 +60,17 @@ asm
         // (((b div 8) div 4)*4= (b div 8) and not(3))
         // r5 := (r4 rotl(32-3)) and (0x01ffffff8)
         rlwinm  r4,r4,31-3+1,3,31-2
-
         // store the result
         stwx    r0,r3,r4
+{$else}
+        { must be done byte- instead of dword-based }
+        rlwinm  r5,r4,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        srwi    r4,r4,3
+        // store the result
+        stbx    r0,r3,r4
+{$endif}
 end;
 
 
@@ -83,6 +92,7 @@ asm
        stfd     f2,16(r3)
        stfd     f3,24(r3)
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
        // get the index of the correct *dword* in the set
        // r0 := (r5 rotl(32-3)) and (0x0fffffff8)
        rlwinm   r0,r5,31-3+1,3,31-2
@@ -96,6 +106,17 @@ asm
        or       r5,r4,r5
        // store result
        stw      r5,0(r3)
+{$else}
+        { must be done byte- instead of dword-based }
+        srwi    r6,r5,3
+        lbzx    r7,r6,r3
+        rlwinm  r5,r5,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        or      r7,r7,r0
+        // store the result
+        stbx    r7,r6,r3
+{$endif}
 end;
 
 
@@ -117,6 +138,7 @@ asm
        stfd     f1,8(r3)
        stfd     f2,16(r3)
        stfd     f3,24(r3)
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
        // get the index of the correct *dword* in the set
        // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
        rlwinm   r0,r5,31-3+1,3,31-2
@@ -129,8 +151,20 @@ asm
        andc     r5,r4,r5
        // store result
        stw      r4,0(r3)
+{$else}
+        { must be done byte- instead of dword-based }
+        srwi    r6,r5,3
+        lbzx    r7,r6,r3
+        rlwinm  r5,r5,0,31-3+1,31
+        li      r0,0x80
+        srw     r0,r0,r5
+        andc    r7,r7,r0
+        // store the result
+        stbx    r7,r6,r3
+{$endif}
 end;
 
+{$ifndef FPC_NEW_BIGENDIAN_SETS}
 
 {$define FPC_SYSTEM_HAS_FPC_SET_SET_RANGE}
 function fpc_set_set_range(const orgset: fpc_normal_set; l,h : byte): fpc_normal_set;assembler; compilerproc;
@@ -193,29 +227,7 @@ asm
   stw    r5,0(r3)             // store to set
 .Lset_range_exit:
 end;
-
-
-{$define FPC_SYSTEM_HAS_FPC_SET_IN_BYTE}
-function fpc_set_in_byte(const p: fpc_normal_set; b : byte): boolean;compilerproc;assembler;[public,alias:'FPC_SET_IN_BYTE'];
-{
-  tests if the element b is in the set p, the **zero** flag is cleared if it's present
-
-  on entry: p in r3, b in r4
-}
-asm
-       // get the index of the correct *dword* in the set
-       // r0 := (r4 rotl(32-3)) and (0x0fffffff8)
-       rlwinm   r0,r4,31-3+1,3,31-2
-
-       // load dword in which the bit has to be tested
-       lwzx     r3,r3,r0
-
-       // r4 := 32 - r4 (no problem if r4 > 32, the rlwnm next does a mod 32)
-       subfic   r4,r4,32
-       // r3 := (r3 shr (r4 mod 32)) and 1
-       rlwnm    r3,r3,r4,31,31
-end;
-
+{$endif}
 
 
 {$define FPC_SYSTEM_HAS_FPC_SET_ADD_SETS}
diff --git a/tests/test/tset7.pp b/tests/test/tset7.pp
new file mode 100644
index 0000000000..b31c5dba4e
--- /dev/null
+++ b/tests/test/tset7.pp
@@ -0,0 +1,36 @@
+{ test for subsetreg sets }
+
+{$packset 1}
+
+type
+  ta = 0..7;
+  tr = record
+    b: byte;
+    a: set of ta;
+    w: word;
+  end;
+
+
+procedure test(r: tr);
+var
+  b: ta;
+begin
+  b := 6;
+  if (r.b<>101) or
+     (r.w<>$abcd) or
+     (5 in r.a) or
+     (b in r.a) or
+     not(7 in r.a) or
+     ([1..3] * r.a <> [2..3]) then
+    halt(1);
+end;
+
+var
+  r: tr;
+begin
+  r.b:=101;
+  r.w:=$abcd;
+  r.a:=[2..3];
+  include(r.a,7);
+  test(r);
+end.
diff --git a/tests/webtbs/tw8660.pp b/tests/webtbs/tw8660.pp
index bebfb73849..c2fe540c03 100644
--- a/tests/webtbs/tw8660.pp
+++ b/tests/webtbs/tw8660.pp
@@ -25,11 +25,15 @@ var
 begin
   C := TClient.Create;
   C.Num := 2;
-  C.St := [ckVip, ckNormal]; // the numeric representation is 5
+  C.St := [ckVip, ckNormal]; // the numeric representation is 5 (on little endian systems)
   V := C.St;
   writeln(sizeof(V), ' ', byte(V)); // It's OK
   writeln(sizeof(C.St), ' ', byte(C.St)); // It's OK
+{$ifdef FPC_LITTLE_ENDIAN}
   if GetOrdProp(C, 'St')<>5 then
+{$else}
+  if GetOrdProp(C, 'St')<>160 then
+{$endif}
     halt(1);
   if GetSetProp(C, 'St')<>'ckNormal,ckVip' then
     halt(1);