这可能有点矫枉过正,但我想玩 SIMD。它可能写得更好,但我不太了解 SIMD。
如果您想要反转生成的位顺序,只需从 SIMD 方法中删除改组部分并将 (7 - i) 更改为 i
对于那些不熟悉 SIMD 的人来说,这种方法比普通的 for 循环快大约 3 倍。
public static byte ByteFrom8Bools(ReadOnlySpan<bool> bools)
{
if (bools.Length < 8)
Throw();
static void Throw() // Throwing in a separate method helps JIT produce better code, or so I've heard
{
throw new ArgumentException("Not enough booleans provided");
}
// these are JIT compile time constants, only one of the branches will be compiled
// depending on the CPU running this code, eliminating the branch entirely
if(Sse2.IsSupported && Ssse3.IsSupported)
{
// copy out the 64 bits all at once
ref readonly bool b = ref bools[0];
ref bool refBool = ref Unsafe.AsRef(b);
ulong ulongBools = Unsafe.As<bool, ulong>(ref refBool);
// load our 64 bits into a vector register
Vector128<byte> vector = Vector128.CreateScalarUnsafe(ulongBools).AsByte();
// this is just to propagate the 1 set bit in true bools to the most significant bit
Vector128<byte> allTrue = Vector128.Create((byte)1);
Vector128<byte> compared = Sse2.CompareEqual(vector, allTrue);
// reverse the bytes we care about, leave the rest in their place
Vector128<byte> shuffleMask = Vector128.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15);
Vector128<byte> shuffled = Ssse3.Shuffle(compared, shuffleMask);
// move the most significant bit of each byte into a bit of int
int mask = Sse2.MoveMask(shuffled);
// returning byte = returning the least significant byte from int
return (byte)mask;
}
else
{
// fall back to a more generic algorithm if there aren't the correct instructions on the CPU
byte bits = 0;
for (int i = 0; i < 8; i++)
{
bool b = bools[i];
bits |= (byte)(Unsafe.As<bool, byte>(ref b) << (7 - i));
}
return bits;
}
}