Layout#
0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0
8 9 10 11 12 13 14 15 0 0 0 0 0 0 0 0
16 17 18 19 20 21 22 23 0 0 0 0 0 0 0 0
24 25 26 27 28 29 30 31 0 0 0 0 0 0 0 0
32 33 34 35 36 37 38 39 0 0 0 0 0 0 0 0
40 41 42 43 44 45 46 47 0 0 0 0 0 0 0 0
48 49 50 51 52 53 54 55 0 0 0 0 0 0 0 0
56 57 58 59 60 61 62 63 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 8 16 24 32 40 48 56
0 0 0 0 0 0 0 0 1 9 17 25 33 41 49 57
0 0 0 0 0 0 0 0 2 10 18 26 34 42 50 58
0 0 0 0 0 0 0 0 3 11 19 27 35 43 51 59
0 0 0 0 0 0 0 0 4 12 20 28 36 44 52 60
0 0 0 0 0 0 0 0 5 13 21 29 37 45 53 61
0 0 0 0 0 0 0 0 6 14 22 30 38 46 54 62
0 0 0 0 0 0 0 0 7 15 23 31 39 47 55 63
Code on V100#
int half_elements = a_frag.num_elements / 2;
for (int i = 0; i < half_elements; i++) {
if (lid < 4)
a_frag.x[i] = (lid << 3) + i; // lid * 8 + i
else if (lid >= 16 && lid < 20)
a_frag.x[i] = (lid << 3) - 96 + i; // lid * 8 - 96 + i
}
for (int i = half_elements; i < a_frag.num_elements; i++) {
if (lid >= 12 && lid < 16)
a_frag.x[i] = lid - 76 + (i << 3); // lid - 76 + i * 8
else if (lid >= 28 && lid < 32)
a_frag.x[i] = lid - 88 + (i << 3); // lid - 88 + i * 8
}