mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
L0 debug - fix windows bitmask decode
Keep threads created in EU range (0,7) Signed-off-by: Yates, Brandon <brandon.yates@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ac426f7fdc
commit
99ef6c499b
@@ -7,7 +7,8 @@
|
||||
|
||||
template <>
|
||||
void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
|
||||
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
@@ -24,11 +25,20 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
|
||||
|
||||
for (auto &thread : threads) {
|
||||
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
uint8_t *euData;
|
||||
|
||||
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) {
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * (thread.subslice / 2));
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.subslice % 2;
|
||||
euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
} else {
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.eu / (numberOfRows * eusPerRow);
|
||||
euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
}
|
||||
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.eu / (numberOfRows * eusPerRow);
|
||||
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
UNRECOVERABLE_IF(thread.thread > 7);
|
||||
*euData |= (1 << thread.thread);
|
||||
}
|
||||
@@ -36,7 +46,8 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
|
||||
|
||||
template <>
|
||||
std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
|
||||
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
@@ -66,8 +77,13 @@ std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBit
|
||||
std::bitset<8> bits(bitmask[offset]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + numEuPerSubslice * dualEu, i);
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + eusPerRow + numEuPerSubslice * dualEu, i);
|
||||
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) {
|
||||
threads.emplace_back(tile, slice, (subslice * 2) + dualEu, euIndex, i);
|
||||
threads.emplace_back(tile, slice, (subslice * 2) + dualEu, euIndex + eusPerRow, i);
|
||||
} else {
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + numEuPerSubslice * dualEu, i);
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + eusPerRow + numEuPerSubslice * dualEu, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user