Revert "fix: tbx page fault manager hang issue"

This reverts commit 7d4e70a25b.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-02-12 09:26:47 +01:00
committed by Compute-Runtime-Automation
parent 7d4e70a25b
commit 116f7270be
12 changed files with 26 additions and 30 deletions

View File

@@ -41,8 +41,9 @@ bool AubHelper::isOneTimeAubWritableAllocationType(const AllocationType &type) {
case AllocationType::assertBuffer:
case AllocationType::tagBuffer:
case AllocationType::syncDispatchToken:
return true;
case AllocationType::bufferHostMemory:
return (NEO::debugManager.flags.SetBufferHostMemoryAlwaysAubWritable.get() ? false : true) || (NEO::debugManager.flags.EnableTbxPageFaultManager.get() == 1);
return NEO::debugManager.flags.SetBufferHostMemoryAlwaysAubWritable.get() ? false : true;
default:
return false;
}

View File

@@ -89,7 +89,7 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::isAllocTbxFaultable(GraphicsAllocati
return false;
}
auto allocType = gfxAlloc->getAllocationType();
return AubHelper::isOneTimeAubWritableAllocationType(allocType) && GraphicsAllocation::isLockable(allocType) && allocType != AllocationType::gpuTimestampDeviceBuffer;
return AubHelper::isOneTimeAubWritableAllocationType(allocType) && GraphicsAllocation::isLockable(allocType);
}
template <typename GfxFamily>

View File

@@ -41,7 +41,7 @@ DECLARE_DEBUG_VARIABLE(bool, AUBDumpAllocsOnEnqueueSVMMemcpyOnly, false, "Force
DECLARE_DEBUG_VARIABLE(bool, AUBDumpForceAllToLocalMemory, false, "Force placing every allocation in local memory address space")
DECLARE_DEBUG_VARIABLE(bool, GenerateAubFilePerProcessId, true, "Generate aub file with process id")
DECLARE_DEBUG_VARIABLE(bool, SetBufferHostMemoryAlwaysAubWritable, false, "Make buffer host memory allocation always uploaded to AUB/TBX")
DECLARE_DEBUG_VARIABLE(bool, EnableTbxPageFaultManager, false, "Enables experimental page fault manager for host buffers and some other alloc types, replaces SetBufferHostMemoryAlwaysAubWritable")
DECLARE_DEBUG_VARIABLE(bool, EnableTbxPageFaultManager, false, "Enables experiemental page fault manager for host buffer types, improves upon SetBufferHostMemoryAlwaysAubWritable")
/*DEBUG FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableSWTags, false, "Enable software tagging in batch buffer")

View File

@@ -21,7 +21,7 @@ void CpuPageFaultManager::insertAllocation(void *ptr, size_t size, SVMAllocsMana
auto initialPlacement = MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties);
const auto domain = (initialPlacement == GraphicsAllocation::UsmInitialPlacement::CPU) ? AllocationDomain::cpu : AllocationDomain::none;
std::unique_lock<RecursiveSpinLock> lock{mtx};
std::unique_lock<SpinLock> lock{mtx};
PageFaultData faultData{};
faultData.size = size;
faultData.unifiedMemoryManager = unifiedMemoryManager;
@@ -35,7 +35,7 @@ void CpuPageFaultManager::insertAllocation(void *ptr, size_t size, SVMAllocsMana
}
void CpuPageFaultManager::removeAllocation(void *ptr) {
std::unique_lock<RecursiveSpinLock> lock{mtx};
std::unique_lock<SpinLock> lock{mtx};
auto alloc = memoryData.find(ptr);
if (alloc != memoryData.end()) {
auto &pageFaultData = alloc->second;
@@ -52,7 +52,7 @@ void CpuPageFaultManager::removeAllocation(void *ptr) {
}
void CpuPageFaultManager::moveAllocationToGpuDomain(void *ptr) {
std::unique_lock<RecursiveSpinLock> lock{mtx};
std::unique_lock<SpinLock> lock{mtx};
auto alloc = memoryData.find(ptr);
if (alloc != memoryData.end()) {
auto &pageFaultData = alloc->second;
@@ -68,7 +68,7 @@ void CpuPageFaultManager::moveAllocationToGpuDomain(void *ptr) {
}
void CpuPageFaultManager::moveAllocationsWithinUMAllocsManagerToGpuDomain(SVMAllocsManager *unifiedMemoryManager) {
std::unique_lock<RecursiveSpinLock> lock{mtx};
std::unique_lock<SpinLock> lock{mtx};
for (auto allocPtr : unifiedMemoryManager->nonGpuDomainAllocs) {
auto &pageFaultData = this->memoryData[allocPtr];
this->migrateStorageToGpuDomain(allocPtr, pageFaultData);
@@ -108,7 +108,7 @@ void CpuPageFaultManager::handlePageFault(void *ptr, PageFaultData &faultData) {
}
bool CpuPageFaultManager::verifyAndHandlePageFault(void *ptr, bool handleFault) {
std::unique_lock<RecursiveSpinLock> lock{mtx};
std::unique_lock<SpinLock> lock{mtx};
auto allocPtr = getFaultData(memoryData, ptr, handleFault);
if (allocPtr == nullptr) {
return false;

View File

@@ -98,6 +98,6 @@ class CpuPageFaultManager : public NonCopyableClass {
gpuDomainHandlerType gpuDomainHandler = &transferAndUnprotectMemory;
std::unordered_map<void *, PageFaultData> memoryData;
RecursiveSpinLock mtx;
SpinLock mtx;
};
} // namespace NEO

View File

@@ -12,6 +12,9 @@
namespace NEO {
class TbxPageFaultManagerLinux final : public PageFaultManagerLinux, public TbxPageFaultManager {};
class TbxPageFaultManagerLinux final : public PageFaultManagerLinux, public TbxPageFaultManager {
public:
TbxPageFaultManagerLinux() : PageFaultManagerLinux(), TbxPageFaultManager() {}
};
} // namespace NEO

View File

@@ -14,7 +14,7 @@
namespace NEO {
bool TbxPageFaultManager::verifyAndHandlePageFault(void *ptr, bool handleFault) {
std::unique_lock<RecursiveSpinLock> lock{mtxTbx};
std::unique_lock<SpinLock> lock{mtxTbx};
auto allocPtr = getFaultData(memoryDataTbx, ptr, handleFault);
if (allocPtr == nullptr) {
return CpuPageFaultManager::verifyAndHandlePageFault(ptr, handleFault);
@@ -43,7 +43,7 @@ void TbxPageFaultManager::handlePageFault(void *ptr, PageFaultDataTbx &faultData
}
void TbxPageFaultManager::removeAllocation(GraphicsAllocation *alloc) {
std::unique_lock<RecursiveSpinLock> lock{mtxTbx};
std::unique_lock<SpinLock> lock{mtxTbx};
for (auto &data : memoryDataTbx) {
auto allocPtr = data.first;
auto faultData = data.second;
@@ -56,7 +56,7 @@ void TbxPageFaultManager::removeAllocation(GraphicsAllocation *alloc) {
}
void TbxPageFaultManager::insertAllocation(CommandStreamReceiver *csr, GraphicsAllocation *alloc, uint32_t bank, void *ptr, size_t size) {
std::unique_lock<RecursiveSpinLock> lock{mtxTbx};
std::unique_lock<SpinLock> lock{mtxTbx};
if (this->memoryDataTbx.find(ptr) == this->memoryDataTbx.end()) {
PageFaultDataTbx pageFaultData{};

View File

@@ -38,7 +38,7 @@ class TbxPageFaultManager : public virtual CpuPageFaultManager {
void handlePageFault(void *ptr, PageFaultDataTbx &faultData);
std::unordered_map<void *, PageFaultDataTbx> memoryDataTbx;
RecursiveSpinLock mtxTbx;
SpinLock mtxTbx;
};
} // namespace NEO

View File

@@ -15,6 +15,9 @@
namespace NEO {
class TbxPageFaultManagerWindows final : public PageFaultManagerWindows, public TbxPageFaultManager {};
class TbxPageFaultManagerWindows final : public PageFaultManagerWindows, public TbxPageFaultManager {
public:
TbxPageFaultManagerWindows() : PageFaultManagerWindows(), TbxPageFaultManager() {}
};
} // namespace NEO

View File

@@ -11,5 +11,4 @@
namespace NEO {
using SpinLock = std::mutex;
using RecursiveSpinLock = std::recursive_mutex;
} // namespace NEO

View File

@@ -129,18 +129,12 @@ TEST(AubHelper, givenAllocationTypeWhenAskingIfOneTimeWritableThenReturnCorrectR
}
}
TEST(AubHelper, givenSetBufferHostMemoryAlwaysAubWritableAndDisabledTbxFaultMngrWhenAskingIfBufferHostMemoryAllocationIsOneTimeAubWritableThenReturnCorrectResult) {
TEST(AubHelper, givenSetBufferHostMemoryAlwaysAubWritableWhenAskingIfBufferHostMemoryAllocationIsOneTimeAubWritableThenReturnCorrectResult) {
DebugManagerStateRestore stateRestore;
NEO::debugManager.flags.EnableTbxPageFaultManager.set(0);
for (auto isAlwaysAubWritable : {false, true}) {
for (auto isTbxFaultManagerEnabled : {false, true}) {
NEO::debugManager.flags.SetBufferHostMemoryAlwaysAubWritable.set(isAlwaysAubWritable);
NEO::debugManager.flags.EnableTbxPageFaultManager.set(isTbxFaultManagerEnabled);
bool isOneTimeAubWritable = AubHelper::isOneTimeAubWritableAllocationType(AllocationType::bufferHostMemory);
EXPECT_EQ(!isAlwaysAubWritable || isTbxFaultManagerEnabled, isOneTimeAubWritable);
}
NEO::debugManager.flags.SetBufferHostMemoryAlwaysAubWritable.set(isAlwaysAubWritable);
EXPECT_NE(AubHelper::isOneTimeAubWritableAllocationType(AllocationType::bufferHostMemory), isAlwaysAubWritable);
}
}

View File

@@ -1589,12 +1589,8 @@ HWTEST_F(TbxCommandStreamTests, givenAubOneTimeWritableAllocWhenTbxFaultManagerI
for (const auto &allocType : onceWritableAllocTypesForTbx) {
gfxAlloc1->setAllocationType(allocType);
if (allocType == AllocationType::gpuTimestampDeviceBuffer) {
EXPECT_FALSE(tbxCsr->isAllocTbxFaultable(gfxAlloc1));
} else if (GraphicsAllocation::isLockable(allocType)) {
if (GraphicsAllocation::isLockable(allocType)) {
EXPECT_TRUE(tbxCsr->isAllocTbxFaultable(gfxAlloc1));
} else {
EXPECT_FALSE(tbxCsr->isAllocTbxFaultable(gfxAlloc1));
}
}