mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 01:48:50 +08:00
Revert "[L0][XE_HPC]Perform memcpy on CPU by default"
This reverts commit 383f33b482.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
06817090bf
commit
cfd96980a0
@@ -329,6 +329,9 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
bool systolicModeSupport = false;
|
bool systolicModeSupport = false;
|
||||||
bool pipelineSelectStateTracking = false;
|
bool pipelineSelectStateTracking = false;
|
||||||
bool stateComputeModeTracking = false;
|
bool stateComputeModeTracking = false;
|
||||||
|
|
||||||
|
std::atomic<uint32_t> barrierCounter{0u};
|
||||||
|
uint32_t latestFlushedBarrierCounter = 0u;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||||
|
|||||||
@@ -2517,6 +2517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||||||
}
|
}
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, workloadPartition);
|
appendSignalEventPostWalker(signalEvent, workloadPartition);
|
||||||
|
this->barrierCounter++;
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -132,9 +132,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||||||
bool isAllocUSMDeviceMemory(NEO::SvmAllocationData *alloc, bool allocFound);
|
bool isAllocUSMDeviceMemory(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||||
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, bool isDstDeviceMemory, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, bool isDstDeviceMemory, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||||
void *obtainLockedPtrFromDevice(void *ptr, size_t size);
|
void *obtainLockedPtrFromDevice(void *ptr, size_t size);
|
||||||
|
|
||||||
protected:
|
|
||||||
std::atomic<bool> barrierCalled{false};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <PRODUCT_FAMILY gfxProductFamily>
|
template <PRODUCT_FAMILY gfxProductFamily>
|
||||||
|
|||||||
@@ -246,8 +246,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
|||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
this->barrierCalled = true;
|
|
||||||
return flushImmediate(ret, true, hSignalEvent);
|
return flushImmediate(ret, true, hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -546,7 +544,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
|
|||||||
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
|
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->barrierCalled) {
|
bool needsFlushTagUpdate = this->latestFlushedBarrierCounter < this->barrierCounter;
|
||||||
|
if (needsFlushTagUpdate) {
|
||||||
this->csr->flushTagUpdate();
|
this->csr->flushTagUpdate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -565,13 +564,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
|
|||||||
cpuMemcpyDstPtr = dstptr;
|
cpuMemcpyDstPtr = dstptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->barrierCalled) {
|
if (needsFlushTagUpdate) {
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
|
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
|
||||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||||
}
|
}
|
||||||
this->barrierCalled = false;
|
this->latestFlushedBarrierCounter = this->barrierCounter;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (signalEvent) {
|
if (signalEvent) {
|
||||||
|
|||||||
@@ -484,7 +484,6 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
|||||||
using BaseClass = WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>;
|
using BaseClass = WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>;
|
||||||
MockCommandListImmediateHw() : BaseClass() {}
|
MockCommandListImmediateHw() : BaseClass() {}
|
||||||
using BaseClass::applyMemoryRangesBarrier;
|
using BaseClass::applyMemoryRangesBarrier;
|
||||||
using BaseClass::barrierCalled;
|
|
||||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||||
using BaseClass::isSyncModeQueue;
|
using BaseClass::isSyncModeQueue;
|
||||||
|
|
||||||
|
|||||||
@@ -886,6 +886,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
|
|||||||
|
|
||||||
struct AppendMemoryLockedCopyFixture : public DeviceFixture {
|
struct AppendMemoryLockedCopyFixture : public DeviceFixture {
|
||||||
void setUp() {
|
void setUp() {
|
||||||
|
DebugManager.flags.ExperimentalCopyThroughLock.set(1);
|
||||||
DeviceFixture::setUp();
|
DeviceFixture::setUp();
|
||||||
|
|
||||||
nonUsmHostPtr = new char[sz];
|
nonUsmHostPtr = new char[sz];
|
||||||
@@ -1075,23 +1076,6 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWith
|
|||||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendBarrierThenSetBarrierCalled, IsXeHpcCore) {
|
|
||||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
|
||||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
|
||||||
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
|
|
||||||
|
|
||||||
EXPECT_FALSE(cmdList.barrierCalled);
|
|
||||||
|
|
||||||
cmdList.appendBarrier(nullptr, 0, nullptr);
|
|
||||||
|
|
||||||
EXPECT_TRUE(cmdList.barrierCalled);
|
|
||||||
|
|
||||||
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
|
|
||||||
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
|
|
||||||
|
|
||||||
EXPECT_FALSE(cmdList.barrierCalled);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImmediateHw<gfxCoreFamily> {
|
class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImmediateHw<gfxCoreFamily> {
|
||||||
public:
|
public:
|
||||||
|
|||||||
@@ -428,10 +428,10 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0,
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable allocation cache.")
|
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable allocation cache.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default treshold (in bytes) for H2D CPU copy.")
|
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default treshold (in bytes) for H2D CPU copy.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default treshold (in bytes) for D2H CPU copy.")
|
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default treshold (in bytes) for D2H CPU copy.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ")
|
|
||||||
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableSourceLevelDebugger, false, "Experimentally enable source level debugger.")
|
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableSourceLevelDebugger, false, "Experimentally enable source level debugger.")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableL0DebuggerForOpenCL, false, "Experimentally enable debugging OCL with L0 Debug API.")
|
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableL0DebuggerForOpenCL, false, "Experimentally enable debugging OCL with L0 Debug API.")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableTileAttach, true, "Experimentally enable attaching to tiles (subdevices).")
|
DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableTileAttach, true, "Experimentally enable attaching to tiles (subdevices).")
|
||||||
|
DECLARE_DEBUG_VARIABLE(bool, ExperimentalCopyThroughLock, false, "Experimentally copy memory through locked ptr.")
|
||||||
|
|
||||||
/*DRIVER TOGGLES*/
|
/*DRIVER TOGGLES*/
|
||||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||||
|
|||||||
@@ -720,9 +720,6 @@ bool HwHelperHw<GfxFamily>::isPatIndexFallbackWaRequired() const {
|
|||||||
|
|
||||||
template <typename gfxProduct>
|
template <typename gfxProduct>
|
||||||
bool HwHelperHw<gfxProduct>::copyThroughLockedPtrEnabled() const {
|
bool HwHelperHw<gfxProduct>::copyThroughLockedPtrEnabled() const {
|
||||||
if (DebugManager.flags.ExperimentalCopyThroughLock.get() != -1) {
|
|
||||||
return DebugManager.flags.ExperimentalCopyThroughLock.get() == 1;
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -444,10 +444,7 @@ bool HwHelperHw<Family>::isPatIndexFallbackWaRequired() const {
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool HwHelperHw<Family>::copyThroughLockedPtrEnabled() const {
|
bool HwHelperHw<Family>::copyThroughLockedPtrEnabled() const {
|
||||||
if (DebugManager.flags.ExperimentalCopyThroughLock.get() != -1) {
|
return DebugManager.flags.ExperimentalCopyThroughLock.get();
|
||||||
return DebugManager.flags.ExperimentalCopyThroughLock.get() == 1;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -460,7 +460,7 @@ DirectSubmissionDisablePrefetcher = -1
|
|||||||
ForceDefaultGrfCompilationMode = 0
|
ForceDefaultGrfCompilationMode = 0
|
||||||
ForceLargeGrfCompilationMode = 0
|
ForceLargeGrfCompilationMode = 0
|
||||||
ForceStatelessMocsEncryptionBit = -1
|
ForceStatelessMocsEncryptionBit = -1
|
||||||
ExperimentalCopyThroughLock = -1
|
ExperimentalCopyThroughLock = 0
|
||||||
ExperimentalH2DCpuCopyThreshold = -1
|
ExperimentalH2DCpuCopyThreshold = -1
|
||||||
ExperimentalD2HCpuCopyThreshold = -1
|
ExperimentalD2HCpuCopyThreshold = -1
|
||||||
CopyHostPtrOnCpu = -1
|
CopyHostPtrOnCpu = -1
|
||||||
|
|||||||
@@ -198,17 +198,7 @@ HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenIsPlatformQueryNotSupportedThen
|
|||||||
EXPECT_FALSE(hwInfoConfig.isPlatformQuerySupported());
|
EXPECT_FALSE(hwInfoConfig.isPlatformQuerySupported());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(HwInfoConfigTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnFalse, IsNotXeHpcCore) {
|
HWTEST_F(HwInfoConfigTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnFalse) {
|
||||||
HwHelper &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
HwHelper &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||||
EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled());
|
EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(HwInfoConfigTest, givenHwHelperWhenFlagSetAndCallCopyThroughLockedPtrEnabledThenReturnCorrectValue) {
|
|
||||||
DebugManagerStateRestore restorer;
|
|
||||||
HwHelper &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
|
||||||
DebugManager.flags.ExperimentalCopyThroughLock.set(0);
|
|
||||||
EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled());
|
|
||||||
|
|
||||||
DebugManager.flags.ExperimentalCopyThroughLock.set(1);
|
|
||||||
EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled());
|
|
||||||
}
|
|
||||||
@@ -6,6 +6,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/helpers/hw_helper.h"
|
#include "shared/source/helpers/hw_helper.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/common/helpers/default_hw_info.h"
|
#include "shared/test/common/helpers/default_hw_info.h"
|
||||||
#include "shared/test/common/helpers/hw_helper_tests.h"
|
#include "shared/test/common/helpers/hw_helper_tests.h"
|
||||||
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
|
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
|
||||||
@@ -74,7 +75,13 @@ XE_HPC_CORETEST_F(HwHelperXeHpcCoreTest, givenXeHPCPlatformWhenCheckAssignEngine
|
|||||||
EXPECT_EQ(hwHelper.isAssignEngineRoundRobinSupported(hwInfo), HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAssignEngineRoundRobinSupported());
|
EXPECT_EQ(hwHelper.isAssignEngineRoundRobinSupported(hwInfo), HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAssignEngineRoundRobinSupported());
|
||||||
}
|
}
|
||||||
|
|
||||||
XE_HPC_CORETEST_F(HwHelperTest, givenHwHelperWhenCallCopyThroughLockedPtrEnabledThenReturnTrue) {
|
XE_HPC_CORETEST_F(HwHelperTest, givenHwHelperWithFlagSetWhenCallCopyThroughLockedPtrEnabledThenReturnFalse) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
auto &hwHelper = HwHelperHw<FamilyType>::get();
|
auto &hwHelper = HwHelperHw<FamilyType>::get();
|
||||||
|
|
||||||
|
DebugManager.flags.ExperimentalCopyThroughLock.set(false);
|
||||||
|
EXPECT_FALSE(hwHelper.copyThroughLockedPtrEnabled());
|
||||||
|
|
||||||
|
DebugManager.flags.ExperimentalCopyThroughLock.set(true);
|
||||||
EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled());
|
EXPECT_TRUE(hwHelper.copyThroughLockedPtrEnabled());
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user