Revert optimization of gpgpu csr's mutex lock in the enqueue blit

optimization available under flag
ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission

Related-To: NEO-7011
Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
Cencelewska, Katarzyna
2022-08-03 22:41:23 +00:00
committed by Compute-Runtime-Automation
parent 19cac22760
commit 61510e9a92
6 changed files with 108 additions and 6 deletions

View File

@@ -390,6 +390,81 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionT
}
}
using BlitEnqueueForceFlagsTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueForceFlagsTests, givenFlagsToForceCsrLockAndNonBlockedQueueWhenEnqueueBlitThenLockAreSetCorrectly) {
using CsrType = UltCommandStreamReceiver<FamilyType>;
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
auto mockCsr = static_cast<CsrType *>(&mockCommandQueue->getGpgpuCommandStreamReceiver());
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
mockCommandQueue->setQueueBlocked = false;
int hostPtr = 0;
{
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(-1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->recursiveLockCounter);
}
{
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(-1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->recursiveLockCounter);
}
{
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, mockCsr->recursiveLockCounter);
}
{
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->recursiveLockCounter);
}
}
HWTEST_TEMPLATED_F(BlitEnqueueForceFlagsTests, givenFlagToForceCsrLockAndBlockedQueueWhenGpgpuSubmissionForBcsNotRequiredAndCallEnqueueBlitThenLockAreSetCorrectly) {
using CsrType = UltCommandStreamReceiver<FamilyType>;
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
auto mockCsr = static_cast<CsrType *>(&mockCommandQueue->getGpgpuCommandStreamReceiver());
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->setQueueBlocked = true;
mockCommandQueue->forceGpgpuSubmissionForBcsRequired = 0;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, mockCsr->recursiveLockCounter);
}
HWTEST_TEMPLATED_F(BlitEnqueueForceFlagsTests, givenFlagToForceCsrLockAndBlockedQueueWhenGpgpuSubmissionForBcsRequiredAndCallEnqueueBlitThenLockAreSetCorrectly) {
using CsrType = UltCommandStreamReceiver<FamilyType>;
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
auto mockCsr = static_cast<CsrType *>(&mockCommandQueue->getGpgpuCommandStreamReceiver());
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.set(1);
mockCsr->recursiveLockCounter = 0u;
mockCommandQueue->setQueueBlocked = true;
mockCommandQueue->forceGpgpuSubmissionForBcsRequired = 1;
mockCommandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->recursiveLockCounter);
}
using BlitCopyTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {

View File

@@ -382,6 +382,18 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image);
return isBlitEnqueueImageAllowed;
}
bool isQueueBlocked() override {
if (setQueueBlocked != -1) {
return setQueueBlocked;
}
return BaseClass::isQueueBlocked();
}
bool isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies &timestampPacketDependencies) const override {
if (forceGpgpuSubmissionForBcsRequired != -1) {
return forceGpgpuSubmissionForBcsRequired;
}
return BaseClass::isGpgpuSubmissionForBcsRequired(queueBlocked, timestampPacketDependencies);
}
unsigned int lastCommandType;
std::vector<Kernel *> lastEnqueuedKernels;
@@ -396,6 +408,8 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool notifyEnqueueSVMMemcpyCalled = false;
bool cpuDataTransferHandlerCalled = false;
bool useBcsCsrOnNotifyEnabled = false;
int setQueueBlocked = -1;
int forceGpgpuSubmissionForBcsRequired = -1;
mutable bool isBlitEnqueueImageAllowed = false;
struct OverrideReturnValue {
bool enabled = false;