mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 07:14:10 +08:00
performance: Bidirectional BCS split implementation for OCL
Related-To: NEO-7877 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7f24a4ba25
commit
9f755b54ad
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_in_ops_base.h"
|
||||
#include "shared/source/command_stream/transfer_direction.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
|
||||
#include "opencl/source/kernel/multi_device_kernel.h"
|
||||
@@ -50,6 +51,7 @@ struct BuiltinOpParams {
|
||||
uint32_t dstMipLevel = 0;
|
||||
void *userPtrForPostOperationCpuCopy = nullptr;
|
||||
bool bcsSplit = false;
|
||||
TransferDirection direction = TransferDirection::LocalToLocal;
|
||||
};
|
||||
|
||||
class BuiltinDispatchInfoBuilder {
|
||||
|
||||
@@ -371,6 +371,13 @@ void CommandQueue::constructBcsEnginesForSplit() {
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.SplitBcsMaskD2H.get() > 0) {
|
||||
this->d2hEngines = DebugManager.flags.SplitBcsMaskD2H.get();
|
||||
}
|
||||
if (DebugManager.flags.SplitBcsMaskH2D.get() > 0) {
|
||||
this->h2dEngines = DebugManager.flags.SplitBcsMaskH2D.get();
|
||||
}
|
||||
|
||||
this->bcsSplitInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -437,6 +437,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
bool bcsSplitInitialized = false;
|
||||
BcsInfoMask splitEngines = EngineHelpers::oddLinkedCopyEnginesMask;
|
||||
BcsInfoMask h2dEngines = NEO::EngineHelpers::h2dCopyEngineMask;
|
||||
BcsInfoMask d2hEngines = NEO::EngineHelpers::d2hCopyEngineMask;
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
|
||||
|
||||
@@ -1245,11 +1245,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
||||
auto ret = CL_SUCCESS;
|
||||
this->releaseMainCopyEngine();
|
||||
|
||||
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 4u> locks;
|
||||
StackVec<CommandStreamReceiver *, 4u> copyEngines;
|
||||
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 2u> locks;
|
||||
StackVec<CommandStreamReceiver *, 2u> copyEngines;
|
||||
|
||||
auto splitEngines = this->splitEngines;
|
||||
if (dispatchInfo.peekBuiltinOpParams().direction == NEO::TransferDirection::HostToLocal) {
|
||||
splitEngines = this->h2dEngines;
|
||||
} else if (dispatchInfo.peekBuiltinOpParams().direction == NEO::TransferDirection::LocalToHost) {
|
||||
splitEngines = this->d2hEngines;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
||||
if (this->splitEngines.test(i)) {
|
||||
if (splitEngines.test(i)) {
|
||||
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
||||
auto bcs = getBcsCommandStreamReceiver(engineType);
|
||||
if (bcs) {
|
||||
|
||||
@@ -40,6 +40,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
dc.dstOffset = {dstOffset, 0, 0};
|
||||
dc.size = {size, 0, 0};
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
dc.dstRowPitch = dstRowPitch;
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -53,6 +53,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin);
|
||||
}
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -110,6 +110,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
dc.size = {size, 0, 0};
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -100,6 +100,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
dc.dstRowPitch = hostRowPitch;
|
||||
dc.dstSlicePitch = hostSlicePitch;
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
@@ -114,6 +114,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
dc.userPtrForPostOperationCpuCopy = ptr;
|
||||
}
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
@@ -125,6 +125,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
dc.size = {size, 0, 0};
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
@@ -212,6 +213,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
dc.size = {svmOperation->regionSize, 0, 0};
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, svmOperation->regionSize, csr);
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
@@ -385,6 +387,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
operationParams.bcsSplit = bcsSplit;
|
||||
operationParams.direction = csrSelectionArgs.direction;
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
} else if (copyType == HostToSvm) {
|
||||
@@ -409,6 +412,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &srcHostPtrSurf;
|
||||
|
||||
operationParams.bcsSplit = bcsSplit;
|
||||
operationParams.direction = csrSelectionArgs.direction;
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
} else if (copyType == SvmToSvm) {
|
||||
@@ -422,6 +426,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstSvmSurf;
|
||||
|
||||
operationParams.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
operationParams.direction = csrSelectionArgs.direction;
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
} else {
|
||||
@@ -449,6 +454,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
operationParams.bcsSplit = bcsSplit;
|
||||
operationParams.direction = csrSelectionArgs.direction;
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -99,6 +99,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
dc.size = {size, 0, 0};
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -105,6 +105,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
dc.dstRowPitch = bufferRowPitch;
|
||||
dc.dstSlicePitch = bufferSlicePitch;
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
@@ -98,6 +98,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
}
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
dc.bcsSplit = bcsSplit;
|
||||
dc.direction = csrSelectionArgs.direction;
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
@@ -364,6 +364,7 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenEnqueueB
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
@@ -411,11 +412,218 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenEnqueueB
|
||||
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyAndD2HMaskWhenEnqueueReadThenEnqueueBlitSplit) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b10);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
|
||||
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
|
||||
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr1->setupContext(*osContext1);
|
||||
csr1->initializeTagAllocation();
|
||||
EngineControl control1(csr1.get(), osContext1.get());
|
||||
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS3, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr2->setupContext(*osContext2);
|
||||
csr2->initializeTagAllocation();
|
||||
EngineControl control2(csr2.get(), osContext2.get());
|
||||
|
||||
auto cmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
|
||||
|
||||
cmdQHw->bcsEngines[1] = &control1;
|
||||
cmdQHw->bcsEngines[3] = &control2;
|
||||
|
||||
BcsSplitBufferTraits::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<BcsSplitBufferTraits>::create());
|
||||
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 16 * MemoryConstants::megaByte, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(cmdQHw->kernelParams.size.x, 16 * MemoryConstants::megaByte);
|
||||
|
||||
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyAndD2HMaskGreaterThanAvailableEnginesWhenEnqueueReadThenEnqueueBlitSplitAcrossAvailableEngines) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b101010);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
|
||||
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
|
||||
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr1->setupContext(*osContext1);
|
||||
csr1->initializeTagAllocation();
|
||||
EngineControl control1(csr1.get(), osContext1.get());
|
||||
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS3, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr2->setupContext(*osContext2);
|
||||
csr2->initializeTagAllocation();
|
||||
EngineControl control2(csr2.get(), osContext2.get());
|
||||
|
||||
auto cmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
|
||||
|
||||
cmdQHw->bcsEngines[1] = &control1;
|
||||
cmdQHw->bcsEngines[3] = &control2;
|
||||
|
||||
BcsSplitBufferTraits::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<BcsSplitBufferTraits>::create());
|
||||
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 16 * MemoryConstants::megaByte, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(cmdQHw->kernelParams.size.x, 8 * MemoryConstants::megaByte);
|
||||
|
||||
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueWriteThenEnqueueBlitSplit) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
|
||||
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
|
||||
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr1->setupContext(*osContext1);
|
||||
csr1->initializeTagAllocation();
|
||||
EngineControl control1(csr1.get(), osContext1.get());
|
||||
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS3, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr2->setupContext(*osContext2);
|
||||
csr2->initializeTagAllocation();
|
||||
EngineControl control2(csr2.get(), osContext2.get());
|
||||
|
||||
auto cmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
|
||||
|
||||
cmdQHw->bcsEngines[1] = &control1;
|
||||
cmdQHw->bcsEngines[3] = &control2;
|
||||
|
||||
BcsSplitBufferTraits::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<BcsSplitBufferTraits>::create());
|
||||
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::LocalMemory;
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 16 * MemoryConstants::megaByte, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(cmdQHw->kernelParams.size.x, 8 * MemoryConstants::megaByte);
|
||||
|
||||
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueWriteH2HThenEnqueueBlitSplit) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
|
||||
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
|
||||
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr1->setupContext(*osContext1);
|
||||
csr1->initializeTagAllocation();
|
||||
EngineControl control1(csr1.get(), osContext1.get());
|
||||
std::unique_ptr<OsContext> osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS3, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield())));
|
||||
auto csr2 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
csr2->setupContext(*osContext2);
|
||||
csr2->initializeTagAllocation();
|
||||
EngineControl control2(csr2.get(), osContext2.get());
|
||||
|
||||
auto cmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
|
||||
|
||||
cmdQHw->bcsEngines[1] = &control1;
|
||||
cmdQHw->bcsEngines[3] = &control2;
|
||||
|
||||
BcsSplitBufferTraits::context = context;
|
||||
auto buffer = clUniquePtr(BufferHelper<BcsSplitBufferTraits>::create());
|
||||
static_cast<MockGraphicsAllocation *>(buffer->getGraphicsAllocation(0u))->memoryPool = MemoryPool::System64KBPages;
|
||||
char ptr[1] = {};
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, cmdQHw->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 16 * MemoryConstants::megaByte, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
EXPECT_EQ(csr1->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
|
||||
|
||||
EXPECT_EQ(cmdQHw->kernelParams.size.x, 8 * MemoryConstants::megaByte);
|
||||
|
||||
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
|
||||
}
|
||||
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithRequestedStallingCommandThenEnqueueMarkerBeforeBlitSplit) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
@@ -466,6 +674,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithRequeste
|
||||
HWTEST_F(OoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueBarrierNonSplitCopyAndSplitCopyThenSplitWaitCorrectly) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
@@ -529,6 +738,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenDoNotEnq
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
@@ -578,6 +788,7 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithRequeste
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
@@ -630,6 +841,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithNoReques
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
@@ -682,6 +894,7 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueBlockingReadThen
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->returnFakeAllocation = true;
|
||||
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
|
||||
@@ -729,6 +942,7 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueBlockingReadThen
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadWithEventThenEnqueueBlitSplitAndAddBothTimestampsToEvent) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b1010);
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
|
||||
@@ -211,6 +211,32 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenConta
|
||||
EXPECT_EQ(4u, queue->countBcsEngines());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, givenBidirectionalMasksWhenConstructBcsEnginesForSplitThenMasksSet, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCopyEngineSelector.set(1);
|
||||
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
|
||||
DebugManager.flags.SplitBcsMaskD2H.set(0b10100010);
|
||||
DebugManager.flags.SplitBcsMaskH2D.set(0b101010);
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
|
||||
MockClDevice clDevice{device};
|
||||
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
|
||||
ClDeviceVector clDevices{&clDeviceId, 1u};
|
||||
cl_int retVal{};
|
||||
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto queue = std::make_unique<MockCommandQueue>(*context);
|
||||
EXPECT_EQ(0u, queue->countBcsEngines());
|
||||
|
||||
queue->constructBcsEnginesForSplit();
|
||||
|
||||
EXPECT_EQ(4u, queue->countBcsEngines());
|
||||
EXPECT_EQ(0b10100010u, queue->d2hEngines.to_ulong());
|
||||
EXPECT_EQ(0b101010u, queue->h2dEngines.to_ulong());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueuePvcAndLaterTests, givenSplitBcsMaskWhenConstructBcsEnginesForSplitThenContainsGivenBcsEngines, IsAtLeastXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCopyEngineSelector.set(1);
|
||||
|
||||
@@ -30,9 +30,11 @@ class MockCommandQueue : public CommandQueue {
|
||||
using CommandQueue::blitEnqueueAllowed;
|
||||
using CommandQueue::blitEnqueueImageAllowed;
|
||||
using CommandQueue::bufferCpuCopyAllowed;
|
||||
using CommandQueue::d2hEngines;
|
||||
using CommandQueue::deferredTimestampPackets;
|
||||
using CommandQueue::device;
|
||||
using CommandQueue::gpgpuEngine;
|
||||
using CommandQueue::h2dEngines;
|
||||
using CommandQueue::isCopyOnly;
|
||||
using CommandQueue::isTextureCacheFlushNeeded;
|
||||
using CommandQueue::migrateMultiGraphicsAllocationsIfRequired;
|
||||
|
||||
Reference in New Issue
Block a user