mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
performance: write image through staging chunks
Related-To: NEO-12968 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
40c9c46db9
commit
a25e973205
@@ -2964,19 +2964,22 @@ cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue,
|
||||
TRACING_EXIT(ClEnqueueWriteImage, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
retVal = pCommandQueue->enqueueWriteImage(
|
||||
pImage,
|
||||
blockingWrite,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptr,
|
||||
nullptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (pCommandQueue->isValidForStagingWriteImage(pImage, ptr, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingWriteImage(pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, event);
|
||||
} else {
|
||||
retVal = pCommandQueue->enqueueWriteImage(
|
||||
pImage,
|
||||
blockingWrite,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptr,
|
||||
nullptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
}
|
||||
}
|
||||
DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast<const uintptr_t *>(event), 1u));
|
||||
TRACING_EXIT(ClEnqueueWriteImage, &retVal);
|
||||
|
||||
@@ -1569,7 +1569,7 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP
|
||||
|
||||
// If there was only one chunk copy, no barrier for OOQ is needed
|
||||
bool isSingleTransfer = false;
|
||||
ChunkCopyFunction chunkCopy = [&](void *chunkDst, void *stagingBuffer, const void *chunkSrc, size_t chunkSize) -> int32_t {
|
||||
ChunkCopyFunction chunkCopy = [&](void *stagingBuffer, size_t chunkSize, void *chunkDst, const void *chunkSrc) -> int32_t {
|
||||
auto isFirstTransfer = (chunkDst == dstPtr);
|
||||
auto isLastTransfer = ptrOffset(chunkDst, chunkSize) == ptrOffset(dstPtr, size);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
@@ -1599,19 +1599,71 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP
|
||||
if (ret != CL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
return postStagingTransferSync(event, profilingEvent, isSingleTransfer, blockingCopy);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event) {
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, this->getDevice().getRootDeviceIndex(), globalRegion, nullptr, globalOrigin};
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
Event profilingEvent{this, CL_COMMAND_WRITE_IMAGE, CompletionStamp::notReady, CompletionStamp::notReady};
|
||||
if (isProfilingEnabled()) {
|
||||
profilingEvent.setQueueTimeStamp();
|
||||
}
|
||||
|
||||
// If there was only one chunk write, no barrier for OOQ is needed
|
||||
bool isSingleTransfer = false;
|
||||
ChunkWriteImageFunc chunkWrite = [&](void *stagingBuffer, size_t bufferSize, const void *chunkPtr, const size_t *origin, const size_t *region) -> int32_t {
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setSubmitTimeStamp();
|
||||
}
|
||||
memcpy(stagingBuffer, chunkPtr, bufferSize);
|
||||
if (isSingleTransfer) {
|
||||
return this->enqueueWriteImage(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, event);
|
||||
}
|
||||
|
||||
if (isFirstTransfer && isProfilingEnabled()) {
|
||||
profilingEvent.setStartTimeStamp();
|
||||
}
|
||||
|
||||
cl_event *outEvent = nullptr;
|
||||
if (isLastTransfer && !this->isOOQEnabled()) {
|
||||
outEvent = event;
|
||||
}
|
||||
auto ret = this->enqueueWriteImage(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent);
|
||||
return ret;
|
||||
};
|
||||
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performImageWrite(ptr, globalOrigin, globalRegion, dstRowPitch, chunkWrite, csr);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
return postStagingTransferSync(event, profilingEvent, isSingleTransfer, blockingCopy);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::postStagingTransferSync(cl_event *event, const Event &profilingEvent, bool isSingleTransfer, bool isBlocking) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
if (event != nullptr) {
|
||||
if (!isSingleTransfer && this->isOOQEnabled()) {
|
||||
ret = this->enqueueBarrierWithWaitList(0, nullptr, event);
|
||||
}
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
if (isProfilingEnabled()) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->copyTimestamps(profilingEvent, !isSingleTransfer);
|
||||
pEvent->setCPUProfilingPath(false);
|
||||
}
|
||||
pEvent->setCmdType(profilingEvent.getCommandType());
|
||||
}
|
||||
|
||||
if (blockingCopy) {
|
||||
if (isBlocking) {
|
||||
ret = this->finish();
|
||||
}
|
||||
return ret;
|
||||
@@ -1633,12 +1685,18 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
|
||||
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingWriteImage(size_t size) {
|
||||
bool CommandQueue::isValidForStagingWriteImage(Image *image, const void *ptr, bool hasDependencies) {
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
if (!stagingBufferManager) {
|
||||
return false;
|
||||
}
|
||||
return stagingBufferManager->isValidForStagingWriteImage(this->getDevice(), size);
|
||||
switch (image->getImageDesc().image_type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
return stagingBufferManager->isValidForStagingWriteImage(this->getDevice(), ptr, hasDependencies);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -389,8 +389,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
bool isBcs() const { return isCopyOnly; };
|
||||
|
||||
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
|
||||
cl_int enqueueStagingWriteImage(Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event);
|
||||
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
|
||||
bool isValidForStagingWriteImage(size_t size);
|
||||
bool isValidForStagingWriteImage(Image *image, const void *ptr, bool hasDependencies);
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
@@ -434,6 +436,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
void unregisterGpgpuAndBcsCsrClients();
|
||||
|
||||
cl_int postStagingTransferSync(cl_event *event, const Event &profilingEvent, bool isSingleTransfer, bool isBlocking);
|
||||
|
||||
Context *context = nullptr;
|
||||
ClDevice *device = nullptr;
|
||||
mutable EngineControl *gpgpuEngine = nullptr;
|
||||
|
||||
@@ -62,7 +62,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
StagingBufferTracker stagingBufferTracker{};
|
||||
if (!mapAllocation) {
|
||||
InternalMemoryType memoryType = InternalMemoryType::notSpecified;
|
||||
bool isCpuCopyAllowed = false;
|
||||
@@ -71,20 +70,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (!mapAllocation && this->isValidForStagingWriteImage(hostPtrSize)) {
|
||||
auto allocatedSize = hostPtrSize;
|
||||
auto [heapAllocator, stagingBuffer] = getContext().getStagingBufferManager()->requestStagingBuffer(allocatedSize, &csr);
|
||||
auto stagingBufferPtr = addrToPtr(stagingBuffer);
|
||||
if (stagingBufferPtr != nullptr) {
|
||||
stagingBufferTracker = StagingBufferTracker{heapAllocator, stagingBuffer, allocatedSize, 0};
|
||||
memcpy(stagingBufferPtr, srcPtr, hostPtrSize);
|
||||
srcPtr = stagingBufferPtr;
|
||||
|
||||
mapAllocation = getContext().getSVMAllocsManager()->getSVMAlloc(srcPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(mapAllocation == nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
if (mapAllocation) {
|
||||
mapAllocation->setAubWritable(true, GraphicsAllocation::defaultBank);
|
||||
mapAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
||||
@@ -134,10 +119,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
|
||||
if (stagingBufferTracker.chunkAddress != 0) {
|
||||
stagingBufferTracker.taskCountToWait = csr.peekTaskCount();
|
||||
getContext().getStagingBufferManager()->trackChunk(stagingBufferTracker);
|
||||
}
|
||||
|
||||
if (dispatchResult != CL_SUCCESS) {
|
||||
return dispatchResult;
|
||||
|
||||
@@ -243,7 +243,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
return cmdQueue;
|
||||
}
|
||||
|
||||
cl_command_type getCommandType() {
|
||||
cl_command_type getCommandType() const {
|
||||
return cmdType;
|
||||
}
|
||||
|
||||
|
||||
@@ -2431,8 +2431,6 @@ HWTEST_F(StagingBufferTest, givenInOrderCmdQueueWhenEnqueueStagingBufferMemcpyNo
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferTest, givenOutOfOrderCmdQueueWhenEnqueueStagingBufferMemcpyNonBlockingThenCopySucessfull) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_BARRIER;
|
||||
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> myCmdQ(context, pClDevice, 0);
|
||||
myCmdQ.setOoqEnabled();
|
||||
@@ -2452,8 +2450,8 @@ HWTEST_F(StagingBufferTest, givenOutOfOrderCmdQueueWhenEnqueueStagingBufferMemcp
|
||||
EXPECT_EQ(1u, numOfStagingBuffers);
|
||||
EXPECT_EQ(expectedNumOfCopies, myCmdQ.enqueueSVMMemcpyCalledCount);
|
||||
EXPECT_EQ(0u, myCmdQ.finishCalledCount);
|
||||
EXPECT_EQ(expectedLastCmd, myCmdQ.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_BARRIER), myCmdQ.lastCommandType);
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
@@ -234,17 +234,13 @@ HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenReadWriteImageIsCalledThenH
|
||||
EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage2.get(), CL_FALSE, origin, region);
|
||||
|
||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||
if (!pCmdQ->isValidForStagingWriteImage(imageSize)) {
|
||||
auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead();
|
||||
ASSERT_NE(nullptr, temporaryAllocation1);
|
||||
EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize);
|
||||
}
|
||||
auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead();
|
||||
ASSERT_NE(nullptr, temporaryAllocation1);
|
||||
EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize);
|
||||
|
||||
EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage2.get(), CL_FALSE, origin, region);
|
||||
auto temporaryAllocation2 = csr.getTemporaryAllocations().peekHead();
|
||||
if (!pCmdQ->isValidForStagingWriteImage(imageSize)) {
|
||||
temporaryAllocation2 = temporaryAllocation2->next;
|
||||
}
|
||||
auto temporaryAllocation2 = temporaryAllocation1->next;
|
||||
|
||||
ASSERT_NE(nullptr, temporaryAllocation2);
|
||||
EXPECT_EQ(temporaryAllocation2->getUnderlyingBufferSize(), imageSize);
|
||||
}
|
||||
@@ -299,17 +295,12 @@ HWTEST_F(EnqueueWriteImageTest, GivenImage2DarrayWhenReadWriteImageIsCalledThenH
|
||||
|
||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (!pCmdQ->isValidForStagingWriteImage(imageSize)) {
|
||||
auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead();
|
||||
ASSERT_NE(nullptr, temporaryAllocation1);
|
||||
EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize);
|
||||
}
|
||||
auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead();
|
||||
ASSERT_NE(nullptr, temporaryAllocation1);
|
||||
EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize);
|
||||
|
||||
EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage.get(), CL_FALSE, origin, region);
|
||||
auto temporaryAllocation2 = csr.getTemporaryAllocations().peekHead();
|
||||
if (!pCmdQ->isValidForStagingWriteImage(imageSize)) {
|
||||
temporaryAllocation2 = temporaryAllocation2->next;
|
||||
}
|
||||
auto temporaryAllocation2 = temporaryAllocation1->next;
|
||||
ASSERT_NE(nullptr, temporaryAllocation2);
|
||||
EXPECT_EQ(temporaryAllocation2->getUnderlyingBufferSize(), imageSize);
|
||||
}
|
||||
@@ -810,52 +801,129 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithUsmPtrAndSizeLowerThanR
|
||||
svmManager->freeSVMAlloc(usmPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithStagingCopyEnabledThenDontImportAllocation) {
|
||||
HWTEST_F(EnqueueWriteImageTest, whenIsValidForStagingWriteImageCalledThenReturnCorrectValue) {
|
||||
bool svmSupported = pDevice->getHardwareInfo().capabilityTable.ftrSvm;
|
||||
if (!svmSupported) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restorer{};
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
auto res = EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE,
|
||||
EnqueueWriteImageTraits::origin,
|
||||
EnqueueWriteImageTraits::region,
|
||||
EnqueueWriteImageTraits::rowPitch,
|
||||
EnqueueWriteImageTraits::slicePitch,
|
||||
EnqueueWriteImageTraits::hostPtr,
|
||||
nullptr,
|
||||
0u,
|
||||
nullptr,
|
||||
nullptr);
|
||||
unsigned char ptr[16];
|
||||
|
||||
std::unique_ptr<Image> image(Image1dHelper<>::create(context));
|
||||
EXPECT_FALSE(pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
|
||||
image.reset(Image2dHelper<>::create(context));
|
||||
EXPECT_FALSE(pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
|
||||
image.reset(Image3dHelper<>::create(context));
|
||||
EXPECT_FALSE(pCmdQ->isValidForStagingWriteImage(image.get(), ptr, false));
|
||||
}
|
||||
|
||||
struct WriteImageStagingBufferTest : public EnqueueWriteImageTest {
|
||||
void SetUp() override {
|
||||
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
|
||||
EnqueueWriteImageTest::SetUp();
|
||||
ptr = new unsigned char[writeSize];
|
||||
device.reset(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
|
||||
return;
|
||||
}
|
||||
delete[] ptr;
|
||||
EnqueueWriteImageTest::TearDown();
|
||||
}
|
||||
|
||||
static constexpr size_t stagingBufferSize = MemoryConstants::megaByte * 2;
|
||||
static constexpr size_t writeSize = stagingBufferSize * 4;
|
||||
unsigned char *ptr;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {4, 8, 1};
|
||||
std::unique_ptr<ClDevice> device;
|
||||
cl_queue_properties props = {};
|
||||
};
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
pCmdQ->finish();
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithStagingCopyEnabledAndStagingBufferFailedThenImportAllocation) {
|
||||
bool svmSupported = pDevice->getHardwareInfo().capabilityTable.ftrSvm;
|
||||
if (!svmSupported) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restorer{};
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
memoryManager->singleFailureInPrimaryAllocation = true;
|
||||
auto res = EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE,
|
||||
EnqueueWriteImageTraits::origin,
|
||||
EnqueueWriteImageTraits::region,
|
||||
EnqueueWriteImageTraits::rowPitch,
|
||||
EnqueueWriteImageTraits::slicePitch,
|
||||
EnqueueWriteImageTraits::hostPtr,
|
||||
nullptr,
|
||||
0u,
|
||||
nullptr,
|
||||
nullptr);
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenBlockingEnqueueStagingWriteImageCalledThenFinishCalled) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, true, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
pCmdQ->finish();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
EXPECT_EQ(1u, mockCommandQueueHw.finishCalledCount);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithEventThenReturnValidEvent) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_WRITE_IMAGE;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingWriteImageCalledWithEventThenReturnValidEvent) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_BARRIER), mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_IMAGE), pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingWriteImageCalledWithSingleTransferThenNoBarrierEnqueued) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_WRITE_IMAGE;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
region[1] = 1;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStagingWriteImageThenTimestampsSetCorrectly) {
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_FALSE(pEvent->isCPUProfilingPath());
|
||||
EXPECT_TRUE(pEvent->isProfilingEnabled());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
mockCommandQueueHw.enqueueWriteImageCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
}
|
||||
@@ -357,17 +357,20 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override {
|
||||
enqueueWriteImageCounter++;
|
||||
return BaseClass::enqueueWriteImage(dstImage,
|
||||
blockingWrite,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptr,
|
||||
mapAllocation,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (enqueueWriteImageCallBase) {
|
||||
return BaseClass::enqueueWriteImage(dstImage,
|
||||
blockingWrite,
|
||||
origin,
|
||||
region,
|
||||
inputRowPitch,
|
||||
inputSlicePitch,
|
||||
ptr,
|
||||
mapAllocation,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
}
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) override {
|
||||
cpuDataTransferHandlerCalled = true;
|
||||
@@ -482,6 +485,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
std::vector<Kernel *> lastEnqueuedKernels;
|
||||
MultiDispatchInfo storedMultiDispatchInfo;
|
||||
size_t enqueueWriteImageCounter = 0;
|
||||
bool enqueueWriteImageCallBase = true;
|
||||
size_t enqueueWriteBufferCounter = 0;
|
||||
size_t requestedCmdStreamSize = 0;
|
||||
bool blockingWriteBuffer = false;
|
||||
|
||||
Reference in New Issue
Block a user