Add ClearColor allocation support for blitter

Related-To: NEO-5175

Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
Pawel Wilma 2020-11-18 23:58:42 +00:00 committed by Compute-Runtime-Automation
parent 17051459ea
commit 9bd0c69913
28 changed files with 171 additions and 33 deletions

View File

@ -7,6 +7,7 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/device/device.h"
@ -690,10 +691,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
uint32_t size) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
srcOffset += ptrDiff<uintptr_t>(srcPtr, srcPtrAlloc->getGpuAddress());
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation);
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
commandContainer.addToResidencyContainer(clearColorAllocation);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
@ -719,11 +724,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
Vec3<size_t> srcPtrOffset = {(copyOneCommand ? (srcRegion.originX / bytesPerPixel) : srcRegion.originX), srcRegion.originY, srcRegion.originZ};
Vec3<size_t> dstPtrOffset = {(copyOneCommand ? (dstRegion.originX / bytesPerPixel) : dstRegion.originX), dstRegion.originY, dstRegion.originZ};
copySize.x = copyOneCommand ? copySize.x / bytesPerPixel : copySize.x;
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc, srcAlloc,
dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
dstRowPitch, dstSlicePitch, clearColorAllocation);
commandContainer.addToResidencyContainer(dstAlloc);
commandContainer.addToResidencyContainer(srcAlloc);
commandContainer.addToResidencyContainer(clearColorAllocation);
blitProperties.bytesPerPixel = bytesPerPixel;
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
@ -747,14 +756,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation();
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dst, src,
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
dstRowPitch, dstSlicePitch, clearColorAllocation);
blitProperties.bytesPerPixel = bytesPerPixel;
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
commandContainer.addToResidencyContainer(dst);
commandContainer.addToResidencyContainer(src);
commandContainer.addToResidencyContainer(clearColorAllocation);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
appendSignalEventPostWalker(hSignalEvent);

View File

@ -486,7 +486,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const Multi
{
// Aux to NonAux
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
buffer->getGraphicsAllocation(rootDeviceIndex));
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
auto auxToNonAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
}
@ -494,7 +494,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const Multi
{
// NonAux to Aux
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
buffer->getGraphicsAllocation(rootDeviceIndex));
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
auto nonAuxToAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
}

View File

@ -21,6 +21,8 @@ struct ClBlitProperties {
const BuiltinOpParams &builtinOpParams) {
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
auto clearColorAllocation = commandStreamReceiver.getClearColorAllocation();
if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection) {
auto dstOffset = builtinOpParams.dstOffset.x;
auto srcOffset = builtinOpParams.srcOffset.x;
@ -45,7 +47,7 @@ struct ClBlitProperties {
{srcOffset, builtinOpParams.srcOffset.y, builtinOpParams.srcOffset.z},
builtinOpParams.size,
builtinOpParams.srcRowPitch, builtinOpParams.srcSlicePitch,
builtinOpParams.dstRowPitch, builtinOpParams.dstSlicePitch);
builtinOpParams.dstRowPitch, builtinOpParams.dstSlicePitch, clearColorAllocation);
}
BlitProperties blitProperties{};

View File

@ -617,13 +617,14 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead();
//Two more surfaces from preemptionAllocation and SipKernel
//Three more surfaces from preemptionAllocation, SipKernel and clearColorAllocation
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0;
size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0;
size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0;
EXPECT_EQ(0, mockCsr->flushCalledCount);
EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount, cmdBuffer->surfaces.size());
EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize, cmdBuffer->surfaces.size());
}
HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) {

View File

@ -1158,7 +1158,7 @@ HWTEST_F(AubCommandStreamReceiverTests, WhenBlitBufferIsCalledThenCounterIsCorre
EXPECT_EQ(0u, aubCsr->blitBufferCalled);
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0);
BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0);
BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0, aubCsr->getClearColorAllocation());
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
aubCsr->blitBuffer(blitPropertiesContainer, true, false);

View File

@ -68,6 +68,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
//two more because of preemption allocation and sipKernel in Mid Thread preemption mode
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
//we should have 3 heaps, tag allocation and csr command stream + cq
EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size());

View File

@ -820,18 +820,24 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
uint32_t newTaskCount = 19;
csr.taskCount = newTaskCount - 1;
EXPECT_EQ(0u, csr.recursiveLockCounter.load());
uint32_t expectedResursiveLockCount = 0u;
EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load());
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()), nullptr, hostPtr,
buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->getGpuAddress(), 0,
0, 0, {bltSize, 1, 1}, 0, 0, 0, 0);
if (csr.getClearColorAllocation()) {
expectedResursiveLockCount++;
}
EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load());
blitBuffer(&csr, blitProperties, true);
EXPECT_EQ(newTaskCount, csr.taskCount);
EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush);
EXPECT_EQ(1u, csr.recursiveLockCounter.load());
expectedResursiveLockCount++;
EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
@ -1216,15 +1222,16 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP
size_t buffer2SlicePitch = std::get<0>(GetParam()).srcSlicePitch;
auto allocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(allocation, //dstAllocation
allocation, //srcAllocation
buffer1Offset, //dstOffset
buffer2Offset, //srcOffset
bltSize, //copySize
buffer1RowPitch, //srcRowPitch
buffer1SlicePitch, //srcSlicePitch
buffer2RowPitch, //dstRowPitch
buffer2SlicePitch //dstSlicePitch
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(allocation, //dstAllocation
allocation, //srcAllocation
buffer1Offset, //dstOffset
buffer2Offset, //srcOffset
bltSize, //copySize
buffer1RowPitch, //srcRowPitch
buffer1SlicePitch, //srcSlicePitch
buffer2RowPitch, //dstRowPitch
buffer2SlicePitch, //dstSlicePitch
csr.getClearColorAllocation() //clearColorAllocation
);
blitBuffer(&csr, blitProperties, true);

View File

@ -373,12 +373,21 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations
csr.blitBuffer(blitPropertiesContainer, false, false);
uint32_t residentAllocationsNum = 5u;
EXPECT_TRUE(csr.isMadeResident(graphicsAllocation1));
EXPECT_TRUE(csr.isMadeResident(graphicsAllocation2));
EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation()));
EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled);
if (csr.clearColorAllocation) {
EXPECT_TRUE(csr.isMadeResident(csr.clearColorAllocation));
residentAllocationsNum++;
}
if (csr.globalFenceAllocation) {
EXPECT_TRUE(csr.isMadeResident(csr.globalFenceAllocation));
residentAllocationsNum++;
}
EXPECT_EQ(csr.globalFenceAllocation ? 6u : 5u, csr.makeResidentAllocations.size());
EXPECT_EQ(residentAllocationsNum, csr.makeResidentAllocations.size());
}
HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAllocationsResident) {
@ -418,13 +427,18 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl
bcsCsr->blitBuffer(blitPropertiesContainer, false, false);
uint32_t residentAllocationsNum = 6u;
EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation1));
EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation2));
EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->getTagAllocation()));
EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->globalFenceAllocation));
if (bcsCsr->clearColorAllocation) {
EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->clearColorAllocation));
residentAllocationsNum++;
}
EXPECT_EQ(1u, bcsCsr->makeSurfacePackNonResidentCalled);
EXPECT_EQ(6u, bcsCsr->makeResidentAllocations.size());
EXPECT_EQ(residentAllocationsNum, bcsCsr->makeResidentAllocations.size());
}
HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
@ -620,7 +634,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1,
graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0);
graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0, csr.getClearColorAllocation());
blitBuffer(&csr, blitProperties, true);
@ -1021,7 +1035,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1,
graphicsAllocation2,
{buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0);
{buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0, csr.getClearColorAllocation());
blitBuffer(&csr, blitProperties, true);
@ -1162,7 +1176,7 @@ HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCor
for (int i = 0; i < 2; i++) {
auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(translationDirection[i],
graphicsAllocation);
graphicsAllocation, csr.getClearColorAllocation());
auto offset = csr.commandStream.getUsed();
blitBuffer(&csr, blitProperties, false);
@ -1499,6 +1513,24 @@ HWTEST_F(BcsTests, givenImageToHostPtrWhenBlitBufferIsCalledThenBlitCmdIsCorrect
EXPECT_EQ(blitProperties.dstGpuAddress, bltCmd->getDestinationBaseAddress());
}
HWTEST_F(BcsTests, givenBlitBufferCalledWhenClearColorAllocationIseSetThenItIsMadeResident) {
MockGraphicsAllocation graphicsAllocation1;
MockGraphicsAllocation graphicsAllocation2;
MockGraphicsAllocation clearColorAllocation;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
Vec3<size_t> copySize = {1, 1, 1};
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&graphicsAllocation1,
&graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0, &clearColorAllocation);
blitBuffer(&csr, blitProperties, false);
auto iter = csr.makeResidentAllocations.find(&clearColorAllocation);
ASSERT_NE(iter, csr.makeResidentAllocations.end());
EXPECT_EQ(&clearColorAllocation, iter->first);
EXPECT_EQ(1u, iter->second);
}
struct MockScratchSpaceController : ScratchSpaceControllerBase {
using ScratchSpaceControllerBase::privateScratchAllocation;
using ScratchSpaceControllerBase::ScratchSpaceControllerBase;

View File

@ -324,6 +324,15 @@ HWTEST_F(CommandStreamReceiverTest, whenDirectSubmissionDisabledThenExpectNoFeat
EXPECT_FALSE(csr.isBlitterDirectSubmissionEnabled());
}
HWTEST_F(CommandStreamReceiverTest, whenClearColorAllocationIsCreatedThenItIsDestroyedInCleanupResources) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto mockClearColorAllocation = std::make_unique<MockGraphicsAllocation>();
csr.clearColorAllocation = mockClearColorAllocation.release();
EXPECT_NE(nullptr, csr.clearColorAllocation);
csr.cleanupResources();
EXPECT_EQ(nullptr, csr.clearColorAllocation);
}
struct InitDirectSubmissionFixture {
void SetUp() {
DebugManager.flags.EnableDirectSubmission.set(1);

View File

@ -937,7 +937,9 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) {
virtualEvent->submitCommand(false);
EXPECT_EQ(pDevice->getUltCommandStreamReceiver<FamilyType>().recursiveLockCounter, 2u);
uint32_t expectedLockCounter = pDevice->getDefaultEngine().commandStreamReceiver->getClearColorAllocation() ? 3u : 2u;
EXPECT_EQ(expectedLockCounter, pDevice->getUltCommandStreamReceiver<FamilyType>().recursiveLockCounter);
}
HWTEST_F(EventTest, givenVirtualEventWhenSubmitCommandEventNotReadyAndEventWithoutCommandThenOneLockCsrNeeded) {

View File

@ -40,3 +40,9 @@ GEN11TEST_F(CommandStreamReceiverHwTestGen11, whenProgrammingMiSemaphoreWaitThen
MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait;
EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode());
}
GEN11TEST_F(CommandStreamReceiverHwTestGen11, givenCommandStreamReceiverWhenGetClearColorAllocationIsCalledThenNothingHappens) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.getClearColorAllocation();
EXPECT_EQ(nullptr, commandStreamReceiver.clearColorAllocation);
}

View File

@ -524,6 +524,8 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
size_t getCmdsSizeForHardwareContext() const override {
return 0;
}
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
std::map<const void *, size_t> residency;
bool passResidencyCallToBaseClass = true;
std::unique_ptr<ExecutionEnvironment> mockExecutionEnvironment;

View File

@ -53,6 +53,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::checkForNewResources;
using BaseClass::CommandStreamReceiver::checkImplicitFlushForGpuIdle;
using BaseClass::CommandStreamReceiver::cleanupResources;
using BaseClass::CommandStreamReceiver::clearColorAllocation;
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;

View File

@ -13,6 +13,7 @@ using namespace NEO;
template <typename GfxFamily>
class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily> {
public:
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::makeResident;

View File

@ -764,6 +764,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe
//preemption allocation
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
auto recordedCmdBuffer = cmdBuffers.peekHead();
EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size());
@ -781,6 +782,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe
EXPECT_EQ(testedCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation);
int ioctlUserPtrCnt = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 4 : 3;
ioctlUserPtrCnt += testedCsr->clearColorAllocation ? 1 : 0;
EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.total);
EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr);
@ -833,6 +835,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen
//preemption allocation
size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0;
//validate that submited command buffer has what we want
EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.buffer_count);
@ -856,6 +859,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen
int ioctlExecCnt = 1;
int ioctlUserPtrCnt = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 3 : 2;
ioctlUserPtrCnt += testedCsr->clearColorAllocation ? 1 : 0;
EXPECT_EQ(ioctlExecCnt, this->mock->ioctl_cnt.execbuffer2);
EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr);
EXPECT_EQ(ioctlExecCnt + ioctlUserPtrCnt, this->mock->ioctl_cnt.total);

View File

@ -1645,6 +1645,12 @@ HWTEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreated
additionalDestroyDeviceIoctls.gemWait++;
}
if (device->getDefaultEngine().commandStreamReceiver->getClearColorAllocation() != nullptr) {
mock->ioctl_expected.gemUserptr++;
additionalDestroyDeviceIoctls.gemClose++;
additionalDestroyDeviceIoctls.gemWait++;
}
MockContext context(device);
cl_image_format imageFormat;

View File

@ -89,6 +89,7 @@ class WddmCommandStreamFixture {
template <typename GfxFamily>
struct MockWddmCsr : public WddmCommandStreamReceiver<GfxFamily> {
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::getCS;
@ -852,6 +853,7 @@ HWTEST_F(WddmCommandStreamMockGdiTest, givenRecordedCommandBufferWhenItIsSubmitt
csr->flushBatchedSubmissions();
csrSurfaceCount += csr->clearColorAllocation ? 1 : 0;
EXPECT_TRUE(cmdBuffers.peekIsEmpty());
EXPECT_EQ(1u, wddm->submitResult.called);

View File

@ -221,6 +221,11 @@ void CommandStreamReceiver::cleanupResources() {
getMemoryManager()->freeGraphicsMemory(perDssBackedBuffer);
perDssBackedBuffer = nullptr;
}
if (clearColorAllocation) {
getMemoryManager()->freeGraphicsMemory(clearColorAllocation);
clearColorAllocation = nullptr;
}
}
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {

View File

@ -222,6 +222,8 @@ class CommandStreamReceiver {
virtual void initializeDefaultsForInternalEngine(){};
virtual GraphicsAllocation *getClearColorAllocation() = 0;
protected:
void cleanupResources();
void printDeviceIndex();
@ -262,6 +264,7 @@ class CommandStreamReceiver {
GraphicsAllocation *preemptionAllocation = nullptr;
GraphicsAllocation *debugSurface = nullptr;
GraphicsAllocation *perDssBackedBuffer = nullptr;
GraphicsAllocation *clearColorAllocation = nullptr;
IndirectHeap *indirectHeap[IndirectHeap::NUM_TYPES];
OsContext *osContext = nullptr;

View File

@ -107,6 +107,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
aub_stream::EngineType contextEngineType,
bool &startOnInit,
bool &startInContext);
GraphicsAllocation *getClearColorAllocation() override;
protected:
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);

View File

@ -999,6 +999,9 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
makeResident(*blitProperties.srcAllocation);
makeResident(*blitProperties.dstAllocation);
if (blitProperties.clearColorAllocation) {
makeResident(*blitProperties.clearColorAllocation);
}
}
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);

View File

@ -104,4 +104,9 @@ bool CommandStreamReceiverHw<GfxFamily>::checkPlatformSupportsGpuIdleImplicitFlu
return false;
}
template <typename GfxFamily>
GraphicsAllocation *CommandStreamReceiverHw<GfxFamily>::getClearColorAllocation() {
return nullptr;
}
} // namespace NEO

View File

@ -26,6 +26,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons
size_t hostRowPitch, size_t hostSlicePitch,
size_t gpuRowPitch, size_t gpuSlicePitch) {
GraphicsAllocation *hostAllocation = nullptr;
auto clearColorAllocation = commandStreamReceiver.getClearColorAllocation();
if (preallocatedHostAllocation) {
hostAllocation = preallocatedHostAllocation;
@ -50,6 +51,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons
AuxTranslationDirection::None, // auxTranslationDirection
memObjAllocation, // dstAllocation
hostAllocation, // srcAllocation
clearColorAllocation, // clearColorAllocation
memObjGpuVa, // dstGpuAddress
hostAllocGpuVa, // srcGpuAddress
copySize, // copySize
@ -68,6 +70,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons
AuxTranslationDirection::None, // auxTranslationDirection
hostAllocation, // dstAllocation
memObjAllocation, // srcAllocation
clearColorAllocation, // clearColorAllocation
hostAllocGpuVa, // dstGpuAddress
memObjGpuVa, // srcGpuAddress
copySize, // copySize
@ -83,7 +86,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons
BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch) {
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation) {
copySize.y = copySize.y ? copySize.y : 1;
copySize.z = copySize.z ? copySize.z : 1;
@ -94,6 +97,7 @@ BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocati
AuxTranslationDirection::None, // auxTranslationDirection
dstAllocation, // dstAllocation
srcAllocation, // srcAllocation
clearColorAllocation, // clearColorAllocation
dstAllocation->getGpuAddress(), // dstGpuAddress
srcAllocation->getGpuAddress(), // srcGpuAddress
copySize, // copySize
@ -106,7 +110,7 @@ BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocati
}
BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
GraphicsAllocation *allocation) {
GraphicsAllocation *allocation, GraphicsAllocation *clearColorAllocation) {
auto allocationSize = allocation->getUnderlyingBufferSize();
return {
@ -116,6 +120,7 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati
auxTranslationDirection, // auxTranslationDirection
allocation, // dstAllocation
allocation, // srcAllocation
clearColorAllocation, // clearColorAllocation
allocation->getGpuAddress(), // dstGpuAddress
allocation->getGpuAddress(), // srcGpuAddress
{allocationSize, 1, 1}, // copySize

View File

@ -47,10 +47,10 @@ struct BlitProperties {
static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch);
size_t dstRowPitch, size_t dstSlicePitch, GraphicsAllocation *clearColorAllocation);
static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
GraphicsAllocation *allocation);
GraphicsAllocation *allocation, GraphicsAllocation *clearColorAllocation);
static void setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies &timestampPacketDependencies,
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
@ -63,6 +63,7 @@ struct BlitProperties {
GraphicsAllocation *dstAllocation = nullptr;
GraphicsAllocation *srcAllocation = nullptr;
GraphicsAllocation *clearColorAllocation = nullptr;
uint64_t dstGpuAddress = 0;
uint64_t srcGpuAddress = 0;
@ -147,5 +148,6 @@ struct BlitCommandsHelper {
static void programGlobalSequencerFlush(LinearStream &commandStream);
static size_t getSizeForGlobalSequencerFlush();
static bool miArbCheckWaRequired();
static void appendClearColor(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd);
};
} // namespace NEO

View File

@ -90,4 +90,8 @@ bool BlitCommandsHelper<GfxFamily>::miArbCheckWaRequired() {
return false;
}
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::appendClearColor(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd) {
}
} // namespace NEO

View File

@ -20,10 +20,14 @@ using namespace NEO;
TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBufferRegionsThenPropertiesCreatedCorrectly) {
uint32_t src[] = {1, 2, 3, 4};
uint32_t dst[] = {4, 3, 2, 1};
uint32_t clear[] = {5, 6, 7, 8};
uint64_t srcGpuAddr = 0x12345;
uint64_t dstGpuAddr = 0x54321;
uint64_t clearGpuAddr = 0x5678;
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
std::unique_ptr<GraphicsAllocation> clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear)));
Vec3<size_t> srcOffsets{1, 2, 3};
Vec3<size_t> dstOffsets{3, 2, 1};
Vec3<size_t> copySize{2, 2, 2};
@ -36,11 +40,12 @@ TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBuffe
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
dstRowPitch, dstSlicePitch, clearColorAllocation.get());
EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::BufferToBuffer);
EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get());
EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get());
EXPECT_EQ(blitProperties.clearColorAllocation, clearColorAllocation.get());
EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr);
EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr);
EXPECT_EQ(blitProperties.copySize, copySize);
@ -55,10 +60,13 @@ TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBuffe
TEST(BlitCommandsHelperTest, GivenCopySizeYAndZEqual0WhenConstructingPropertiesForBufferRegionsThenCopyZAndZEqual1) {
uint32_t src[] = {1, 2, 3, 4};
uint32_t dst[] = {4, 3, 2, 1};
uint32_t clear[] = {5, 6, 7, 8};
uint64_t srcGpuAddr = 0x12345;
uint64_t dstGpuAddr = 0x54321;
uint64_t clearGpuAddr = 0x5678;
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
std::unique_ptr<GraphicsAllocation> clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear)));
Vec3<size_t> srcOffsets{1, 2, 3};
Vec3<size_t> dstOffsets{3, 2, 1};
Vec3<size_t> copySize{2, 0, 0};
@ -71,7 +79,7 @@ TEST(BlitCommandsHelperTest, GivenCopySizeYAndZEqual0WhenConstructingPropertiesF
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
dstRowPitch, dstSlicePitch, clearColorAllocation.get());
Vec3<size_t> expectedSize{copySize.x, 1, 1};
EXPECT_EQ(blitProperties.copySize, expectedSize);
}
@ -361,6 +369,7 @@ HWTEST2_F(BlitTests, givenMemoryAndImageWhenDispatchCopyImageCallThenCommandAdde
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
MockGraphicsAllocation srcAlloc;
MockGraphicsAllocation dstAlloc;
MockGraphicsAllocation clearColorAllocation;
Vec3<size_t> dstOffsets = {0, 0, 0};
Vec3<size_t> srcOffsets = {0, 0, 0};
@ -376,7 +385,7 @@ HWTEST2_F(BlitTests, givenMemoryAndImageWhenDispatchCopyImageCallThenCommandAdde
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(&dstAlloc, &srcAlloc,
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
dstRowPitch, dstSlicePitch, &clearColorAllocation);
uint32_t streamBuffer[100] = {};
LinearStream stream(streamBuffer, sizeof(streamBuffer));

View File

@ -15,6 +15,8 @@
#include "gtest/gtest.h"
#include <cstring>
using namespace NEO;
using BlitTests = Test<DeviceFixture>;
@ -230,3 +232,11 @@ HWTEST2_F(BlitTests, givenLinearSrcAndDestinationImagesWhenAppendImageCommandsTh
EXPECT_EQ(bltCmd.getDestinationPitch(), static_cast<uint32_t>(properties.dstRowPitch));
EXPECT_EQ(bltCmd.getSourcePitch(), static_cast<uint32_t>(properties.srcRowPitch));
}
HWTEST2_F(BlitTests, givenBlitCommandWhenAppendClearColorCalledThenNothingHappens, IsGen12LP) {
auto bltCmd = FamilyType::cmdInitXyCopyBlt;
auto expectedBlitCmd = FamilyType::cmdInitXyCopyBlt;
BlitProperties properties = {};
BlitCommandsHelper<FamilyType>::appendClearColor(properties, bltCmd);
EXPECT_EQ(0, std::memcmp(&expectedBlitCmd, &bltCmd, sizeof(bltCmd)));
}

View File

@ -91,6 +91,8 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
return const_cast<volatile uint32_t *>(&mockTagAddress);
}
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
std::vector<char> instructionHeapReserveredData;
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
@ -109,6 +111,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiverHw<GfxFamily>::flushStamp;
using CommandStreamReceiverHw<GfxFamily>::programL3;
using CommandStreamReceiverHw<GfxFamily>::programVFEState;
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::globalFenceAllocation;