HostPtr surface makeResident must be called once
Change-Id: I9cb04e3affdd8b8634466621b50326a088ecdcf9
This commit is contained in:
parent
f9254c8de6
commit
86bb715b95
|
@ -489,10 +489,16 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
}
|
||||
|
||||
auto mediaSamplerRequired = false;
|
||||
Kernel *kernel = nullptr;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.getKernel()->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= dispatchInfo.getKernel()->requiresCoherency();
|
||||
mediaSamplerRequired |= dispatchInfo.getKernel()->isVmeKernel();
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
kernel = dispatchInfo.getKernel();
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
kernel->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= kernel->requiresCoherency();
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
|
@ -617,11 +623,17 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
|||
} else {
|
||||
//store task data in event
|
||||
std::vector<Surface *> allSurfaces;
|
||||
Kernel *kernel = nullptr;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.getKernel()->getResidency(allSurfaces);
|
||||
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
|
||||
allSurfaces.push_back(surface->duplicate());
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
kernel = dispatchInfo.getKernel();
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
kernel->getResidency(allSurfaces);
|
||||
}
|
||||
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
|
||||
allSurfaces.push_back(surface->duplicate());
|
||||
}
|
||||
|
||||
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -274,9 +274,9 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
bool isVmeKernel() { return kernelInfo.isVmeWorkload; };
|
||||
|
||||
//residency for kernel surfaces
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
void updateWithCompletionStamp(CommandStreamReceiver &commandStreamReceiver, CompletionStamp *completionStamp);
|
||||
void getResidency(std::vector<Surface *> &dst);
|
||||
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
||||
bool requiresCoherency();
|
||||
void resetSharedObjectsPatchAddresses();
|
||||
bool isUsingSharedObjArgs() { return usingSharedObjArgs; }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -30,6 +30,7 @@
|
|||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "unit_tests/command_queue/command_queue_fixture.h"
|
||||
#include "unit_tests/fixtures/buffer_fixture.h"
|
||||
#include "unit_tests/fixtures/context_fixture.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/fixtures/memory_management_fixture.h"
|
||||
|
@ -990,3 +991,85 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted
|
|||
EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel);
|
||||
EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) {
|
||||
KernelInfo kernelInfo;
|
||||
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
auto mockProgram = mockKernelWithInternals.mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
size_t offset = 0;
|
||||
size_t gws = 63;
|
||||
size_t lws = 16;
|
||||
|
||||
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, status);
|
||||
EXPECT_EQ(1u, mockKernel->makeResidentCalls);
|
||||
|
||||
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
|
||||
for (; it != csr.makeResidentAllocations.end(); it++) {
|
||||
EXPECT_EQ(1u, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
|
||||
UserEvent userEvent(context);
|
||||
KernelInfo kernelInfo;
|
||||
MockKernelWithInternals mockKernelWithInternals(*pDevice);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
auto mockProgram = mockKernelWithInternals.mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
size_t offset = 0;
|
||||
size_t gws = 63;
|
||||
size_t lws = 16;
|
||||
|
||||
cl_event blockedEvent = &userEvent;
|
||||
|
||||
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, status);
|
||||
EXPECT_EQ(1u, mockKernel->getResidencyCalls);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
|
||||
for (; it != csr.makeResidentAllocations.end(); it++) {
|
||||
EXPECT_EQ(1u, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
|
||||
UserEvent userEvent(context);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
BufferDefaults::context = context;
|
||||
std::unique_ptr<Buffer> buffer(BufferHelper<>::create());
|
||||
GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation();
|
||||
char array[3 * MemoryConstants::cacheLineSize];
|
||||
char *ptr = &array[MemoryConstants::cacheLineSize];
|
||||
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
|
||||
ptr -= 1;
|
||||
|
||||
cl_event blockedEvent = &userEvent;
|
||||
|
||||
cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, 1, &blockedEvent, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, status);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
|
||||
for (; it != csr.makeResidentAllocations.end(); it++) {
|
||||
uint32_t expected = 1u;
|
||||
//Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call)
|
||||
if (it->first == bufferAllocation) {
|
||||
expected = 3u;
|
||||
}
|
||||
EXPECT_EQ(expected, it->second);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,14 +43,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
|||
using BaseClass::CommandStreamReceiver::dispatchMode;
|
||||
using BaseClass::CommandStreamReceiver::flushStamp;
|
||||
using BaseClass::CommandStreamReceiver::isPreambleSent;
|
||||
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
|
||||
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
|
||||
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
|
||||
using BaseClass::CommandStreamReceiver::lastSentL3Config;
|
||||
using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
|
||||
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
|
||||
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
|
||||
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
|
||||
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
|
||||
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
|
||||
using BaseClass::CommandStreamReceiver::taskCount;
|
||||
using BaseClass::CommandStreamReceiver::taskLevel;
|
||||
|
||||
|
@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
|||
tempTagLocation = new GraphicsAllocation(nullptr, 0);
|
||||
this->tagAllocation = tempTagLocation;
|
||||
this->tagAddress = reinterpret_cast<uint32_t *>(tempTagLocation->getUnderlyingBuffer());
|
||||
this->storeMakeResidentAllocations = false;
|
||||
}
|
||||
|
||||
virtual MemoryManager *createMemoryManager(bool enable64kbPages) override {
|
||||
|
@ -82,6 +83,23 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
|||
using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState;
|
||||
SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; }
|
||||
|
||||
void makeResident(GraphicsAllocation &gfxAllocation) override {
|
||||
if (storeMakeResidentAllocations) {
|
||||
std::map<GraphicsAllocation *, uint32_t>::iterator it = makeResidentAllocations.find(&gfxAllocation);
|
||||
if (it == makeResidentAllocations.end()) {
|
||||
std::pair<std::map<GraphicsAllocation *, uint32_t>::iterator, bool> result;
|
||||
result = makeResidentAllocations.insert(std::pair<GraphicsAllocation *, uint32_t>(&gfxAllocation, 1));
|
||||
DEBUG_BREAK_IF(!result.second);
|
||||
} else {
|
||||
makeResidentAllocations[&gfxAllocation]++;
|
||||
}
|
||||
}
|
||||
BaseClass::makeResident(gfxAllocation);
|
||||
}
|
||||
|
||||
std::map<GraphicsAllocation *, uint32_t> makeResidentAllocations;
|
||||
bool storeMakeResidentAllocations;
|
||||
|
||||
protected:
|
||||
using BaseClass::CommandStreamReceiver::memoryManager;
|
||||
using BaseClass::CommandStreamReceiver::tagAddress;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -53,4 +53,14 @@ bool MockKernel::isPatched() const {
|
|||
return true;
|
||||
}
|
||||
|
||||
void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
makeResidentCalls++;
|
||||
Kernel::makeResident(commandStreamReceiver);
|
||||
}
|
||||
|
||||
void MockKernel::getResidency(std::vector<Surface *> &dst) {
|
||||
getResidencyCalls++;
|
||||
Kernel::getResidency(dst);
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -182,6 +182,12 @@ class MockKernel : public Kernel {
|
|||
using Kernel::kernelArgHandlers;
|
||||
|
||||
void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; }
|
||||
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver) override;
|
||||
void getResidency(std::vector<Surface *> &dst) override;
|
||||
|
||||
uint32_t makeResidentCalls = 0;
|
||||
uint32_t getResidencyCalls = 0;
|
||||
};
|
||||
|
||||
//class below have enough internals to service Enqueue operation.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -99,6 +99,9 @@ class MockProgram : public Program {
|
|||
void SetLLVMBinarySize(size_t bsz) { llvmBinarySize = bsz; }
|
||||
|
||||
uint64_t getHash();
|
||||
void setAllowNonUniform(bool allow) {
|
||||
allowNonUniform = allow;
|
||||
}
|
||||
|
||||
bool contextSet = false;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue