Pass private scratch size to scratch space controller

Related-To: NEO-3190

Change-Id: I6f1e71481679492516d898226de6a1e721896e81
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2019-06-28 09:37:04 +02:00
committed by sys_ocldev
parent 2f42f332d8
commit 27f3f8ea8f
10 changed files with 68 additions and 22 deletions

View File

@ -442,7 +442,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
}
}
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
getCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize());
}
template <typename GfxFamily>

View File

@ -214,10 +214,13 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
this->tagAddress = allocation ? reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer()) : nullptr;
}
void CommandStreamReceiver::setRequiredScratchSize(uint32_t newRequiredScratchSize) {
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) {
if (newRequiredScratchSize > requiredScratchSize) {
requiredScratchSize = newRequiredScratchSize;
}
if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) {
requiredPrivateScratchSize = newRequiredPrivateScratchSize;
}
}
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {

View File

@ -116,7 +116,7 @@ class CommandStreamReceiver {
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
void setRequiredScratchSize(uint32_t newRequiredScratchSize);
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
GraphicsAllocation *getScratchAllocation();
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
GraphicsAllocation *allocateDebugSurface(size_t size);
@ -235,6 +235,7 @@ class CommandStreamReceiver {
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
uint32_t requiredScratchSize = 0;
uint32_t requiredPrivateScratchSize = 0;
int8_t lastSentCoherencyRequest = -1;
int8_t lastMediaSamplerConfig = -1;

View File

@ -224,7 +224,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (requiredScratchSize) {
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
requiredScratchSize,
0u,
requiredPrivateScratchSize,
this->taskCount,
this->osContext->getContextId(),
stateBaseAddressDirty,

View File

@ -22,6 +22,10 @@ uint32_t DispatchInfo::getRequiredScratchSize() const {
return (kernel == nullptr) ? 0 : kernel->getScratchSize();
}
uint32_t DispatchInfo::getRequiredPrivateScratchSize() const {
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize();
}
Kernel *MultiDispatchInfo::peekMainKernel() const {
if (dispatchInfos.size() == 0) {
return nullptr;

View File

@ -21,16 +21,17 @@ class Kernel;
class DispatchInfo {
public:
DispatchInfo() : gws(0, 0, 0), elws(0, 0, 0), offset(0, 0, 0), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
DispatchInfo() = default;
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
bool isPipeControlRequired() const { return pipeControlRequired; }
void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; }
bool usesSlm() const;
bool usesStatelessPrintfSurface() const;
uint32_t getRequiredScratchSize() const;
uint32_t getRequiredPrivateScratchSize() const;
void setKernel(Kernel *kernel) { this->kernel = kernel; }
Kernel *getKernel() const { return kernel; }
uint32_t getDim() const { return dim; }
@ -60,14 +61,14 @@ class DispatchInfo {
Kernel *kernel = nullptr;
uint32_t dim = 0;
Vec3<size_t> gws; //global work size
Vec3<size_t> elws; //enqueued local work size
Vec3<size_t> offset; //global offset
Vec3<size_t> agws; //actual global work size
Vec3<size_t> lws; //local work size
Vec3<size_t> twgs; //total number of work groups
Vec3<size_t> nwgs; //number of work groups
Vec3<size_t> swgs; //start of work groups
Vec3<size_t> gws{0, 0, 0}; //global work size
Vec3<size_t> elws{0, 0, 0}; //enqueued local work size
Vec3<size_t> offset{0, 0, 0}; //global offset
Vec3<size_t> agws{0, 0, 0}; //actual global work size
Vec3<size_t> lws{0, 0, 0}; //local work size
Vec3<size_t> twgs{0, 0, 0}; //total number of work groups
Vec3<size_t> nwgs{0, 0, 0}; //number of work groups
Vec3<size_t> swgs{0, 0, 0}; //start of work groups
};
struct MultiDispatchInfo {
@ -113,6 +114,14 @@ struct MultiDispatchInfo {
return ret;
}
uint32_t getRequiredPrivateScratchSize() const {
uint32_t ret = 0;
for (const auto &dispatchInfo : dispatchInfos) {
ret = std::max(ret, dispatchInfo.getRequiredPrivateScratchSize());
}
return ret;
}
DispatchInfo *begin() {
return dispatchInfos.begin();
}

View File

@ -334,6 +334,26 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc);
}
HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) {
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.getMemoryManager()->setForce32BitAllocations(false);
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
SPatchMediaVFEState mediaVFEstate;
uint32_t privateScratchSize = 4096u;
mediaVFEstate.PerThreadScratchSpace = privateScratchSize;
MockKernelWithInternals mockKernel(*pDevice);
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstate;
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize);
}
HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) {
size_t gws[3] = {0, 0, 0};
MockKernelWithInternals mockKernel(*pDevice);

View File

@ -6,6 +6,7 @@
*/
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/command_stream/scratch_space_controller.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/state_base_address.h"
@ -410,7 +411,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, handleTagAndScratchAllocationsResi
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(1024); // whatever > 0
commandStreamReceiver->setRequiredScratchSizes(1024, 0); // whatever > 0
flushTask(*commandStreamReceiver);
@ -686,13 +687,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeDoNotS
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size)
flushTask(*commandStreamReceiver);
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(scratchAllocation, nullptr);
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
flushTask(*commandStreamReceiver); // 2nd flush
@ -720,13 +721,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeStore3
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size)
flushTask(*commandStreamReceiver);
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
ASSERT_NE(scratchAllocation, nullptr);
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
flushTask(*commandStreamReceiver); // 2nd flush

View File

@ -337,3 +337,9 @@ TEST(DispatchInfoBasicTests, givenDispatchInfoWhenSetCanBePartitionIsCalledThenS
dispatchInfo.setCanBePartitioned(true);
EXPECT_TRUE(dispatchInfo.peekCanBePartitioned());
}
TEST(DispatchInfoBasicTests, givenDispatchInfoWithoutKernelWhenGettingSizeForPrivateScratchThenZeroIsReturned) {
DispatchInfo dispatchInfo;
EXPECT_EQ(nullptr, dispatchInfo.getKernel());
EXPECT_EQ(0u, dispatchInfo.getRequiredPrivateScratchSize());
}

View File

@ -56,9 +56,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize;
using BaseClass::CommandStreamReceiver::requiredScratchSize;
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
using BaseClass::CommandStreamReceiver::scratchSpaceController;
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::taskCount;