mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Pass private scratch size to scratch space controller
Related-To: NEO-3190 Change-Id: I6f1e71481679492516d898226de6a1e721896e81 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
2f42f332d8
commit
27f3f8ea8f
@ -442,7 +442,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
}
|
||||
}
|
||||
|
||||
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||
getCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -214,10 +214,13 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
|
||||
this->tagAddress = allocation ? reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer()) : nullptr;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::setRequiredScratchSize(uint32_t newRequiredScratchSize) {
|
||||
void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) {
|
||||
if (newRequiredScratchSize > requiredScratchSize) {
|
||||
requiredScratchSize = newRequiredScratchSize;
|
||||
}
|
||||
if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) {
|
||||
requiredPrivateScratchSize = newRequiredPrivateScratchSize;
|
||||
}
|
||||
}
|
||||
|
||||
GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() {
|
||||
|
@ -116,7 +116,7 @@ class CommandStreamReceiver {
|
||||
|
||||
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
|
||||
|
||||
void setRequiredScratchSize(uint32_t newRequiredScratchSize);
|
||||
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
|
||||
GraphicsAllocation *getScratchAllocation();
|
||||
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
|
||||
GraphicsAllocation *allocateDebugSurface(size_t size);
|
||||
@ -235,6 +235,7 @@ class CommandStreamReceiver {
|
||||
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
|
||||
uint32_t requiredScratchSize = 0;
|
||||
uint32_t requiredPrivateScratchSize = 0;
|
||||
|
||||
int8_t lastSentCoherencyRequest = -1;
|
||||
int8_t lastMediaSamplerConfig = -1;
|
||||
|
@ -224,7 +224,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (requiredScratchSize) {
|
||||
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
|
||||
requiredScratchSize,
|
||||
0u,
|
||||
requiredPrivateScratchSize,
|
||||
this->taskCount,
|
||||
this->osContext->getContextId(),
|
||||
stateBaseAddressDirty,
|
||||
|
@ -22,6 +22,10 @@ uint32_t DispatchInfo::getRequiredScratchSize() const {
|
||||
return (kernel == nullptr) ? 0 : kernel->getScratchSize();
|
||||
}
|
||||
|
||||
uint32_t DispatchInfo::getRequiredPrivateScratchSize() const {
|
||||
return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize();
|
||||
}
|
||||
|
||||
Kernel *MultiDispatchInfo::peekMainKernel() const {
|
||||
if (dispatchInfos.size() == 0) {
|
||||
return nullptr;
|
||||
|
@ -21,16 +21,17 @@ class Kernel;
|
||||
|
||||
class DispatchInfo {
|
||||
public:
|
||||
DispatchInfo() : gws(0, 0, 0), elws(0, 0, 0), offset(0, 0, 0), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
|
||||
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
|
||||
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(0, 0, 0), lws(0, 0, 0), twgs(0, 0, 0), nwgs(0, 0, 0), swgs(0, 0, 0) {}
|
||||
DispatchInfo(Kernel *k, uint32_t d, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
||||
: kernel(k), dim(d), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
||||
DispatchInfo() = default;
|
||||
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
|
||||
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
|
||||
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
||||
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
||||
bool isPipeControlRequired() const { return pipeControlRequired; }
|
||||
void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; }
|
||||
bool usesSlm() const;
|
||||
bool usesStatelessPrintfSurface() const;
|
||||
uint32_t getRequiredScratchSize() const;
|
||||
uint32_t getRequiredPrivateScratchSize() const;
|
||||
void setKernel(Kernel *kernel) { this->kernel = kernel; }
|
||||
Kernel *getKernel() const { return kernel; }
|
||||
uint32_t getDim() const { return dim; }
|
||||
@ -60,14 +61,14 @@ class DispatchInfo {
|
||||
Kernel *kernel = nullptr;
|
||||
uint32_t dim = 0;
|
||||
|
||||
Vec3<size_t> gws; //global work size
|
||||
Vec3<size_t> elws; //enqueued local work size
|
||||
Vec3<size_t> offset; //global offset
|
||||
Vec3<size_t> agws; //actual global work size
|
||||
Vec3<size_t> lws; //local work size
|
||||
Vec3<size_t> twgs; //total number of work groups
|
||||
Vec3<size_t> nwgs; //number of work groups
|
||||
Vec3<size_t> swgs; //start of work groups
|
||||
Vec3<size_t> gws{0, 0, 0}; //global work size
|
||||
Vec3<size_t> elws{0, 0, 0}; //enqueued local work size
|
||||
Vec3<size_t> offset{0, 0, 0}; //global offset
|
||||
Vec3<size_t> agws{0, 0, 0}; //actual global work size
|
||||
Vec3<size_t> lws{0, 0, 0}; //local work size
|
||||
Vec3<size_t> twgs{0, 0, 0}; //total number of work groups
|
||||
Vec3<size_t> nwgs{0, 0, 0}; //number of work groups
|
||||
Vec3<size_t> swgs{0, 0, 0}; //start of work groups
|
||||
};
|
||||
|
||||
struct MultiDispatchInfo {
|
||||
@ -113,6 +114,14 @@ struct MultiDispatchInfo {
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t getRequiredPrivateScratchSize() const {
|
||||
uint32_t ret = 0;
|
||||
for (const auto &dispatchInfo : dispatchInfos) {
|
||||
ret = std::max(ret, dispatchInfo.getRequiredPrivateScratchSize());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
DispatchInfo *begin() {
|
||||
return dispatchInfos.begin();
|
||||
}
|
||||
|
@ -334,6 +334,26 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
|
||||
EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) {
|
||||
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.getMemoryManager()->setForce32BitAllocations(false);
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
uint32_t privateScratchSize = 4096u;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = privateScratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstate;
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) {
|
||||
size_t gws[3] = {0, 0, 0};
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include "runtime/command_stream/scratch_space_controller.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/helpers/state_base_address.h"
|
||||
@ -410,7 +411,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, handleTagAndScratchAllocationsResi
|
||||
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment);
|
||||
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSize(1024); // whatever > 0
|
||||
commandStreamReceiver->setRequiredScratchSizes(1024, 0); // whatever > 0
|
||||
|
||||
flushTask(*commandStreamReceiver);
|
||||
|
||||
@ -686,13 +687,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeDoNotS
|
||||
|
||||
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
|
||||
commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size)
|
||||
flushTask(*commandStreamReceiver);
|
||||
|
||||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||
ASSERT_NE(scratchAllocation, nullptr);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
|
||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
||||
|
||||
flushTask(*commandStreamReceiver); // 2nd flush
|
||||
|
||||
@ -720,13 +721,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeStore3
|
||||
|
||||
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSize(4096); // whatever > 0 (in page size)
|
||||
commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size)
|
||||
flushTask(*commandStreamReceiver);
|
||||
|
||||
auto scratchAllocation = commandStreamReceiver->getScratchAllocation();
|
||||
ASSERT_NE(scratchAllocation, nullptr);
|
||||
|
||||
commandStreamReceiver->setRequiredScratchSize(8196); // whatever > first size
|
||||
commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size
|
||||
|
||||
flushTask(*commandStreamReceiver); // 2nd flush
|
||||
|
||||
|
@ -337,3 +337,9 @@ TEST(DispatchInfoBasicTests, givenDispatchInfoWhenSetCanBePartitionIsCalledThenS
|
||||
dispatchInfo.setCanBePartitioned(true);
|
||||
EXPECT_TRUE(dispatchInfo.peekCanBePartitioned());
|
||||
}
|
||||
|
||||
TEST(DispatchInfoBasicTests, givenDispatchInfoWithoutKernelWhenGettingSizeForPrivateScratchThenZeroIsReturned) {
|
||||
DispatchInfo dispatchInfo;
|
||||
EXPECT_EQ(nullptr, dispatchInfo.getKernel());
|
||||
EXPECT_EQ(0u, dispatchInfo.getRequiredPrivateScratchSize());
|
||||
}
|
||||
|
@ -56,9 +56,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
|
||||
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
|
||||
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
|
||||
using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize;
|
||||
using BaseClass::CommandStreamReceiver::requiredScratchSize;
|
||||
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::scratchSpaceController;
|
||||
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
|
||||
using BaseClass::CommandStreamReceiver::submissionAggregator;
|
||||
using BaseClass::CommandStreamReceiver::taskCount;
|
||||
|
Reference in New Issue
Block a user