mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Store SLM sizes per root device in Kernel
reduce usages of getDefaultKernelInfo Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
09bdd2ad09
commit
be7ae13911
@@ -1845,7 +1845,7 @@ cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel,
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
break;
|
||||
}
|
||||
if (pKernel->getDefaultKernelInfo().kernelArgInfo.size() <= argIndex) {
|
||||
if (pKernel->getKernelArguments().size() <= argIndex) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
namespace NEO {
|
||||
bool DispatchInfo::usesSlm() const {
|
||||
return (kernel == nullptr) ? false : kernel->slmTotalSize > 0;
|
||||
return (kernel == nullptr) ? false : kernel->getSlmTotalSize(pClDevice->getRootDeviceIndex()) > 0;
|
||||
}
|
||||
|
||||
bool DispatchInfo::usesStatelessPrintfSurface() const {
|
||||
|
||||
@@ -180,8 +180,10 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
|
||||
interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
|
||||
|
||||
auto slmTotalSize = kernel.getSlmTotalSize(rootDeviceIndex);
|
||||
|
||||
setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData, rootDeviceIndex);
|
||||
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, kernel.slmTotalSize, SlmPolicy::SlmPolicyNone);
|
||||
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone);
|
||||
|
||||
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
|
||||
|
||||
@@ -190,7 +192,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
EncodeDispatchKernel<GfxFamily>::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize);
|
||||
|
||||
auto programmableIDSLMSize =
|
||||
static_cast<SHARED_LOCAL_MEMORY_SIZE>(HwHelperHw<GfxFamily>::get().computeSlmValues(hardwareInfo, kernel.slmTotalSize));
|
||||
static_cast<SHARED_LOCAL_MEMORY_SIZE>(HwHelperHw<GfxFamily>::get().computeSlmValues(hardwareInfo, slmTotalSize));
|
||||
|
||||
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
|
||||
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
|
||||
|
||||
@@ -166,6 +166,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
printfHandler.get()->makeResident(commandStreamReceiver);
|
||||
}
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
|
||||
if (executionModelKernel) {
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
@@ -195,7 +196,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
// Update SLM usage
|
||||
slmUsed |= scheduler.slmTotalSize > 0;
|
||||
slmUsed |= scheduler.getSlmTotalSize(rootDeviceIndex) > 0;
|
||||
|
||||
this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
|
||||
}
|
||||
@@ -210,7 +211,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr);
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
const auto &kernelDescriptor = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor;
|
||||
|
||||
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
|
||||
|
||||
@@ -65,8 +65,7 @@ class Surface;
|
||||
uint32_t Kernel::dummyPatchLocation = 0xbaddf00d;
|
||||
|
||||
Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, bool schedulerKernel)
|
||||
: slmTotalSize(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->workloadInfo.slmStaticSize),
|
||||
isParentKernel(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue),
|
||||
: isParentKernel(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue),
|
||||
isSchedulerKernel(schedulerKernel),
|
||||
executionEnvironment(programArg->getExecutionEnvironment()),
|
||||
program(programArg),
|
||||
@@ -78,7 +77,9 @@ Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, b
|
||||
program->retainForKernel();
|
||||
imageTransformer.reset(new ImageTransformer);
|
||||
for (const auto &pClDevice : deviceVector) {
|
||||
kernelDeviceInfos[pClDevice->getRootDeviceIndex()].maxKernelWorkGroupSize = static_cast<uint32_t>(pClDevice->getSharedDeviceInfo().maxWorkGroupSize);
|
||||
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
|
||||
kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize = static_cast<uint32_t>(pClDevice->getSharedDeviceInfo().maxWorkGroupSize);
|
||||
kernelDeviceInfos[rootDeviceIndex].slmTotalSize = kernelInfosArg[rootDeviceIndex]->workloadInfo.slmStaticSize;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,7 +101,7 @@ Kernel::~Kernel() {
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < patchedArgumentsNum; i++) {
|
||||
if (getDefaultKernelInfo().kernelArgInfo.at(i).isSampler) {
|
||||
if (SAMPLER_OBJ == getKernelArguments()[i].type) {
|
||||
auto sampler = castToObject<Sampler>(kernelArguments.at(i).object);
|
||||
if (sampler) {
|
||||
sampler->decRefInternal();
|
||||
@@ -372,6 +373,8 @@ cl_int Kernel::initialize() {
|
||||
if (program->isKernelDebugEnabled() && kernelInfo.patchInfo.pAllocateSystemThreadSurface) {
|
||||
debugEnabled = true;
|
||||
}
|
||||
auto numArgs = kernelInfo.kernelArgInfo.size();
|
||||
kernelDeviceInfo.slmSizes.resize(numArgs);
|
||||
isDeviceInitialized.set(rootDeviceIndex);
|
||||
}
|
||||
|
||||
@@ -384,13 +387,11 @@ cl_int Kernel::initialize() {
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
auto numArgs = defaultKernelInfo.kernelArgInfo.size();
|
||||
kernelArguments.resize(numArgs);
|
||||
slmSizes.resize(numArgs);
|
||||
kernelArgHandlers.resize(numArgs);
|
||||
kernelArgRequiresCacheFlush.resize(numArgs);
|
||||
|
||||
for (uint32_t i = 0; i < numArgs; ++i) {
|
||||
storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0);
|
||||
slmSizes[i] = 0;
|
||||
|
||||
// set the argument handler
|
||||
auto &argInfo = defaultKernelInfo.kernelArgInfo[i];
|
||||
@@ -483,7 +484,6 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
||||
const _cl_context *ctxt;
|
||||
cl_uint refCount = 0;
|
||||
uint64_t nonCannonizedGpuAddress = 0llu;
|
||||
auto defaultRootDeviceIndex = getDevices()[0]->getRootDeviceIndex();
|
||||
auto &defaultKernelInfo = getKernelInfo(defaultRootDeviceIndex);
|
||||
|
||||
switch (paramName) {
|
||||
@@ -1112,7 +1112,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
||||
availableThreadCount,
|
||||
dssCount,
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
hwHelper.alignSlmSize(kernelDeviceInfos[rootDeviceIndex].slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
workDim,
|
||||
@@ -1280,7 +1280,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
||||
|
||||
storeKernelArg(argIndex, SLM_OBJ, nullptr, argVal, argSize);
|
||||
|
||||
slmSizes[argIndex] = argSize;
|
||||
kernelDeviceInfos[rootDeviceIndex].slmSizes[argIndex] = argSize;
|
||||
|
||||
// Extract our current slmOffset
|
||||
auto slmOffset = *ptrOffset(crossThreadData,
|
||||
@@ -1291,7 +1291,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
||||
|
||||
// Update all slm offsets after this argIndex
|
||||
++argIndex;
|
||||
while (argIndex < slmSizes.size()) {
|
||||
while (argIndex < kernelDeviceInfos[rootDeviceIndex].slmSizes.size()) {
|
||||
const auto &kernelArgInfo = defaultKernelInfo.kernelArgInfo[argIndex];
|
||||
auto slmAlignment = kernelArgInfo.slmAlignment;
|
||||
|
||||
@@ -1306,11 +1306,11 @@ cl_int Kernel::setArgLocal(uint32_t argIndex,
|
||||
*patchLocation = slmOffset;
|
||||
}
|
||||
|
||||
slmOffset += static_cast<uint32_t>(slmSizes[argIndex]);
|
||||
slmOffset += static_cast<uint32_t>(kernelDeviceInfos[rootDeviceIndex].slmSizes[argIndex]);
|
||||
++argIndex;
|
||||
}
|
||||
|
||||
slmTotalSize = defaultKernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
|
||||
kernelDeviceInfos[rootDeviceIndex].slmTotalSize = defaultKernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -2679,4 +2679,7 @@ void Kernel::setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim) {
|
||||
uint32_t Kernel::getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const {
|
||||
return kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize;
|
||||
}
|
||||
uint32_t Kernel::getSlmTotalSize(uint32_t rootDeviceIndex) const {
|
||||
return kernelDeviceInfos[rootDeviceIndex].slmTotalSize;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -182,7 +182,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
}
|
||||
|
||||
size_t getKernelArgsNumber() const {
|
||||
return getDefaultKernelInfo().kernelArgInfo.size();
|
||||
return kernelArguments.size();
|
||||
}
|
||||
|
||||
bool requiresSshForBuffers(uint32_t rootDeviceIndex) const {
|
||||
@@ -308,11 +308,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
static uint32_t dummyPatchLocation;
|
||||
|
||||
std::vector<size_t> slmSizes;
|
||||
|
||||
uint32_t allBufferArgsStateful = CL_TRUE;
|
||||
|
||||
uint32_t slmTotalSize;
|
||||
bool isBuiltIn = false;
|
||||
const bool isParentKernel;
|
||||
const bool isSchedulerKernel;
|
||||
@@ -406,6 +403,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
void setNumWorkGroupsValues(uint32_t rootDeviceIndex, uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ);
|
||||
void setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim);
|
||||
uint32_t getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const;
|
||||
uint32_t getSlmTotalSize(uint32_t rootDeviceIndex) const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
@@ -574,6 +572,9 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
size_t numberOfBindingTableStates = 0u;
|
||||
size_t localBindingTableOffset = 0u;
|
||||
|
||||
std::vector<size_t> slmSizes;
|
||||
uint32_t slmTotalSize = 0u;
|
||||
|
||||
std::unique_ptr<char[]> pSshLocal;
|
||||
uint32_t sshLocalSize = 0u;
|
||||
char *crossThreadData = nullptr;
|
||||
|
||||
@@ -138,7 +138,7 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
|
||||
this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(rootDeviceIndex);
|
||||
this->hasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers();
|
||||
this->simdSize = static_cast<uint32_t>(kernelInfo.getMaxSimdSize());
|
||||
this->slmTotalSize = static_cast<uint32_t>(dispatchInfo.getKernel()->slmTotalSize);
|
||||
this->slmTotalSize = static_cast<uint32_t>(dispatchInfo.getKernel()->getSlmTotalSize(rootDeviceIndex));
|
||||
this->coreFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
|
||||
this->numThreadsPerSubSlice = static_cast<uint32_t>(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) *
|
||||
device.getSharedDeviceInfo().numThreadsPerEU;
|
||||
|
||||
@@ -380,7 +380,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML
|
||||
commandStreamReceiver->lastSentL3Config = L3Config;
|
||||
commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy();
|
||||
|
||||
((MockKernel *)kernel)->setTotalSLMSize(1024);
|
||||
((MockKernel *)kernel)->setTotalSLMSize(rootDeviceIndex, 1024);
|
||||
|
||||
cmdList.clear();
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
@@ -43,7 +43,7 @@ void CommandStreamReceiverHwTest<GfxFamily>::givenKernelWithSlmWhenPreviousNOSLM
|
||||
commandStreamReceiver->isPreambleSent = true;
|
||||
commandStreamReceiver->lastSentL3Config = 0;
|
||||
|
||||
static_cast<MockKernel *>(kernel)->setTotalSLMSize(1024);
|
||||
static_cast<MockKernel *>(kernel)->setTotalSLMSize(rootDeviceIndex, 1024);
|
||||
|
||||
cmdList.clear();
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
|
||||
@@ -89,7 +89,7 @@ void CommandStreamReceiverHwTest<GfxFamily>::givenBlockedKernelWithSlmWhenPrevio
|
||||
commandStreamReceiver->isPreambleSent = true;
|
||||
commandStreamReceiver->lastSentL3Config = 0;
|
||||
|
||||
static_cast<MockKernel *>(kernel)->setTotalSLMSize(1024);
|
||||
static_cast<MockKernel *>(kernel)->setTotalSLMSize(rootDeviceIndex, 1024);
|
||||
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr);
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
|
||||
pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer);
|
||||
|
||||
pKernel->slmTotalSize = 128;
|
||||
pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize = 128;
|
||||
pKernel->isBuiltIn = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture {
|
||||
pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice));
|
||||
|
||||
pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
pKernel->slmTotalSize = 128;
|
||||
pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize = 128;
|
||||
}
|
||||
void TearDown() override {
|
||||
delete pKernel;
|
||||
@@ -56,7 +56,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture {
|
||||
SPatchMediaVFEState *pMediaVFEstate = nullptr;
|
||||
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = nullptr;
|
||||
MockProgram *pProgram = nullptr;
|
||||
Kernel *pKernel = nullptr;
|
||||
MockKernel *pKernel = nullptr;
|
||||
};
|
||||
|
||||
typedef Test<DispatchInfoFixture> DispatchInfoTest;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/accelerators/intel_accelerator.h"
|
||||
#include "opencl/source/accelerators/intel_motion_estimation.h"
|
||||
@@ -162,7 +163,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched);
|
||||
|
||||
EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel->slmTotalSize);
|
||||
EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize);
|
||||
}
|
||||
|
||||
TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
@@ -336,10 +337,10 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr
|
||||
}
|
||||
|
||||
TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
std::unique_ptr<Sampler> sampler(new MockSampler(pContext,
|
||||
true,
|
||||
(cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT,
|
||||
(cl_filter_mode)CL_FILTER_NEAREST));
|
||||
auto sampler = clUniquePtr<Sampler>(new MockSampler(pContext,
|
||||
true,
|
||||
(cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT,
|
||||
(cl_filter_mode)CL_FILTER_NEAREST));
|
||||
|
||||
uint32_t objectId = SAMPLER_OBJECT_ID_SHIFT + pKernelInfo->kernelArgInfo[0].offsetHeap;
|
||||
|
||||
@@ -381,6 +382,8 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
|
||||
auto pNormalizedCoords = ptrOffset(crossThreadData, argInfo.offsetSamplerNormalizedCoords);
|
||||
EXPECT_EQ(GetNormCoordsEnum(sampler->normalizedCoordinates), *pNormalizedCoords);
|
||||
|
||||
EXPECT_EQ(3, sampler->getRefInternalCount());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
|
||||
@@ -83,7 +83,7 @@ TEST_F(KernelSlmArgTest, WhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) {
|
||||
slmOffset = ptrOffset(crossThreadData, 0x30);
|
||||
EXPECT_EQ(0x400u, *slmOffset);
|
||||
|
||||
EXPECT_EQ(5 * KB, pKernel->slmTotalSize);
|
||||
EXPECT_EQ(5 * KB, pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelSlmArgTest, GivenReverseOrderWhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) {
|
||||
@@ -100,5 +100,5 @@ TEST_F(KernelSlmArgTest, GivenReverseOrderWhenSettingSizeThenAlignmentOfHigherSl
|
||||
slmOffset = ptrOffset(crossThreadData, 0x30);
|
||||
EXPECT_EQ(0x400u, *slmOffset);
|
||||
|
||||
EXPECT_EQ(5 * KB, pKernel->slmTotalSize);
|
||||
EXPECT_EQ(5 * KB, pKernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize);
|
||||
}
|
||||
|
||||
@@ -2288,7 +2288,7 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS
|
||||
|
||||
MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
|
||||
EXPECT_EQ(1024u, kernel->slmTotalSize);
|
||||
EXPECT_EQ(1024u, kernel->kernelDeviceInfos[rootDeviceIndex].slmTotalSize);
|
||||
|
||||
delete kernel;
|
||||
}
|
||||
|
||||
@@ -211,8 +211,8 @@ class MockKernel : public Kernel {
|
||||
}
|
||||
}
|
||||
|
||||
void setTotalSLMSize(uint32_t size) {
|
||||
slmTotalSize = size;
|
||||
void setTotalSLMSize(uint32_t rootDeviceIndex, uint32_t size) {
|
||||
kernelDeviceInfos[rootDeviceIndex].slmTotalSize = size;
|
||||
}
|
||||
|
||||
void setKernelArguments(std::vector<SimpleKernelArgInfo> kernelArguments) {
|
||||
|
||||
Reference in New Issue
Block a user