mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
refactor: move Kernel data members to KernelImp::sharedState
The class Kernel is abstract and as such it should not have any members. Move its members to sharedState as this is what they represent. Related-To: NEO-15374 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c1c1f1f0af
commit
91a4809a79
@@ -173,31 +173,13 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI, NEO::N
|
||||
|
||||
virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0;
|
||||
|
||||
virtual uint32_t getMaxWgCountPerTile(NEO::EngineGroupType engineGroupType) const = 0;
|
||||
|
||||
static Kernel *fromHandle(ze_kernel_handle_t handle) { return static_cast<Kernel *>(handle); }
|
||||
|
||||
inline ze_kernel_handle_t toHandle() { return this; }
|
||||
|
||||
uint32_t getMaxWgCountPerTile(NEO::EngineGroupType engineGroupType) const {
|
||||
auto value = maxWgCountPerTileCcs;
|
||||
if (engineGroupType == NEO::EngineGroupType::renderCompute) {
|
||||
value = maxWgCountPerTileRcs;
|
||||
} else if (engineGroupType == NEO::EngineGroupType::cooperativeCompute) {
|
||||
value = maxWgCountPerTileCooperative;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
virtual uint32_t getIndirectSize() const = 0;
|
||||
|
||||
protected:
|
||||
uint32_t maxWgCountPerTileCcs = 0;
|
||||
uint32_t maxWgCountPerTileRcs = 0;
|
||||
uint32_t maxWgCountPerTileCooperative = 0;
|
||||
bool heaplessEnabled = false;
|
||||
bool implicitScalingEnabled = false;
|
||||
bool localDispatchSupport = false;
|
||||
bool rcsAvailable = false;
|
||||
bool cooperativeSupport = false;
|
||||
};
|
||||
|
||||
using KernelAllocatorFn = Kernel *(*)(Module *module);
|
||||
|
||||
@@ -520,13 +520,13 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
this->privateState.perThreadDataSize = 0;
|
||||
}
|
||||
|
||||
if (this->heaplessEnabled && this->localDispatchSupport) {
|
||||
this->maxWgCountPerTileCcs = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::compute, true);
|
||||
if (this->rcsAvailable) {
|
||||
this->maxWgCountPerTileRcs = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::renderCompute, true);
|
||||
if (this->sharedState->heaplessEnabled && this->sharedState->localDispatchSupport) {
|
||||
this->sharedState->maxWgCountPerTileCcs = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::compute, true);
|
||||
if (this->sharedState->rcsAvailable) {
|
||||
this->sharedState->maxWgCountPerTileRcs = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::renderCompute, true);
|
||||
}
|
||||
if (this->cooperativeSupport) {
|
||||
this->maxWgCountPerTileCooperative = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::cooperativeCompute, true);
|
||||
if (this->sharedState->cooperativeSupport) {
|
||||
this->sharedState->maxWgCountPerTileCooperative = suggestMaxCooperativeGroupCount(NEO::EngineGroupType::cooperativeCompute, true);
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -611,7 +611,7 @@ uint32_t KernelImp::suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineG
|
||||
workDim,
|
||||
localWorkSize,
|
||||
engineGroupType,
|
||||
this->implicitScalingEnabled,
|
||||
this->sharedState->implicitScalingEnabled,
|
||||
forceSingleTileQuery);
|
||||
}
|
||||
|
||||
@@ -714,8 +714,8 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||
// redescribed image's surface state is after image's implicit args and sampler
|
||||
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * bindlessSlot;
|
||||
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
|
||||
uint64_t patchValue = this->heaplessEnabled
|
||||
uint32_t patchSize = this->sharedState->heaplessEnabled ? 8u : 4u;
|
||||
uint64_t patchValue = this->sharedState->heaplessEnabled
|
||||
? bindlessSlotOffset
|
||||
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||
|
||||
@@ -917,7 +917,7 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset;
|
||||
uint32_t patchSize = NEO::isUndefined(arg.size) ? 0 : arg.size;
|
||||
uint64_t patchValue = this->heaplessEnabled
|
||||
uint64_t patchValue = this->sharedState->heaplessEnabled
|
||||
? bindlessSlotOffset
|
||||
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||
|
||||
@@ -1122,11 +1122,12 @@ void KernelImp::setInlineSamplers() {
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
this->sharedState->kernelImmData = module->getKernelImmutableData(desc->pKernelName);
|
||||
if (this->sharedState->kernelImmData == nullptr) {
|
||||
auto &sharedState = *(this->sharedState);
|
||||
sharedState.kernelImmData = module->getKernelImmutableData(desc->pKernelName);
|
||||
if (sharedState.kernelImmData == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_KERNEL_NAME;
|
||||
}
|
||||
auto &kernelImmData = *(this->sharedState->kernelImmData);
|
||||
auto &kernelImmData = *(sharedState.kernelImmData);
|
||||
auto neoDevice = module->getDevice()->getNEODevice();
|
||||
auto &kernelDescriptor = kernelImmData.getDescriptor();
|
||||
|
||||
@@ -1156,16 +1157,16 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
auto deviceBitfield = neoDevice->getDeviceBitfield();
|
||||
const auto &gfxHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
|
||||
|
||||
this->heaplessEnabled = rootDeviceEnvironment.getHelper<NEO::CompilerProductHelper>().isHeaplessModeEnabled(hwInfo);
|
||||
sharedState.heaplessEnabled = rootDeviceEnvironment.getHelper<NEO::CompilerProductHelper>().isHeaplessModeEnabled(hwInfo);
|
||||
|
||||
bool platformImplicitScaling = gfxHelper.platformSupportsImplicitScaling(rootDeviceEnvironment);
|
||||
this->implicitScalingEnabled = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, platformImplicitScaling);
|
||||
sharedState.implicitScalingEnabled = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, platformImplicitScaling);
|
||||
|
||||
this->rcsAvailable = gfxHelper.isRcsAvailable(hwInfo);
|
||||
this->cooperativeSupport = productHelper.isCooperativeEngineSupported(hwInfo);
|
||||
this->sharedState->walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
|
||||
this->sharedState->surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
|
||||
this->sharedState->surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
|
||||
sharedState.rcsAvailable = gfxHelper.isRcsAvailable(hwInfo);
|
||||
sharedState.cooperativeSupport = productHelper.isCooperativeEngineSupported(hwInfo);
|
||||
sharedState.walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
|
||||
sharedState.surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
|
||||
sharedState.surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
|
||||
|
||||
if (isaAllocation->getAllocationType() == NEO::AllocationType::kernelIsaInternal && kernelImmData.getIsaParentAllocation() == nullptr) {
|
||||
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
@@ -1238,7 +1239,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||
privateState.pImplicitArgs = std::make_unique<NEO::ImplicitArgs>();
|
||||
*privateState.pImplicitArgs = {};
|
||||
privateState.pImplicitArgs->initializeHeader(this->sharedState->implicitArgsVersion);
|
||||
privateState.pImplicitArgs->initializeHeader(sharedState.implicitArgsVersion);
|
||||
privateState.pImplicitArgs->setSimdWidth(kernelDescriptor.kernelAttributes.simdSize);
|
||||
}
|
||||
|
||||
@@ -1262,12 +1263,12 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
|
||||
auto &kernelAttributes = kernelDescriptor.kernelAttributes;
|
||||
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
|
||||
this->sharedState->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation();
|
||||
if (this->sharedState->privateMemoryGraphicsAllocation == nullptr) {
|
||||
sharedState.privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation();
|
||||
if (sharedState.privateMemoryGraphicsAllocation == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
this->patchCrossthreadDataWithPrivateAllocation(this->sharedState->privateMemoryGraphicsAllocation);
|
||||
this->privateState.internalResidencyContainer.push_back(this->sharedState->privateMemoryGraphicsAllocation);
|
||||
this->patchCrossthreadDataWithPrivateAllocation(sharedState.privateMemoryGraphicsAllocation);
|
||||
this->privateState.internalResidencyContainer.push_back(sharedState.privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
this->createPrintfBuffer();
|
||||
@@ -1342,15 +1343,6 @@ std::unique_ptr<KernelImp> KernelImp::cloneWithStateOverride(const KernelMutable
|
||||
clone->cloneOrigin = this;
|
||||
clone->sharedState = this->sharedState;
|
||||
|
||||
// Kernel-specific members dynamically set in `initailize()` but shareable with clones
|
||||
clone->maxWgCountPerTileCcs = this->maxWgCountPerTileCcs;
|
||||
clone->maxWgCountPerTileRcs = this->maxWgCountPerTileRcs;
|
||||
clone->maxWgCountPerTileCooperative = this->maxWgCountPerTileCooperative;
|
||||
clone->heaplessEnabled = this->heaplessEnabled;
|
||||
clone->implicitScalingEnabled = this->implicitScalingEnabled;
|
||||
clone->rcsAvailable = this->rcsAvailable;
|
||||
clone->cooperativeSupport = this->cooperativeSupport;
|
||||
|
||||
if (stateOverride) {
|
||||
clone->privateState = *stateOverride;
|
||||
}
|
||||
@@ -1359,18 +1351,19 @@ std::unique_ptr<KernelImp> KernelImp::cloneWithStateOverride(const KernelMutable
|
||||
}
|
||||
|
||||
void KernelImp::createPrintfBuffer() {
|
||||
auto &sharedState = *(this->sharedState);
|
||||
if (this->getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf || privateState.pImplicitArgs) {
|
||||
this->sharedState->printfBuffer = PrintfHandler::createPrintfBuffer(this->module->getDevice());
|
||||
this->privateState.internalResidencyContainer.push_back(this->sharedState->printfBuffer);
|
||||
sharedState.printfBuffer = PrintfHandler::createPrintfBuffer(this->module->getDevice());
|
||||
this->privateState.internalResidencyContainer.push_back(sharedState.printfBuffer);
|
||||
if (this->getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf) {
|
||||
NEO::patchPointer(getCrossThreadDataSpan(),
|
||||
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.printfSurfaceAddress,
|
||||
static_cast<uintptr_t>(this->sharedState->printfBuffer->getGpuAddressToPatch()));
|
||||
static_cast<uintptr_t>(sharedState.printfBuffer->getGpuAddressToPatch()));
|
||||
}
|
||||
if (privateState.pImplicitArgs) {
|
||||
privateState.pImplicitArgs->setPrintfBuffer(this->sharedState->printfBuffer->getGpuAddress());
|
||||
privateState.pImplicitArgs->setPrintfBuffer(sharedState.printfBuffer->getGpuAddress());
|
||||
}
|
||||
this->sharedState->devicePrintfKernelMutex = &(static_cast<DeviceImp *>(this->module->getDevice())->printfKernelMutex);
|
||||
sharedState.devicePrintfKernelMutex = &(static_cast<DeviceImp *>(this->module->getDevice())->printfKernelMutex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -205,6 +205,16 @@ struct KernelImp : Kernel {
|
||||
|
||||
NEO::ImplicitArgs *getImplicitArgs() const override { return privateState.pImplicitArgs.get(); }
|
||||
|
||||
uint32_t getMaxWgCountPerTile(NEO::EngineGroupType engineGroupType) const override {
|
||||
auto value = this->sharedState->maxWgCountPerTileCcs;
|
||||
if (engineGroupType == NEO::EngineGroupType::renderCompute) {
|
||||
value = this->sharedState->maxWgCountPerTileRcs;
|
||||
} else if (engineGroupType == NEO::EngineGroupType::cooperativeCompute) {
|
||||
value = this->sharedState->maxWgCountPerTileCooperative;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
KernelExt *getExtension(uint32_t extensionType);
|
||||
|
||||
bool checkKernelContainsStatefulAccess();
|
||||
|
||||
@@ -31,6 +31,16 @@ struct KernelSharedState {
|
||||
|
||||
uint32_t implicitArgsVersion = 0;
|
||||
uint32_t walkerInlineDataSize = 0;
|
||||
|
||||
uint32_t maxWgCountPerTileCcs = 0;
|
||||
uint32_t maxWgCountPerTileRcs = 0;
|
||||
uint32_t maxWgCountPerTileCooperative = 0;
|
||||
|
||||
bool heaplessEnabled = false;
|
||||
bool implicitScalingEnabled = false;
|
||||
bool localDispatchSupport = false;
|
||||
bool rcsAvailable = false;
|
||||
bool cooperativeSupport = false;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -40,7 +40,7 @@ uint32_t KernelImpSuggestMaxCooperativeGroupCountFixture::getMaxWorkGroupCount()
|
||||
kernel.sharedState->kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::user);
|
||||
kernel.module = module.get();
|
||||
kernel.implicitScalingEnabled = device->getNEODevice()->getDeviceBitfield().count() > 1;
|
||||
kernel.sharedState->implicitScalingEnabled = device->getNEODevice()->getDeviceBitfield().count() > 1;
|
||||
kernel.privateState.groupSize[0] = lws[0];
|
||||
kernel.privateState.groupSize[1] = lws[1];
|
||||
kernel.privateState.groupSize[2] = lws[2];
|
||||
|
||||
@@ -41,23 +41,15 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
using BaseClass = ::L0::KernelImp;
|
||||
using BaseClass::BaseClass;
|
||||
using ::L0::KernelImp::cloneOrigin;
|
||||
using ::L0::KernelImp::cooperativeSupport;
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::getCrossThreadDataSpan;
|
||||
using ::L0::KernelImp::getDynamicStateHeapDataSpan;
|
||||
using ::L0::KernelImp::getSurfaceStateHeapDataSpan;
|
||||
using ::L0::KernelImp::heaplessEnabled;
|
||||
using ::L0::KernelImp::implicitScalingEnabled;
|
||||
using ::L0::KernelImp::localDispatchSupport;
|
||||
using ::L0::KernelImp::maxWgCountPerTileCcs;
|
||||
using ::L0::KernelImp::maxWgCountPerTileCooperative;
|
||||
using ::L0::KernelImp::maxWgCountPerTileRcs;
|
||||
using ::L0::KernelImp::module;
|
||||
using ::L0::KernelImp::patchBindlessOffsetsInCrossThreadData;
|
||||
using ::L0::KernelImp::patchBindlessSurfaceState;
|
||||
using ::L0::KernelImp::patchSamplerBindlessOffsetsInCrossThreadData;
|
||||
using ::L0::KernelImp::privateState;
|
||||
using ::L0::KernelImp::rcsAvailable;
|
||||
using ::L0::KernelImp::setAssertBuffer;
|
||||
using ::L0::KernelImp::sharedState;
|
||||
|
||||
|
||||
@@ -3168,7 +3168,7 @@ HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrect
|
||||
for (auto heaplessEnabled : {false, true}) {
|
||||
|
||||
createKernel();
|
||||
kernel->heaplessEnabled = heaplessEnabled;
|
||||
kernel->sharedState->heaplessEnabled = heaplessEnabled;
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice,
|
||||
neoDevice->getNumGenericSubDevices() > 1);
|
||||
@@ -3199,11 +3199,11 @@ HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrect
|
||||
auto ssInHeap = imageHW->getBindlessSlot();
|
||||
auto patchLocation = ptrOffset(ctd, imageArg.bindless);
|
||||
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
|
||||
uint64_t expectedPatchValue = kernel->heaplessEnabled
|
||||
uint64_t expectedPatchValue = kernel->sharedState->heaplessEnabled
|
||||
? bindlessSlotOffset
|
||||
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||
|
||||
if (kernel->heaplessEnabled) {
|
||||
if (kernel->sharedState->heaplessEnabled) {
|
||||
uint64_t patchedValued = *(reinterpret_cast<uint64_t *>(patchLocation));
|
||||
EXPECT_EQ(expectedPatchValue, patchedValued);
|
||||
} else {
|
||||
|
||||
@@ -272,13 +272,13 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
|
||||
EXPECT_EQ(kernel2->sharedState->surfaceStateAlignment, kernel1.sharedState->surfaceStateAlignment);
|
||||
EXPECT_EQ(kernel2->sharedState->implicitArgsVersion, kernel1.sharedState->implicitArgsVersion);
|
||||
EXPECT_EQ(kernel2->sharedState->walkerInlineDataSize, kernel1.sharedState->walkerInlineDataSize);
|
||||
EXPECT_EQ(kernel2->maxWgCountPerTileCcs, kernel1.maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(kernel2->maxWgCountPerTileRcs, kernel1.maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(kernel2->maxWgCountPerTileCooperative, kernel1.maxWgCountPerTileCooperative);
|
||||
EXPECT_EQ(kernel2->heaplessEnabled, kernel1.heaplessEnabled);
|
||||
EXPECT_EQ(kernel2->implicitScalingEnabled, kernel1.implicitScalingEnabled);
|
||||
EXPECT_EQ(kernel2->rcsAvailable, kernel1.rcsAvailable);
|
||||
EXPECT_EQ(kernel2->cooperativeSupport, kernel1.cooperativeSupport);
|
||||
EXPECT_EQ(kernel2->sharedState->maxWgCountPerTileCcs, kernel1.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(kernel2->sharedState->maxWgCountPerTileRcs, kernel1.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(kernel2->sharedState->maxWgCountPerTileCooperative, kernel1.sharedState->maxWgCountPerTileCooperative);
|
||||
EXPECT_EQ(kernel2->sharedState->heaplessEnabled, kernel1.sharedState->heaplessEnabled);
|
||||
EXPECT_EQ(kernel2->sharedState->implicitScalingEnabled, kernel1.sharedState->implicitScalingEnabled);
|
||||
EXPECT_EQ(kernel2->sharedState->rcsAvailable, kernel1.sharedState->rcsAvailable);
|
||||
EXPECT_EQ(kernel2->sharedState->cooperativeSupport, kernel1.sharedState->cooperativeSupport);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAndGroupCount) {
|
||||
@@ -1201,41 +1201,41 @@ TEST_F(KernelImpTest, givenHeaplessAndLocalDispatchEnabledWheSettingGroupSizeThe
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
|
||||
kernel.heaplessEnabled = false;
|
||||
kernel.localDispatchSupport = false;
|
||||
kernel.sharedState->heaplessEnabled = false;
|
||||
kernel.sharedState->localDispatchSupport = false;
|
||||
kernel.setGroupSize(128, 1, 1);
|
||||
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCooperative);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCooperative);
|
||||
|
||||
kernel.heaplessEnabled = true;
|
||||
kernel.sharedState->heaplessEnabled = true;
|
||||
kernel.setGroupSize(64, 2, 1);
|
||||
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCooperative);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCooperative);
|
||||
|
||||
kernel.localDispatchSupport = true;
|
||||
kernel.sharedState->localDispatchSupport = true;
|
||||
kernel.setGroupSize(32, 4, 1);
|
||||
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCooperative);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCooperative);
|
||||
|
||||
kernel.rcsAvailable = true;
|
||||
kernel.sharedState->rcsAvailable = true;
|
||||
kernel.setGroupSize(16, 8, 1);
|
||||
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileCcs);
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.maxWgCountPerTileCooperative);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_EQ(0u, kernel.sharedState->maxWgCountPerTileCooperative);
|
||||
|
||||
kernel.cooperativeSupport = true;
|
||||
kernel.sharedState->cooperativeSupport = true;
|
||||
kernel.setGroupSize(8, 8, 2);
|
||||
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileCcs);
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileRcs);
|
||||
EXPECT_NE(0u, kernel.maxWgCountPerTileCooperative);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileCcs);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileRcs);
|
||||
EXPECT_NE(0u, kernel.sharedState->maxWgCountPerTileCooperative);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpTest, givenCorrectEngineTypeWhenGettingMaxWgCountPerTileThenReturnActualValue) {
|
||||
@@ -1243,9 +1243,9 @@ TEST_F(KernelImpTest, givenCorrectEngineTypeWhenGettingMaxWgCountPerTileThenRetu
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
|
||||
kernel.maxWgCountPerTileCcs = 4;
|
||||
kernel.maxWgCountPerTileRcs = 2;
|
||||
kernel.maxWgCountPerTileCooperative = 100;
|
||||
kernel.sharedState->maxWgCountPerTileCcs = 4;
|
||||
kernel.sharedState->maxWgCountPerTileRcs = 2;
|
||||
kernel.sharedState->maxWgCountPerTileCooperative = 100;
|
||||
|
||||
EXPECT_EQ(4u, kernel.getMaxWgCountPerTile(NEO::EngineGroupType::compute));
|
||||
EXPECT_EQ(2u, kernel.getMaxWgCountPerTile(NEO::EngineGroupType::renderCompute));
|
||||
|
||||
Reference in New Issue
Block a user