refactor: crossThreadData as std::vector

Related-To: NEO-15374
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2025-08-08 16:09:17 +00:00
committed by Compute-Runtime-Automation
parent 921db0e9e1
commit fe174328eb
16 changed files with 101 additions and 145 deletions

View File

@@ -249,11 +249,7 @@ KernelMutableState::KernelMutableState(const KernelMutableState &rhs) : Params{r
pImplicitArgs = (rhs.pImplicitArgs) ? std::make_unique<NEO::ImplicitArgs>(*rhs.pImplicitArgs) : nullptr;
pExtension = nullptr;
crossThreadDataSize = rhs.crossThreadDataSize;
if (crossThreadDataSize) {
crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
std::memcpy(crossThreadData.get(), rhs.crossThreadData.get(), crossThreadDataSize);
}
crossThreadData = rhs.crossThreadData;
surfaceStateHeapDataSize = rhs.surfaceStateHeapDataSize;
if (surfaceStateHeapDataSize) {
@@ -292,7 +288,6 @@ void KernelMutableState::swap(KernelMutableState &rhs) {
swap(this->pImplicitArgs, rhs.pImplicitArgs);
swap(this->pExtension, rhs.pExtension);
swap(this->crossThreadData, rhs.crossThreadData);
swap(this->crossThreadDataSize, rhs.crossThreadDataSize);
swap(this->surfaceStateHeapData, rhs.surfaceStateHeapData);
swap(this->surfaceStateHeapDataSize, rhs.surfaceStateHeapDataSize);
swap(this->dynamicStateHeapData, rhs.dynamicStateHeapData);
@@ -318,7 +313,6 @@ void KernelMutableState::moveMembersFrom(KernelMutableState &&orig) {
pImplicitArgs = std::move(orig.pImplicitArgs);
pExtension = std::move(orig.pExtension);
crossThreadDataSize = std::exchange(orig.crossThreadDataSize, 0U);
crossThreadData = std::move(orig.crossThreadData);
surfaceStateHeapDataSize = std::exchange(orig.surfaceStateHeapDataSize, 0U);
surfaceStateHeapData = std::move(orig.surfaceStateHeapData);
@@ -686,7 +680,7 @@ ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const
size_t maxBytesToCopy = argSize - element.sourceOffset;
size_t bytesToCopy = std::min(static_cast<size_t>(element.size), maxBytesToCopy);
auto pDst = ptrOffset(state.crossThreadData.get(), element.offset);
auto pDst = &getCrossThreadDataSpan()[element.offset];
if (argVal) {
auto pSrc = ptrOffset(argVal, element.sourceOffset);
memcpy_s(pDst, element.size, pSrc, bytesToCopy);
@@ -832,7 +826,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
state.slmArgSizes[argIndex] = static_cast<uint32_t>(argSize);
state.kernelArgInfos[argIndex] = KernelArgInfo{nullptr, 0, 0, false};
UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as<NEO::ArgDescPointer>().slmOffset));
auto slmOffset = *reinterpret_cast<uint32_t *>(state.crossThreadData.get() + currArg.as<NEO::ArgDescPointer>().slmOffset);
auto slmOffset = *reinterpret_cast<uint32_t *>(&getCrossThreadDataSpan()[currArg.as<NEO::ArgDescPointer>().slmOffset]);
state.slmArgOffsetValues[argIndex] = slmOffset;
slmOffset += static_cast<uint32_t>(argSize);
++argIndex;
@@ -1231,13 +1225,13 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->state.surfaceStateHeapDataSize = kernelImmData->getSurfaceStateHeapSize();
}
if (kernelDescriptor.kernelAttributes.crossThreadDataSize != 0) {
this->state.crossThreadData.reset(new uint8_t[kernelDescriptor.kernelAttributes.crossThreadDataSize]);
memcpy_s(this->state.crossThreadData.get(),
kernelDescriptor.kernelAttributes.crossThreadDataSize,
if (uint16_t crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;
crossThreadDataSize != 0) {
this->state.crossThreadData.resize(crossThreadDataSize);
memcpy_s(this->state.crossThreadData.data(),
crossThreadDataSize,
kernelImmData->getCrossThreadDataTemplate(),
kernelDescriptor.kernelAttributes.crossThreadDataSize);
this->state.crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;
crossThreadDataSize);
}
if (kernelImmData->getDynamicStateHeapDataSize() != 0) {

View File

@@ -81,8 +81,8 @@ struct KernelImp : Kernel {
uint32_t suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineGroupType, uint32_t *groupSize, bool forceSingleTileQuery);
const uint8_t *getCrossThreadData() const override { return state.crossThreadData.get(); }
uint32_t getCrossThreadDataSize() const override { return state.crossThreadDataSize; }
const uint8_t *getCrossThreadData() const override { return state.crossThreadData.data(); }
uint32_t getCrossThreadDataSize() const override { return static_cast<uint32_t>(state.crossThreadData.size()); }
const std::vector<NEO::GraphicsAllocation *> &getArgumentsResidencyContainer() const override {
return state.argumentsResidencyContainer;
@@ -251,7 +251,7 @@ struct KernelImp : Kernel {
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
ze_result_t validateWorkgroupSize() const;
ArrayRef<uint8_t> getCrossThreadDataSpan() { return ArrayRef<uint8_t>(state.crossThreadData.get(), state.crossThreadDataSize); }
ArrayRef<uint8_t> getCrossThreadDataSpan() { return ArrayRef<uint8_t>(state.crossThreadData.data(), state.crossThreadData.size()); }
const KernelImmutableData *kernelImmData = nullptr;
Module *module = nullptr;

View File

@@ -91,7 +91,7 @@ struct KernelMutableState : public KernelMutableStateDefaultCopyableParams {
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
std::unique_ptr<KernelExt> pExtension;
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
std::vector<uint8_t> crossThreadData{};
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
std::unique_ptr<uint8_t[]> dynamicStateHeapData = nullptr;
@@ -99,7 +99,6 @@ struct KernelMutableState : public KernelMutableStateDefaultCopyableParams {
uint32_t perThreadDataSizeForWholeThreadGroup = 0U;
uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0U;
uint32_t crossThreadDataSize = 0U;
uint32_t surfaceStateHeapDataSize = 0U;
uint32_t dynamicStateHeapDataSize = 0U;
};

View File

@@ -72,9 +72,8 @@ void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNe
}
void ModuleImmutableDataFixture::MockKernel::setCrossThreadData(uint32_t dataSize) {
state.crossThreadData.reset(new uint8_t[dataSize]);
state.crossThreadDataSize = dataSize;
memset(state.crossThreadData.get(), 0x00, state.crossThreadDataSize);
state.crossThreadData.clear();
state.crossThreadData.resize(dataSize, 0x0);
}
void ModuleImmutableDataFixture::setUp() {

View File

@@ -41,7 +41,8 @@ Mock<::L0::KernelImp>::Mock() : BaseClass() {
NEO::populateKernelDescriptor(descriptor, kernelTokens, 8);
immutableData.kernelDescriptor = &descriptor;
immutableData.kernelInfo = &info;
state.crossThreadData.reset(new uint8_t[100]);
state.crossThreadData.clear();
state.crossThreadData.resize(100U, 0x0);
state.groupSize[0] = 1;
state.groupSize[1] = 1;

View File

@@ -44,6 +44,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
using ::L0::KernelImp::cooperativeSupport;
using ::L0::KernelImp::createPrintfBuffer;
using ::L0::KernelImp::devicePrintfKernelMutex;
using ::L0::KernelImp::getCrossThreadDataSpan;
using ::L0::KernelImp::heaplessEnabled;
using ::L0::KernelImp::implicitArgsVersion;
using ::L0::KernelImp::implicitScalingEnabled;

View File

@@ -94,14 +94,13 @@ TEST(KernelAssert, GivenKernelWithAssertWhenSettingAssertBufferThenAssertBufferI
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
kernel.state.crossThreadData.resize(16, 0x0);
kernel.setAssertBuffer();
auto assertBufferAddress = assertHandler->getAssertBuffer()->getGpuAddressToPatch();
EXPECT_TRUE(memcmp(kernel.state.crossThreadData.get(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0);
EXPECT_TRUE(memcmp(kernel.getCrossThreadDataSpan().begin(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0);
EXPECT_TRUE(std::find(kernel.getInternalResidencyContainer().begin(), kernel.getInternalResidencyContainer().end(), assertHandler->getAssertBuffer()) != kernel.getInternalResidencyContainer().end());
}
@@ -121,8 +120,7 @@ TEST(KernelAssert, GivenKernelWithAssertAndImplicitArgsWhenInitializingKernelThe
kernel.descriptor.kernelAttributes.flags.requiresImplicitArgs = true;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
kernel.state.crossThreadData.resize(16, 0x0);
module.kernelImmData = &kernel.immutableData;
char heap[8];
@@ -157,8 +155,7 @@ TEST(KernelAssert, GivenNoAssertHandlerWhenKernelWithAssertSetsAssertBufferThenA
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
kernel.state.crossThreadData.resize(16, 0x0);
kernel.setAssertBuffer();
EXPECT_NE(nullptr, neoDevice->getRootDeviceEnvironmentRef().assertHandler.get());

View File

@@ -1831,8 +1831,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
mockKernel.state.crossThreadData.resize(4 * sizeof(uint64_t), 0x0);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
@@ -1902,8 +1901,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
mockKernel.state.crossThreadData.resize(4 * sizeof(uint64_t), 0x0);
const auto surfStateSize = static_cast<uint32_t>(device->getNEODevice()->getGfxCoreHelper().getRenderSurfaceStateSize());
mockKernel.state.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfStateSize);
mockKernel.state.surfaceStateHeapDataSize = surfStateSize;
@@ -1955,7 +1953,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
auto offsetInHeap = ptrDiff(sshHeap->getSpace(0), sshHeap->getCpuBase()) - surfStateSize;
uint64_t bindlessSshBaseOffset = ptrDiff(sshHeap->getGraphicsAllocation()->getGpuAddress(), sshHeap->getGraphicsAllocation()->getGpuBaseAddress()) + offsetInHeap;
auto patchValue = device->getNEODevice()->getGfxCoreHelper().getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSshBaseOffset));
auto patchLocation = reinterpret_cast<uint32_t *>(mockKernel.state.crossThreadData.get());
auto patchLocation = reinterpret_cast<const uint32_t *>(mockKernel.getCrossThreadData());
EXPECT_EQ(patchValue, *patchLocation);
}

View File

@@ -237,7 +237,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
Mock<::L0::KernelImp> kernel;
kernel.module = mockModule.get();
kernel.state.crossThreadDataSize = 0x60u;
kernel.state.crossThreadData.resize(0x60U, 0x0);
kernel.descriptor.kernelAttributes.flags.passInlineData = true;
uint32_t globalWorkSizeXOffset = 0x40u;

View File

@@ -629,9 +629,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
EXPECT_EQ(nullptr, kernel.getSyncBufferAllocation());
constexpr uint32_t crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
kernel.state.crossThreadDataSize = crossThreadDataSize;
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -665,7 +663,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, cooperativeParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), syncBufferAddress.stateless));
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[syncBufferAddress.stateless]);
EXPECT_EQ(0u, patchPtr);
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.getSyncBufferIndex());
@@ -694,9 +692,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenPatchPreambleQueueWhenAppendedSync
kernel.module = pMockModule.get();
constexpr uint32_t crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
kernel.state.crossThreadDataSize = crossThreadDataSize;
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -803,9 +799,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
constexpr uint32_t crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
kernel.state.crossThreadDataSize = crossThreadDataSize;
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -827,7 +821,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams));
EXPECT_EQ(std::numeric_limits<size_t>::max(), launchParams.regionBarrierPatchIndex);
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
EXPECT_NE(0u, patchPtr);
auto allocIter = std::find_if(ultCsr->makeResidentAllocations.begin(), ultCsr->makeResidentAllocations.end(), [patchPtr](const std::pair<GraphicsAllocation *, uint32_t> &element) {
@@ -853,7 +847,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
auto notFoundIt = std::find(regionGroupBarrierAllocIt + 1, kernel.state.internalResidencyContainer.end(), regionGroupBarrierAllocation);
EXPECT_EQ(kernel.state.internalResidencyContainer.end(), notFoundIt);
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
auto patchPtr2 = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
@@ -886,9 +880,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
constexpr uint32_t crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
kernel.state.crossThreadDataSize = crossThreadDataSize;
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -916,7 +908,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams));
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
EXPECT_EQ(0u, patchPtr);
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.getRegionGroupBarrierIndex());
@@ -945,9 +937,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenPatchPreambleQueueWhenAppendedRegi
kernel.module = pMockModule.get();
constexpr uint32_t crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
kernel.state.crossThreadDataSize = crossThreadDataSize;
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};

View File

@@ -2968,8 +2968,7 @@ HWTEST2_F(CommandListAppendLaunchKernel,
kernel.module = mockModule.get();
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
kernel.state.perThreadDataSizeForWholeThreadGroup = 0;
kernel.state.crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(kernel.state.crossThreadDataSize);
kernel.state.crossThreadData.resize(64U, 0x0);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0);
@@ -3000,8 +2999,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughIohSpaceWhenLaunchingKern
kernel.module = mockModule.get();
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
kernel.state.perThreadDataSizeForWholeThreadGroup = 0;
kernel.state.crossThreadDataSize = 64;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(kernel.state.crossThreadDataSize);
kernel.state.crossThreadData.resize(64U, 0x0);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0);

View File

@@ -1475,9 +1475,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
immDataVector->push_back(std::move(mockKernelImmutableData));
auto crossThreadData = std::make_unique<uint32_t[]>(4);
kernel->state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
kernel->state.crossThreadDataSize = sizeof(uint32_t[4]);
constexpr size_t ctdDwords = 4U;
kernel->state.crossThreadData.clear();
kernel->state.crossThreadData.resize(sizeof(uint32_t[ctdDwords]));
auto result = kernel->initialize(&kernelDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -1485,11 +1485,11 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_NE(nullptr, rtDispatchGlobals);
auto dispatchGlobalsAddressPatched = *reinterpret_cast<uint64_t *>(ptrOffset(crossThreadData.get(), rtGlobalPointerPatchOffset));
auto dispatchGlobalsAddressPatched = *reinterpret_cast<const uint64_t *>(ptrOffset(kernel->getCrossThreadData(), rtGlobalPointerPatchOffset));
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch());
EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
kernel->state.crossThreadData.release();
kernel->state.crossThreadData.clear();
}
using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
@@ -3188,7 +3188,7 @@ HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrect
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
auto ctd = kernel->state.crossThreadData.get();
auto ctd = kernel->state.crossThreadData.data();
auto ssInHeap = imageHW->getBindlessSlot();
auto patchLocation = ptrOffset(ctd, imageArg.bindless);
@@ -3336,8 +3336,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCal
argDescriptor.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescImage>().bindless = 0x0;
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[4]));
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
@@ -3848,23 +3847,20 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
auto crossThreadData = std::make_unique<uint32_t[]>(4);
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
mockKernel.state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
mockKernel.state.crossThreadDataSize = sizeof(uint32_t[4]);
mockKernel.state.crossThreadData.resize(sizeof(uint32_t[4]));
mockKernel.createPrintfBuffer();
auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
EXPECT_NE(nullptr, printfBufferAllocation);
auto printfBufferAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
auto printfBufferAddressPatched = *reinterpret_cast<const uintptr_t *>(mockKernel.getCrossThreadData());
auto printfBufferGpuAddressOffset = static_cast<uintptr_t>(printfBufferAllocation->getGpuAddressToPatch());
EXPECT_EQ(printfBufferGpuAddressOffset, printfBufferAddressPatched);
mockKernel.state.crossThreadData.release();
mockKernel.state.crossThreadData.clear();
}
using PrintfHandlerTests = ::testing::Test;
@@ -4251,9 +4247,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 5 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 5U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
@@ -4266,8 +4261,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(patchValue1, crossThreadData[0]);
EXPECT_EQ(patchValue2, crossThreadData[1]);
@@ -4313,9 +4308,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 4U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
@@ -4325,8 +4319,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(0u, crossThreadData[0]);
EXPECT_EQ(patchValue2, crossThreadData[1]);
@@ -4348,15 +4342,14 @@ TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThre
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 4U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(0u, crossThreadData[0]);
}
@@ -4374,17 +4367,15 @@ TEST_F(BindlessKernelTest, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCross
argDescriptor.as<NEO::ArgDescValue>().elements.push_back(NEO::ArgDescValue::Element{0, 8, 0, false});
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
mockKernel.state.crossThreadData.resize(sizeof(uint64_t), 0x0);
const uint64_t baseAddress = 0x1000;
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
uint64_t crossThreadData = 0U;
memcpy_s(&crossThreadData, sizeof(uint64_t), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(0u, crossThreadData[0]);
EXPECT_EQ(0u, crossThreadData);
}
TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImplicitArgsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
@@ -4413,15 +4404,14 @@ TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImpl
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 4U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(0u, crossThreadData[0]);
EXPECT_EQ(0u, crossThreadData[1]);
@@ -4510,9 +4500,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 5 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 5U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
@@ -4523,8 +4512,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(patchValue1, crossThreadData[1]);
EXPECT_EQ(0u, patchValue2);
@@ -4574,9 +4563,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamp
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(7 * sizeof(uint64_t));
mockKernel.state.crossThreadDataSize = 7 * sizeof(uint64_t);
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
constexpr size_t ctdQwords = 7U;
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
@@ -4587,8 +4575,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamp
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
EXPECT_EQ(patchValue1, crossThreadData[5]);
EXPECT_EQ(patchValue2, crossThreadData[6]);
@@ -4603,8 +4591,7 @@ TEST_F(KernelSyncBufferTest, GivenSyncBufferArgWhenPatchingSyncBufferThenPtrIsCo
Mock<Module> mockModule(device, nullptr);
kernel.module = &mockModule;
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(64);
kernel.state.crossThreadDataSize = 64;
kernel.state.crossThreadData.resize(64U);
auto &syncBuffer = kernel.immutableData.kernelDescriptor->payloadMappings.implicitArgs.syncBufferAddress;
syncBuffer.stateless = 0x8;
@@ -4618,7 +4605,7 @@ TEST_F(KernelSyncBufferTest, GivenSyncBufferArgWhenPatchingSyncBufferThenPtrIsCo
kernel.patchSyncBuffer(&alloc, bufferOffset);
auto patchValue = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), syncBuffer.stateless));
auto patchValue = *reinterpret_cast<const uint64_t *>(ptrOffset(kernel.getCrossThreadData(), syncBuffer.stateless));
auto expectedPatchValue = ptrOffset(alloc.getGpuAddressToPatch(), bufferOffset);
EXPECT_EQ(expectedPatchValue, patchValue);

View File

@@ -59,16 +59,17 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssigningToItselfThenTheCurrent
constexpr size_t mockSize{8U};
KernelMutableState state1{};
state1.crossThreadData.reset(new uint8_t[mockSize]);
auto addressBeforeAssignment{state1.crossThreadData.get()};
state1.crossThreadData.clear();
state1.crossThreadData.resize(mockSize, 0x0);
auto addressBeforeAssignment{state1.crossThreadData.data()};
auto &notReallyDifferentState{state1};
state1 = notReallyDifferentState;
auto addressAfterAssignment{state1.crossThreadData.get()};
auto addressAfterAssignment{state1.crossThreadData.data()};
auto &&notReallyDifferentState2{std::move(state1)};
state1 = std::move(notReallyDifferentState2);
auto addressAfterAssignment2{state1.crossThreadData.get()};
auto addressAfterAssignment2{state1.crossThreadData.data()};
EXPECT_EQ(addressBeforeAssignment, addressAfterAssignment);
EXPECT_EQ(addressBeforeAssignment, addressAfterAssignment2);
@@ -86,9 +87,9 @@ void fillKernelMutableStateWithMockData(KernelMutableState &state) {
state.pExtension = std::make_unique<KernelExt>();
constexpr size_t mockSize{8U};
state.crossThreadData.reset(new uint8_t[mockSize]);
std::memcpy(state.crossThreadData.get(), std::to_array<uint8_t>({11, 12, 13, 14, 15, 16, 17, 18}).data(), mockSize);
state.crossThreadDataSize = mockSize;
state.crossThreadData.clear();
state.crossThreadData.reserve(mockSize);
std::ranges::copy(std::to_array<uint8_t, mockSize>({11, 12, 13, 14, 15, 16, 17, 18}), std::back_inserter(state.crossThreadData));
state.surfaceStateHeapData.reset(new uint8_t[mockSize]);
std::memcpy(state.surfaceStateHeapData.get(), std::to_array<uint8_t>({21, 22, 23, 24, 25, 26, 27, 28}).data(), mockSize);
@@ -142,7 +143,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
KernelMutableState state2{};
state2 = state1; // assignment operator is being tested
EXPECT_EQ(0, std::memcmp(state1.crossThreadData.get(), state2.crossThreadData.get(), state1.crossThreadDataSize));
EXPECT_EQ(0, std::memcmp(state1.crossThreadData.data(), state2.crossThreadData.data(), state1.crossThreadData.size()));
EXPECT_EQ(0, std::memcmp(state1.surfaceStateHeapData.get(), state2.surfaceStateHeapData.get(), state1.surfaceStateHeapDataSize));
EXPECT_EQ(0, std::memcmp(state1.dynamicStateHeapData.get(), state2.dynamicStateHeapData.get(), state1.dynamicStateHeapDataSize));
@@ -164,7 +165,6 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
EXPECT_EQ(0, std::memcmp(state1.globalOffsets, state2.globalOffsets, KernelMutableState::dimMax * sizeof(uint32_t)));
EXPECT_EQ(0, std::memcmp(state1.groupSize, state2.groupSize, KernelMutableState::dimMax * sizeof(uint32_t)));
EXPECT_EQ(state1.crossThreadDataSize, state2.crossThreadDataSize);
EXPECT_EQ(state1.surfaceStateHeapDataSize, state2.surfaceStateHeapDataSize);
EXPECT_EQ(state1.dynamicStateHeapDataSize, state2.dynamicStateHeapDataSize);
EXPECT_EQ(state1.perThreadDataSize, state2.perThreadDataSize);
@@ -182,8 +182,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
KernelMutableState state3{};
state3 = std::move(state1);
EXPECT_EQ(nullptr, state1.crossThreadData.get());
EXPECT_EQ(0U, state1.crossThreadDataSize);
EXPECT_EQ(0U, state1.crossThreadData.size());
EXPECT_EQ(nullptr, state1.surfaceStateHeapData.get());
EXPECT_EQ(0U, state1.surfaceStateHeapDataSize);
EXPECT_EQ(nullptr, state1.dynamicStateHeapData.get());
@@ -194,7 +193,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
EXPECT_EQ(0U, state1.perThreadDataSizeForWholeThreadGroup);
EXPECT_EQ(0U, state1.perThreadDataSizeForWholeThreadGroupAllocated);
EXPECT_EQ(0, std::memcmp(state3.crossThreadData.get(), state2.crossThreadData.get(), state3.crossThreadDataSize));
EXPECT_EQ(0, std::memcmp(state3.crossThreadData.data(), state2.crossThreadData.data(), state3.crossThreadData.size()));
EXPECT_EQ(0, std::memcmp(state3.surfaceStateHeapData.get(), state2.surfaceStateHeapData.get(), state3.surfaceStateHeapDataSize));
EXPECT_EQ(0, std::memcmp(state3.dynamicStateHeapData.get(), state2.dynamicStateHeapData.get(), state3.dynamicStateHeapDataSize));
@@ -216,7 +215,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
EXPECT_EQ(0, std::memcmp(state3.globalOffsets, state2.globalOffsets, KernelMutableState::dimMax * sizeof(uint32_t)));
EXPECT_EQ(0, std::memcmp(state3.groupSize, state2.groupSize, KernelMutableState::dimMax * sizeof(uint32_t)));
EXPECT_EQ(state3.crossThreadDataSize, state2.crossThreadDataSize);
EXPECT_EQ(state3.crossThreadData.size(), state2.crossThreadData.size());
EXPECT_EQ(state3.surfaceStateHeapDataSize, state2.surfaceStateHeapDataSize);
EXPECT_EQ(state3.dynamicStateHeapDataSize, state2.dynamicStateHeapDataSize);
EXPECT_EQ(state3.perThreadDataSize, state2.perThreadDataSize);
@@ -243,9 +242,9 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
kernel1.module = &module;
constexpr size_t mockSize{8U};
kernel1.state.crossThreadData.reset(new uint8_t[mockSize]);
std::memcpy(kernel1.state.crossThreadData.get(), std::to_array<uint8_t>({91, 92, 93, 94, 95, 96, 97, 98}).data(), mockSize);
kernel1.state.crossThreadDataSize = mockSize;
kernel1.state.crossThreadData.clear();
kernel1.state.crossThreadData.reserve(mockSize);
std::ranges::copy(std::to_array<uint8_t, mockSize>({91, 92, 93, 94, 95, 96, 97, 98}), std::back_inserter(kernel1.state.crossThreadData));
kernel1.state.reservePerThreadDataForWholeThreadGroup(mockSize);
std::memcpy(kernel1.state.perThreadDataForWholeThreadGroup, std::to_array<uint8_t>({81, 82, 83, 84, 85, 86, 87, 88}).data(), mockSize);
@@ -254,14 +253,14 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
fillKernelMutableStateWithMockData(state);
// No need to check each and every member again
EXPECT_NE(0, std::memcmp(kernel1.state.crossThreadData.get(), state.crossThreadData.get(), mockSize));
EXPECT_NE(0, std::memcmp(kernel1.state.crossThreadData.data(), state.crossThreadData.data(), mockSize));
EXPECT_NE(0, std::memcmp(kernel1.state.perThreadDataForWholeThreadGroup, state.perThreadDataForWholeThreadGroup, mockSize));
auto clonedKernel = kernel1.cloneWithStateOverride(&state);
auto kernel2 = static_cast<WhiteBox<KernelImp> *>(clonedKernel.get());
// KernelMutableState part taken from `state`
EXPECT_EQ(0, std::memcmp(kernel2->state.crossThreadData.get(), state.crossThreadData.get(), mockSize));
EXPECT_EQ(0, std::memcmp(kernel2->state.crossThreadData.data(), state.crossThreadData.data(), mockSize));
EXPECT_EQ(0, std::memcmp(kernel2->state.perThreadDataForWholeThreadGroup, state.perThreadDataForWholeThreadGroup, mockSize));
// KernelImp part taken from `kernel1`
@@ -284,9 +283,6 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
}
TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAndGroupCount) {
uint32_t *crossThreadData =
reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32));
WhiteBox<::L0::KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
kernelInfo.kernelDescriptor = &descriptor;
@@ -299,8 +295,7 @@ TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSi
Mock<KernelImp> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
kernel.state.crossThreadDataSize = sizeof(uint32_t[6]);
kernel.state.crossThreadData.resize(sizeof(uint32_t[6]));
kernel.state.groupSize[0] = 2;
kernel.state.groupSize[1] = 3;
kernel.state.groupSize[2] = 5;
@@ -318,8 +313,7 @@ TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSi
EXPECT_EQ(11U, numGroups[1]);
EXPECT_EQ(13U, numGroups[2]);
kernel.state.crossThreadData.release();
alignedFree(crossThreadData);
kernel.state.crossThreadData.clear();
}
TEST_F(KernelImpTest, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) {

View File

@@ -315,11 +315,11 @@ void MutableCommandListFixtureInit::prepareKernelArg(uint16_t argIndex, L0::MCL:
argSlm.pointerSize = 8;
if (kernelMask & kernel1Bit) {
memset(ptrOffset(kernel->state.crossThreadData.get(), argSlm.slmOffset), 0, 8);
memset(&kernel->getCrossThreadDataSpan()[argSlm.slmOffset], 0, 8);
mockKernelImmData->kernelDescriptor->payloadMappings.explicitArgs[argIndex] = kernelArgSlm;
}
if (kernelMask & kernel2Bit) {
memset(ptrOffset(kernel2->state.crossThreadData.get(), argSlm.slmOffset), 0, 8);
memset(&kernel2->getCrossThreadDataSpan()[argSlm.slmOffset], 0, 8);
mockKernelImmData2->kernelDescriptor->payloadMappings.explicitArgs[argIndex] = kernelArgSlm;
}
}

View File

@@ -405,8 +405,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
mockKernelImmData2->kernelDescriptor->kernelAttributes.crossThreadDataSize = kernel2CrossThreadInitSize;
mockKernelImmData2->crossThreadDataSize = kernel2CrossThreadInitSize;
mockKernelImmData2->crossThreadDataTemplate.reset(new uint8_t[kernel2CrossThreadInitSize]);
kernel2->state.crossThreadDataSize = kernel2CrossThreadInitSize;
kernel2->state.crossThreadData.reset(new uint8_t[kernel2CrossThreadInitSize]);
kernel2->state.crossThreadData.resize(kernel2CrossThreadInitSize, 0x0);
mutableCommandIdDesc.flags = kernelIsaMutationFlags;

View File

@@ -87,8 +87,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
mockKernelImmData2->kernelDescriptor->kernelAttributes.crossThreadDataSize = kernel2CrossThreadInitSize;
mockKernelImmData2->crossThreadDataSize = kernel2CrossThreadInitSize;
mockKernelImmData2->crossThreadDataTemplate.reset(new uint8_t[kernel2CrossThreadInitSize]);
kernel2->state.crossThreadDataSize = kernel2CrossThreadInitSize;
kernel2->state.crossThreadData.reset(new uint8_t[kernel2CrossThreadInitSize]);
kernel2->state.crossThreadData.resize(kernel2CrossThreadInitSize, 0x0);
createMutableKernelGroup();
@@ -153,7 +152,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
kernel2->state.perThreadDataSizeForWholeThreadGroup = 0x40;
kernel2->state.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(kernel2->state.perThreadDataSizeForWholeThreadGroup, 32));
auto srcPtr = kernel2->state.crossThreadData.get();
auto srcPtr = kernel2->state.crossThreadData.data();
memset(srcPtr, 0xFF, mutableKernels[1]->inlineDataSize);
auto dstPtr = mutableKernels[1]->getMutableComputeWalker()->getHostMemoryInlineDataPointer();