mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-01 04:23:00 +08:00
refactor: crossThreadData as std::vector
Related-To: NEO-15374 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
921db0e9e1
commit
fe174328eb
@@ -249,11 +249,7 @@ KernelMutableState::KernelMutableState(const KernelMutableState &rhs) : Params{r
|
||||
pImplicitArgs = (rhs.pImplicitArgs) ? std::make_unique<NEO::ImplicitArgs>(*rhs.pImplicitArgs) : nullptr;
|
||||
pExtension = nullptr;
|
||||
|
||||
crossThreadDataSize = rhs.crossThreadDataSize;
|
||||
if (crossThreadDataSize) {
|
||||
crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
std::memcpy(crossThreadData.get(), rhs.crossThreadData.get(), crossThreadDataSize);
|
||||
}
|
||||
crossThreadData = rhs.crossThreadData;
|
||||
|
||||
surfaceStateHeapDataSize = rhs.surfaceStateHeapDataSize;
|
||||
if (surfaceStateHeapDataSize) {
|
||||
@@ -292,7 +288,6 @@ void KernelMutableState::swap(KernelMutableState &rhs) {
|
||||
swap(this->pImplicitArgs, rhs.pImplicitArgs);
|
||||
swap(this->pExtension, rhs.pExtension);
|
||||
swap(this->crossThreadData, rhs.crossThreadData);
|
||||
swap(this->crossThreadDataSize, rhs.crossThreadDataSize);
|
||||
swap(this->surfaceStateHeapData, rhs.surfaceStateHeapData);
|
||||
swap(this->surfaceStateHeapDataSize, rhs.surfaceStateHeapDataSize);
|
||||
swap(this->dynamicStateHeapData, rhs.dynamicStateHeapData);
|
||||
@@ -318,7 +313,6 @@ void KernelMutableState::moveMembersFrom(KernelMutableState &&orig) {
|
||||
pImplicitArgs = std::move(orig.pImplicitArgs);
|
||||
pExtension = std::move(orig.pExtension);
|
||||
|
||||
crossThreadDataSize = std::exchange(orig.crossThreadDataSize, 0U);
|
||||
crossThreadData = std::move(orig.crossThreadData);
|
||||
surfaceStateHeapDataSize = std::exchange(orig.surfaceStateHeapDataSize, 0U);
|
||||
surfaceStateHeapData = std::move(orig.surfaceStateHeapData);
|
||||
@@ -686,7 +680,7 @@ ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const
|
||||
size_t maxBytesToCopy = argSize - element.sourceOffset;
|
||||
size_t bytesToCopy = std::min(static_cast<size_t>(element.size), maxBytesToCopy);
|
||||
|
||||
auto pDst = ptrOffset(state.crossThreadData.get(), element.offset);
|
||||
auto pDst = &getCrossThreadDataSpan()[element.offset];
|
||||
if (argVal) {
|
||||
auto pSrc = ptrOffset(argVal, element.sourceOffset);
|
||||
memcpy_s(pDst, element.size, pSrc, bytesToCopy);
|
||||
@@ -832,7 +826,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
|
||||
state.slmArgSizes[argIndex] = static_cast<uint32_t>(argSize);
|
||||
state.kernelArgInfos[argIndex] = KernelArgInfo{nullptr, 0, 0, false};
|
||||
UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as<NEO::ArgDescPointer>().slmOffset));
|
||||
auto slmOffset = *reinterpret_cast<uint32_t *>(state.crossThreadData.get() + currArg.as<NEO::ArgDescPointer>().slmOffset);
|
||||
auto slmOffset = *reinterpret_cast<uint32_t *>(&getCrossThreadDataSpan()[currArg.as<NEO::ArgDescPointer>().slmOffset]);
|
||||
state.slmArgOffsetValues[argIndex] = slmOffset;
|
||||
slmOffset += static_cast<uint32_t>(argSize);
|
||||
++argIndex;
|
||||
@@ -1231,13 +1225,13 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
this->state.surfaceStateHeapDataSize = kernelImmData->getSurfaceStateHeapSize();
|
||||
}
|
||||
|
||||
if (kernelDescriptor.kernelAttributes.crossThreadDataSize != 0) {
|
||||
this->state.crossThreadData.reset(new uint8_t[kernelDescriptor.kernelAttributes.crossThreadDataSize]);
|
||||
memcpy_s(this->state.crossThreadData.get(),
|
||||
kernelDescriptor.kernelAttributes.crossThreadDataSize,
|
||||
if (uint16_t crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
crossThreadDataSize != 0) {
|
||||
this->state.crossThreadData.resize(crossThreadDataSize);
|
||||
memcpy_s(this->state.crossThreadData.data(),
|
||||
crossThreadDataSize,
|
||||
kernelImmData->getCrossThreadDataTemplate(),
|
||||
kernelDescriptor.kernelAttributes.crossThreadDataSize);
|
||||
this->state.crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
crossThreadDataSize);
|
||||
}
|
||||
|
||||
if (kernelImmData->getDynamicStateHeapDataSize() != 0) {
|
||||
|
||||
@@ -81,8 +81,8 @@ struct KernelImp : Kernel {
|
||||
|
||||
uint32_t suggestMaxCooperativeGroupCount(NEO::EngineGroupType engineGroupType, uint32_t *groupSize, bool forceSingleTileQuery);
|
||||
|
||||
const uint8_t *getCrossThreadData() const override { return state.crossThreadData.get(); }
|
||||
uint32_t getCrossThreadDataSize() const override { return state.crossThreadDataSize; }
|
||||
const uint8_t *getCrossThreadData() const override { return state.crossThreadData.data(); }
|
||||
uint32_t getCrossThreadDataSize() const override { return static_cast<uint32_t>(state.crossThreadData.size()); }
|
||||
|
||||
const std::vector<NEO::GraphicsAllocation *> &getArgumentsResidencyContainer() const override {
|
||||
return state.argumentsResidencyContainer;
|
||||
@@ -251,7 +251,7 @@ struct KernelImp : Kernel {
|
||||
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
|
||||
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
|
||||
ze_result_t validateWorkgroupSize() const;
|
||||
ArrayRef<uint8_t> getCrossThreadDataSpan() { return ArrayRef<uint8_t>(state.crossThreadData.get(), state.crossThreadDataSize); }
|
||||
ArrayRef<uint8_t> getCrossThreadDataSpan() { return ArrayRef<uint8_t>(state.crossThreadData.data(), state.crossThreadData.size()); }
|
||||
|
||||
const KernelImmutableData *kernelImmData = nullptr;
|
||||
Module *module = nullptr;
|
||||
|
||||
@@ -91,7 +91,7 @@ struct KernelMutableState : public KernelMutableStateDefaultCopyableParams {
|
||||
|
||||
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
|
||||
std::unique_ptr<KernelExt> pExtension;
|
||||
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
|
||||
std::vector<uint8_t> crossThreadData{};
|
||||
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
|
||||
std::unique_ptr<uint8_t[]> dynamicStateHeapData = nullptr;
|
||||
|
||||
@@ -99,7 +99,6 @@ struct KernelMutableState : public KernelMutableStateDefaultCopyableParams {
|
||||
uint32_t perThreadDataSizeForWholeThreadGroup = 0U;
|
||||
uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0U;
|
||||
|
||||
uint32_t crossThreadDataSize = 0U;
|
||||
uint32_t surfaceStateHeapDataSize = 0U;
|
||||
uint32_t dynamicStateHeapDataSize = 0U;
|
||||
};
|
||||
|
||||
@@ -72,9 +72,8 @@ void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNe
|
||||
}
|
||||
|
||||
void ModuleImmutableDataFixture::MockKernel::setCrossThreadData(uint32_t dataSize) {
|
||||
state.crossThreadData.reset(new uint8_t[dataSize]);
|
||||
state.crossThreadDataSize = dataSize;
|
||||
memset(state.crossThreadData.get(), 0x00, state.crossThreadDataSize);
|
||||
state.crossThreadData.clear();
|
||||
state.crossThreadData.resize(dataSize, 0x0);
|
||||
}
|
||||
|
||||
void ModuleImmutableDataFixture::setUp() {
|
||||
|
||||
@@ -41,7 +41,8 @@ Mock<::L0::KernelImp>::Mock() : BaseClass() {
|
||||
NEO::populateKernelDescriptor(descriptor, kernelTokens, 8);
|
||||
immutableData.kernelDescriptor = &descriptor;
|
||||
immutableData.kernelInfo = &info;
|
||||
state.crossThreadData.reset(new uint8_t[100]);
|
||||
state.crossThreadData.clear();
|
||||
state.crossThreadData.resize(100U, 0x0);
|
||||
|
||||
state.groupSize[0] = 1;
|
||||
state.groupSize[1] = 1;
|
||||
|
||||
@@ -44,6 +44,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::cooperativeSupport;
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::devicePrintfKernelMutex;
|
||||
using ::L0::KernelImp::getCrossThreadDataSpan;
|
||||
using ::L0::KernelImp::heaplessEnabled;
|
||||
using ::L0::KernelImp::implicitArgsVersion;
|
||||
using ::L0::KernelImp::implicitScalingEnabled;
|
||||
|
||||
@@ -94,14 +94,13 @@ TEST(KernelAssert, GivenKernelWithAssertWhenSettingAssertBufferThenAssertBufferI
|
||||
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
|
||||
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
|
||||
kernel.state.crossThreadData.resize(16, 0x0);
|
||||
|
||||
kernel.setAssertBuffer();
|
||||
|
||||
auto assertBufferAddress = assertHandler->getAssertBuffer()->getGpuAddressToPatch();
|
||||
|
||||
EXPECT_TRUE(memcmp(kernel.state.crossThreadData.get(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0);
|
||||
EXPECT_TRUE(memcmp(kernel.getCrossThreadDataSpan().begin(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0);
|
||||
EXPECT_TRUE(std::find(kernel.getInternalResidencyContainer().begin(), kernel.getInternalResidencyContainer().end(), assertHandler->getAssertBuffer()) != kernel.getInternalResidencyContainer().end());
|
||||
}
|
||||
|
||||
@@ -121,8 +120,7 @@ TEST(KernelAssert, GivenKernelWithAssertAndImplicitArgsWhenInitializingKernelThe
|
||||
kernel.descriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
|
||||
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
|
||||
kernel.state.crossThreadData.resize(16, 0x0);
|
||||
|
||||
module.kernelImmData = &kernel.immutableData;
|
||||
char heap[8];
|
||||
@@ -157,8 +155,7 @@ TEST(KernelAssert, GivenNoAssertHandlerWhenKernelWithAssertSetsAssertBufferThenA
|
||||
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
|
||||
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(16);
|
||||
kernel.state.crossThreadDataSize = sizeof(uint8_t[16]);
|
||||
kernel.state.crossThreadData.resize(16, 0x0);
|
||||
|
||||
kernel.setAssertBuffer();
|
||||
EXPECT_NE(nullptr, neoDevice->getRootDeviceEnvironmentRef().assertHandler.get());
|
||||
|
||||
@@ -1831,8 +1831,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
mockKernel.state.crossThreadData.resize(4 * sizeof(uint64_t), 0x0);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
@@ -1902,8 +1901,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
mockKernel.state.crossThreadData.resize(4 * sizeof(uint64_t), 0x0);
|
||||
const auto surfStateSize = static_cast<uint32_t>(device->getNEODevice()->getGfxCoreHelper().getRenderSurfaceStateSize());
|
||||
mockKernel.state.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfStateSize);
|
||||
mockKernel.state.surfaceStateHeapDataSize = surfStateSize;
|
||||
@@ -1955,7 +1953,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
auto offsetInHeap = ptrDiff(sshHeap->getSpace(0), sshHeap->getCpuBase()) - surfStateSize;
|
||||
uint64_t bindlessSshBaseOffset = ptrDiff(sshHeap->getGraphicsAllocation()->getGpuAddress(), sshHeap->getGraphicsAllocation()->getGpuBaseAddress()) + offsetInHeap;
|
||||
auto patchValue = device->getNEODevice()->getGfxCoreHelper().getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSshBaseOffset));
|
||||
auto patchLocation = reinterpret_cast<uint32_t *>(mockKernel.state.crossThreadData.get());
|
||||
auto patchLocation = reinterpret_cast<const uint32_t *>(mockKernel.getCrossThreadData());
|
||||
EXPECT_EQ(patchValue, *patchLocation);
|
||||
}
|
||||
|
||||
|
||||
@@ -237,7 +237,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
std::unique_ptr<L0::ult::Module> mockModule = std::make_unique<L0::ult::Module>(device, nullptr, ModuleType::builtin);
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
kernel.module = mockModule.get();
|
||||
kernel.state.crossThreadDataSize = 0x60u;
|
||||
kernel.state.crossThreadData.resize(0x60U, 0x0);
|
||||
kernel.descriptor.kernelAttributes.flags.passInlineData = true;
|
||||
|
||||
uint32_t globalWorkSizeXOffset = 0x40u;
|
||||
|
||||
@@ -629,9 +629,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||
EXPECT_EQ(nullptr, kernel.getSyncBufferAllocation());
|
||||
|
||||
constexpr uint32_t crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
kernel.state.crossThreadDataSize = crossThreadDataSize;
|
||||
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -665,7 +663,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, cooperativeParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), syncBufferAddress.stateless));
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[syncBufferAddress.stateless]);
|
||||
EXPECT_EQ(0u, patchPtr);
|
||||
|
||||
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.getSyncBufferIndex());
|
||||
@@ -694,9 +692,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenPatchPreambleQueueWhenAppendedSync
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
constexpr uint32_t crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
kernel.state.crossThreadDataSize = crossThreadDataSize;
|
||||
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -803,9 +799,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
|
||||
|
||||
constexpr uint32_t crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
kernel.state.crossThreadDataSize = crossThreadDataSize;
|
||||
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -827,7 +821,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams));
|
||||
EXPECT_EQ(std::numeric_limits<size_t>::max(), launchParams.regionBarrierPatchIndex);
|
||||
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
|
||||
EXPECT_NE(0u, patchPtr);
|
||||
|
||||
auto allocIter = std::find_if(ultCsr->makeResidentAllocations.begin(), ultCsr->makeResidentAllocations.end(), [patchPtr](const std::pair<GraphicsAllocation *, uint32_t> &element) {
|
||||
@@ -853,7 +847,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
auto notFoundIt = std::find(regionGroupBarrierAllocIt + 1, kernel.state.internalResidencyContainer.end(), regionGroupBarrierAllocation);
|
||||
EXPECT_EQ(kernel.state.internalResidencyContainer.end(), notFoundIt);
|
||||
|
||||
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||
auto patchPtr2 = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
|
||||
|
||||
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
|
||||
|
||||
@@ -886,9 +880,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
|
||||
|
||||
constexpr uint32_t crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
kernel.state.crossThreadDataSize = crossThreadDataSize;
|
||||
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -916,7 +908,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams));
|
||||
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||
auto patchPtr = *reinterpret_cast<uint64_t *>(&kernel.getCrossThreadDataSpan()[regionGroupBarrier.stateless]);
|
||||
EXPECT_EQ(0u, patchPtr);
|
||||
|
||||
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.getRegionGroupBarrierIndex());
|
||||
@@ -945,9 +937,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenPatchPreambleQueueWhenAppendedRegi
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
constexpr uint32_t crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(crossThreadDataSize);
|
||||
kernel.state.crossThreadDataSize = crossThreadDataSize;
|
||||
memset(kernel.state.crossThreadData.get(), 0, crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(crossThreadDataSize, 0x0);
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
|
||||
@@ -2968,8 +2968,7 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
kernel.module = mockModule.get();
|
||||
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
|
||||
kernel.state.perThreadDataSizeForWholeThreadGroup = 0;
|
||||
kernel.state.crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(kernel.state.crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(64U, 0x0);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0);
|
||||
@@ -3000,8 +2999,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughIohSpaceWhenLaunchingKern
|
||||
kernel.module = mockModule.get();
|
||||
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
|
||||
kernel.state.perThreadDataSizeForWholeThreadGroup = 0;
|
||||
kernel.state.crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(kernel.state.crossThreadDataSize);
|
||||
kernel.state.crossThreadData.resize(64U, 0x0);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0);
|
||||
|
||||
@@ -1475,9 +1475,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
|
||||
immDataVector->push_back(std::move(mockKernelImmutableData));
|
||||
|
||||
auto crossThreadData = std::make_unique<uint32_t[]>(4);
|
||||
kernel->state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
|
||||
kernel->state.crossThreadDataSize = sizeof(uint32_t[4]);
|
||||
constexpr size_t ctdDwords = 4U;
|
||||
kernel->state.crossThreadData.clear();
|
||||
kernel->state.crossThreadData.resize(sizeof(uint32_t[ctdDwords]));
|
||||
|
||||
auto result = kernel->initialize(&kernelDesc);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
@@ -1485,11 +1485,11 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
|
||||
EXPECT_NE(nullptr, rtDispatchGlobals);
|
||||
|
||||
auto dispatchGlobalsAddressPatched = *reinterpret_cast<uint64_t *>(ptrOffset(crossThreadData.get(), rtGlobalPointerPatchOffset));
|
||||
auto dispatchGlobalsAddressPatched = *reinterpret_cast<const uint64_t *>(ptrOffset(kernel->getCrossThreadData(), rtGlobalPointerPatchOffset));
|
||||
auto dispatchGlobalsGpuAddressOffset = static_cast<uint64_t>(rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch());
|
||||
EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
|
||||
|
||||
kernel->state.crossThreadData.release();
|
||||
kernel->state.crossThreadData.clear();
|
||||
}
|
||||
|
||||
using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
|
||||
@@ -3188,7 +3188,7 @@ HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrect
|
||||
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto ctd = kernel->state.crossThreadData.get();
|
||||
auto ctd = kernel->state.crossThreadData.data();
|
||||
|
||||
auto ssInHeap = imageHW->getBindlessSlot();
|
||||
auto patchLocation = ptrOffset(ctd, imageArg.bindless);
|
||||
@@ -3336,8 +3336,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCal
|
||||
argDescriptor.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
|
||||
argDescriptor.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescImage>().bindless = 0x0;
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[4]));
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
@@ -3848,23 +3847,20 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
|
||||
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
auto crossThreadData = std::make_unique<uint32_t[]>(4);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
|
||||
mockKernel.state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
|
||||
mockKernel.state.crossThreadDataSize = sizeof(uint32_t[4]);
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint32_t[4]));
|
||||
|
||||
mockKernel.createPrintfBuffer();
|
||||
|
||||
auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
|
||||
EXPECT_NE(nullptr, printfBufferAllocation);
|
||||
|
||||
auto printfBufferAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
|
||||
auto printfBufferAddressPatched = *reinterpret_cast<const uintptr_t *>(mockKernel.getCrossThreadData());
|
||||
auto printfBufferGpuAddressOffset = static_cast<uintptr_t>(printfBufferAllocation->getGpuAddressToPatch());
|
||||
EXPECT_EQ(printfBufferGpuAddressOffset, printfBufferAddressPatched);
|
||||
|
||||
mockKernel.state.crossThreadData.release();
|
||||
mockKernel.state.crossThreadData.clear();
|
||||
}
|
||||
|
||||
using PrintfHandlerTests = ::testing::Test;
|
||||
@@ -4251,9 +4247,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 5 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 5U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
@@ -4266,8 +4261,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[0]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
@@ -4313,9 +4308,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 4U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
@@ -4325,8 +4319,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
|
||||
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
@@ -4348,15 +4342,14 @@ TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThre
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 4U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
}
|
||||
@@ -4374,17 +4367,15 @@ TEST_F(BindlessKernelTest, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCross
|
||||
argDescriptor.as<NEO::ArgDescValue>().elements.push_back(NEO::ArgDescValue::Element{0, 8, 0, false});
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
uint64_t crossThreadData = 0U;
|
||||
memcpy_s(&crossThreadData, sizeof(uint64_t), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
EXPECT_EQ(0u, crossThreadData);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImplicitArgsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
@@ -4413,15 +4404,14 @@ TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImpl
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 4U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
EXPECT_EQ(0u, crossThreadData[1]);
|
||||
@@ -4510,9 +4500,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 5 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 5U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
@@ -4523,8 +4512,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
|
||||
|
||||
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[1]);
|
||||
EXPECT_EQ(0u, patchValue2);
|
||||
@@ -4574,9 +4563,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamp
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.state.crossThreadData = std::make_unique<uint8_t[]>(7 * sizeof(uint64_t));
|
||||
mockKernel.state.crossThreadDataSize = 7 * sizeof(uint64_t);
|
||||
memset(mockKernel.state.crossThreadData.get(), 0, mockKernel.state.crossThreadDataSize);
|
||||
constexpr size_t ctdQwords = 7U;
|
||||
mockKernel.state.crossThreadData.resize(sizeof(uint64_t[ctdQwords]), 0x0);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
@@ -4587,8 +4575,8 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamp
|
||||
|
||||
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.state.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.state.crossThreadData.get(), mockKernel.state.crossThreadDataSize);
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(ctdQwords);
|
||||
memcpy_s(crossThreadData.get(), sizeof(uint64_t[ctdQwords]), mockKernel.getCrossThreadData(), mockKernel.getCrossThreadDataSize());
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[5]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[6]);
|
||||
@@ -4603,8 +4591,7 @@ TEST_F(KernelSyncBufferTest, GivenSyncBufferArgWhenPatchingSyncBufferThenPtrIsCo
|
||||
Mock<Module> mockModule(device, nullptr);
|
||||
kernel.module = &mockModule;
|
||||
|
||||
kernel.state.crossThreadData = std::make_unique<uint8_t[]>(64);
|
||||
kernel.state.crossThreadDataSize = 64;
|
||||
kernel.state.crossThreadData.resize(64U);
|
||||
|
||||
auto &syncBuffer = kernel.immutableData.kernelDescriptor->payloadMappings.implicitArgs.syncBufferAddress;
|
||||
syncBuffer.stateless = 0x8;
|
||||
@@ -4618,7 +4605,7 @@ TEST_F(KernelSyncBufferTest, GivenSyncBufferArgWhenPatchingSyncBufferThenPtrIsCo
|
||||
|
||||
kernel.patchSyncBuffer(&alloc, bufferOffset);
|
||||
|
||||
auto patchValue = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.state.crossThreadData.get(), syncBuffer.stateless));
|
||||
auto patchValue = *reinterpret_cast<const uint64_t *>(ptrOffset(kernel.getCrossThreadData(), syncBuffer.stateless));
|
||||
auto expectedPatchValue = ptrOffset(alloc.getGpuAddressToPatch(), bufferOffset);
|
||||
EXPECT_EQ(expectedPatchValue, patchValue);
|
||||
|
||||
|
||||
@@ -59,16 +59,17 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssigningToItselfThenTheCurrent
|
||||
constexpr size_t mockSize{8U};
|
||||
|
||||
KernelMutableState state1{};
|
||||
state1.crossThreadData.reset(new uint8_t[mockSize]);
|
||||
auto addressBeforeAssignment{state1.crossThreadData.get()};
|
||||
state1.crossThreadData.clear();
|
||||
state1.crossThreadData.resize(mockSize, 0x0);
|
||||
auto addressBeforeAssignment{state1.crossThreadData.data()};
|
||||
|
||||
auto ¬ReallyDifferentState{state1};
|
||||
state1 = notReallyDifferentState;
|
||||
auto addressAfterAssignment{state1.crossThreadData.get()};
|
||||
auto addressAfterAssignment{state1.crossThreadData.data()};
|
||||
|
||||
auto &¬ReallyDifferentState2{std::move(state1)};
|
||||
state1 = std::move(notReallyDifferentState2);
|
||||
auto addressAfterAssignment2{state1.crossThreadData.get()};
|
||||
auto addressAfterAssignment2{state1.crossThreadData.data()};
|
||||
|
||||
EXPECT_EQ(addressBeforeAssignment, addressAfterAssignment);
|
||||
EXPECT_EQ(addressBeforeAssignment, addressAfterAssignment2);
|
||||
@@ -86,9 +87,9 @@ void fillKernelMutableStateWithMockData(KernelMutableState &state) {
|
||||
state.pExtension = std::make_unique<KernelExt>();
|
||||
|
||||
constexpr size_t mockSize{8U};
|
||||
state.crossThreadData.reset(new uint8_t[mockSize]);
|
||||
std::memcpy(state.crossThreadData.get(), std::to_array<uint8_t>({11, 12, 13, 14, 15, 16, 17, 18}).data(), mockSize);
|
||||
state.crossThreadDataSize = mockSize;
|
||||
state.crossThreadData.clear();
|
||||
state.crossThreadData.reserve(mockSize);
|
||||
std::ranges::copy(std::to_array<uint8_t, mockSize>({11, 12, 13, 14, 15, 16, 17, 18}), std::back_inserter(state.crossThreadData));
|
||||
|
||||
state.surfaceStateHeapData.reset(new uint8_t[mockSize]);
|
||||
std::memcpy(state.surfaceStateHeapData.get(), std::to_array<uint8_t>({21, 22, 23, 24, 25, 26, 27, 28}).data(), mockSize);
|
||||
@@ -142,7 +143,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
KernelMutableState state2{};
|
||||
state2 = state1; // assignment operator is being tested
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(state1.crossThreadData.get(), state2.crossThreadData.get(), state1.crossThreadDataSize));
|
||||
EXPECT_EQ(0, std::memcmp(state1.crossThreadData.data(), state2.crossThreadData.data(), state1.crossThreadData.size()));
|
||||
EXPECT_EQ(0, std::memcmp(state1.surfaceStateHeapData.get(), state2.surfaceStateHeapData.get(), state1.surfaceStateHeapDataSize));
|
||||
EXPECT_EQ(0, std::memcmp(state1.dynamicStateHeapData.get(), state2.dynamicStateHeapData.get(), state1.dynamicStateHeapDataSize));
|
||||
|
||||
@@ -164,7 +165,6 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(state1.globalOffsets, state2.globalOffsets, KernelMutableState::dimMax * sizeof(uint32_t)));
|
||||
EXPECT_EQ(0, std::memcmp(state1.groupSize, state2.groupSize, KernelMutableState::dimMax * sizeof(uint32_t)));
|
||||
EXPECT_EQ(state1.crossThreadDataSize, state2.crossThreadDataSize);
|
||||
EXPECT_EQ(state1.surfaceStateHeapDataSize, state2.surfaceStateHeapDataSize);
|
||||
EXPECT_EQ(state1.dynamicStateHeapDataSize, state2.dynamicStateHeapDataSize);
|
||||
EXPECT_EQ(state1.perThreadDataSize, state2.perThreadDataSize);
|
||||
@@ -182,8 +182,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
KernelMutableState state3{};
|
||||
state3 = std::move(state1);
|
||||
|
||||
EXPECT_EQ(nullptr, state1.crossThreadData.get());
|
||||
EXPECT_EQ(0U, state1.crossThreadDataSize);
|
||||
EXPECT_EQ(0U, state1.crossThreadData.size());
|
||||
EXPECT_EQ(nullptr, state1.surfaceStateHeapData.get());
|
||||
EXPECT_EQ(0U, state1.surfaceStateHeapDataSize);
|
||||
EXPECT_EQ(nullptr, state1.dynamicStateHeapData.get());
|
||||
@@ -194,7 +193,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
EXPECT_EQ(0U, state1.perThreadDataSizeForWholeThreadGroup);
|
||||
EXPECT_EQ(0U, state1.perThreadDataSizeForWholeThreadGroupAllocated);
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(state3.crossThreadData.get(), state2.crossThreadData.get(), state3.crossThreadDataSize));
|
||||
EXPECT_EQ(0, std::memcmp(state3.crossThreadData.data(), state2.crossThreadData.data(), state3.crossThreadData.size()));
|
||||
EXPECT_EQ(0, std::memcmp(state3.surfaceStateHeapData.get(), state2.surfaceStateHeapData.get(), state3.surfaceStateHeapDataSize));
|
||||
EXPECT_EQ(0, std::memcmp(state3.dynamicStateHeapData.get(), state2.dynamicStateHeapData.get(), state3.dynamicStateHeapDataSize));
|
||||
|
||||
@@ -216,7 +215,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(state3.globalOffsets, state2.globalOffsets, KernelMutableState::dimMax * sizeof(uint32_t)));
|
||||
EXPECT_EQ(0, std::memcmp(state3.groupSize, state2.groupSize, KernelMutableState::dimMax * sizeof(uint32_t)));
|
||||
EXPECT_EQ(state3.crossThreadDataSize, state2.crossThreadDataSize);
|
||||
EXPECT_EQ(state3.crossThreadData.size(), state2.crossThreadData.size());
|
||||
EXPECT_EQ(state3.surfaceStateHeapDataSize, state2.surfaceStateHeapDataSize);
|
||||
EXPECT_EQ(state3.dynamicStateHeapDataSize, state2.dynamicStateHeapDataSize);
|
||||
EXPECT_EQ(state3.perThreadDataSize, state2.perThreadDataSize);
|
||||
@@ -243,9 +242,9 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
|
||||
kernel1.module = &module;
|
||||
|
||||
constexpr size_t mockSize{8U};
|
||||
kernel1.state.crossThreadData.reset(new uint8_t[mockSize]);
|
||||
std::memcpy(kernel1.state.crossThreadData.get(), std::to_array<uint8_t>({91, 92, 93, 94, 95, 96, 97, 98}).data(), mockSize);
|
||||
kernel1.state.crossThreadDataSize = mockSize;
|
||||
kernel1.state.crossThreadData.clear();
|
||||
kernel1.state.crossThreadData.reserve(mockSize);
|
||||
std::ranges::copy(std::to_array<uint8_t, mockSize>({91, 92, 93, 94, 95, 96, 97, 98}), std::back_inserter(kernel1.state.crossThreadData));
|
||||
kernel1.state.reservePerThreadDataForWholeThreadGroup(mockSize);
|
||||
std::memcpy(kernel1.state.perThreadDataForWholeThreadGroup, std::to_array<uint8_t>({81, 82, 83, 84, 85, 86, 87, 88}).data(), mockSize);
|
||||
|
||||
@@ -254,14 +253,14 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
|
||||
fillKernelMutableStateWithMockData(state);
|
||||
|
||||
// No need to check each and every member again
|
||||
EXPECT_NE(0, std::memcmp(kernel1.state.crossThreadData.get(), state.crossThreadData.get(), mockSize));
|
||||
EXPECT_NE(0, std::memcmp(kernel1.state.crossThreadData.data(), state.crossThreadData.data(), mockSize));
|
||||
EXPECT_NE(0, std::memcmp(kernel1.state.perThreadDataForWholeThreadGroup, state.perThreadDataForWholeThreadGroup, mockSize));
|
||||
|
||||
auto clonedKernel = kernel1.cloneWithStateOverride(&state);
|
||||
auto kernel2 = static_cast<WhiteBox<KernelImp> *>(clonedKernel.get());
|
||||
|
||||
// KernelMutableState part taken from `state`
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->state.crossThreadData.get(), state.crossThreadData.get(), mockSize));
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->state.crossThreadData.data(), state.crossThreadData.data(), mockSize));
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->state.perThreadDataForWholeThreadGroup, state.perThreadDataForWholeThreadGroup, mockSize));
|
||||
|
||||
// KernelImp part taken from `kernel1`
|
||||
@@ -284,9 +283,6 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
|
||||
}
|
||||
|
||||
TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAndGroupCount) {
|
||||
uint32_t *crossThreadData =
|
||||
reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32));
|
||||
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
@@ -299,8 +295,7 @@ TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSi
|
||||
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.state.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
|
||||
kernel.state.crossThreadDataSize = sizeof(uint32_t[6]);
|
||||
kernel.state.crossThreadData.resize(sizeof(uint32_t[6]));
|
||||
kernel.state.groupSize[0] = 2;
|
||||
kernel.state.groupSize[1] = 3;
|
||||
kernel.state.groupSize[2] = 5;
|
||||
@@ -318,8 +313,7 @@ TEST_F(KernelImpTest, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSi
|
||||
EXPECT_EQ(11U, numGroups[1]);
|
||||
EXPECT_EQ(13U, numGroups[2]);
|
||||
|
||||
kernel.state.crossThreadData.release();
|
||||
alignedFree(crossThreadData);
|
||||
kernel.state.crossThreadData.clear();
|
||||
}
|
||||
|
||||
TEST_F(KernelImpTest, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) {
|
||||
|
||||
@@ -315,11 +315,11 @@ void MutableCommandListFixtureInit::prepareKernelArg(uint16_t argIndex, L0::MCL:
|
||||
argSlm.pointerSize = 8;
|
||||
|
||||
if (kernelMask & kernel1Bit) {
|
||||
memset(ptrOffset(kernel->state.crossThreadData.get(), argSlm.slmOffset), 0, 8);
|
||||
memset(&kernel->getCrossThreadDataSpan()[argSlm.slmOffset], 0, 8);
|
||||
mockKernelImmData->kernelDescriptor->payloadMappings.explicitArgs[argIndex] = kernelArgSlm;
|
||||
}
|
||||
if (kernelMask & kernel2Bit) {
|
||||
memset(ptrOffset(kernel2->state.crossThreadData.get(), argSlm.slmOffset), 0, 8);
|
||||
memset(&kernel2->getCrossThreadDataSpan()[argSlm.slmOffset], 0, 8);
|
||||
mockKernelImmData2->kernelDescriptor->payloadMappings.explicitArgs[argIndex] = kernelArgSlm;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -405,8 +405,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
|
||||
mockKernelImmData2->kernelDescriptor->kernelAttributes.crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
mockKernelImmData2->crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
mockKernelImmData2->crossThreadDataTemplate.reset(new uint8_t[kernel2CrossThreadInitSize]);
|
||||
kernel2->state.crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
kernel2->state.crossThreadData.reset(new uint8_t[kernel2CrossThreadInitSize]);
|
||||
kernel2->state.crossThreadData.resize(kernel2CrossThreadInitSize, 0x0);
|
||||
|
||||
mutableCommandIdDesc.flags = kernelIsaMutationFlags;
|
||||
|
||||
|
||||
@@ -87,8 +87,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
|
||||
mockKernelImmData2->kernelDescriptor->kernelAttributes.crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
mockKernelImmData2->crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
mockKernelImmData2->crossThreadDataTemplate.reset(new uint8_t[kernel2CrossThreadInitSize]);
|
||||
kernel2->state.crossThreadDataSize = kernel2CrossThreadInitSize;
|
||||
kernel2->state.crossThreadData.reset(new uint8_t[kernel2CrossThreadInitSize]);
|
||||
kernel2->state.crossThreadData.resize(kernel2CrossThreadInitSize, 0x0);
|
||||
|
||||
createMutableKernelGroup();
|
||||
|
||||
@@ -153,7 +152,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
|
||||
kernel2->state.perThreadDataSizeForWholeThreadGroup = 0x40;
|
||||
kernel2->state.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(kernel2->state.perThreadDataSizeForWholeThreadGroup, 32));
|
||||
|
||||
auto srcPtr = kernel2->state.crossThreadData.get();
|
||||
auto srcPtr = kernel2->state.crossThreadData.data();
|
||||
memset(srcPtr, 0xFF, mutableKernels[1]->inlineDataSize);
|
||||
|
||||
auto dstPtr = mutableKernels[1]->getMutableComputeWalker()->getHostMemoryInlineDataPointer();
|
||||
|
||||
Reference in New Issue
Block a user