refactor: pass outImplicitArgs to patchImplicitArgs function

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-08-05 16:40:51 +00:00
committed by Compute-Runtime-Automation
parent fdf7b98502
commit 2a9bcdeb83
15 changed files with 28 additions and 23 deletions

View File

@@ -203,6 +203,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
nullptr, // outWalkerPtr nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
&additionalCommands, // additionalCommands &additionalCommands, // additionalCommands
commandListPreemptionMode, // preemptionMode commandListPreemptionMode, // preemptionMode
launchParams.requiredPartitionDim, // requiredPartitionDim launchParams.requiredPartitionDim, // requiredPartitionDim

View File

@@ -348,6 +348,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
nullptr, // outWalkerPtr nullptr, // outWalkerPtr
launchParams.cmdWalkerBuffer, // cpuWalkerBuffer launchParams.cmdWalkerBuffer, // cpuWalkerBuffer
launchParams.hostPayloadBuffer, // cpuPayloadBuffer launchParams.hostPayloadBuffer, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
&additionalCommands, // additionalCommands &additionalCommands, // additionalCommands
kernelPreemptionMode, // preemptionMode kernelPreemptionMode, // preemptionMode
launchParams.requiredPartitionDim, // requiredPartitionDim launchParams.requiredPartitionDim, // requiredPartitionDim

View File

@@ -202,6 +202,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
nullptr, // outWalkerPtr nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
nullptr, // additionalCommands nullptr, // additionalCommands
PreemptionMode::MidBatch, // preemptionMode PreemptionMode::MidBatch, // preemptionMode
NEO::RequiredPartitionDim::none, // requiredPartitionDim NEO::RequiredPartitionDim::none, // requiredPartitionDim

View File

@@ -749,6 +749,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
nullptr, // outWalkerPtr nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
nullptr, // additionalCommands nullptr, // additionalCommands
PreemptionMode::MidBatch, // preemptionMode PreemptionMode::MidBatch, // preemptionMode
NEO::RequiredPartitionDim::none, // requiredPartitionDim NEO::RequiredPartitionDim::none, // requiredPartitionDim

View File

@@ -92,7 +92,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed();
auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
*implicitArgsCrossThreadPtr = implicitArgsGpuVA; *implicitArgsCrossThreadPtr = implicitArgsGpuVA;

View File

@@ -95,7 +95,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment); ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment, nullptr);
} }
uint32_t sizeToCopy = sizeCrossThreadData; uint32_t sizeToCopy = sizeCrossThreadData;

View File

@@ -58,6 +58,7 @@ struct EncodeDispatchKernelArgs {
void *outWalkerPtr = nullptr; void *outWalkerPtr = nullptr;
void *cpuWalkerBuffer = nullptr; void *cpuWalkerBuffer = nullptr;
void *cpuPayloadBuffer = nullptr; void *cpuPayloadBuffer = nullptr;
void *outImplicitArgsPtr = nullptr;
std::list<void *> *additionalCommands = nullptr; std::list<void *> *additionalCommands = nullptr;
PreemptionMode preemptionMode = PreemptionMode::Initial; PreemptionMode preemptionMode = PreemptionMode::Initial;
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
@@ -210,7 +211,6 @@ struct EncodeDispatchKernel {
static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args); static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
static void *getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor);
static size_t getScratchPtrOffsetOfImplicitArgs(); static size_t getScratchPtrOffsetOfImplicitArgs();
template <typename WalkerType> template <typename WalkerType>

View File

@@ -906,11 +906,6 @@ size_t EncodeDispatchKernel<Family>::getDefaultDshAlignment() {
return Family::cacheLineSize; return Family::cacheLineSize;
} }
template <typename GfxFamily>
void *EncodeDispatchKernel<GfxFamily>::getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor) {
return nullptr;
}
template <typename GfxFamily> template <typename GfxFamily>
size_t EncodeDispatchKernel<GfxFamily>::getScratchPtrOffsetOfImplicitArgs() { size_t EncodeDispatchKernel<GfxFamily>::getScratchPtrOffsetOfImplicitArgs() {
return 0; return 0;

View File

@@ -198,7 +198,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
*implicitArgsCrossThreadPtr = implicitArgsGpuVA; *implicitArgsCrossThreadPtr = implicitArgsGpuVA;
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
} }
memcpy_s(ptr, sizeCrossThreadData, memcpy_s(ptr, sizeCrossThreadData,

View File

@@ -267,7 +267,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching); EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching);
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment); ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment, &args.outImplicitArgsPtr);
} }
if (args.isIndirect) { if (args.isIndirect) {

View File

@@ -66,7 +66,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
} }
} }
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment) { void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgsAddress) {
auto localIdsGeneratedByHw = hwGenerationOfLocalIdsParams.has_value() ? hwGenerationOfLocalIdsParams.value().first : false; auto localIdsGeneratedByHw = hwGenerationOfLocalIdsParams.has_value() ? hwGenerationOfLocalIdsParams.value().first : false;
auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, rootDeviceEnvironment); auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, rootDeviceEnvironment);
auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram); auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram);
@@ -89,6 +89,11 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
auto sizeForLocalIdsProgramming = totalSizeToProgram - ImplicitArgs::getSize(); auto sizeForLocalIdsProgramming = totalSizeToProgram - ImplicitArgs::getSize();
ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming); ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming);
} }
if (outImplicitArgsAddress) {
*outImplicitArgsAddress = ptrToPatch;
}
memcpy_s(ptrToPatch, ImplicitArgs::getSize(), &implicitArgs, ImplicitArgs::getSize()); memcpy_s(ptrToPatch, ImplicitArgs::getSize(), &implicitArgs, ImplicitArgs::getSize());
return retVal; return retVal;

View File

@@ -25,6 +25,6 @@ namespace ImplicitArgsHelper {
std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams); std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams);
uint32_t getGrfSize(uint32_t simd); uint32_t getGrfSize(uint32_t simd);
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const RootDeviceEnvironment &rootDeviceEnvironment); uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const RootDeviceEnvironment &rootDeviceEnvironment);
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment); void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgs);
} // namespace ImplicitArgsHelper } // namespace ImplicitArgsHelper
} // namespace NEO } // namespace NEO

View File

@@ -778,14 +778,6 @@ HWTEST_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPoli
} }
} }
HWTEST_F(CommandEncoderTests, whenGetImplicitArgsAddressIsCalledThenNullptrIsReturned) {
KernelDescriptor kernelDescriptor{};
EncodeDispatchKernelArgs args{};
auto implicitArgsPtr = EncodeDispatchKernel<FamilyType>::getImplicitArgsAddress(args, kernelDescriptor);
EXPECT_EQ(nullptr, implicitArgsPtr);
}
HWTEST_F(CommandEncoderTests, whenGetScratchPtrOffsetOfImplicitArgsIsCalledThenZeroIsReturned) { HWTEST_F(CommandEncoderTests, whenGetScratchPtrOffsetOfImplicitArgsIsCalledThenZeroIsReturned) {
auto scratchOffset = EncodeDispatchKernel<FamilyType>::getScratchPtrOffsetOfImplicitArgs(); auto scratchOffset = EncodeDispatchKernel<FamilyType>::getScratchPtrOffsetOfImplicitArgs();

View File

@@ -51,6 +51,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
nullptr, // outWalkerPtr nullptr, // outWalkerPtr
nullptr, // cpuWalkerBuffer nullptr, // cpuWalkerBuffer
nullptr, // cpuPayloadBuffer nullptr, // cpuPayloadBuffer
nullptr, // outImplicitArgsPtr
nullptr, // additionalCommands nullptr, // additionalCommands
PreemptionMode::Disabled, // preemptionMode PreemptionMode::Disabled, // preemptionMode
NEO::RequiredPartitionDim::none, // requiredPartitionDim NEO::RequiredPartitionDim::none, // requiredPartitionDim

View File

@@ -102,6 +102,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl
TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) { TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) {
ImplicitArgs implicitArgs{ImplicitArgs::getSize()}; ImplicitArgs implicitArgs{ImplicitArgs::getSize()};
void *outImplicitArgs = nullptr;
KernelDescriptor kernelDescriptor{}; KernelDescriptor kernelDescriptor{};
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 0; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 0;
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
@@ -119,6 +120,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ; auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ;
auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t); auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t);
auto localIdsOffset = alignUp(localIdsPatchingSize, MemoryConstants::cacheLineSize);
auto memoryToPatch = std::make_unique<uint8_t[]>(totalSizeForPatching); auto memoryToPatch = std::make_unique<uint8_t[]>(totalSizeForPatching);
@@ -126,10 +128,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
memset(memoryToPatch.get(), pattern, totalSizeForPatching); memset(memoryToPatch.get(), pattern, totalSizeForPatching);
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs);
EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));
void *expectedImplicitArgsPtr = ptrOffset(memoryToPatch.get(), localIdsOffset);
EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs);
uint32_t offset = 0; uint32_t offset = 0;
for (; offset < localIdsPatchingSize; offset++) { for (; offset < localIdsPatchingSize; offset++) {
@@ -147,7 +152,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) { TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) {
ImplicitArgs implicitArgs{ImplicitArgs::getSize()}; ImplicitArgs implicitArgs{ImplicitArgs::getSize()};
void *outImplicitArgs = nullptr;
KernelDescriptor kernelDescriptor{}; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10; kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10;
EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer));
@@ -168,10 +173,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl
memset(memoryToPatch.get(), pattern, totalSizeForPatching); memset(memoryToPatch.get(), pattern, totalSizeForPatching);
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs);
EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));
void *expectedImplicitArgsPtr = memoryToPatch.get();
EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs);
uint32_t offset = 0; uint32_t offset = 0;
for (; offset < ImplicitArgs::getSize(); offset++) { for (; offset < ImplicitArgs::getSize(); offset++) {