mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
refactor: pass outImplicitArgs to patchImplicitArgs function
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
fdf7b98502
commit
2a9bcdeb83
@@ -203,6 +203,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
nullptr, // outWalkerPtr
|
||||
nullptr, // cpuWalkerBuffer
|
||||
nullptr, // cpuPayloadBuffer
|
||||
nullptr, // outImplicitArgsPtr
|
||||
&additionalCommands, // additionalCommands
|
||||
commandListPreemptionMode, // preemptionMode
|
||||
launchParams.requiredPartitionDim, // requiredPartitionDim
|
||||
|
||||
@@ -348,6 +348,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
nullptr, // outWalkerPtr
|
||||
launchParams.cmdWalkerBuffer, // cpuWalkerBuffer
|
||||
launchParams.hostPayloadBuffer, // cpuPayloadBuffer
|
||||
nullptr, // outImplicitArgsPtr
|
||||
&additionalCommands, // additionalCommands
|
||||
kernelPreemptionMode, // preemptionMode
|
||||
launchParams.requiredPartitionDim, // requiredPartitionDim
|
||||
|
||||
@@ -202,6 +202,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
||||
nullptr, // outWalkerPtr
|
||||
nullptr, // cpuWalkerBuffer
|
||||
nullptr, // cpuPayloadBuffer
|
||||
nullptr, // outImplicitArgsPtr
|
||||
nullptr, // additionalCommands
|
||||
PreemptionMode::MidBatch, // preemptionMode
|
||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||
|
||||
@@ -749,6 +749,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA
|
||||
nullptr, // outWalkerPtr
|
||||
nullptr, // cpuWalkerBuffer
|
||||
nullptr, // cpuPayloadBuffer
|
||||
nullptr, // outImplicitArgsPtr
|
||||
nullptr, // additionalCommands
|
||||
PreemptionMode::MidBatch, // preemptionMode
|
||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||
|
||||
@@ -92,7 +92,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed();
|
||||
auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
|
||||
|
||||
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment);
|
||||
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
|
||||
|
||||
auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
|
||||
*implicitArgsCrossThreadPtr = implicitArgsGpuVA;
|
||||
|
||||
@@ -95,7 +95,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
|
||||
auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming);
|
||||
|
||||
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment);
|
||||
ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment, nullptr);
|
||||
}
|
||||
|
||||
uint32_t sizeToCopy = sizeCrossThreadData;
|
||||
|
||||
@@ -58,6 +58,7 @@ struct EncodeDispatchKernelArgs {
|
||||
void *outWalkerPtr = nullptr;
|
||||
void *cpuWalkerBuffer = nullptr;
|
||||
void *cpuPayloadBuffer = nullptr;
|
||||
void *outImplicitArgsPtr = nullptr;
|
||||
std::list<void *> *additionalCommands = nullptr;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Initial;
|
||||
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
||||
@@ -210,7 +211,6 @@ struct EncodeDispatchKernel {
|
||||
static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired);
|
||||
|
||||
static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args);
|
||||
static void *getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor);
|
||||
static size_t getScratchPtrOffsetOfImplicitArgs();
|
||||
|
||||
template <typename WalkerType>
|
||||
|
||||
@@ -906,11 +906,6 @@ size_t EncodeDispatchKernel<Family>::getDefaultDshAlignment() {
|
||||
return Family::cacheLineSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *EncodeDispatchKernel<GfxFamily>::getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EncodeDispatchKernel<GfxFamily>::getScratchPtrOffsetOfImplicitArgs() {
|
||||
return 0;
|
||||
|
||||
@@ -198,7 +198,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
|
||||
*implicitArgsCrossThreadPtr = implicitArgsGpuVA;
|
||||
|
||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment);
|
||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
|
||||
}
|
||||
|
||||
memcpy_s(ptr, sizeCrossThreadData,
|
||||
|
||||
@@ -267,7 +267,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
|
||||
EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching);
|
||||
|
||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment);
|
||||
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment, &args.outImplicitArgsPtr);
|
||||
}
|
||||
|
||||
if (args.isIndirect) {
|
||||
|
||||
@@ -66,7 +66,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
|
||||
}
|
||||
}
|
||||
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool, uint32_t>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgsAddress) {
|
||||
auto localIdsGeneratedByHw = hwGenerationOfLocalIdsParams.has_value() ? hwGenerationOfLocalIdsParams.value().first : false;
|
||||
auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, rootDeviceEnvironment);
|
||||
auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram);
|
||||
@@ -89,6 +89,11 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
|
||||
auto sizeForLocalIdsProgramming = totalSizeToProgram - ImplicitArgs::getSize();
|
||||
ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming);
|
||||
}
|
||||
|
||||
if (outImplicitArgsAddress) {
|
||||
*outImplicitArgsAddress = ptrToPatch;
|
||||
}
|
||||
|
||||
memcpy_s(ptrToPatch, ImplicitArgs::getSize(), &implicitArgs, ImplicitArgs::getSize());
|
||||
|
||||
return retVal;
|
||||
|
||||
@@ -25,6 +25,6 @@ namespace ImplicitArgsHelper {
|
||||
std::array<uint8_t, 3> getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams);
|
||||
uint32_t getGrfSize(uint32_t simd);
|
||||
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional<std::pair<bool /* localIdsGeneratedByRuntime */, uint32_t /* walkOrderForHwGenerationOfLocalIds */>> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgs);
|
||||
} // namespace ImplicitArgsHelper
|
||||
} // namespace NEO
|
||||
|
||||
@@ -778,14 +778,6 @@ HWTEST_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPoli
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, whenGetImplicitArgsAddressIsCalledThenNullptrIsReturned) {
|
||||
|
||||
KernelDescriptor kernelDescriptor{};
|
||||
EncodeDispatchKernelArgs args{};
|
||||
auto implicitArgsPtr = EncodeDispatchKernel<FamilyType>::getImplicitArgsAddress(args, kernelDescriptor);
|
||||
EXPECT_EQ(nullptr, implicitArgsPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, whenGetScratchPtrOffsetOfImplicitArgsIsCalledThenZeroIsReturned) {
|
||||
|
||||
auto scratchOffset = EncodeDispatchKernel<FamilyType>::getScratchPtrOffsetOfImplicitArgs();
|
||||
|
||||
@@ -51,6 +51,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
|
||||
nullptr, // outWalkerPtr
|
||||
nullptr, // cpuWalkerBuffer
|
||||
nullptr, // cpuPayloadBuffer
|
||||
nullptr, // outImplicitArgsPtr
|
||||
nullptr, // additionalCommands
|
||||
PreemptionMode::Disabled, // preemptionMode
|
||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||
|
||||
@@ -102,6 +102,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl
|
||||
TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) {
|
||||
ImplicitArgs implicitArgs{ImplicitArgs::getSize()};
|
||||
|
||||
void *outImplicitArgs = nullptr;
|
||||
KernelDescriptor kernelDescriptor{};
|
||||
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 0;
|
||||
kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
|
||||
@@ -119,6 +120,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
|
||||
|
||||
auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ;
|
||||
auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t);
|
||||
auto localIdsOffset = alignUp(localIdsPatchingSize, MemoryConstants::cacheLineSize);
|
||||
|
||||
auto memoryToPatch = std::make_unique<uint8_t[]>(totalSizeForPatching);
|
||||
|
||||
@@ -126,10 +128,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
|
||||
|
||||
memset(memoryToPatch.get(), pattern, totalSizeForPatching);
|
||||
|
||||
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment);
|
||||
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs);
|
||||
|
||||
EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));
|
||||
|
||||
void *expectedImplicitArgsPtr = ptrOffset(memoryToPatch.get(), localIdsOffset);
|
||||
EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs);
|
||||
|
||||
uint32_t offset = 0;
|
||||
|
||||
for (; offset < localIdsPatchingSize; offset++) {
|
||||
@@ -147,7 +152,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
|
||||
|
||||
TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) {
|
||||
ImplicitArgs implicitArgs{ImplicitArgs::getSize()};
|
||||
|
||||
void *outImplicitArgs = nullptr;
|
||||
KernelDescriptor kernelDescriptor{};
|
||||
kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10;
|
||||
EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer));
|
||||
@@ -168,10 +173,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl
|
||||
|
||||
memset(memoryToPatch.get(), pattern, totalSizeForPatching);
|
||||
|
||||
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment);
|
||||
auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs);
|
||||
|
||||
EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching));
|
||||
|
||||
void *expectedImplicitArgsPtr = memoryToPatch.get();
|
||||
EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs);
|
||||
|
||||
uint32_t offset = 0;
|
||||
|
||||
for (; offset < ImplicitArgs::getSize(); offset++) {
|
||||
|
||||
Reference in New Issue
Block a user