Use uint64_t instead of void * in indirect dispatch programming
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
5560663b01
commit
5d2d81b2d1
|
@ -230,7 +230,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
const void **pRanges);
|
||||
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
|
||||
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask);
|
||||
|
|
|
@ -2246,7 +2246,7 @@ void CommandListCoreFamily<gfxCoreFamily>::clearCommandsToPatch() {
|
|||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]) {
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
NEO::EncodeIndirectParams<GfxFamily>::setGlobalWorkSizeIndirect(commandContainer, offsets, crossThreadAddress, lws);
|
||||
|
|
|
@ -1043,7 +1043,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchT
|
|||
uint32_t groupSize[] = {1, 1, 1};
|
||||
auto estimate = EncodeIndirectParams<FamilyType>::getCmdsSizeForSetWorkDimIndirect(groupSize, false);
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x4, nullptr, groupSize);
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x4, 0u, groupSize);
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
}
|
||||
|
@ -1051,7 +1051,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchT
|
|||
uint32_t groupSize[] = {1, 1, 2};
|
||||
auto estimate = EncodeIndirectParams<FamilyType>::getCmdsSizeForSetWorkDimIndirect(groupSize, false);
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x4, nullptr, groupSize);
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x4, 0u, groupSize);
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
}
|
||||
|
@ -1059,7 +1059,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchT
|
|||
uint32_t groupSize[] = {1, 1, 1};
|
||||
auto estimate = EncodeIndirectParams<FamilyType>::getCmdsSizeForSetWorkDimIndirect(groupSize, true);
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x2, nullptr, groupSize);
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x2, 0u, groupSize);
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
}
|
||||
|
@ -1067,7 +1067,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchT
|
|||
uint32_t groupSize[] = {1, 1, 2};
|
||||
auto estimate = EncodeIndirectParams<FamilyType>::getCmdsSizeForSetWorkDimIndirect(groupSize, true);
|
||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x2, nullptr, groupSize);
|
||||
EncodeIndirectParams<FamilyType>::setWorkDimIndirect(commandList->commandContainer, 0x2, 0u, groupSize);
|
||||
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
EXPECT_LE(sizeAfter - sizeBefore, estimate);
|
||||
}
|
||||
|
|
|
@ -170,10 +170,10 @@ struct EncodeIndirectParams {
|
|||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
|
||||
static void encode(CommandContainer &container, void *crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, void *implicitArgsGpuPtr);
|
||||
static void setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress);
|
||||
static void setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offset, void *crossThreadAddress, const uint32_t *groupSize);
|
||||
static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws);
|
||||
static void encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr);
|
||||
static void setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress);
|
||||
static void setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offset, uint64_t crossThreadAddress, const uint32_t *groupSize);
|
||||
static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws);
|
||||
|
||||
static size_t getCmdsSizeForIndirectParams();
|
||||
static size_t getCmdsSizeForSetGroupSizeIndirect();
|
||||
|
|
|
@ -514,7 +514,7 @@ template <typename Family>
|
|||
void EncodeDispatchKernel<Family>::adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::encode(CommandContainer &container, void *crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, void *implicitArgsGpuPtr) {
|
||||
void EncodeIndirectParams<Family>::encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr) {
|
||||
const auto &kernelDescriptor = dispatchInterface->getKernelDescriptor();
|
||||
setGroupCountIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups, crossThreadDataGpuVa);
|
||||
setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, crossThreadDataGpuVa, dispatchInterface->getGroupSize());
|
||||
|
@ -530,19 +530,19 @@ void EncodeIndirectParams<Family>::encode(CommandContainer &container, void *cro
|
|||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(crossThreadAddress, offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset workDimOffset, void *crossThreadAddress, const uint32_t *groupSize) {
|
||||
void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset workDimOffset, uint64_t crossThreadAddress, const uint32_t *groupSize) {
|
||||
if (NEO::isValidOffset(workDimOffset)) {
|
||||
auto dstPtr = ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), workDimOffset);
|
||||
auto dstPtr = ptrOffset(crossThreadAddress, workDimOffset);
|
||||
constexpr uint32_t RESULT_REGISTER = CS_GPR_R0;
|
||||
constexpr AluRegisters RESULT_ALU_REGISTER = AluRegisters::R_0;
|
||||
const uint32_t offset = static_cast<uint32_t>((1ull << 8 * (dstPtr & 0b11)) - 1);
|
||||
|
@ -650,12 +650,12 @@ template <typename Family>
|
|||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(crossThreadAddress, offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -153,10 +153,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
|||
dispatchInterface->getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (isIndirect) {
|
||||
void *gpuPtr = reinterpret_cast<void *>(heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData);
|
||||
void *implicitArgsGpuPtr = nullptr;
|
||||
auto gpuPtr = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData;
|
||||
uint64_t implicitArgsGpuPtr = 0u;
|
||||
if (pImplicitArgs) {
|
||||
implicitArgsGpuPtr = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(gpuPtr) - sizeof(ImplicitArgs));
|
||||
implicitArgsGpuPtr = gpuPtr - sizeof(ImplicitArgs);
|
||||
}
|
||||
EncodeIndirectParams<Family>::encode(container, gpuPtr, dispatchInterface, implicitArgsGpuPtr);
|
||||
}
|
||||
|
|
|
@ -187,10 +187,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
|||
crossThreadData, sizeCrossThreadData);
|
||||
}
|
||||
if (isIndirect) {
|
||||
void *gpuPtr = reinterpret_cast<void *>(heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset));
|
||||
void *implicitArgsGpuPtr = nullptr;
|
||||
auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset);
|
||||
uint64_t implicitArgsGpuPtr = 0u;
|
||||
if (pImplicitArgs) {
|
||||
implicitArgsGpuPtr = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(gpuPtr) + inlineDataProgrammingOffset - sizeof(ImplicitArgs));
|
||||
implicitArgsGpuPtr = gpuPtr + inlineDataProgrammingOffset - sizeof(ImplicitArgs);
|
||||
}
|
||||
EncodeIndirectParams<Family>::encode(container, gpuPtr, dispatchInterface, implicitArgsGpuPtr);
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ HWTEST_F(CommandEncoderMathTest, WhenSettingGroupSizeIndirectThenCommandsAreCorr
|
|||
uint32_t crossThreadAdress[3] = {};
|
||||
uint32_t lws[3] = {2, 1, 1};
|
||||
|
||||
EncodeIndirectParams<FamilyType>::setGlobalWorkSizeIndirect(cmdContainer, offsets, crossThreadAdress, lws);
|
||||
EncodeIndirectParams<FamilyType>::setGlobalWorkSizeIndirect(cmdContainer, offsets, reinterpret_cast<uint64_t>(crossThreadAdress), lws);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
@ -254,7 +254,7 @@ HWTEST_F(CommandEncoderMathTest, WhenSettingGroupCountIndirectThenCommandsAreCor
|
|||
CrossThreadDataOffset offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
|
||||
uint32_t crossThreadAdress[3] = {};
|
||||
|
||||
EncodeIndirectParams<FamilyType>::setGroupCountIndirect(cmdContainer, offsets, crossThreadAdress);
|
||||
EncodeIndirectParams<FamilyType>::setGroupCountIndirect(cmdContainer, offsets, reinterpret_cast<uint64_t>(crossThreadAdress));
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
|
|
Loading…
Reference in New Issue