Add LogicalStateHelper getter for CommandQueue.

Refactor Kernel handling

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2022-06-27 17:20:50 +00:00
committed by Compute-Runtime-Automation
parent 5e5df0fe87
commit 4fb4a1d77b
10 changed files with 43 additions and 27 deletions

View File

@ -12,6 +12,7 @@
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/get_info.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/timestamp_packet.h"
@ -1229,4 +1230,8 @@ void CommandQueue::clearLastBcsPackets() {
}
}
LogicalStateHelper *CommandQueue::getLogicalStateHelper() const {
return logicalStateHelper.get();
}
} // namespace NEO

View File

@ -35,6 +35,7 @@ class IndirectHeap;
class Kernel;
class MemObj;
class PerformanceCounters;
class LogicalStateHelper;
struct CompletionStamp;
struct MultiDispatchInfo;
@ -358,6 +359,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool isTextureCacheFlushNeeded(uint32_t commandType) const;
LogicalStateHelper *getLogicalStateHelper() const;
protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
@ -425,6 +428,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
std::unique_ptr<LogicalStateHelper> logicalStateHelper;
struct BcsTimestampPacketContainers {
TimestampPacketContainer lastBarrierToWaitFor;
TimestampPacketContainer lastSignalledPacket;

View File

@ -505,7 +505,5 @@ class CommandQueueHw : public CommandQueue {
void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType);
bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
std::unique_ptr<LogicalStateHelper> logicalStateHelper;
};
} // namespace NEO

View File

@ -88,7 +88,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
ioh,
ssh,
kernel,
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
kernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,

View File

@ -103,7 +103,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
ioh,
ssh,
kernel,
kernel.getKernelStartOffset(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed),
kernel.getKernelStartAddress(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed, false),
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,

View File

@ -2020,15 +2020,13 @@ void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocati
}
}
uint64_t Kernel::getKernelStartOffset(
const bool localIdsGenerationByRuntime,
const bool kernelUsesLocalIds,
const bool isCssUsed) const {
uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const {
uint64_t kernelStartOffset = 0;
if (kernelInfo.getGraphicsAllocation()) {
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
kernelStartOffset = returnFullAddress ? kernelInfo.getGraphicsAllocation()->getGpuAddress()
: kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}

View File

@ -360,10 +360,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
size_t *localWorkSize);
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const;
uint64_t getKernelStartOffset(
const bool localIdsGenerationByRuntime,
const bool kernelUsesLocalIds,
const bool isCssUsed) const;
uint64_t getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const;
bool isKernelDebugEnabled() const { return debugEnabled; }
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -32,7 +32,7 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS
mockKernelWithInternals.kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP = additionalOffsetDueToFfid;
for (auto isCcsUsed : ::testing::Bool()) {
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartAddress(false, false, isCcsUsed, false);
if (stepping == REVISION_A0 && isCcsUsed) {
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);

View File

@ -340,7 +340,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes
ioh,
ssh,
*kernel,
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
kernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
idToffset,
@ -395,7 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -441,7 +441,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -520,7 +520,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
ioh,
ssh,
mockKernel,
mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
modifiedKernelInfo.getMaxSimdSize(),
localWorkSizes,
idToffset,
@ -610,7 +610,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
ioh,
ssh,
*kernel,
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
kernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -719,7 +719,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
ioh,
ssh,
*pKernel,
pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
pKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
pKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -860,7 +860,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -884,7 +884,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
localWorkSizes,
0,
@ -954,7 +954,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
ioh,
ssh,
*mockKernelWithInternal->mockKernel,
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
8,
localWorkSizes,
interfaceDescriptorTableOffset,

View File

@ -2845,11 +2845,24 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, true, false, false);
EXPECT_EQ(allocationOffset + 256u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenReturnFullAddress) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
MockKernelWithInternals mockKernel(*device);
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
auto address = mockKernel.mockKernel->getKernelStartAddress(false, true, false, true);
EXPECT_EQ(mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddress(), address);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
@ -2861,7 +2874,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
auto offset = mockKernel.mockKernel->getKernelStartAddress(true, true, false, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
@ -2877,7 +2890,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, false, false, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}