mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add LogicalStateHelper getter for CommandQueue.
Refactor Kernel handling Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
5e5df0fe87
commit
4fb4a1d77b
@ -12,6 +12,7 @@
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/get_info.h"
|
||||
#include "shared/source/helpers/logical_state_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
@ -1229,4 +1230,8 @@ void CommandQueue::clearLastBcsPackets() {
|
||||
}
|
||||
}
|
||||
|
||||
LogicalStateHelper *CommandQueue::getLogicalStateHelper() const {
|
||||
return logicalStateHelper.get();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -35,6 +35,7 @@ class IndirectHeap;
|
||||
class Kernel;
|
||||
class MemObj;
|
||||
class PerformanceCounters;
|
||||
class LogicalStateHelper;
|
||||
struct CompletionStamp;
|
||||
struct MultiDispatchInfo;
|
||||
|
||||
@ -358,6 +359,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
bool isTextureCacheFlushNeeded(uint32_t commandType) const;
|
||||
|
||||
LogicalStateHelper *getLogicalStateHelper() const;
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
@ -425,6 +428,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
|
||||
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
||||
std::unique_ptr<LogicalStateHelper> logicalStateHelper;
|
||||
|
||||
struct BcsTimestampPacketContainers {
|
||||
TimestampPacketContainer lastBarrierToWaitFor;
|
||||
TimestampPacketContainer lastSignalledPacket;
|
||||
|
@ -505,7 +505,5 @@ class CommandQueueHw : public CommandQueue {
|
||||
void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType);
|
||||
|
||||
bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
|
||||
|
||||
std::unique_ptr<LogicalStateHelper> logicalStateHelper;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -88,7 +88,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
|
@ -103,7 +103,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
ioh,
|
||||
ssh,
|
||||
kernel,
|
||||
kernel.getKernelStartOffset(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel.getKernelStartAddress(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed, false),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
|
@ -2020,15 +2020,13 @@ void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocati
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Kernel::getKernelStartOffset(
|
||||
const bool localIdsGenerationByRuntime,
|
||||
const bool kernelUsesLocalIds,
|
||||
const bool isCssUsed) const {
|
||||
uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const {
|
||||
|
||||
uint64_t kernelStartOffset = 0;
|
||||
|
||||
if (kernelInfo.getGraphicsAllocation()) {
|
||||
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
kernelStartOffset = returnFullAddress ? kernelInfo.getGraphicsAllocation()->getGpuAddress()
|
||||
: kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
|
||||
kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
|
@ -360,10 +360,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
size_t *localWorkSize);
|
||||
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const;
|
||||
|
||||
uint64_t getKernelStartOffset(
|
||||
const bool localIdsGenerationByRuntime,
|
||||
const bool kernelUsesLocalIds,
|
||||
const bool isCssUsed) const;
|
||||
uint64_t getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const;
|
||||
|
||||
bool isKernelDebugEnabled() const { return debugEnabled; }
|
||||
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -32,7 +32,7 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS
|
||||
mockKernelWithInternals.kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP = additionalOffsetDueToFfid;
|
||||
|
||||
for (auto isCcsUsed : ::testing::Bool()) {
|
||||
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
|
||||
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartAddress(false, false, isCcsUsed, false);
|
||||
|
||||
if (stepping == REVISION_A0 && isCcsUsed) {
|
||||
EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
|
||||
|
@ -340,7 +340,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes
|
||||
ioh,
|
||||
ssh,
|
||||
*kernel,
|
||||
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
kernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
idToffset,
|
||||
@ -395,7 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -441,7 +441,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -520,7 +520,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
ioh,
|
||||
ssh,
|
||||
mockKernel,
|
||||
mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
modifiedKernelInfo.getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
idToffset,
|
||||
@ -610,7 +610,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
|
||||
ioh,
|
||||
ssh,
|
||||
*kernel,
|
||||
kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
kernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -719,7 +719,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
ioh,
|
||||
ssh,
|
||||
*pKernel,
|
||||
pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
pKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
pKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -860,7 +860,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -884,7 +884,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
@ -954,7 +954,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
|
||||
mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
8,
|
||||
localWorkSizes,
|
||||
interfaceDescriptorTableOffset,
|
||||
|
@ -2845,11 +2845,24 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, true, false, false);
|
||||
EXPECT_EQ(allocationOffset + 256u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenReturnFullAddress) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
auto address = mockKernel.mockKernel->getKernelStartAddress(false, true, false, true);
|
||||
EXPECT_EQ(mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddress(), address);
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
|
||||
@ -2861,7 +2874,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(true, true, false, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
@ -2877,7 +2890,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, false, false, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
Reference in New Issue
Block a user