mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Move Walker specific code to dedicated method.
- move cache flushes after the Walker. Change-Id: I58c5e76bad22ac42da2c466ef008ef5bf96df077
This commit is contained in:
committed by
sys_ocldev
parent
a24704fa18
commit
ef73bb8c11
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018 Intel Corporation
|
* Copyright (C) 2018-2019 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -80,6 +80,21 @@ class HardwareInterface {
|
|||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue);
|
CommandQueue &commandQueue);
|
||||||
|
|
||||||
|
static void programWalker(
|
||||||
|
LinearStream &commandStream,
|
||||||
|
Kernel &kernel,
|
||||||
|
CommandQueue &commandQueue,
|
||||||
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
|
IndirectHeap &dsh,
|
||||||
|
IndirectHeap &ioh,
|
||||||
|
IndirectHeap &ssh,
|
||||||
|
size_t localWorkSizes[3],
|
||||||
|
PreemptionMode preemptionMode,
|
||||||
|
size_t currentDispatchIndex,
|
||||||
|
uint32_t &interfaceDescriptorIndex,
|
||||||
|
const DispatchInfo &dispatchInfo,
|
||||||
|
size_t offsetInterfaceDescriptorTable);
|
||||||
|
|
||||||
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
|
static WALKER_TYPE<GfxFamily> *allocateWalkerSpace(LinearStream &commandStream,
|
||||||
const Kernel &kernel);
|
const Kernel &kernel);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018 Intel Corporation
|
* Copyright (C) 2018-2019 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -12,6 +12,14 @@
|
|||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
inline WALKER_TYPE<GfxFamily> *HardwareInterface<GfxFamily>::allocateWalkerSpace(LinearStream &commandStream,
|
||||||
|
const Kernel &kernel) {
|
||||||
|
auto walkerCmd = static_cast<WALKER_TYPE<GfxFamily> *>(commandStream.getSpace(sizeof(WALKER_TYPE<GfxFamily>)));
|
||||||
|
*walkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||||
|
return walkerCmd;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void HardwareInterface<GfxFamily>::dispatchWalker(
|
void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
@@ -126,9 +134,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3));
|
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3));
|
||||||
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2));
|
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2));
|
||||||
|
|
||||||
// Determine SIMD size
|
|
||||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
|
||||||
|
|
||||||
// If we don't have a required WGS, compute one opportunistically
|
// If we don't have a required WGS, compute one opportunistically
|
||||||
auto maxWorkGroupSize = static_cast<uint32_t>(commandQueue.getDevice().getDeviceInfo().maxWorkGroupSize);
|
auto maxWorkGroupSize = static_cast<uint32_t>(commandQueue.getDevice().getDeviceInfo().maxWorkGroupSize);
|
||||||
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
||||||
@@ -148,7 +153,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
// Compute number of work groups
|
// Compute number of work groups
|
||||||
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
|
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups()
|
||||||
: generateWorkgroupsNumber(gws, lws);
|
: generateWorkgroupsNumber(gws, lws);
|
||||||
Vec3<size_t> nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs;
|
|
||||||
|
|
||||||
// Patch our kernel constants
|
// Patch our kernel constants
|
||||||
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
|
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
|
||||||
@@ -183,7 +187,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
|
|
||||||
// Send our indirect object data
|
// Send our indirect object data
|
||||||
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
|
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
|
||||||
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
|
||||||
|
|
||||||
dispatchProfilingPerfStartCommands(dispatchInfo, multiDispatchInfo, hwTimeStamps,
|
dispatchProfilingPerfStartCommands(dispatchInfo, multiDispatchInfo, hwTimeStamps,
|
||||||
hwPerfCounter, commandStream, commandQueue);
|
hwPerfCounter, commandStream, commandQueue);
|
||||||
@@ -195,47 +198,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
|
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Program the walker. Invokes execution so all state should already be programmed
|
programWalker(*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh,
|
||||||
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);
|
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable);
|
||||||
|
|
||||||
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel);
|
|
||||||
|
|
||||||
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
|
||||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto idd = obtainInterfaceDescriptorData(walkerCmd);
|
|
||||||
|
|
||||||
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
|
|
||||||
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
|
||||||
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
|
||||||
KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
|
||||||
*commandStream,
|
|
||||||
*dsh,
|
|
||||||
*ioh,
|
|
||||||
*ssh,
|
|
||||||
kernel,
|
|
||||||
simd,
|
|
||||||
localWorkSizes,
|
|
||||||
offsetInterfaceDescriptorTable,
|
|
||||||
interfaceDescriptorIndex,
|
|
||||||
preemptionMode,
|
|
||||||
walkerCmd,
|
|
||||||
idd,
|
|
||||||
localIdsGenerationByRuntime,
|
|
||||||
kernelUsesLocalIds,
|
|
||||||
inlineDataProgrammingRequired);
|
|
||||||
|
|
||||||
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
|
|
||||||
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
|
|
||||||
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
|
||||||
numWorkGroups, localWorkSizes, simd, dim,
|
|
||||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired,
|
|
||||||
*kernel.getKernelInfo().patchInfo.threadPayload);
|
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::adjustWalkerData(commandStream, walkerCmd, kernel, dispatchInfo);
|
|
||||||
|
|
||||||
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
||||||
if (dispatchInfo.isPipeControlRequired()) {
|
if (dispatchInfo.isPipeControlRequired()) {
|
||||||
@@ -244,6 +208,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
}
|
}
|
||||||
|
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel);
|
||||||
|
|
||||||
currentDispatchIndex++;
|
currentDispatchIndex++;
|
||||||
}
|
}
|
||||||
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018 Intel Corporation
|
* Copyright (C) 2018-2019 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -92,11 +92,70 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline WALKER_TYPE<GfxFamily> *HardwareInterface<GfxFamily>::allocateWalkerSpace(LinearStream &commandStream,
|
inline void HardwareInterface<GfxFamily>::programWalker(
|
||||||
const Kernel &kernel) {
|
LinearStream &commandStream,
|
||||||
auto walkerCmd = static_cast<WALKER_TYPE<GfxFamily> *>(commandStream.getSpace(sizeof(WALKER_TYPE<GfxFamily>)));
|
Kernel &kernel,
|
||||||
*walkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
CommandQueue &commandQueue,
|
||||||
return walkerCmd;
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
|
IndirectHeap &dsh,
|
||||||
|
IndirectHeap &ioh,
|
||||||
|
IndirectHeap &ssh,
|
||||||
|
size_t localWorkSizes[3],
|
||||||
|
PreemptionMode preemptionMode,
|
||||||
|
size_t currentDispatchIndex,
|
||||||
|
uint32_t &interfaceDescriptorIndex,
|
||||||
|
const DispatchInfo &dispatchInfo,
|
||||||
|
size_t offsetInterfaceDescriptorTable) {
|
||||||
|
|
||||||
|
auto walkerCmd = allocateWalkerSpace(commandStream, kernel);
|
||||||
|
uint32_t dim = dispatchInfo.getDim();
|
||||||
|
Vec3<size_t> lws = dispatchInfo.getLocalWorkgroupSize();
|
||||||
|
Vec3<size_t> gws = dispatchInfo.getGWS();
|
||||||
|
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
|
||||||
|
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws);
|
||||||
|
Vec3<size_t> nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs;
|
||||||
|
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
||||||
|
|
||||||
|
if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
|
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
|
||||||
|
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto idd = obtainInterfaceDescriptorData(walkerCmd);
|
||||||
|
|
||||||
|
bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(dim, globalWorkSizes, localWorkSizes);
|
||||||
|
bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
||||||
|
bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds(kernel);
|
||||||
|
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||||
|
|
||||||
|
Vec3<size_t> offset = dispatchInfo.getOffset();
|
||||||
|
|
||||||
|
KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||||
|
commandStream,
|
||||||
|
dsh,
|
||||||
|
ioh,
|
||||||
|
ssh,
|
||||||
|
kernel,
|
||||||
|
simd,
|
||||||
|
localWorkSizes,
|
||||||
|
offsetInterfaceDescriptorTable,
|
||||||
|
interfaceDescriptorIndex,
|
||||||
|
preemptionMode,
|
||||||
|
walkerCmd,
|
||||||
|
idd,
|
||||||
|
localIdsGenerationByRuntime,
|
||||||
|
kernelUsesLocalIds,
|
||||||
|
inlineDataProgrammingRequired);
|
||||||
|
|
||||||
|
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
|
||||||
|
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
|
||||||
|
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
|
||||||
|
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
||||||
|
numWorkGroups, localWorkSizes, simd, dim,
|
||||||
|
localIdsGenerationByRuntime, inlineDataProgrammingRequired,
|
||||||
|
*kernel.getKernelInfo().patchInfo.threadPayload);
|
||||||
|
|
||||||
|
GpgpuWalkerHelper<GfxFamily>::adjustWalkerData(&commandStream, walkerCmd, kernel, dispatchInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018 Intel Corporation
|
* Copyright (C) 2018-2019 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -878,7 +878,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabled
|
|||||||
hwParse.parseCommands<FamilyType>(cmdQ.getCS(0), 0);
|
hwParse.parseCommands<FamilyType>(cmdQ.getCS(0), 0);
|
||||||
auto itorCmd = find<GPGPU_WALKER *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
auto itorCmd = find<GPGPU_WALKER *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||||
ASSERT_NE(hwParse.cmdList.end(), itorCmd);
|
ASSERT_NE(hwParse.cmdList.end(), itorCmd);
|
||||||
++itorCmd;
|
itorCmd = find<PIPE_CONTROL *>(itorCmd, hwParse.cmdList.end());
|
||||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorCmd);
|
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorCmd);
|
||||||
ASSERT_NE(nullptr, pipeControl);
|
ASSERT_NE(nullptr, pipeControl);
|
||||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||||
|
|||||||
Reference in New Issue
Block a user