mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Refactor interface to hardware interface
Related-To: NEO-6959 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
789dd1900e
commit
461a2eb8c7
@ -440,16 +440,19 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
hwPerfCounter = event->getHwPerfCounterNode();
|
||||
}
|
||||
|
||||
HardwareInterfaceWalkerArgs dispatchWalkerArgs = {};
|
||||
dispatchWalkerArgs.blockedCommandsData = blockedCommandsData;
|
||||
dispatchWalkerArgs.hwTimeStamps = hwTimeStamps;
|
||||
dispatchWalkerArgs.hwPerfCounter = hwPerfCounter;
|
||||
dispatchWalkerArgs.timestampPacketDependencies = ×tampPacketDependencies;
|
||||
dispatchWalkerArgs.currentTimestampPacketNodes = timestampPacketContainer.get();
|
||||
dispatchWalkerArgs.commandType = commandType;
|
||||
|
||||
HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
*this,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
blockedCommandsData,
|
||||
hwTimeStamps,
|
||||
hwPerfCounter,
|
||||
×tampPacketDependencies,
|
||||
timestampPacketContainer.get(),
|
||||
commandType);
|
||||
dispatchWalkerArgs);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
|
@ -27,6 +27,24 @@ struct MultiDispatchInfo;
|
||||
template <class T>
|
||||
class TagNode;
|
||||
|
||||
struct HardwareInterfaceWalkerArgs {
|
||||
size_t globalWorkSizes[3] = {};
|
||||
size_t localWorkSizes[3] = {};
|
||||
TagNodeBase *hwTimeStamps = nullptr;
|
||||
TagNodeBase *hwPerfCounter = nullptr;
|
||||
TimestampPacketDependencies *timestampPacketDependencies = nullptr;
|
||||
TimestampPacketContainer *currentTimestampPacketNodes = nullptr;
|
||||
const Vec3<size_t> *numberOfWorkgroups = nullptr;
|
||||
const Vec3<size_t> *startOfWorkgroups = nullptr;
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
size_t currentDispatchIndex = 0;
|
||||
size_t offsetInterfaceDescriptorTable = 0;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Initial;
|
||||
uint32_t commandType = 0;
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
bool isMainKernel = false;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class HardwareInterface {
|
||||
public:
|
||||
@ -37,12 +55,7 @@ class HardwareInterface {
|
||||
CommandQueue &commandQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const CsrDependencies &csrDependencies,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
TagNodeBase *hwPerfCounter,
|
||||
TimestampPacketDependencies *timestampPacketDependencies,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
uint32_t commandType);
|
||||
HardwareInterfaceWalkerArgs &walkerArgs);
|
||||
|
||||
static void getDefaultDshSpace(
|
||||
const size_t &offsetInterfaceDescriptorTable,
|
||||
@ -81,19 +94,11 @@ class HardwareInterface {
|
||||
LinearStream &commandStream,
|
||||
Kernel &kernel,
|
||||
CommandQueue &commandQueue,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
size_t globalWorkSizes[3],
|
||||
size_t localWorkSizes[3],
|
||||
PreemptionMode preemptionMode,
|
||||
size_t currentDispatchIndex,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups);
|
||||
HardwareInterfaceWalkerArgs &walkerArgs);
|
||||
|
||||
static WALKER_TYPE *allocateWalkerSpace(LinearStream &commandStream,
|
||||
const Kernel &kernel);
|
||||
@ -101,11 +106,9 @@ class HardwareInterface {
|
||||
static void obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo,
|
||||
bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh);
|
||||
|
||||
static void dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType,
|
||||
LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode,
|
||||
uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable,
|
||||
IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh);
|
||||
static void dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, LinearStream &commandStream,
|
||||
IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh,
|
||||
HardwareInterfaceWalkerArgs &walkerArgs);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -64,17 +64,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
CommandQueue &commandQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const CsrDependencies &csrDependencies,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
TagNodeBase *hwPerfCounter,
|
||||
TimestampPacketDependencies *timestampPacketDependencies,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
uint32_t commandType) {
|
||||
HardwareInterfaceWalkerArgs &walkerArgs) {
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
auto mainKernel = multiDispatchInfo.peekMainKernel();
|
||||
auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo);
|
||||
walkerArgs.preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo);
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
// Compute local workgroup sizes
|
||||
@ -85,11 +80,11 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
}
|
||||
|
||||
// Allocate command stream and indirect heaps
|
||||
bool blockedQueue = (blockedCommandsData != nullptr);
|
||||
bool blockedQueue = (walkerArgs.blockedCommandsData != nullptr);
|
||||
obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockedQueue, dsh, ioh, ssh);
|
||||
if (blockedQueue) {
|
||||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
commandStream = blockedCommandsData->commandStream.get();
|
||||
walkerArgs.blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
commandStream = walkerArgs.blockedCommandsData->commandStream.get();
|
||||
} else {
|
||||
commandStream = &commandQueue.getCS(0);
|
||||
}
|
||||
@ -119,22 +114,22 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
|
||||
dsh->align(EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
const size_t offsetInterfaceDescriptorTable = dsh->getUsed();
|
||||
walkerArgs.interfaceDescriptorIndex = 0;
|
||||
walkerArgs.offsetInterfaceDescriptorTable = dsh->getUsed();
|
||||
|
||||
size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
|
||||
getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, dsh, commandStream);
|
||||
getDefaultDshSpace(walkerArgs.offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, dsh, commandStream);
|
||||
|
||||
// Program media interface descriptor load
|
||||
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
*commandStream,
|
||||
offsetInterfaceDescriptorTable,
|
||||
walkerArgs.offsetInterfaceDescriptorTable,
|
||||
totalInterfaceDescriptorTableSize);
|
||||
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
DEBUG_BREAK_IF(walkerArgs.offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
dispatchProfilingPerfStartCommands(walkerArgs.hwTimeStamps, walkerArgs.hwPerfCounter, commandStream, commandQueue);
|
||||
|
||||
const auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
||||
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
@ -146,25 +141,23 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
multiDispatchInfo.begin()->getLocalWorkgroupSize(),
|
||||
multiDispatchInfo.begin()->getActualWorkgroupSize(),
|
||||
multiDispatchInfo.begin()->getOffset(),
|
||||
currentTimestampPacketNodes);
|
||||
walkerArgs.currentTimestampPacketNodes);
|
||||
|
||||
size_t currentDispatchIndex = 0;
|
||||
walkerArgs.currentDispatchIndex = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
bool isMainKernel = (dispatchInfo.getKernel() == mainKernel);
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
walkerArgs.isMainKernel = (dispatchInfo.getKernel() == mainKernel);
|
||||
|
||||
dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel,
|
||||
currentDispatchIndex, currentTimestampPacketNodes, preemptionMode, interfaceDescriptorIndex,
|
||||
offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh);
|
||||
dispatchKernelCommands(commandQueue, dispatchInfo, *commandStream, *dsh, *ioh, *ssh, walkerArgs);
|
||||
|
||||
currentDispatchIndex++;
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
walkerArgs.currentDispatchIndex++;
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
}
|
||||
|
||||
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
|
||||
uint64_t postSyncAddress = 0;
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacketNodeForPostSync = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
auto timestampPacketNodeForPostSync = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||
timestampPacketNodeForPostSync->setProfilingCapable(false);
|
||||
postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync);
|
||||
}
|
||||
@ -182,15 +175,13 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
DebugPauseState::hasUserEndConfirmation, hwInfo);
|
||||
}
|
||||
|
||||
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
dispatchProfilingPerfEndCommands(walkerArgs.hwTimeStamps, walkerArgs.hwPerfCounter, commandStream, commandQueue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType,
|
||||
LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode,
|
||||
uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable,
|
||||
IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh) {
|
||||
void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, LinearStream &commandStream,
|
||||
IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh,
|
||||
HardwareInterfaceWalkerArgs &walkerArgs) {
|
||||
auto &kernel = *dispatchInfo.getKernel();
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
|
||||
@ -199,7 +190,7 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2));
|
||||
|
||||
// If we don't have a required WGS, compute one opportunistically
|
||||
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
||||
if (walkerArgs.commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
||||
provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), dispatchInfo);
|
||||
}
|
||||
|
||||
@ -207,7 +198,7 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
|
||||
auto dim = dispatchInfo.getDim();
|
||||
const auto &gws = dispatchInfo.getGWS();
|
||||
const auto &offset = dispatchInfo.getOffset();
|
||||
const auto &startOfWorkgroups = dispatchInfo.getStartOfWorkgroups();
|
||||
walkerArgs.startOfWorkgroups = &dispatchInfo.getStartOfWorkgroups();
|
||||
|
||||
// Compute local workgroup sizes
|
||||
const auto &lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
@ -215,37 +206,39 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
|
||||
|
||||
// Compute number of work groups
|
||||
const auto &totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups();
|
||||
const auto &numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups();
|
||||
walkerArgs.numberOfWorkgroups = &dispatchInfo.getNumberOfWorkgroups();
|
||||
UNRECOVERABLE_IF(totalNumberOfWorkgroups.x == 0);
|
||||
UNRECOVERABLE_IF(numberOfWorkgroups.x == 0);
|
||||
UNRECOVERABLE_IF(walkerArgs.numberOfWorkgroups->x == 0);
|
||||
|
||||
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
||||
walkerArgs.globalWorkSizes[0] = gws.x;
|
||||
walkerArgs.globalWorkSizes[1] = gws.y;
|
||||
walkerArgs.globalWorkSizes[2] = gws.z;
|
||||
|
||||
// Patch our kernel constants
|
||||
kernel.setGlobalWorkOffsetValues(static_cast<uint32_t>(offset.x), static_cast<uint32_t>(offset.y), static_cast<uint32_t>(offset.z));
|
||||
kernel.setGlobalWorkSizeValues(static_cast<uint32_t>(gws.x), static_cast<uint32_t>(gws.y), static_cast<uint32_t>(gws.z));
|
||||
|
||||
if (isMainKernel || (!kernel.isLocalWorkSize2Patchable())) {
|
||||
if (walkerArgs.isMainKernel || (!kernel.isLocalWorkSize2Patchable())) {
|
||||
kernel.setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
}
|
||||
|
||||
kernel.setLocalWorkSize2Values(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
kernel.setEnqueuedLocalWorkSizeValues(static_cast<uint32_t>(elws.x), static_cast<uint32_t>(elws.y), static_cast<uint32_t>(elws.z));
|
||||
|
||||
if (isMainKernel) {
|
||||
if (walkerArgs.isMainKernel) {
|
||||
kernel.setNumWorkGroupsValues(static_cast<uint32_t>(totalNumberOfWorkgroups.x), static_cast<uint32_t>(totalNumberOfWorkgroups.y), static_cast<uint32_t>(totalNumberOfWorkgroups.z));
|
||||
}
|
||||
|
||||
kernel.setWorkDim(dim);
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
|
||||
walkerArgs.localWorkSizes[0] = lws.x;
|
||||
walkerArgs.localWorkSizes[1] = lws.y;
|
||||
walkerArgs.localWorkSizes[2] = lws.z;
|
||||
|
||||
dispatchWorkarounds(&commandStream, commandQueue, kernel, true);
|
||||
|
||||
programWalker(commandStream, kernel, commandQueue, currentTimestampPacketNodes, dsh, ioh, ssh, globalWorkSizes,
|
||||
localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo,
|
||||
offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups);
|
||||
programWalker(commandStream, kernel, commandQueue, dsh, ioh, ssh, dispatchInfo, walkerArgs);
|
||||
|
||||
dispatchWorkarounds(&commandStream, commandQueue, kernel, false);
|
||||
}
|
||||
|
@ -51,19 +51,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
LinearStream &commandStream,
|
||||
Kernel &kernel,
|
||||
CommandQueue &commandQueue,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
size_t globalWorkSizes[3],
|
||||
size_t localWorkSizes[3],
|
||||
PreemptionMode preemptionMode,
|
||||
size_t currentDispatchIndex,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups) {
|
||||
HardwareInterfaceWalkerArgs &walkerArgs) {
|
||||
|
||||
auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel);
|
||||
WALKER_TYPE walkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
@ -71,11 +63,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||
|
||||
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
|
||||
size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z};
|
||||
size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z};
|
||||
size_t startWorkGroups[3] = {walkerArgs.startOfWorkgroups->x, walkerArgs.startOfWorkgroups->y, walkerArgs.startOfWorkgroups->z};
|
||||
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
|
||||
|
||||
if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacketNode = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
if (walkerArgs.currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
@ -90,10 +82,10 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
kernel,
|
||||
kernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex,
|
||||
preemptionMode,
|
||||
walkerArgs.localWorkSizes,
|
||||
walkerArgs.offsetInterfaceDescriptorTable,
|
||||
walkerArgs.interfaceDescriptorIndex,
|
||||
walkerArgs.preemptionMode,
|
||||
&walkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
@ -101,11 +93,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor,
|
||||
globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
|
||||
false, false, 0u);
|
||||
|
||||
EncodeWalkerArgs walkerArgs{kernel.getExecutionType(), false};
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, walkerArgs);
|
||||
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), false};
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, encodeWalkerArgs);
|
||||
*walkerCmdBuf = walkerCmd;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -41,19 +41,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
LinearStream &commandStream,
|
||||
Kernel &kernel,
|
||||
CommandQueue &commandQueue,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
IndirectHeap &dsh,
|
||||
IndirectHeap &ioh,
|
||||
IndirectHeap &ssh,
|
||||
size_t globalWorkSizes[3],
|
||||
size_t localWorkSizes[3],
|
||||
PreemptionMode preemptionMode,
|
||||
size_t currentDispatchIndex,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
const DispatchInfo &dispatchInfo,
|
||||
size_t offsetInterfaceDescriptorTable,
|
||||
const Vec3<size_t> &numberOfWorkgroups,
|
||||
const Vec3<size_t> &startOfWorkgroups) {
|
||||
HardwareInterfaceWalkerArgs &walkerArgs) {
|
||||
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
|
||||
@ -66,13 +58,13 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
|
||||
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
|
||||
size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z};
|
||||
size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z};
|
||||
size_t startWorkGroups[3] = {walkerArgs.startOfWorkgroups->x, walkerArgs.startOfWorkgroups->y, walkerArgs.startOfWorkgroups->z};
|
||||
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
|
||||
uint32_t requiredWalkOrder = 0u;
|
||||
|
||||
bool localIdsGenerationByRuntime = EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(
|
||||
numChannels,
|
||||
localWorkSizes,
|
||||
walkerArgs.localWorkSizes,
|
||||
std::array<uint8_t, 3>{{kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0],
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1],
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}},
|
||||
@ -84,8 +76,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
auto idd = &walkerCmd.getInterfaceDescriptor();
|
||||
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
if (walkerArgs.currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
@ -105,21 +97,21 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
kernel,
|
||||
kernel.getKernelStartAddress(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed, false),
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex,
|
||||
preemptionMode,
|
||||
walkerArgs.localWorkSizes,
|
||||
walkerArgs.offsetInterfaceDescriptorTable,
|
||||
walkerArgs.interfaceDescriptorIndex,
|
||||
walkerArgs.preemptionMode,
|
||||
&walkerCmd,
|
||||
idd,
|
||||
localIdsGenerationByRuntime,
|
||||
commandQueue.getDevice());
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
|
||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
|
||||
|
||||
EncodeWalkerArgs walkerArgs{kernel.getExecutionType(), true};
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, walkerArgs);
|
||||
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), true};
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, encodeWalkerArgs);
|
||||
|
||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());
|
||||
@ -139,7 +131,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
if (queueCsr.isStaticWorkPartitioningEnabled()) {
|
||||
queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount));
|
||||
}
|
||||
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex);
|
||||
auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||
timestampPacket->setPacketsUsed(partitionCount);
|
||||
} else {
|
||||
auto computeWalkerOnStream = commandStream.getSpaceForCmd<typename GfxFamily::COMPUTE_WALKER>();
|
||||
|
@ -81,6 +81,7 @@ set(IGDRCL_SRCS_tests_command_queue
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_buffer_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_image_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ioq_task_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/multi_dispatch_info_tests.cpp
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
@ -160,16 +161,13 @@ HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntCha
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
|
||||
EXPECT_LT(commandStreamStart, commandStream.getUsed());
|
||||
@ -207,16 +205,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
|
||||
EXPECT_LT(commandStreamStart, commandStream.getUsed());
|
||||
@ -238,16 +232,12 @@ HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDi
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
@ -270,16 +260,12 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
@ -300,16 +286,12 @@ HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensi
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
@ -331,16 +313,12 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
EXPECT_EQ(dimension, *kernel.getWorkDim());
|
||||
}
|
||||
}
|
||||
@ -362,16 +340,12 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG
|
||||
dispatchInfo.setTotalNumberOfWorkgroups(workItems);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto numWorkGroups = kernel.getNumWorkGroupsValues();
|
||||
EXPECT_EQ(2u, *numWorkGroups[0]);
|
||||
@ -396,16 +370,12 @@ HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGloba
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto gwo = kernel.getGlobalWorkOffsetValues();
|
||||
EXPECT_EQ(1u, *gwo[0]);
|
||||
@ -430,16 +400,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
@ -464,16 +430,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThe
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
@ -499,16 +461,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
@ -534,16 +492,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(2u, *localWorkSize[0]);
|
||||
@ -567,16 +521,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(1u, *localWorkSize[0]);
|
||||
@ -603,16 +553,12 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto localWorkSize = kernel.getLocalWorkSizeValues();
|
||||
EXPECT_EQ(1u, *localWorkSize[0]);
|
||||
@ -645,17 +591,12 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre
|
||||
di2.setTotalNumberOfWorkgroups({2, 2, 2});
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2}));
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto dispatchId = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
@ -702,16 +643,12 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre
|
||||
multiDispatchInfo.push(di1);
|
||||
multiDispatchInfo.push(di2);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
|
||||
@ -758,16 +695,13 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSt
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto &commandStream = pCmdQ->getCS(1024);
|
||||
EXPECT_EQ(0u, commandStream.getUsed());
|
||||
@ -793,16 +727,13 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfo(&kernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
Vec3<size_t> localWorkgroupSize(workGroupSize);
|
||||
|
||||
@ -845,17 +776,13 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel);
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
|
||||
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
|
||||
@ -872,16 +799,13 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel);
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation());
|
||||
EXPECT_NE(0ull, blockedCommandsData->commandStream->getGraphicsAllocation()->getGpuAddress());
|
||||
@ -899,16 +823,13 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI
|
||||
ASSERT_FALSE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty());
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty());
|
||||
EXPECT_EQ(allocation, blockedCommandsData->commandStream->getGraphicsAllocation());
|
||||
@ -923,17 +844,12 @@ HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDi
|
||||
ASSERT_EQ(CL_SUCCESS, kernel2.initialize());
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector<Kernel *>({&kernel1, &kernel2}));
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = static_cast<MockKernel &>(*dispatchInfo.getKernel());
|
||||
@ -964,17 +880,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch
|
||||
|
||||
indirectHeap.align(EncodeStates<FamilyType>::alignInterfaceDescriptorData);
|
||||
auto dshBeforeMultiDisptach = indirectHeap.getUsed();
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto dshAfterMultiDisptach = indirectHeap.getUsed();
|
||||
|
||||
@ -1052,17 +964,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch
|
||||
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1097,17 +1004,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch
|
||||
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1147,17 +1049,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSame
|
||||
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1199,17 +1096,12 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector<Kernel *>({&kernel1}));
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cmdStream);
|
||||
@ -1237,17 +1129,12 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector<Kernel *>({&kernel1, &kernel2}));
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cmdStream);
|
||||
@ -1277,27 +1164,19 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ
|
||||
// create commandStream
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo1,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs2 = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo2,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs2);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cmdStream);
|
||||
@ -1346,17 +1225,12 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTran
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
|
||||
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto sizeUsed = cmdStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
@ -1400,17 +1274,12 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTran
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
|
||||
|
||||
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
auto sizeUsed = cmdStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
@ -1575,16 +1444,13 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
dispatchInfoWithoutImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfoWithoutImplicitArgs(&kernelWithoutImplicitArgs);
|
||||
multiDispatchInfoWithoutImplicitArgs.push(dispatchInfoWithoutImplicitArgs);
|
||||
HardwareInterfaceWalkerArgs walkerArgsWithoutImplicitArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgsWithoutImplicitArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfoWithoutImplicitArgs,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgsWithoutImplicitArgs);
|
||||
|
||||
auto iohSizeWithoutImplicitArgs = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithoutImplicitArgs, Math::computeTotalElementsCount(localWorkgroupSize));
|
||||
|
||||
@ -1593,16 +1459,13 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
dispatchInfoWithImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfoWithImplicitArgs(&kernelWithoutImplicitArgs);
|
||||
multiDispatchInfoWithImplicitArgs.push(dispatchInfoWithImplicitArgs);
|
||||
HardwareInterfaceWalkerArgs walkerArgsWithImplicitArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgsWithImplicitArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfoWithImplicitArgs,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgsWithImplicitArgs);
|
||||
|
||||
auto iohSizeWithImplicitArgs = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, Math::computeTotalElementsCount(localWorkgroupSize));
|
||||
|
||||
|
@ -18,11 +18,11 @@
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct Dg2AndLaterDispatchWalkerBasicFixture : public LinearStreamFixture {
|
||||
@ -131,16 +131,13 @@ HWTEST2_F(Dg2AndLaterDispatchWalkerBasicTest, givenTimestampPacketWhenDispatchin
|
||||
MockCommandQueue cmdQ(context.get(), device.get(), nullptr, false);
|
||||
auto &cmdStream = cmdQ.getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tampPacketContainer;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
cmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tampPacketContainer,
|
||||
false);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/source/helpers/dispatch_info_builder.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/helpers/cl_hw_parse.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
@ -459,16 +460,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp
|
||||
MockCommandQueue cmdQ(context.get(), device.get(), nullptr, false);
|
||||
auto &cmdStream = cmdQ.getCS(0);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tampPacketContainer;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
cmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tampPacketContainer,
|
||||
false);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
|
||||
inline NEO::HardwareInterfaceWalkerArgs createHardwareInterfaceWalkerArgs(size_t wkgSizeArray[3], Vec3<size_t> &wgInfo) {
|
||||
NEO::HardwareInterfaceWalkerArgs args = {};
|
||||
args.globalWorkSizes[0] = args.localWorkSizes[0] = wkgSizeArray[0];
|
||||
args.globalWorkSizes[1] = args.localWorkSizes[1] = wkgSizeArray[1];
|
||||
args.globalWorkSizes[2] = args.localWorkSizes[2] = wkgSizeArray[2];
|
||||
|
||||
args.numberOfWorkgroups = &wgInfo;
|
||||
args.startOfWorkgroups = &wgInfo;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
inline NEO::HardwareInterfaceWalkerArgs createHardwareInterfaceWalkerArgs(size_t wkgSizeArray[3], Vec3<size_t> &wgInfo, PreemptionMode mode) {
|
||||
NEO::HardwareInterfaceWalkerArgs args = createHardwareInterfaceWalkerArgs(wkgSizeArray, wgInfo);
|
||||
args.preemptionMode = mode;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
inline NEO::HardwareInterfaceWalkerArgs createHardwareInterfaceWalkerArgs(uint32_t commandType) {
|
||||
NEO::HardwareInterfaceWalkerArgs args = {};
|
||||
args.commandType = commandType;
|
||||
return args;
|
||||
}
|
@ -21,6 +21,7 @@
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/helpers/timestamp_packet_tests.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
@ -256,16 +257,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tampPacket;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tampPacket,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -298,17 +296,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWh
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tampPacket;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tampPacket,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1115,16 +1109,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tamp7;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tamp7,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1192,16 +1183,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tamp7;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tamp7,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1255,16 +1243,12 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@ -1304,17 +1288,12 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesRe
|
||||
EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr);
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*mockCmdQ,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
@ -190,16 +191,13 @@ XE_HPC_CORETEST_F(SystemMemoryFenceViaComputeWalkerTest, givenSystemMemoryFenceG
|
||||
auto &cmdStream = commandQueue.getCS(0);
|
||||
MockTimestampPacketContainer timestampPacket(*pClDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
|
||||
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs.currentTimestampPacketNodes = ×tampPacket;
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
commandQueue,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
×tampPacket,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgs);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
@ -63,12 +64,11 @@ XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenKernelWhenWalkerIsProgramme
|
||||
size_t workSize[] = {1, 1, 1};
|
||||
Vec3<size_t> wgInfo = {1, 1, 1};
|
||||
|
||||
uint32_t iddIndex = 0;
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(workSize, wgInfo, PreemptionMode::Disabled);
|
||||
|
||||
mockKernel->kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
HardwareInterface<FamilyType>::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue,
|
||||
nullptr, heap, heap, heap, workSize, workSize,
|
||||
PreemptionMode::Disabled, 0, iddIndex,
|
||||
dispatchInfo, 0, wgInfo, wgInfo);
|
||||
heap, heap, heap, dispatchInfo, walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream, 0);
|
||||
@ -110,16 +110,15 @@ XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenDebugVariableSetWhenProgramm
|
||||
size_t workSize[] = {1, 1, 1};
|
||||
Vec3<size_t> wgInfo = {1, 1, 1};
|
||||
|
||||
uint32_t iddIndex = 0;
|
||||
HardwareInterfaceWalkerArgs walkerArgs = createHardwareInterfaceWalkerArgs(workSize, wgInfo, PreemptionMode::Disabled);
|
||||
|
||||
size_t commandsOffset = 0;
|
||||
|
||||
{
|
||||
// default
|
||||
|
||||
HardwareInterface<FamilyType>::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue,
|
||||
nullptr, heap, heap, heap, workSize, workSize,
|
||||
PreemptionMode::Disabled, 0, iddIndex,
|
||||
dispatchInfo, 0, wgInfo, wgInfo);
|
||||
heap, heap, heap, dispatchInfo, walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream, 0);
|
||||
@ -138,9 +137,7 @@ XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenDebugVariableSetWhenProgramm
|
||||
DebugManager.flags.ForceL3PrefetchForComputeWalker.set(1);
|
||||
|
||||
HardwareInterface<FamilyType>::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue,
|
||||
nullptr, heap, heap, heap, workSize, workSize,
|
||||
PreemptionMode::Disabled, 0, iddIndex,
|
||||
dispatchInfo, 0, wgInfo, wgInfo);
|
||||
heap, heap, heap, dispatchInfo, walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream, commandsOffset);
|
||||
@ -159,9 +156,7 @@ XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenDebugVariableSetWhenProgramm
|
||||
DebugManager.flags.ForceL3PrefetchForComputeWalker.set(0);
|
||||
|
||||
HardwareInterface<FamilyType>::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue,
|
||||
nullptr, heap, heap, heap, workSize, workSize,
|
||||
PreemptionMode::Disabled, 0, iddIndex,
|
||||
dispatchInfo, 0, wgInfo, wgInfo);
|
||||
heap, heap, heap, dispatchInfo, walkerArgs);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(commandStream, commandsOffset);
|
||||
|
Reference in New Issue
Block a user