Program preemption mode in Interface Descriptor Data

Change-Id: I7fce731d71dd0b6dc8505ebfe45d24c65898a08b
This commit is contained in:
Zdanowicz, Zbigniew
2018-03-01 22:43:04 +01:00
committed by sys_ocldev
parent bee295415f
commit 533afe472a
25 changed files with 226 additions and 90 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -433,6 +433,7 @@ void dispatchWalker(
KernelOperation **blockedCommandsData,
HwTimeStamps *hwTimeStamps,
OCLRT::HwPerfCounter *hwPerfCounter,
PreemptionMode preemptionMode,
bool blockQueue = false,
unsigned int commandType = 0) {
@@ -586,7 +587,8 @@ void dispatchWalker(
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
interfaceDescriptorIndex);
interfaceDescriptorIndex,
preemptionMode);
if (&dispatchInfo == &*multiDispatchInfo.begin()) {
// If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled
@@ -659,17 +661,19 @@ void dispatchWalker(
KernelOperation **blockedCommandsData,
HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter,
PreemptionMode preemptionMode,
bool blockQueue = false) {
DispatchInfo dispatchInfo(const_cast<Kernel *>(&kernel), workDim, workItems, localWorkSizesIn, globalOffsets);
dispatchWalker<GfxFamily>(commandQueue, dispatchInfo, numEventsInWaitList, eventWaitList,
blockedCommandsData, hwTimeStamps, hwPerfCounter, blockQueue);
blockedCommandsData, hwTimeStamps, hwPerfCounter, preemptionMode, blockQueue);
}
template <typename GfxFamily>
void dispatchScheduler(
CommandQueue &commandQueue,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler) {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
@@ -754,7 +758,8 @@ void dispatchScheduler(
simd,
localWorkSizes,
offsetInterfaceDescriptorTable,
interfaceDescriptorIndex);
interfaceDescriptorIndex,
preemptionMode);
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, scheduler, true);

View File

@@ -183,6 +183,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
std::unique_ptr<PrintfHandler> printfHandler;
bool slmUsed = false;
EngineType engineType = device->getEngineType();
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
auto blockQueue = false;
@@ -244,6 +245,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
&blockedCommandsData,
hwTimeStamps,
hwPerfCounter,
preemption,
blockQueue,
commandType);
@@ -282,6 +284,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
dispatchScheduler<GfxFamily>(
*this,
*devQueueHw,
preemption,
scheduler);
scheduler.makeResident(commandStreamReceiver);
@@ -638,7 +641,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
allSurfaces.push_back(surface->duplicate());
}
PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
auto cmd = std::unique_ptr<Command>(new CommandComputeKernel(
*this,
@@ -649,6 +652,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
slmUsed,
commandType == CL_COMMAND_NDRANGE_KERNEL,
std::move(printfHandler),
preemptionMode,
multiDispatchInfo.begin()->getKernel(),
(uint32_t)multiDispatchInfo.size()));
eventBuilder->getEvent()->setCommand(std::move(cmd));

View File

@@ -32,6 +32,9 @@ struct MultiDispatchInfo;
class PreemptionHelper {
public:
template <typename CmdFamily>
using INTERFACE_DESCRIPTOR_DATA = typename CmdFamily::INTERFACE_DESCRIPTOR_DATA;
static PreemptionMode taskPreemptionMode(Device &device, Kernel *kernel);
static PreemptionMode taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo);
static bool allowThreadGroupPreemption(Kernel *kernel, const WorkaroundTable *waTable);
@@ -65,6 +68,9 @@ class PreemptionHelper {
static void applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device);
static PreemptionMode getDefaultPreemptionMode(const HardwareInfo &hwInfo);
template <typename GfxFamily>
static void programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
};
template <typename GfxFamily>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -130,4 +130,8 @@ size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) {
return sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS) + sizeof(typename GfxFamily::STATE_SIP);
}
template <typename GfxFamily>
void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode) {
}
} // namespace OCLRT

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -173,7 +173,7 @@ void DeviceQueue::resetDeviceQueue() {
return;
}
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) {
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
return;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +23,7 @@
#pragma once
#include "runtime/api/cl_types.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/execution_model/device_enqueue.h"
@@ -92,7 +93,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
}
virtual void resetDeviceQueue();
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler);
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode);
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
void acquireEMCriticalSection() {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -76,7 +76,7 @@ class DeviceQueueHw : public DeviceQueue {
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
void resetDeviceQueue() override;
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) override;
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override;
uint32_t getSchedulerReturnInstance() {
return igilQueue->m_controls.m_SchedulerEarlyReturn;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -385,9 +385,10 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
}
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) {
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
OCLRT::dispatchScheduler<GfxFamily>(cmdQ,
*this,
preemptionMode,
scheduler);
return;
}

View File

@@ -70,5 +70,6 @@ template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(Preemption
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
} // namespace OCLRT

View File

@@ -23,6 +23,7 @@
#include <cstring>
#include "runtime/built_ins/built_ins.h"
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/command_stream/preemption.inl"
@@ -52,4 +53,5 @@ template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(Preemption
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
} // namespace OCLRT

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -60,7 +60,8 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
uint32_t numSamplers,
uint32_t threadsPerThreadGroup,
uint32_t sizeSlm,
bool barrierEnable);
bool barrierEnable,
PreemptionMode preemptionMode);
static void sendMediaStateFlush(
LinearStream &commandStream,
@@ -103,7 +104,8 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
const uint32_t interfaceDescriptorIndex);
const uint32_t interfaceDescriptorIndex,
PreemptionMode preemptionMode);
static size_t getSizeRequiredCS();
static bool isPipeControlWArequired();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,6 +21,8 @@
*/
#include "runtime/command_queue/local_id_gen.h"
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/dispatch_info.h"
@@ -171,12 +173,13 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
uint32_t numSamplers,
uint32_t threadsPerThreadGroup,
uint32_t sizeSlm,
bool barrierEnable) {
bool barrierEnable,
PreemptionMode preemptionMode) {
typedef typename GfxFamily::SAMPLER_STATE SAMPLER_STATE;
typedef typename GfxFamily::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
// Allocate some memory for the interface descriptor
auto pInterfaceDescriptor = (INTERFACE_DESCRIPTOR_DATA *)ptrOffset(indirectHeap.getBase(), (size_t)offsetInterfaceDescriptor);
auto pInterfaceDescriptor = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(indirectHeap.getBase(), (size_t)offsetInterfaceDescriptor));
*pInterfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
// Program the kernel start pointer
@@ -211,6 +214,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
pInterfaceDescriptor->setBarrierEnable(barrierEnable);
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(pInterfaceDescriptor, preemptionMode);
return (size_t)offsetInterfaceDescriptor;
}
@@ -326,7 +331,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
const uint32_t interfaceDescriptorIndex) {
const uint32_t interfaceDescriptorIndex,
PreemptionMode preemptionMode) {
typedef typename GfxFamily::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
typedef typename GfxFamily::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
@@ -410,7 +416,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
samplerCount,
threadsPerThreadGroup,
kernel.slmTotalSize,
!!patchInfo.executionEnvironment->HasBarriers);
!!patchInfo.executionEnvironment->HasBarriers,
preemptionMode);
// Program media state flush to set interface descriptor offset
KernelCommandsHelper<GfxFamily>::sendMediaStateFlush(

View File

@@ -105,7 +105,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
std::unique_ptr<KernelOperation> kernelOperation, std::vector<Surface *> &surfaces,
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler, Kernel *kernel, uint32_t kernelCount)
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
: commandQueue(commandQueue),
commandStreamReceiver(commandStreamReceiver),
kernelOperation(std::move(kernelOperation)),
@@ -123,6 +124,7 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, CommandSt
kernel->incRefInternal();
}
this->kernelCount = kernelCount;
this->preemptionMode = preemptionMode;
}
CommandComputeKernel::~CommandComputeKernel() {
@@ -240,7 +242,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
devQueue->dispatchScheduler(
commandQueue,
scheduler);
scheduler,
preemptionMode);
scheduler.makeResident(commandStreamReceiver);
@@ -257,7 +260,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = commandQueue.getThrottle();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), kernel);
dispatchFlags.preemptionMode = preemptionMode;
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,7 +24,8 @@
#include "runtime/command_stream/linear_stream.h"
#include "runtime/indirect_heap/indirect_heap.h"
#include "runtime/utilities/iflist.h"
#include "runtime/helpers//completion_stamp.h"
#include "runtime/helpers/completion_stamp.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/properties_helper.h"
#include <memory>
@@ -99,7 +100,9 @@ struct KernelOperation {
class CommandComputeKernel : public Command {
public:
CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces, bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler, Kernel *kernel = nullptr, uint32_t kernelCount = 0);
std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces,
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
PreemptionMode preemptionMode, Kernel *kernel = nullptr, uint32_t kernelCount = 0);
~CommandComputeKernel() override;
@@ -120,6 +123,7 @@ class CommandComputeKernel : public Command {
std::unique_ptr<PrintfHandler> printfHandler;
Kernel *kernel;
uint32_t kernelCount;
PreemptionMode preemptionMode;
};
class CommandMarker : public Command {