Pass more information to programPipelineSelect.

Change-Id: Iaabe60742269b721f8defe71306dd6e87d60d546
This commit is contained in:
Piotr Fusik
2018-11-14 08:40:37 +01:00
committed by sys_ocldev
parent 9bdedc62dd
commit 76efeae9d8
13 changed files with 30 additions and 14 deletions

View File

@@ -526,6 +526,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto mediaSamplerRequired = false;
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
auto specialPipelineSelectMode = false;
Kernel *kernel = nullptr;
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {
@@ -538,6 +539,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
mediaSamplerRequired |= kernel->isVmeKernel();
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
}
if (mediaSamplerRequired) {
@@ -595,6 +597,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
}
dispatchFlags.numGrfRequired = numGrfRequired;
dispatchFlags.specialPipelineSelectMode = specialPipelineSelectMode;
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
if (gtpinIsGTPinInitialized()) {

View File

@@ -218,6 +218,7 @@ class CommandStreamReceiver {
bool stallingPipeControlOnNextFlushRequired = false;
bool timestampPacketWriteEnabled = false;
bool nTo1SubmissionModelEnabled = false;
bool lastSpecialPipelineSelectMode = false;
};
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump, ExecutionEnvironment &executionEnvironment);

View File

@@ -121,7 +121,7 @@ inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw<GfxFamily>::add
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) {
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, dispatchFlags.mediaSamplerRequired);
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, dispatchFlags);
this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired;
}
}
@@ -239,6 +239,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode;
size_t requiredScratchSizeInBytes = requiredScratchSize * device.getDeviceInfo().computeUnitsUsedForScratch;

View File

@@ -45,6 +45,7 @@ struct DispatchFlags {
PreemptionMode preemptionMode = PreemptionMode::Disabled;
EventsRequest *outOfDeviceDependencies = nullptr;
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
bool specialPipelineSelectMode = false;
};
struct CsrSizeRequestFlags {
@@ -54,5 +55,6 @@ struct CsrSizeRequestFlags {
bool mediaSamplerConfigChanged = false;
bool hasSharedHandles = false;
bool numGrfRequiredChanged = false;
bool specialPipelineSelectModeChanged = false;
};
} // namespace OCLRT

View File

@@ -5,8 +5,9 @@
*
*/
#include "runtime/helpers/preamble.inl"
#include "runtime/command_queue/gpgpu_walker.h"
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/helpers/preamble.inl"
namespace OCLRT {
@@ -67,7 +68,7 @@ void PreambleHelper<CNLFamily>::addPipeControlBeforeVfeCmd(LinearStream *pComman
}
template <>
void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
typedef typename CNLFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
@@ -76,7 +77,7 @@ void PreambleHelper<CNLFamily>::programPipelineSelect(LinearStream *pCommandStre
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
pCmd->setMaskBits(mask);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired);
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
}
template struct PreambleHelper<CNLFamily>;

View File

@@ -32,7 +32,7 @@ uint32_t PreambleHelper<BDWFamily>::getL3Config(const HardwareInfo &hwInfo, bool
}
template <>
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
*pCmd = PIPELINE_SELECT::sInit();

View File

@@ -5,6 +5,7 @@
*
*/
#include "runtime/command_stream/csr_definitions.h"
#include "runtime/helpers/preamble.inl"
namespace OCLRT {
@@ -27,7 +28,7 @@ uint32_t PreambleHelper<SKLFamily>::getL3Config(const HardwareInfo &hwInfo, bool
}
template <>
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
auto pCmd = (PIPELINE_SELECT *)pCommandStream->getSpace(sizeof(PIPELINE_SELECT));
@@ -36,7 +37,7 @@ void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStre
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
pCmd->setMaskBits(mask);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
pCmd->setMediaSamplerDopClockGateEnable(!mediaSamplerRequired);
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
}
template <>

View File

@@ -6,7 +6,7 @@
*/
#pragma once
#include "stdint.h"
#include <cstdint>
namespace OCLRT {
const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;

View File

@@ -7,14 +7,15 @@
#pragma once
#include "igfxfmid.h"
#include "stdint.h"
#include "runtime/helpers/pipeline_select_helper.h"
#include <cstddef>
#include <cstdint>
namespace OCLRT {
struct HardwareInfo;
class Device;
struct DispatchFlags;
class GraphicsAllocation;
class LinearStream;
@@ -26,7 +27,7 @@ struct PreambleHelper {
static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; }
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired);
static void programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags);
static uint32_t getDefaultThreadArbitrationPolicy();
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);

View File

@@ -206,6 +206,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
}
dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@@ -276,15 +276,16 @@ class Kernel : public BaseObject<_cl_kernel> {
const void *getKernelArg(uint32_t argIndex) const;
const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const;
bool getAllowNonUniform() { return program->getAllowNonUniform(); }
bool isVmeKernel() { return kernelInfo.isVmeWorkload; };
bool getAllowNonUniform() const { return program->getAllowNonUniform(); }
bool isVmeKernel() const { return kernelInfo.isVmeWorkload; }
bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; }
//residency for kernel surfaces
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
bool requiresCoherency();
void resetSharedObjectsPatchAddresses();
bool isUsingSharedObjArgs() { return usingSharedObjArgs; }
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
bool hasPrintfOutput() const;
@@ -490,5 +491,7 @@ class Kernel : public BaseObject<_cl_kernel> {
std::vector<PatchInfoData> patchInfoDataList;
std::unique_ptr<ImageTransformer> imageTransformer;
bool specialPipelineSelectMode = false;
};
} // namespace OCLRT

View File

@@ -158,7 +158,7 @@ uint32_t PreambleHelper<GENX>::getL3Config(const HardwareInfo &hwInfo, bool useS
}
template <>
void PreambleHelper<GENX>::programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired) {
void PreambleHelper<GENX>::programPipelineSelect(LinearStream *pCommandStream, const DispatchFlags &dispatchFlags) {
}
template <>

View File

@@ -225,6 +225,8 @@ class MockKernel : public Kernel {
Kernel::releaseOwnership();
}
void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; }
uint32_t makeResidentCalls = 0;
uint32_t getResidencyCalls = 0;
mutable uint32_t takeOwnershipCalls = 0;