add dispatch hints for more flixable control on workload dispatch

Change-Id: Iecfe3031172fd108a1ef0d77d2fff8ad3cef22b3
This commit is contained in:
Lindong Wu 2019-10-11 12:54:10 +08:00 committed by sys_ocldev
parent 539d688877
commit 9066192dc3
16 changed files with 96 additions and 1 deletions

View File

@ -423,6 +423,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
uint64_t getSliceCount() const { return sliceCount; } uint64_t getSliceCount() const { return sliceCount; }
// extend dispatch hints
uint64_t dispatchHints = 0;
protected: protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);

View File

@ -709,6 +709,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
} }
if (this->dispatchHints != 0) {
dispatchFlags.epilogueRequired = true;
}
if (gtpinIsGTPinInitialized()) { if (gtpinIsGTPinInitialized()) {
gtpinNotifyPreFlushTask(this); gtpinNotifyPreFlushTask(this);
} }

View File

@ -48,6 +48,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl
) )
get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM) get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM)

View File

@ -53,6 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForPipelineSelect() const; size_t getCmdSizeForPipelineSelect() const;
size_t getCmdSizeForComputeMode(); size_t getCmdSizeForComputeMode();
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const; size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags); void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
@ -88,6 +89,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads); void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags); void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){}; virtual void initPageTableManagerRegisters(LinearStream &csr){};
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd); void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream); PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);

View File

@ -31,6 +31,8 @@
#include "runtime/os_interface/os_context.h" #include "runtime/os_interface/os_context.h"
#include "runtime/utilities/tag_allocator.h" #include "runtime/utilities/tag_allocator.h"
#include "command_stream_receiver_hw_ext.inl"
namespace NEO { namespace NEO {
template <typename GfxFamily> template <typename GfxFamily>
@ -258,6 +260,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags); programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
} }
programEngineModeCommands(commandStreamCSR, dispatchFlags);
initPageTableManagerRegisters(commandStreamCSR); initPageTableManagerRegisters(commandStreamCSR);
programComputeMode(commandStreamCSR, dispatchFlags); programComputeMode(commandStreamCSR, dispatchFlags);
programL3(commandStreamCSR, dispatchFlags, newL3Config); programL3(commandStreamCSR, dispatchFlags, newL3Config);

View File

@ -57,12 +57,15 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() { void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get()); scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get());
} }
template <typename GfxFamily> template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
this->programEngineModeEpliogue(csr, dispatchFlags);
} }
template <typename GfxFamily> template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const { size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const {
return 0u; return this->getCmdSizeForEngineMode(dispatchFlags);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -78,6 +78,7 @@ struct DispatchFlags {
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn; uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount; uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
uint64_t engineHints = 0;
bool blocking = false; bool blocking = false;
bool dcFlush = false; bool dcFlush = false;
bool useSLM = false; bool useSLM = false;

View File

@ -0,0 +1,28 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_stream/linear_stream.h"
#include "runtime/command_stream/command_stream_receiver_hw.h"
#include "runtime/command_stream/csr_definitions.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const {
return 0u;
}
} // namespace NEO

View File

@ -27,6 +27,8 @@ set(RUNTIME_SRCS_MEM_OBJ
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h ${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
) )
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ}) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})

View File

@ -182,6 +182,7 @@ class BufferHw : public Buffer {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override; void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument); void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
void appendSurfaceStateExt(void *memory);
static Buffer *create(Context *context, static Buffer *create(Context *context,
MemoryProperties properties, MemoryProperties properties,

View File

@ -14,6 +14,7 @@
#include "runtime/helpers/surface_formats.h" #include "runtime/helpers/surface_formats.h"
#include "runtime/mem_obj/buffer.h" #include "runtime/mem_obj/buffer.h"
#include "buffer_ext.inl"
#include "hw_cmds.h" #include "hw_cmds.h"
namespace NEO { namespace NEO {
@ -79,6 +80,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
} }
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument); appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
appendSurfaceStateExt(memory);
} }
} // namespace NEO } // namespace NEO

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
template <typename GfxFamily>
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
template <typename GfxFamily>
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@ -290,6 +290,7 @@ class ImageHw : public Image {
void setMediaSurfaceRotation(void *memory) override; void setMediaSurfaceRotation(void *memory) override;
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override; void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState); void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
void appendSurfaceStateExt(void *memory);
void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm); void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
void transformImage2dArrayTo3d(void *memory) override; void transformImage2dArrayTo3d(void *memory) override;
void transformImage3dTo2dArray(void *memory) override; void transformImage3dTo2dArray(void *memory) override;

View File

@ -14,6 +14,7 @@
#include "runtime/mem_obj/image.h" #include "runtime/mem_obj/image.h"
#include "hw_cmds.h" #include "hw_cmds.h"
#include "image_ext.inl"
namespace NEO { namespace NEO {
@ -155,6 +156,7 @@ void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, ui
setAuxParamsForCCS(surfaceState, gmm); setAuxParamsForCCS(surfaceState, gmm);
} }
appendSurfaceStateParams(surfaceState); appendSurfaceStateParams(surfaceState);
appendSurfaceStateExt(surfaceState);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -1091,3 +1091,16 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
} }
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->dispatchHints = 1;
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
}