add dispatch hints for more flixable control on workload dispatch
Change-Id: Iecfe3031172fd108a1ef0d77d2fff8ad3cef22b3
This commit is contained in:
parent
539d688877
commit
9066192dc3
|
@ -423,6 +423,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
uint64_t getSliceCount() const { return sliceCount; }
|
uint64_t getSliceCount() const { return sliceCount; }
|
||||||
|
|
||||||
|
// extend dispatch hints
|
||||||
|
uint64_t dispatchHints = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||||
|
|
|
@ -709,6 +709,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
|
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this->dispatchHints != 0) {
|
||||||
|
dispatchFlags.epilogueRequired = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (gtpinIsGTPinInitialized()) {
|
if (gtpinIsGTPinInitialized()) {
|
||||||
gtpinNotifyPreFlushTask(this);
|
gtpinNotifyPreFlushTask(this);
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl
|
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h
|
${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl
|
||||||
)
|
)
|
||||||
|
|
||||||
get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM)
|
get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM)
|
||||||
|
|
|
@ -53,6 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||||
size_t getCmdSizeForPipelineSelect() const;
|
size_t getCmdSizeForPipelineSelect() const;
|
||||||
size_t getCmdSizeForComputeMode();
|
size_t getCmdSizeForComputeMode();
|
||||||
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
|
||||||
|
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
|
||||||
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
|
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||||
|
|
||||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||||
|
@ -88,6 +89,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||||
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
|
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
|
||||||
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
|
||||||
virtual void initPageTableManagerRegisters(LinearStream &csr){};
|
virtual void initPageTableManagerRegisters(LinearStream &csr){};
|
||||||
|
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||||
|
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
|
||||||
|
|
||||||
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
|
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
|
||||||
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);
|
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);
|
||||||
|
|
|
@ -31,6 +31,8 @@
|
||||||
#include "runtime/os_interface/os_context.h"
|
#include "runtime/os_interface/os_context.h"
|
||||||
#include "runtime/utilities/tag_allocator.h"
|
#include "runtime/utilities/tag_allocator.h"
|
||||||
|
|
||||||
|
#include "command_stream_receiver_hw_ext.inl"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
@ -258,6 +260,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||||
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
programEngineModeCommands(commandStreamCSR, dispatchFlags);
|
||||||
initPageTableManagerRegisters(commandStreamCSR);
|
initPageTableManagerRegisters(commandStreamCSR);
|
||||||
programComputeMode(commandStreamCSR, dispatchFlags);
|
programComputeMode(commandStreamCSR, dispatchFlags);
|
||||||
programL3(commandStreamCSR, dispatchFlags, newL3Config);
|
programL3(commandStreamCSR, dispatchFlags, newL3Config);
|
||||||
|
|
|
@ -57,12 +57,15 @@ template <typename GfxFamily>
|
||||||
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {
|
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {
|
||||||
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get());
|
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||||
|
this->programEngineModeEpliogue(csr, dispatchFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const {
|
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const {
|
||||||
return 0u;
|
return this->getCmdSizeForEngineMode(dispatchFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -78,6 +78,7 @@ struct DispatchFlags {
|
||||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||||
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
|
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
|
||||||
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
|
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
|
||||||
|
uint64_t engineHints = 0;
|
||||||
bool blocking = false;
|
bool blocking = false;
|
||||||
bool dcFlush = false;
|
bool dcFlush = false;
|
||||||
bool useSLM = false;
|
bool useSLM = false;
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "core/command_stream/linear_stream.h"
|
||||||
|
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||||
|
#include "runtime/command_stream/csr_definitions.h"
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void CommandStreamReceiverHw<GfxFamily>::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void CommandStreamReceiverHw<GfxFamily>::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace NEO
|
|
@ -27,6 +27,8 @@ set(RUNTIME_SRCS_MEM_OBJ
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
|
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
|
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
|
||||||
)
|
)
|
||||||
|
|
||||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})
|
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})
|
||||||
|
|
|
@ -182,6 +182,7 @@ class BufferHw : public Buffer {
|
||||||
|
|
||||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
|
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
|
||||||
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
|
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
|
||||||
|
void appendSurfaceStateExt(void *memory);
|
||||||
|
|
||||||
static Buffer *create(Context *context,
|
static Buffer *create(Context *context,
|
||||||
MemoryProperties properties,
|
MemoryProperties properties,
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "runtime/helpers/surface_formats.h"
|
#include "runtime/helpers/surface_formats.h"
|
||||||
#include "runtime/mem_obj/buffer.h"
|
#include "runtime/mem_obj/buffer.h"
|
||||||
|
|
||||||
|
#include "buffer_ext.inl"
|
||||||
#include "hw_cmds.h"
|
#include "hw_cmds.h"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
@ -79,6 +80,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
||||||
}
|
}
|
||||||
|
|
||||||
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
|
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
|
||||||
|
appendSurfaceStateExt(memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace NEO
|
|
@ -0,0 +1,14 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace NEO
|
|
@ -290,6 +290,7 @@ class ImageHw : public Image {
|
||||||
void setMediaSurfaceRotation(void *memory) override;
|
void setMediaSurfaceRotation(void *memory) override;
|
||||||
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
|
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
|
||||||
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
|
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
|
||||||
|
void appendSurfaceStateExt(void *memory);
|
||||||
void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
|
void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
|
||||||
void transformImage2dArrayTo3d(void *memory) override;
|
void transformImage2dArrayTo3d(void *memory) override;
|
||||||
void transformImage3dTo2dArray(void *memory) override;
|
void transformImage3dTo2dArray(void *memory) override;
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "runtime/mem_obj/image.h"
|
#include "runtime/mem_obj/image.h"
|
||||||
|
|
||||||
#include "hw_cmds.h"
|
#include "hw_cmds.h"
|
||||||
|
#include "image_ext.inl"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
|
@ -155,6 +156,7 @@ void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, ui
|
||||||
setAuxParamsForCCS(surfaceState, gmm);
|
setAuxParamsForCCS(surfaceState, gmm);
|
||||||
}
|
}
|
||||||
appendSurfaceStateParams(surfaceState);
|
appendSurfaceStateParams(surfaceState);
|
||||||
|
appendSurfaceStateExt(surfaceState);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -1091,3 +1091,16 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
|
||||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) {
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
size_t off[3] = {0, 0, 0};
|
||||||
|
size_t gws[3] = {1, 1, 1};
|
||||||
|
|
||||||
|
MockKernelWithInternals mockKernel(*pDevice);
|
||||||
|
pCmdQ->dispatchHints = 1;
|
||||||
|
|
||||||
|
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue