add dispatch hints for more flixable control on workload dispatch

Change-Id: Iecfe3031172fd108a1ef0d77d2fff8ad3cef22b3
This commit is contained in:
Lindong Wu 2019-10-11 12:54:10 +08:00 committed by sys_ocldev
parent 539d688877
commit 9066192dc3
16 changed files with 96 additions and 1 deletions

View File

@ -423,6 +423,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
uint64_t getSliceCount() const { return sliceCount; }
// extend dispatch hints
uint64_t dispatchHints = 0;
protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);

View File

@ -709,6 +709,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
}
if (this->dispatchHints != 0) {
dispatchFlags.epilogueRequired = true;
}
if (gtpinIsGTPinInitialized()) {
gtpinNotifyPreFlushTask(this);
}

View File

@ -48,6 +48,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl
)
get_property(NEO_CORE_COMMAND_STREAM GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM)

View File

@ -53,6 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForPipelineSelect() const;
size_t getCmdSizeForComputeMode();
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags);
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
@ -88,6 +89,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);

View File

@ -31,6 +31,8 @@
#include "runtime/os_interface/os_context.h"
#include "runtime/utilities/tag_allocator.h"
#include "command_stream_receiver_hw_ext.inl"
namespace NEO {
template <typename GfxFamily>
@ -258,6 +260,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
}
programEngineModeCommands(commandStreamCSR, dispatchFlags);
initPageTableManagerRegisters(commandStreamCSR);
programComputeMode(commandStreamCSR, dispatchFlags);
programL3(commandStreamCSR, dispatchFlags, newL3Config);

View File

@ -57,12 +57,15 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createScratchSpaceController() {
scratchSpaceController = std::make_unique<ScratchSpaceControllerBase>(executionEnvironment, *internalAllocationStorage.get());
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
this->programEngineModeEpliogue(csr, dispatchFlags);
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const {
return 0u;
return this->getCmdSizeForEngineMode(dispatchFlags);
}
template <typename GfxFamily>

View File

@ -78,6 +78,7 @@ struct DispatchFlags {
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
uint64_t engineHints = 0;
bool blocking = false;
bool dcFlush = false;
bool useSLM = false;

View File

@ -0,0 +1,28 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_stream/linear_stream.h"
#include "runtime/command_stream/command_stream_receiver_hw.h"
#include "runtime/command_stream/csr_definitions.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const {
return 0u;
}
} // namespace NEO

View File

@ -27,6 +27,8 @@ set(RUNTIME_SRCS_MEM_OBJ
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})

View File

@ -182,6 +182,7 @@ class BufferHw : public Buffer {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
void appendSurfaceStateExt(void *memory);
static Buffer *create(Context *context,
MemoryProperties properties,

View File

@ -14,6 +14,7 @@
#include "runtime/helpers/surface_formats.h"
#include "runtime/mem_obj/buffer.h"
#include "buffer_ext.inl"
#include "hw_cmds.h"
namespace NEO {
@ -79,6 +80,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
}
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
appendSurfaceStateExt(memory);
}
} // namespace NEO

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
template <typename GfxFamily>
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
template <typename GfxFamily>
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@ -290,6 +290,7 @@ class ImageHw : public Image {
void setMediaSurfaceRotation(void *memory) override;
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
void appendSurfaceStateExt(void *memory);
void setFlagsForMediaCompression(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
void transformImage2dArrayTo3d(void *memory) override;
void transformImage3dTo2dArray(void *memory) override;

View File

@ -14,6 +14,7 @@
#include "runtime/mem_obj/image.h"
#include "hw_cmds.h"
#include "image_ext.inl"
namespace NEO {
@ -155,6 +156,7 @@ void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, ui
setAuxParamsForCCS(surfaceState, gmm);
}
appendSurfaceStateParams(surfaceState);
appendSurfaceStateExt(surfaceState);
}
template <typename GfxFamily>

View File

@ -1091,3 +1091,16 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
}
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
pCmdQ->dispatchHints = 1;
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
}