diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index c48d71979e..3bf40de4b1 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -854,7 +854,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( DispatchFlags dispatchFlags( {}, //csrDependencies {}, //pipelineSelectArgs - nullptr, //flushStampReference + flushStamp->getStampReference(), //flushStampReference QueueThrottle::MEDIUM, //throttle device->getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index 8364207827..5e648a491b 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -45,25 +45,25 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { size_t offset = queueCommandStream.getUsed(); DispatchFlags dispatchFlags( - {}, //csrDependencies - {}, //pipelineSelectArgs - nullptr, //flushStampReference - commandQueue.getThrottle(), //throttle - PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), nullptr), //preemptionMode - GrfConfig::DefaultGrfNumber, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings - commandQueue.getSliceCount(), //sliceCount - true, //blocking - true, //dcFlush - true, //useSLM - true, //guardCommandBufferWithPipeControl - false, //GSBA32BitRequired - false, //requiresCoherency - commandQueue.getPriority() == QueuePriority::LOW, //lowPriority - false, //implicitFlush - false, //outOfOrderExecutionAllowed - commandQueue.isMultiEngineQueue(), //multiEngineQueue - false //epilogueRequired + {}, //csrDependencies + {}, //pipelineSelectArgs + commandQueue.flushStamp->getStampReference(), //flushStampReference + commandQueue.getThrottle(), //throttle + PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), nullptr), //preemptionMode + GrfConfig::DefaultGrfNumber, //numGrfRequired + L3CachingSettings::l3CacheOn, //l3CacheSettings + commandQueue.getSliceCount(), //sliceCount + true, //blocking + true, //dcFlush + false, //useSLM + true, //guardCommandBufferWithPipeControl + false, //GSBA32BitRequired + false, //requiresCoherency + commandQueue.getPriority() == QueuePriority::LOW, //lowPriority + false, //implicitFlush + commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + commandQueue.isMultiEngineQueue(), //multiEngineQueue + false //epilogueRequired ); DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); @@ -187,27 +187,26 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate } DispatchFlags dispatchFlags( - {}, //csrDependencies - {}, //pipelineSelectArgs - nullptr, //flushStampReference - commandQueue.getThrottle(), //throttle - preemptionMode, //preemptionMode - kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired - L3CachingSettings::l3CacheOn, //l3CacheSettings - commandQueue.getSliceCount(), //sliceCount - true, //blocking - flushDC, //dcFlush - slmUsed, //useSLM - true, //guardCommandBufferWithPipeControl - NDRangeKernel, //GSBA32BitRequired - requiresCoherency, //requiresCoherency - commandQueue.getPriority() == QueuePriority::LOW, //lowPriority - false, //implicitFlush - false, //outOfOrderExecutionAllowed - commandQueue.isMultiEngineQueue(), //multiEngineQueue - false //epilogueRequired + {}, //csrDependencies + {false, kernel->isVmeKernel()}, //pipelineSelectArgs + commandQueue.flushStamp->getStampReference(), //flushStampReference + commandQueue.getThrottle(), //throttle + preemptionMode, //preemptionMode + kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired + L3CachingSettings::l3CacheOn, //l3CacheSettings + commandQueue.getSliceCount(), //sliceCount + true, //blocking + flushDC, //dcFlush + slmUsed, //useSLM + true, //guardCommandBufferWithPipeControl + NDRangeKernel, //GSBA32BitRequired + requiresCoherency, //requiresCoherency + commandQueue.getPriority() == QueuePriority::LOW, //lowPriority + false, //implicitFlush + commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + commandQueue.isMultiEngineQueue(), //multiEngineQueue + false //epilogueRequired ); - dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = kernel->isVmeKernel(); if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { dispatchFlags.csrDependencies.fillFromEventsRequest(eventsRequest, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); @@ -282,7 +281,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate DispatchFlags dispatchFlags( {}, //csrDependencies {}, //pipelineSelectArgs - nullptr, //flushStampReference + commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle commandQueue.getDevice().getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired diff --git a/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp b/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp index 7c0c7c80f8..ba37e64118 100644 --- a/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp @@ -5,13 +5,13 @@ * */ -#include "core/unit_tests/helpers/debug_manager_state_restore.h" #include "runtime/event/event_builder.h" #include "runtime/event/user_event.h" #include "runtime/helpers/timestamp_packet.h" #include "runtime/memory_manager/surface.h" #include "runtime/os_interface/os_context.h" #include "test.h" +#include "unit_tests/fixtures/dispatch_flags_fixture.h" #include "unit_tests/fixtures/enqueue_handler_fixture.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_csr.h" @@ -103,19 +103,6 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitProperties.dstAllocation); } -struct DispatchFlagsTests : public ::testing::Test { - template - void SetUpImpl() { - auto executionEnvironment = new MockExecutionEnvironmentWithCsr(**platformDevices, 1u); - device.reset(MockDevice::createWithExecutionEnvironment(*platformDevices, executionEnvironment, 0)); - context = std::make_unique(device.get()); - } - - std::unique_ptr device; - std::unique_ptr context; - DebugManagerStateRestore restore; -}; - HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispatchFlags) { using CsrType = MockCsrHw2; SetUpImpl(); @@ -136,6 +123,7 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); + EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); } HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { diff --git a/unit_tests/fixtures/CMakeLists.txt b/unit_tests/fixtures/CMakeLists.txt index 675100aa10..b13b1dd098 100644 --- a/unit_tests/fixtures/CMakeLists.txt +++ b/unit_tests/fixtures/CMakeLists.txt @@ -11,6 +11,7 @@ set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_test_fixture.h + ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_host_queue_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_host_queue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h diff --git a/unit_tests/fixtures/dispatch_flags_fixture.h b/unit_tests/fixtures/dispatch_flags_fixture.h new file mode 100644 index 0000000000..b7d0ac2ada --- /dev/null +++ b/unit_tests/fixtures/dispatch_flags_fixture.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "core/unit_tests/helpers/debug_manager_state_restore.h" +#include "runtime/os_interface/os_context.h" +#include "unit_tests/mocks/mock_context.h" +#include "unit_tests/mocks/mock_csr.h" +#include "unit_tests/mocks/mock_device.h" +#include "unit_tests/mocks/mock_execution_environment.h" + +namespace NEO { +struct DispatchFlagsTests : public ::testing::Test { + template + void SetUpImpl() { + auto executionEnvironment = new MockExecutionEnvironmentWithCsr(**platformDevices, 1u); + device.reset(MockDevice::createWithExecutionEnvironment(*platformDevices, executionEnvironment, 0)); + context = std::make_unique(device.get()); + } + + std::unique_ptr device; + std::unique_ptr context; + DebugManagerStateRestore restore; +}; +} // namespace NEO diff --git a/unit_tests/helpers/task_information_tests.cpp b/unit_tests/helpers/task_information_tests.cpp index 38078efa68..48b99593cb 100644 --- a/unit_tests/helpers/task_information_tests.cpp +++ b/unit_tests/helpers/task_information_tests.cpp @@ -10,6 +10,7 @@ #include "runtime/memory_manager/internal_allocation_storage.h" #include "test.h" #include "unit_tests/fixtures/device_fixture.h" +#include "unit_tests/fixtures/dispatch_flags_fixture.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_csr.h" @@ -153,3 +154,136 @@ TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAl EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation2)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation3)); } + +template +class MockCsr1 : public CommandStreamReceiverHw { + public: + CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap &dsh, const IndirectHeap &ioh, + const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + passedDispatchFlags = dispatchFlags; + return CompletionStamp(); + } + MockCsr1(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw::CommandStreamReceiverHw(executionEnvironment) {} + DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + using CommandStreamReceiver::timestampPacketWriteEnabled; +}; + +HWTEST_F(DispatchFlagsTests, givenCommandMapUnmapWhenSubmitThenPassCorrectDispatchFlags) { + using CsrType = MockCsr1; + SetUpImpl(); + + auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); + auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); + + MockBuffer buffer; + + MemObjSizeArray size = {{1, 1, 1}}; + MemObjOffsetArray offset = {{0, 0, 0}}; + std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *mockCmdQ)); + command->submit(20, false); + + EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); + EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); + EXPECT_EQ(PreemptionHelper::taskPreemptionMode(mockCmdQ->getDevice(), nullptr), mockCsr->passedDispatchFlags.preemptionMode); + EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); + EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); + EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); + EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); + EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); + EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); + EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); + EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); + EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); + EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); + EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); + EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue); + EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); +} + +HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchFlags) { + using CsrType = MockCsr1; + SetUpImpl(); + auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); + auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); + + IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; + mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); + mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); + mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); + + PreemptionMode preemptionMode = device->getPreemptionMode(); + auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); + + std::vector surfaces; + auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); + MockKernelWithInternals kernel(*device); + kernelOperation->setHeaps(ih1, ih2, ih3); + + bool flushDC = false; + bool slmUsed = false; + bool ndRangeKernel = false; + bool requiresCoherency = false; + for (auto &surface : surfaces) { + requiresCoherency |= surface->IsCoherent; + } + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); + command->submit(20, false); + + EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); + EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); + EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); + EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); + EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); + EXPECT_EQ(kernel.mockKernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired); + EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); + EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); + EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); + EXPECT_EQ(slmUsed, mockCsr->passedDispatchFlags.useSLM); + EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); + EXPECT_EQ(ndRangeKernel, mockCsr->passedDispatchFlags.gsba32BitRequired); + EXPECT_EQ(requiresCoherency, mockCsr->passedDispatchFlags.requiresCoherency); + EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); + EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); + EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); + EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue); + EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); +} + +HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectDispatchFlags) { + using CsrType = MockCsr1; + SetUpImpl(); + + auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); + auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); + + mockCsr->timestampPacketWriteEnabled = true; + IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; + mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); + mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); + mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); + + auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); + auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); + kernelOperation->setHeaps(ih1, ih2, ih3); + std::unique_ptr command(new CommandWithoutKernel(*mockCmdQ, kernelOperation)); + + command->submit(20, false); + + EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); + EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); + EXPECT_EQ(mockCmdQ->getDevice().getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); + EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); + EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); + EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); + EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); + EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); + EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); + EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); + EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); + EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); + EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); + EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); + EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue); + EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); +}