diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl index b48d9abb1f..edba83dc29 100644 --- a/runtime/command_queue/gpgpu_walker.inl +++ b/runtime/command_queue/gpgpu_walker.inl @@ -108,12 +108,7 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsStart( // PIPE_CONTROL for global timestamp uint64_t TimeStampAddress = reinterpret_cast(&(hwTimeStamps.GlobalStartTS)); - auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL)); - *pPipeControlCmd = PIPE_CONTROL::sInit(); - pPipeControlCmd->setCommandStreamerStallEnable(true); - pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); - pPipeControlCmd->setAddress(static_cast(TimeStampAddress & 0x0000FFFFFFFFULL)); - pPipeControlCmd->setAddressHigh(static_cast(TimeStampAddress >> 32)); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu); //MI_STORE_REGISTER_MEM for context local timestamp TimeStampAddress = reinterpret_cast(&(hwTimeStamps.ContextStartTS)); @@ -311,14 +306,9 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( address = reinterpret_cast(&(hwPerfCounter.HWPerfCounters.HwPerfReportBegin.Oa)); pReportPerfCount->setMemoryAddress(address); - //Timestamp: Global Start - pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL)); - *pPipeControlCmd = PIPE_CONTROL::sInit(); - pPipeControlCmd->setCommandStreamerStallEnable(true); - pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); address = reinterpret_cast(&(hwPerfCounter.HWTimeStamp.GlobalStartTS)); - pPipeControlCmd->setAddress(static_cast(address & ((uint64_t)UINT32_MAX))); - pPipeControlCmd->setAddressHigh(static_cast(address >> 32)); + + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu); GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands(commandQueue, hwPerfCounter, commandStream, true); @@ -347,13 +337,8 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( GpgpuWalkerHelper::dispatchPerfCountersOABufferStateCommands(commandQueue, hwPerfCounter, commandStream); //Timestamp: Global End - pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL)); - *pPipeControlCmd = PIPE_CONTROL::sInit(); - pPipeControlCmd->setCommandStreamerStallEnable(true); - pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); address = reinterpret_cast(&(hwPerfCounter.HWTimeStamp.GlobalEndTS)); - pPipeControlCmd->setAddress(static_cast(address & ((uint64_t)UINT32_MAX))); - pPipeControlCmd->setAddressHigh(static_cast(address >> 32)); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu); auto pReportPerfCount = (MI_REPORT_PERF_COUNT *)commandStream->getSpace(sizeof(MI_REPORT_PERF_COUNT)); *pReportPerfCount = MI_REPORT_PERF_COUNT::sInit(); diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 46ae4cf07b..ac19648fd6 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -187,8 +187,8 @@ CompletionStamp CommandStreamReceiverHw::flushTask( //Some architectures (SKL) requires to have pipe control prior to pipe control with tag write, add it here addPipeControlWA(commandStreamTask, dispatchFlags.dcFlush); - auto pCmd = addPipeControlCmd(commandStreamTask); - pCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + auto address = getTagAllocation()->getGpuAddress(); + auto pCmd = PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, taskCount + 1); //Some architectures (BDW) requires to have at least one flush bit set addDcFlushToPipeControl(pCmd, dispatchFlags.dcFlush); @@ -204,11 +204,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( pCmd->setStateCacheInvalidationEnable(true); } - auto address = getTagAllocation()->getGpuAddress(); - pCmd->setAddressHigh(address >> 32); - pCmd->setAddress(address & (0xffffffff)); - pCmd->setImmediateData(taskCount + 1); - this->latestSentTaskCount = taskCount + 1; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", taskCount); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { diff --git a/runtime/command_stream/experimental_command_buffer.inl b/runtime/command_stream/experimental_command_buffer.inl index 6d82c9eb6f..41d19c2987 100644 --- a/runtime/command_stream/experimental_command_buffer.inl +++ b/runtime/command_stream/experimental_command_buffer.inl @@ -8,6 +8,7 @@ #include "runtime/command_stream/command_stream_receiver_hw.h" #include "runtime/command_stream/experimental_command_buffer.h" #include "runtime/command_stream/linear_stream.h" +#include "runtime/helpers/hw_helper.h" #include "runtime/memory_manager/graphics_allocation.h" namespace OCLRT { @@ -75,12 +76,7 @@ void ExperimentalCommandBuffer::addTimeStampPipeControl() { uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset; - pCmd = static_cast(currentStream->getSpace(sizeof(PIPE_CONTROL))); - *pCmd = GfxFamily::cmdInitPipeControl; - pCmd->setCommandStreamerStallEnable(true); - pCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); - pCmd->setAddress(static_cast(timeStampAddress & 0x0000FFFFFFFFULL)); - pCmd->setAddressHigh(static_cast(timeStampAddress >> 32)); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(currentStream.get(), PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu); //moving to next chunk timestampsOffset += sizeof(uint64_t); diff --git a/runtime/device_queue/device_queue_hw.inl b/runtime/device_queue/device_queue_hw.inl index 1d77bdae13..1ca1af94b2 100644 --- a/runtime/device_queue/device_queue_hw.inl +++ b/runtime/device_queue/device_queue_hw.inl @@ -228,25 +228,13 @@ void DeviceQueueHw::addExecutionModelCleanUpSection(Kernel *parentKer addPipeControlCmdWa(); - auto pipeControl = slbCS.getSpaceForCmd(); - *pipeControl = PIPE_CONTROL::sInit(); - pipeControl->setCommandStreamerStallEnable(true); - pipeControl->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); - pipeControl->setAddressHigh(criticalSectionAddress >> 32); - pipeControl->setAddress(criticalSectionAddress & (0xffffffff)); - pipeControl->setImmediateData(ExecutionModelCriticalSection::Free); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, criticalSectionAddress, ExecutionModelCriticalSection::Free); uint64_t tagAddress = (uint64_t)device->getTagAddress(); addPipeControlCmdWa(); - auto pipeControl2 = slbCS.getSpaceForCmd(); - *pipeControl2 = PIPE_CONTROL::sInit(); - pipeControl2->setCommandStreamerStallEnable(true); - pipeControl2->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); - pipeControl2->setAddressHigh(tagAddress >> 32); - pipeControl2->setAddress(tagAddress & (0xffffffff)); - pipeControl2->setImmediateData(taskCount); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, tagAddress, taskCount); addMediaStateClearCmds(); diff --git a/runtime/helpers/hw_helper.h b/runtime/helpers/hw_helper.h index 75816cae34..da31e575ce 100644 --- a/runtime/helpers/hw_helper.h +++ b/runtime/helpers/hw_helper.h @@ -127,4 +127,26 @@ struct LriHelper { return lri; } }; + +template +struct PipeControlHelper { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; + static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream *commandStream, + POST_SYNC_OPERATION operation, + uint64_t gpuAddress, + uint64_t immediateData) { + auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); + *pipeControl = PIPE_CONTROL::sInit(); + pipeControl->setCommandStreamerStallEnable(true); + pipeControl->setPostSyncOperation(operation); + pipeControl->setAddress(static_cast(gpuAddress & 0x0000FFFFFFFFULL)); + pipeControl->setAddressHigh(static_cast(gpuAddress >> 32)); + if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + pipeControl->setImmediateData(immediateData); + } + return pipeControl; + } +}; + } // namespace OCLRT diff --git a/runtime/helpers/kernel_commands.inl b/runtime/helpers/kernel_commands.inl index cd4eb54028..ef12c327f2 100644 --- a/runtime/helpers/kernel_commands.inl +++ b/runtime/helpers/kernel_commands.inl @@ -400,14 +400,7 @@ typename GfxFamily::MI_ATOMIC *KernelCommandsHelper::programMiAtomic( template void KernelCommandsHelper::programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - - auto pipeControlCmd = commandStream.getSpaceForCmd(); - *pipeControlCmd = PIPE_CONTROL::sInit(); - pipeControlCmd->setCommandStreamerStallEnable(true); - pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); - pipeControlCmd->setAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); - pipeControlCmd->setAddressHigh(static_cast(writeAddress >> 32)); - pipeControlCmd->setImmediateData(data); + PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, data); } template diff --git a/unit_tests/helpers/hw_helper_tests.cpp b/unit_tests/helpers/hw_helper_tests.cpp index 3fd284bf06..29468559e9 100644 --- a/unit_tests/helpers/hw_helper_tests.cpp +++ b/unit_tests/helpers/hw_helper_tests.cpp @@ -147,6 +147,51 @@ HWTEST_F(LriHelperTests, givenAddressAndOffsetWhenHelperIsUsedThenProgramCmdStre EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } +using PipeControlHelperTests = ::testing::Test; + +HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr buffer(new uint8_t[128]); + + LinearStream stream(buffer.get(), 128); + uint64_t address = 0x1234567887654321; + uint64_t immediateData = 0x1234; + + auto expectedPipeControl = PIPE_CONTROL::sInit(); + expectedPipeControl.setCommandStreamerStallEnable(true); + expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); + expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); + expectedPipeControl.setAddressHigh(static_cast(address >> 32)); + + auto pipeControl = PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData); + + EXPECT_EQ(sizeof(PIPE_CONTROL), stream.getUsed()); + EXPECT_EQ(pipeControl, stream.getCpuBase()); + EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); +} + +HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr buffer(new uint8_t[128]); + + LinearStream stream(buffer.get(), 128); + uint64_t address = 0x1234567887654321; + uint64_t immediateData = 0x1234; + + auto expectedPipeControl = PIPE_CONTROL::sInit(); + expectedPipeControl.setCommandStreamerStallEnable(true); + expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); + expectedPipeControl.setAddressHigh(static_cast(address >> 32)); + expectedPipeControl.setImmediateData(immediateData); + + auto pipeControl = PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(&stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData); + + EXPECT_EQ(sizeof(PIPE_CONTROL), stream.getUsed()); + EXPECT_EQ(pipeControl, stream.getCpuBase()); + EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); +} + TEST(HwInfoTest, givenHwInfoWhenIsCoreThenPlatformTypeIsCore) { HardwareInfo hwInfo; hwInfo.capabilityTable.isCore = true;