diff --git a/runtime/command_queue/gpgpu_walker.h b/runtime/command_queue/gpgpu_walker.h index fe1f5fa867..0be094e0ad 100644 --- a/runtime/command_queue/gpgpu_walker.h +++ b/runtime/command_queue/gpgpu_walker.h @@ -181,16 +181,13 @@ class GpgpuWalkerHelper { uint32_t registerAddress); static void dispatchPerfCountersGeneralPurposeCounterCommands( - CommandQueue &commandQueue, - TagNode &hwPerfCounter, LinearStream *commandStream, - bool start); + uint64_t baseAddress); static void dispatchPerfCountersUserCounterCommands( CommandQueue &commandQueue, - TagNode &hwPerfCounter, LinearStream *commandStream, - bool start); + uint64_t baseAddress); static void dispatchPerfCountersOABufferStateCommands( TagNode &hwPerfCounter, diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl index 03291916e0..46940a75da 100644 --- a/runtime/command_queue/gpgpu_walker.inl +++ b/runtime/command_queue/gpgpu_walker.inl @@ -165,14 +165,8 @@ void GpgpuWalkerHelper::dispatchStoreRegisterCommand( template void GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterCommands( - CommandQueue &commandQueue, - TagNode &hwPerfCounter, LinearStream *commandStream, - bool start) { - - uint64_t baseAddress = hwPerfCounter.getGpuAddress(); - baseAddress += start ? offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp) - : offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp); + uint64_t baseAddress) { // Read General Purpose counters for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) { @@ -186,19 +180,15 @@ void GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterComm template void GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands( CommandQueue &commandQueue, - TagNode &hwPerfCounter, LinearStream *commandStream, - bool start) { + uint64_t baseAddress) { - uint64_t baseAddr = hwPerfCounter.getGpuAddress(); - baseAddr += start ? offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User) - : offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User); auto userRegs = &commandQueue.getPerfCountersConfigData()->ReadRegs; for (uint32_t i = 0; i < userRegs->RegsCount; i++) { uint32_t regAddr = userRegs->Reg[i].Offset; //offset between base (low) registers is cl_ulong wide - uint64_t address = baseAddr + i * sizeof(cl_ulong); + uint64_t address = baseAddress + i * sizeof(cl_ulong); dispatchStoreRegisterCommand(commandStream, address, regAddr); if (userRegs->Reg[i].BitSize > 32) { @@ -240,7 +230,7 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( //Read Core Frequency GpgpuWalkerHelper::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqBegin), INSTR_MMIO_RPSTAT1); - GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, true); + GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp)); auto pReportPerfCount = commandStream->getSpaceForCmd(); *pReportPerfCount = GfxFamily::cmdInitReportPerfCount; @@ -252,7 +242,7 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( PipeControlHelper::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false); - GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands(commandQueue, hwPerfCounter, commandStream, true); + GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User)); commandQueue.sendPerfCountersConfig(); } @@ -286,7 +276,7 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Oa); pReportPerfCount->setMemoryAddress(address); - GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, false); + GpgpuWalkerHelper::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp)); //Store value of NOOPID register GpgpuWalkerHelper::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdEnd), INSTR_MMIO_NOOPID); @@ -294,7 +284,7 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( //Read Core Frequency GpgpuWalkerHelper::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqEnd), INSTR_MMIO_RPSTAT1); - GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands(commandQueue, hwPerfCounter, commandStream, false); + GpgpuWalkerHelper::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User)); perfCounters->setCpuTimestamp(); } diff --git a/runtime/command_stream/experimental_command_buffer.inl b/runtime/command_stream/experimental_command_buffer.inl index 0e61caaa04..84c5e33e0f 100644 --- a/runtime/command_stream/experimental_command_buffer.inl +++ b/runtime/command_stream/experimental_command_buffer.inl @@ -16,8 +16,8 @@ namespace NEO { template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset) { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - auto pCmd = static_cast(parentStream.getSpace(sizeof(MI_BATCH_BUFFER_START))); - auto commandStreamReceiverHw = reinterpret_cast *>(commandStreamReceiver); + auto pCmd = parentStream.getSpaceForCmd(); + auto commandStreamReceiverHw = static_cast *>(commandStreamReceiver); commandStreamReceiverHw->addBatchBufferStart(pCmd, currentStream->getGraphicsAllocation()->getGpuAddress() + cmdBufferOffset, true); } @@ -44,7 +44,7 @@ size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer() { addTimeStampPipeControl(); //end - auto pCmd = static_cast(currentStream->getSpace(sizeof(MI_BATCH_BUFFER_END))); + auto pCmd = currentStream->getSpaceForCmd(); *pCmd = GfxFamily::cmdInitBatchBufferEnd; return returnOffset; @@ -70,7 +70,7 @@ template void ExperimentalCommandBuffer::addTimeStampPipeControl() { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - auto pCmd = static_cast(currentStream->getSpace(sizeof(PIPE_CONTROL))); + auto pCmd = currentStream->getSpaceForCmd(); *pCmd = GfxFamily::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); @@ -88,12 +88,11 @@ template void ExperimentalCommandBuffer::addExperimentalCommands() { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; - uintptr_t semaphoreAddr = reinterpret_cast(experimentalAllocation->getUnderlyingBuffer()) + experimentalAllocationOffset; - uint32_t *semaphoreData = reinterpret_cast(semaphoreAddr); + uint32_t *semaphoreData = reinterpret_cast(ptrOffset(experimentalAllocation->getUnderlyingBuffer(), experimentalAllocationOffset)); *semaphoreData = 1; uint64_t gpuAddr = experimentalAllocation->getGpuAddress() + experimentalAllocationOffset; - auto semaphoreCmd = reinterpret_cast(currentStream->getSpace(sizeof(MI_SEMAPHORE_WAIT))); + auto semaphoreCmd = currentStream->getSpaceForCmd(); *semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; semaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD); semaphoreCmd->setSemaphoreDataDword(*semaphoreData); diff --git a/runtime/memory_manager/svm_memory_manager.h b/runtime/memory_manager/svm_memory_manager.h index 3331554227..78ed52ab12 100644 --- a/runtime/memory_manager/svm_memory_manager.h +++ b/runtime/memory_manager/svm_memory_manager.h @@ -11,9 +11,9 @@ #include namespace NEO { +class CommandStreamReceiver; class Device; class GraphicsAllocation; -class CommandStreamReceiver; class MemoryManager; struct SvmAllocationData { diff --git a/runtime/os_interface/linux/drm_neo.h b/runtime/os_interface/linux/drm_neo.h index 1c6dcc1e83..797e0bda6d 100644 --- a/runtime/os_interface/linux/drm_neo.h +++ b/runtime/os_interface/linux/drm_neo.h @@ -25,7 +25,6 @@ namespace NEO { class DeviceFactory; struct HardwareInfo; -struct FeatureTable; struct DeviceDescriptor { unsigned short deviceId; diff --git a/unit_tests/aub_tests/command_stream/aub_command_stream_fixture.h b/unit_tests/aub_tests/command_stream/aub_command_stream_fixture.h index a96803fc81..953c405271 100644 --- a/unit_tests/aub_tests/command_stream/aub_command_stream_fixture.h +++ b/unit_tests/aub_tests/command_stream/aub_command_stream_fixture.h @@ -41,12 +41,12 @@ class AUBCommandStreamFixture : public CommandStreamFixture { void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { - csr = reinterpret_cast> *>(pCommandStreamReceiver)->aubCSR.get(); + csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { // Write our pseudo-op to the AUB file - auto aubCsr = reinterpret_cast *>(csr); + auto aubCsr = static_cast *>(csr); aubCsr->expectMMIO(mmioRegister, expectedValue); } } @@ -55,14 +55,14 @@ class AUBCommandStreamFixture : public CommandStreamFixture { void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { - auto tbxCsr = reinterpret_cast *>(pCommandStreamReceiver); + auto tbxCsr = static_cast *>(pCommandStreamReceiver); tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length); - csr = reinterpret_cast> *>(pCommandStreamReceiver)->aubCSR.get(); + csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { - auto aubCsr = reinterpret_cast *>(csr); + auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryEqual(gfxAddress, srcAddress, length); } } @@ -71,21 +71,21 @@ class AUBCommandStreamFixture : public CommandStreamFixture { void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { - auto tbxCsr = reinterpret_cast *>(pCommandStreamReceiver); + auto tbxCsr = static_cast *>(pCommandStreamReceiver); tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); - csr = reinterpret_cast> *>(pCommandStreamReceiver)->aubCSR.get(); + csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { - auto aubCsr = reinterpret_cast *>(csr); + auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template - CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() { - return reinterpret_cast *>(pCommandStreamReceiver); + CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { + return static_cast *>(pCommandStreamReceiver); } template diff --git a/unit_tests/aub_tests/fixtures/aub_fixture.h b/unit_tests/aub_tests/fixtures/aub_fixture.h index 35e887cb23..96110f8d37 100644 --- a/unit_tests/aub_tests/fixtures/aub_fixture.h +++ b/unit_tests/aub_tests/fixtures/aub_fixture.h @@ -56,10 +56,8 @@ class AUBFixture : public CommandQueueHwFixture { GraphicsAllocation *createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size); template - CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() { - CommandStreamReceiverSimulatedCommonHw *simulatedCsr = nullptr; - simulatedCsr = reinterpret_cast *>(csr); - return simulatedCsr; + CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { + return static_cast *>(csr); } template diff --git a/unit_tests/event/event_tests.cpp b/unit_tests/event/event_tests.cpp index 58dd391fe3..07c24c21b4 100644 --- a/unit_tests/event/event_tests.cpp +++ b/unit_tests/event/event_tests.cpp @@ -1138,13 +1138,8 @@ TEST_F(EventTest, hwTimeStampsMemoryIsPlacedInGraphicsAllocation) { void *memoryStorage = allocation->getUnderlyingBuffer(); size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize(); - uintptr_t timeStampAddress = reinterpret_cast(timeStamps); - uintptr_t graphicsAllocationStart = reinterpret_cast(memoryStorage); - - if (!((timeStampAddress >= graphicsAllocationStart) && - ((timeStampAddress + sizeof(HwTimeStamps)) <= (graphicsAllocationStart + graphicsAllocationSize)))) { - EXPECT_TRUE(false); - } + EXPECT_GE(timeStamps, memoryStorage); + EXPECT_LE(timeStamps + 1, ptrOffset(memoryStorage, graphicsAllocationSize)); } TEST_F(EventTest, getHwPerfCounterReturnsValidPointer) { diff --git a/unit_tests/mem_obj/image_tests.cpp b/unit_tests/mem_obj/image_tests.cpp index 3fd0371016..e5162fde3a 100644 --- a/unit_tests/mem_obj/image_tests.cpp +++ b/unit_tests/mem_obj/image_tests.cpp @@ -371,10 +371,10 @@ TEST(TestCreateImageUseHostPtr, CheckMemoryAllocationForDifferenHostPtrAlignment imageDesc.image_slice_pitch = 0; void *pageAlignedPointer = alignedMalloc(imageDesc.image_row_pitch * height * 1 * 4 + 256, 4096); - void *hostPtr[] = {reinterpret_cast(reinterpret_cast(pageAlignedPointer) + 16), // 16 - byte alignment - reinterpret_cast(reinterpret_cast(pageAlignedPointer) + 32), // 32 - byte alignment - reinterpret_cast(reinterpret_cast(pageAlignedPointer) + 64), // 64 - byte alignment - reinterpret_cast(reinterpret_cast(pageAlignedPointer) + 128)}; // 128 - byte alignment + void *hostPtr[] = {ptrOffset(pageAlignedPointer, 16), // 16 - byte alignment + ptrOffset(pageAlignedPointer, 32), // 32 - byte alignment + ptrOffset(pageAlignedPointer, 64), // 64 - byte alignment + ptrOffset(pageAlignedPointer, 128)}; // 128 - byte alignment bool result[] = {false, false, diff --git a/unit_tests/memory_manager/memory_manager_tests.cpp b/unit_tests/memory_manager/memory_manager_tests.cpp index f8b96e73a7..33d7f36232 100644 --- a/unit_tests/memory_manager/memory_manager_tests.cpp +++ b/unit_tests/memory_manager/memory_manager_tests.cpp @@ -1753,6 +1753,7 @@ TEST_F(MemoryAllocatorTest, whenCommandStreamerIsNotRegisteredThenReturnNullEngi auto engineControl = memoryManager->getRegisteredEngineForCsr(dummyCsr); EXPECT_EQ(nullptr, engineControl); } + TEST(MemoryManagerCopyMemoryTest, givenAllocationWithNoStorageWhenCopyMemoryToAllocationThenReturnFalse) { MockExecutionEnvironment executionEnvironment(*platformDevices); MockMemoryManager memoryManager(false, false, executionEnvironment); @@ -1760,6 +1761,7 @@ TEST(MemoryManagerCopyMemoryTest, givenAllocationWithNoStorageWhenCopyMemoryToAl MockGraphicsAllocation invalidAllocation{nullptr, 0u}; EXPECT_FALSE(memoryManager.copyMemoryToAllocation(&invalidAllocation, &memory, sizeof(memory))); } + TEST(MemoryManagerCopyMemoryTest, givenValidAllocationAndMemoryWhenCopyMemoryToAllocationThenDataIsCopied) { MockExecutionEnvironment executionEnvironment(*platformDevices); MockMemoryManager memoryManager(false, false, executionEnvironment); diff --git a/unit_tests/os_interface/windows/wddm_memory_manager_tests.cpp b/unit_tests/os_interface/windows/wddm_memory_manager_tests.cpp index d90182f397..f2d690ac4d 100644 --- a/unit_tests/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/unit_tests/os_interface/windows/wddm_memory_manager_tests.cpp @@ -1198,7 +1198,7 @@ TEST_F(BufferWithWddmMemory, givenFragmentsThatAreNotInOrderWhenGraphicsAllocati memoryManager->getHostPtrManager()->storeFragment(fragment); auto offset = 80; - auto allocationPtr = reinterpret_cast(reinterpret_cast(ptr) + offset); + auto allocationPtr = ptrOffset(ptr, offset); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = allocationPtr;