diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 8bb49475e1..ccc71a77ed 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -489,10 +489,16 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } auto mediaSamplerRequired = false; + Kernel *kernel = nullptr; for (auto &dispatchInfo : multiDispatchInfo) { - dispatchInfo.getKernel()->makeResident(commandStreamReceiver); - requiresCoherency |= dispatchInfo.getKernel()->requiresCoherency(); - mediaSamplerRequired |= dispatchInfo.getKernel()->isVmeKernel(); + if (kernel != dispatchInfo.getKernel()) { + kernel = dispatchInfo.getKernel(); + } else { + continue; + } + kernel->makeResident(commandStreamReceiver); + requiresCoherency |= kernel->requiresCoherency(); + mediaSamplerRequired |= kernel->isVmeKernel(); } if (mediaSamplerRequired) { @@ -617,11 +623,17 @@ void CommandQueueHw::enqueueBlocked( } else { //store task data in event std::vector allSurfaces; + Kernel *kernel = nullptr; for (auto &dispatchInfo : multiDispatchInfo) { - dispatchInfo.getKernel()->getResidency(allSurfaces); - for (auto &surface : CreateRange(surfaces, surfaceCount)) { - allSurfaces.push_back(surface->duplicate()); + if (kernel != dispatchInfo.getKernel()) { + kernel = dispatchInfo.getKernel(); + } else { + continue; } + kernel->getResidency(allSurfaces); + } + for (auto &surface : CreateRange(surfaces, surfaceCount)) { + allSurfaces.push_back(surface->duplicate()); } auto kernelOperation = std::unique_ptr(blockedCommandsData); // marking ownership diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 114dbf776a..175ed00635 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -274,9 +274,9 @@ class Kernel : public BaseObject<_cl_kernel> { bool isVmeKernel() { return kernelInfo.isVmeWorkload; }; //residency for kernel surfaces - void makeResident(CommandStreamReceiver &commandStreamReceiver); + MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); void updateWithCompletionStamp(CommandStreamReceiver &commandStreamReceiver, CompletionStamp *completionStamp); - void getResidency(std::vector &dst); + MOCKABLE_VIRTUAL void getResidency(std::vector &dst); bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() { return usingSharedObjArgs; } diff --git a/unit_tests/command_queue/command_queue_hw_tests.cpp b/unit_tests/command_queue/command_queue_hw_tests.cpp index 83dc00840f..7ea5282b3d 100644 --- a/unit_tests/command_queue/command_queue_hw_tests.cpp +++ b/unit_tests/command_queue/command_queue_hw_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,6 +30,7 @@ #include "runtime/memory_manager/memory_manager.h" #include "runtime/memory_manager/surface.h" #include "unit_tests/command_queue/command_queue_fixture.h" +#include "unit_tests/fixtures/buffer_fixture.h" #include "unit_tests/fixtures/context_fixture.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" @@ -990,3 +991,85 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel); EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); } + +HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) { + KernelInfo kernelInfo; + MockKernelWithInternals mockKernelWithInternals(*pDevice); + auto mockKernel = mockKernelWithInternals.mockKernel; + auto mockProgram = mockKernelWithInternals.mockProgram; + mockProgram->setAllowNonUniform(true); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + size_t offset = 0; + size_t gws = 63; + size_t lws = 16; + + cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, status); + EXPECT_EQ(1u, mockKernel->makeResidentCalls); + + std::map::iterator it = csr.makeResidentAllocations.begin(); + for (; it != csr.makeResidentAllocations.end(); it++) { + EXPECT_EQ(1u, it->second); + } +} + +HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) { + UserEvent userEvent(context); + KernelInfo kernelInfo; + MockKernelWithInternals mockKernelWithInternals(*pDevice); + auto mockKernel = mockKernelWithInternals.mockKernel; + auto mockProgram = mockKernelWithInternals.mockProgram; + mockProgram->setAllowNonUniform(true); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + size_t offset = 0; + size_t gws = 63; + size_t lws = 16; + + cl_event blockedEvent = &userEvent; + + cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr); + EXPECT_EQ(CL_SUCCESS, status); + EXPECT_EQ(1u, mockKernel->getResidencyCalls); + + userEvent.setStatus(CL_COMPLETE); + + std::map::iterator it = csr.makeResidentAllocations.begin(); + for (; it != csr.makeResidentAllocations.end(); it++) { + EXPECT_EQ(1u, it->second); + } +} + +HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) { + UserEvent userEvent(context); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + BufferDefaults::context = context; + std::unique_ptr buffer(BufferHelper<>::create()); + GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation(); + char array[3 * MemoryConstants::cacheLineSize]; + char *ptr = &array[MemoryConstants::cacheLineSize]; + ptr = alignUp(ptr, MemoryConstants::cacheLineSize); + ptr -= 1; + + cl_event blockedEvent = &userEvent; + + cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, 1, &blockedEvent, nullptr); + EXPECT_EQ(CL_SUCCESS, status); + + userEvent.setStatus(CL_COMPLETE); + + std::map::iterator it = csr.makeResidentAllocations.begin(); + for (; it != csr.makeResidentAllocations.end(); it++) { + uint32_t expected = 1u; + //Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call) + if (it->first == bufferAllocation) { + expected = 3u; + } + EXPECT_EQ(expected, it->second); + } +} diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index e76ec7b9c7..691610c6ad 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -43,14 +43,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::dispatchMode; using BaseClass::CommandStreamReceiver::flushStamp; using BaseClass::CommandStreamReceiver::isPreambleSent; + using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; + using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest; using BaseClass::CommandStreamReceiver::lastSentL3Config; using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy; + using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; - using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; - using BaseClass::CommandStreamReceiver::lastPreemptionMode; - using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel; @@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { tempTagLocation = new GraphicsAllocation(nullptr, 0); this->tagAllocation = tempTagLocation; this->tagAddress = reinterpret_cast(tempTagLocation->getUnderlyingBuffer()); + this->storeMakeResidentAllocations = false; } virtual MemoryManager *createMemoryManager(bool enable64kbPages) override { @@ -82,6 +83,23 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState; SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; } + void makeResident(GraphicsAllocation &gfxAllocation) override { + if (storeMakeResidentAllocations) { + std::map::iterator it = makeResidentAllocations.find(&gfxAllocation); + if (it == makeResidentAllocations.end()) { + std::pair::iterator, bool> result; + result = makeResidentAllocations.insert(std::pair(&gfxAllocation, 1)); + DEBUG_BREAK_IF(!result.second); + } else { + makeResidentAllocations[&gfxAllocation]++; + } + } + BaseClass::makeResident(gfxAllocation); + } + + std::map makeResidentAllocations; + bool storeMakeResidentAllocations; + protected: using BaseClass::CommandStreamReceiver::memoryManager; using BaseClass::CommandStreamReceiver::tagAddress; diff --git a/unit_tests/mocks/mock_kernel.cpp b/unit_tests/mocks/mock_kernel.cpp index 9efd2650e2..198c927bc9 100644 --- a/unit_tests/mocks/mock_kernel.cpp +++ b/unit_tests/mocks/mock_kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -53,4 +53,14 @@ bool MockKernel::isPatched() const { return true; } +void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { + makeResidentCalls++; + Kernel::makeResident(commandStreamReceiver); +} + +void MockKernel::getResidency(std::vector &dst) { + getResidencyCalls++; + Kernel::getResidency(dst); +} + } // namespace OCLRT diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 709b3639af..085eba0546 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -182,6 +182,12 @@ class MockKernel : public Kernel { using Kernel::kernelArgHandlers; void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } + + void makeResident(CommandStreamReceiver &commandStreamReceiver) override; + void getResidency(std::vector &dst) override; + + uint32_t makeResidentCalls = 0; + uint32_t getResidencyCalls = 0; }; //class below have enough internals to service Enqueue operation. diff --git a/unit_tests/mocks/mock_program.h b/unit_tests/mocks/mock_program.h index 919d7f6605..e0a8d6b013 100644 --- a/unit_tests/mocks/mock_program.h +++ b/unit_tests/mocks/mock_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -99,6 +99,9 @@ class MockProgram : public Program { void SetLLVMBinarySize(size_t bsz) { llvmBinarySize = bsz; } uint64_t getHash(); + void setAllowNonUniform(bool allow) { + allowNonUniform = allow; + } bool contextSet = false; };