mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 21:27:04 +08:00
Revert "Disable EUFusion for odd work groups with DPAS on DG2"
This reverts commit 017d66a469.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
47486ca55a
commit
606a900080
@@ -17,7 +17,6 @@ if(TESTS_XE_HPG_CORE)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe_hpg_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hpg_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_tests_xe_hpg_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests_dg2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_cl_device_caps_xe_hpg_core.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe_hpg_core.cpp
|
||||
)
|
||||
|
||||
@@ -13,7 +13,6 @@ if(TESTS_DG2)
|
||||
set(IGDRCL_SRCS_tests_xe_hpg_core_dg2
|
||||
${IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests_dg2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool_alloc_tests_dg2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_dg2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_dg2.cpp
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/scratch_space_controller_base.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/xe_hpg_core/hw_cmds_dg2.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/event/event_builder.h"
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
class MyMockCommandStreamReceiver : public MockCommandStreamReceiver {
|
||||
public:
|
||||
using CommandStreamReceiver::scratchSpaceController;
|
||||
MyMockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
|
||||
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
|
||||
CompletionStamp flushTask(
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
const IndirectHeap *dsh,
|
||||
const IndirectHeap *ioh,
|
||||
const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel,
|
||||
DispatchFlags &dispatchFlags,
|
||||
Device &device) override {
|
||||
disableEuFusionPassed = dispatchFlags.disableEUFusion;
|
||||
return MockCommandStreamReceiver::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
|
||||
}
|
||||
bool disableEuFusionPassed = false;
|
||||
};
|
||||
template <typename GfxFamily>
|
||||
class MockCmdQueueOverrideCsr : public MockCommandQueueHw<GfxFamily> {
|
||||
public:
|
||||
MockCmdQueueOverrideCsr(Context *context,
|
||||
ClDevice *device,
|
||||
MyMockCommandStreamReceiver *csr) : MockCommandQueueHw<GfxFamily>(context, device, nullptr) {
|
||||
this->csr = csr;
|
||||
}
|
||||
CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; }
|
||||
MyMockCommandStreamReceiver *csr = nullptr;
|
||||
};
|
||||
|
||||
DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) {
|
||||
auto hardwareInfo = *defaultHwInfo;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
|
||||
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
|
||||
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
|
||||
csr->setupContext(*osContext);
|
||||
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
|
||||
csr->scratchSpaceController.reset(scratchController);
|
||||
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
|
||||
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
LinearStream commandStream;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20;
|
||||
|
||||
pKernel->setLocalWorkSizeValues(3, 7, 1);
|
||||
pKernel->setNumWorkGroupsValues(5, 1, 1);
|
||||
|
||||
bool blocking = false;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
|
||||
EXPECT_TRUE(csr->disableEuFusionPassed);
|
||||
}
|
||||
|
||||
DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndNotOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) {
|
||||
auto hardwareInfo = *defaultHwInfo;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
|
||||
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
|
||||
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
|
||||
csr->setupContext(*osContext);
|
||||
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
|
||||
csr->scratchSpaceController.reset(scratchController);
|
||||
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
|
||||
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
LinearStream commandStream;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20;
|
||||
|
||||
pKernel->setLocalWorkSizeValues(4, 7, 1);
|
||||
pKernel->setNumWorkGroupsValues(5, 1, 1);
|
||||
|
||||
bool blocking = false;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
|
||||
EXPECT_FALSE(csr->disableEuFusionPassed);
|
||||
}
|
||||
DG2TEST_F(CommandQueueHwTest, GivenKernelWithRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) {
|
||||
auto hardwareInfo = *defaultHwInfo;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
|
||||
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
|
||||
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
|
||||
csr->setupContext(*osContext);
|
||||
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
|
||||
csr->scratchSpaceController.reset(scratchController);
|
||||
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
|
||||
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
LinearStream commandStream;
|
||||
|
||||
bool blocking = false;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
|
||||
EXPECT_TRUE(csr->disableEuFusionPassed);
|
||||
}
|
||||
DG2TEST_F(CommandQueueHwTest, GivenKernelWithoutRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) {
|
||||
auto hardwareInfo = *defaultHwInfo;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
|
||||
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
|
||||
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
|
||||
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
|
||||
csr->setupContext(*osContext);
|
||||
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
|
||||
csr->scratchSpaceController.reset(scratchController);
|
||||
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
|
||||
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
LinearStream commandStream;
|
||||
|
||||
bool blocking = false;
|
||||
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
|
||||
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
|
||||
EXPECT_FALSE(csr->disableEuFusionPassed);
|
||||
}
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_work_size.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "opencl/source/command_queue/cl_local_work_size.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using LocalWorkSizeTestDG2 = ::testing::Test;
|
||||
|
||||
DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithDpasAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) {
|
||||
MockClDevice device{new MockDevice};
|
||||
MockKernelWithInternals kernel(device);
|
||||
DispatchInfo dispatchInfo;
|
||||
dispatchInfo.setClDevice(&device);
|
||||
dispatchInfo.setKernel(kernel.mockKernel);
|
||||
|
||||
auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount;
|
||||
auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice;
|
||||
|
||||
auto &deviceInfo = device.sharedDeviceInfo;
|
||||
deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice;
|
||||
deviceInfo.numThreadsPerEU = threadsPerEu;
|
||||
kernel.mockKernel->slmTotalSize = 0x100;
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false;
|
||||
WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize);
|
||||
}
|
||||
|
||||
DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithFusedEuDisabledAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) {
|
||||
MockClDevice device{new MockDevice};
|
||||
MockKernelWithInternals kernel(device);
|
||||
DispatchInfo dispatchInfo;
|
||||
dispatchInfo.setClDevice(&device);
|
||||
dispatchInfo.setKernel(kernel.mockKernel);
|
||||
|
||||
auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount;
|
||||
auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice;
|
||||
|
||||
auto &deviceInfo = device.sharedDeviceInfo;
|
||||
deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice;
|
||||
deviceInfo.numThreadsPerEU = threadsPerEu;
|
||||
kernel.mockKernel->slmTotalSize = 0x100;
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
|
||||
WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize);
|
||||
}
|
||||
Reference in New Issue
Block a user