Revert "Disable EUFusion for odd work groups with DPAS on DG2"

This reverts commit 017d66a469.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2023-02-03 02:11:42 +01:00
committed by Compute-Runtime-Automation
parent 47486ca55a
commit 606a900080
28 changed files with 45 additions and 548 deletions

View File

@@ -17,7 +17,6 @@ if(TESTS_XE_HPG_CORE)
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_tests_xe_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests_dg2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cl_device_caps_xe_hpg_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe_hpg_core.cpp
)

View File

@@ -13,7 +13,6 @@ if(TESTS_DG2)
set(IGDRCL_SRCS_tests_xe_hpg_core_dg2
${IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes}
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests_dg2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool_alloc_tests_dg2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_dg2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_dg2.cpp

View File

@@ -1,178 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/xe_hpg_core/hw_cmds_dg2.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/event/event_builder.h"
#include "opencl/source/helpers/task_information.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_mdi.h"
using namespace NEO;
class MyMockCommandStreamReceiver : public MockCommandStreamReceiver {
public:
using CommandStreamReceiver::scratchSpaceController;
MyMockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
CompletionStamp flushTask(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override {
disableEuFusionPassed = dispatchFlags.disableEUFusion;
return MockCommandStreamReceiver::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
}
bool disableEuFusionPassed = false;
};
template <typename GfxFamily>
class MockCmdQueueOverrideCsr : public MockCommandQueueHw<GfxFamily> {
public:
MockCmdQueueOverrideCsr(Context *context,
ClDevice *device,
MyMockCommandStreamReceiver *csr) : MockCommandQueueHw<GfxFamily>(context, device, nullptr) {
this->csr = csr;
}
CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; }
MyMockCommandStreamReceiver *csr = nullptr;
};
DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) {
auto hardwareInfo = *defaultHwInfo;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
csr->setupContext(*osContext);
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
csr->scratchSpaceController.reset(scratchController);
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
auto pKernel = mockKernelWithInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
BlitPropertiesContainer blitPropertiesContainer;
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
TimestampPacketDependencies timestampPacketDependencies;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
LinearStream commandStream;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20;
pKernel->setLocalWorkSizeValues(3, 7, 1);
pKernel->setNumWorkGroupsValues(5, 1, 1);
bool blocking = false;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
EXPECT_TRUE(csr->disableEuFusionPassed);
}
DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndNotOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) {
auto hardwareInfo = *defaultHwInfo;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
csr->setupContext(*osContext);
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
csr->scratchSpaceController.reset(scratchController);
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
auto pKernel = mockKernelWithInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
BlitPropertiesContainer blitPropertiesContainer;
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
TimestampPacketDependencies timestampPacketDependencies;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
LinearStream commandStream;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20;
pKernel->setLocalWorkSizeValues(4, 7, 1);
pKernel->setNumWorkGroupsValues(5, 1, 1);
bool blocking = false;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
EXPECT_FALSE(csr->disableEuFusionPassed);
}
DG2TEST_F(CommandQueueHwTest, GivenKernelWithRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) {
auto hardwareInfo = *defaultHwInfo;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
csr->setupContext(*osContext);
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
csr->scratchSpaceController.reset(scratchController);
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
auto pKernel = mockKernelWithInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
BlitPropertiesContainer blitPropertiesContainer;
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
TimestampPacketDependencies timestampPacketDependencies;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
LinearStream commandStream;
bool blocking = false;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
EXPECT_TRUE(csr->disableEuFusionPassed);
}
DG2TEST_F(CommandQueueHwTest, GivenKernelWithoutRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) {
auto hardwareInfo = *defaultHwInfo;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo, 0));
std::unique_ptr<OsContext> osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular},
PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield())));
auto csr = std::make_unique<MyMockCommandStreamReceiver>(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield());
csr->setupContext(*osContext);
auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage());
csr->scratchSpaceController.reset(scratchController);
MockCmdQueueOverrideCsr<FamilyType> cmdQ(pContext, mockDevice.get(), csr.get());
MockKernelWithInternals mockKernelWithInternals(*mockDevice.get());
auto pKernel = mockKernelWithInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel);
BlitPropertiesContainer blitPropertiesContainer;
const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer);
TimestampPacketDependencies timestampPacketDependencies;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
LinearStream commandStream;
bool blocking = false;
const_cast<NEO::KernelDescriptor &>(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
cmdQ.template enqueueNonBlocked<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr);
EXPECT_FALSE(csr->disableEuFusionPassed);
}

View File

@@ -18,7 +18,6 @@
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"

View File

@@ -1,68 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/local_work_size.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "opencl/source/command_queue/cl_local_work_size.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
using namespace NEO;
using LocalWorkSizeTestDG2 = ::testing::Test;
DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithDpasAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) {
MockClDevice device{new MockDevice};
MockKernelWithInternals kernel(device);
DispatchInfo dispatchInfo;
dispatchInfo.setClDevice(&device);
dispatchInfo.setKernel(kernel.mockKernel);
auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount;
auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice;
auto &deviceInfo = device.sharedDeviceInfo;
deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice;
deviceInfo.numThreadsPerEU = threadsPerEu;
kernel.mockKernel->slmTotalSize = 0x100;
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false;
WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize);
}
DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithFusedEuDisabledAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) {
MockClDevice device{new MockDevice};
MockKernelWithInternals kernel(device);
DispatchInfo dispatchInfo;
dispatchInfo.setClDevice(&device);
dispatchInfo.setKernel(kernel.mockKernel);
auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount;
auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice;
auto &deviceInfo = device.sharedDeviceInfo;
deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice;
deviceInfo.numThreadsPerEU = threadsPerEu;
kernel.mockKernel->slmTotalSize = 0x100;
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
const_cast<NEO::KernelDescriptor &>(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize);
}