Add Non-Uniform AUB tests

Change-Id: Ie1944caa2ea9b7240dde9460bd817f8889fff3bb
This commit is contained in:
Zdanowicz, Zbigniew
2018-10-16 15:37:07 -07:00
committed by sys_ocldev
parent 02e1284aa8
commit 620708e510
8 changed files with 661 additions and 42 deletions

View File

@@ -183,33 +183,35 @@ function(neo_gen_kernels target platform_name suffix)
set_target_properties(${target} PROPERTIES FOLDER "kernels/${suffix}")
endfunction()
function(neo_gen_kernel_with_options target platform_name suffix filepath)
get_filename_component(filename ${filepath} NAME)
get_filename_component(basename ${filepath} NAME_WE)
get_filename_component(base_workdir ${filepath} DIRECTORY)
set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/")
set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/")
function(neo_gen_kernels_with_options target platform_name suffix filepath)
set(results)
foreach(arg ${ARGN})
string(REPLACE " " "_" argwospaces ${arg})
foreach(filearg ${filepath})
get_filename_component(filename ${filearg} NAME)
get_filename_component(basename ${filearg} NAME_WE)
get_filename_component(base_workdir ${filearg} DIRECTORY)
set(outputpath_base "${outputdir}/${basename}_${suffix}")
set(output_files
${outputpath_base}${ir_extension}${argwospaces}
${outputpath_base}.bin${argwospaces}
${outputpath_base}.gen${argwospaces}
)
set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/")
set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/")
add_custom_command(
OUTPUT ${output_files}
COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -options ${arg} -options_name
WORKING_DIRECTORY ${workdir}
DEPENDS ${filepath} cloc
)
foreach(arg ${ARGN})
string(REPLACE " " "_" argwospaces ${arg})
list(APPEND results ${output_files})
set(outputpath_base "${outputdir}/${basename}_${suffix}")
set(output_files
${outputpath_base}${ir_extension}${argwospaces}
${outputpath_base}.bin${argwospaces}
${outputpath_base}.gen${argwospaces}
)
add_custom_command(
OUTPUT ${output_files}
COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -options ${arg} -options_name
WORKING_DIRECTORY ${workdir}
DEPENDS ${filearg} cloc
)
list(APPEND results ${output_files})
endforeach()
endforeach()
add_custom_target(${target} DEPENDS ${results} copy_compiler_files)
set_target_properties(${target} PROPERTIES FOLDER "kernels/${suffix}")
@@ -293,6 +295,7 @@ set(TEST_KERNEL_2_0_options
set(TEST_KERNEL_2_0
test_files/simple_block_kernel.cl
test_files/simple_nonuniform.cl
)
set(TEST_KERNEL_SIP_DEBUG_options
@@ -306,6 +309,7 @@ set(TEST_KERNEL_SIP_DEBUG_LOCAL_options
file(GLOB_RECURSE TEST_KERNELS test_files/*.cl)
list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/shouldfail.cl")
list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_block_kernel.cl")
list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_nonuniform.cl")
function(gen_run_tests_with_appverifier product slices subslices eu_per_ss)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" AND "${IGDRCL_OPTION__BITS}" STREQUAL "64" AND APPVERIFIER_ALLOWED)
@@ -373,7 +377,7 @@ macro(macro_for_each_gen)
if(MSVC OR CMAKE_SIZEOF_VOID_P EQUAL 8)
neo_gen_kernels(test_kernels_${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNELS})
neo_gen_kernel_with_options(test_kernel_${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL} ${TEST_KERNEL_options})
neo_gen_kernels_with_options(test_kernel_${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL} ${TEST_KERNEL_options})
# Temporarily disabled debug kernel generation on gen8
if(NOT ("${GEN_TYPE_LOWER}" STREQUAL "gen8"))
@@ -399,7 +403,7 @@ macro(macro_for_each_gen)
# add_dependencies(unit_tests test_kernel_sip_debug_${family_name_with_type})
if(PLATFORM_2_0_LOWER)
neo_gen_kernel_with_options(test_kernel_2_0_${family_name_with_type} ${PLATFORM_2_0_LOWER} ${family_name_with_type} ${TEST_KERNEL_2_0} ${TEST_KERNEL_2_0_options})
neo_gen_kernels_with_options(test_kernel_2_0_${family_name_with_type} ${PLATFORM_2_0_LOWER} ${family_name_with_type} "${TEST_KERNEL_2_0}" ${TEST_KERNEL_2_0_options})
endif()
endif()
endif()

View File

@@ -8,6 +8,7 @@
#include "runtime/command_queue/command_queue.h"
#include "runtime/helpers/ptr_math.h"
#include "gen_cmd_parse.h"
#include "unit_tests/aub_tests/fixtures/aub_fixture.h"
#include "unit_tests/aub_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/fixtures/simple_arg_fixture.h"
@@ -17,6 +18,8 @@
using namespace OCLRT;
extern const HardwareInfo **platformDevices;
struct TestParam {
cl_uint globalWorkSizeX;
cl_uint globalWorkSizeY;
@@ -350,3 +353,485 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(TestSimdTable),
::testing::ValuesIn(TestParamTable)));
} // namespace ULT
struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUniformKernelFixture> {
void SetUp() override {
deviceClVersionSupport = OCLRT::platformDevices[0]->capabilityTable.clVersionSupport;
if (deviceClVersionSupport < 20) {
return;
}
KernelAUBFixture<SimpleArgNonUniformKernelFixture>::SetUp();
argVal = static_cast<int>(0x22222222);
sizeWrittenMemory = 0;
typeSize = sizeof(int);
typeItems = 40 * 40 * 40;
sizeUserMemory = alignUp(typeItems * typeSize, 64);
destMemory = alignedMalloc(sizeUserMemory, 4096);
ASSERT_NE(nullptr, destMemory);
for (uint32_t i = 0; i < typeItems; i++) {
*(static_cast<int *>(destMemory) + i) = 0xdeadbeef;
}
expectedMemory = alignedMalloc(sizeUserMemory, 4096);
ASSERT_NE(nullptr, expectedMemory);
memset(expectedMemory, 0x0, sizeUserMemory);
kernel->setArgSvm(1, sizeUserMemory, destMemory);
outBuffer = csr->createAllocationAndHandleResidency(destMemory, sizeUserMemory);
ASSERT_NE(nullptr, outBuffer);
outBuffer->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
outBuffer->setMemObjectsAllocationWithWritableFlags(true);
}
void initializeExpectedMemory(size_t globalX, size_t globalY, size_t globalZ) {
uint32_t id = 0;
size_t testGlobalMax = globalX * globalY * globalZ;
ASSERT_GT(typeItems, testGlobalMax);
int maxId = static_cast<int>(testGlobalMax);
argVal = maxId;
kernel->setArg(0, sizeof(int), &argVal);
int *expectedData = static_cast<int *>(expectedMemory);
for (size_t z = 0; z < globalZ; z++) {
for (size_t y = 0; y < globalY; y++) {
for (size_t x = 0; x < globalX; x++) {
*(expectedData + id) = id;
++id;
}
}
}
*(static_cast<int *>(destMemory) + maxId) = 0;
*(expectedData + maxId) = maxId;
sizeWrittenMemory = maxId * typeSize;
//add single int size for atomic sum of all work-items
sizeWrittenMemory += typeSize;
sizeRemainderMemory = sizeUserMemory - sizeWrittenMemory;
expectedRemainderMemory = alignedMalloc(sizeRemainderMemory, 4096);
ASSERT_NE(nullptr, expectedRemainderMemory);
int *expectedReminderData = static_cast<int *>(expectedRemainderMemory);
size_t reminderElements = sizeRemainderMemory / typeSize;
for (size_t i = 0; i < reminderElements; i++) {
*(expectedReminderData + i) = 0xdeadbeef;
}
remainderDestMemory = static_cast<char *>(destMemory) + sizeWrittenMemory;
}
void TearDown() override {
if (deviceClVersionSupport < 20) {
return;
}
if (destMemory) {
alignedFree(destMemory);
destMemory = nullptr;
}
if (expectedMemory) {
alignedFree(expectedMemory);
expectedMemory = nullptr;
}
if (expectedRemainderMemory) {
alignedFree(expectedRemainderMemory);
expectedRemainderMemory = nullptr;
}
KernelAUBFixture<SimpleArgNonUniformKernelFixture>::TearDown();
}
unsigned int deviceClVersionSupport;
size_t typeSize;
size_t typeItems;
size_t sizeWrittenMemory;
size_t sizeUserMemory;
size_t sizeRemainderMemory;
int argVal;
void *destMemory = nullptr;
void *expectedMemory = nullptr;
void *expectedRemainderMemory = nullptr;
char *remainderDestMemory = nullptr;
GraphicsAllocation *outBuffer;
HardwareParse hwParser;
};
using AUBSimpleArgNonUniformTest = Test<AUBSimpleArgNonUniformFixture>;
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork1DimNonUniformGroupThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 1, 1};
size_t localWorkSize[3] = {32, 1, 1};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXDimensionThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 2;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 32, 1};
size_t localWorkSize[3] = {16, 16, 1};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInYDimensionThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 2;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {32, 39, 1};
size_t localWorkSize[3] = {16, 16, 1};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 2;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 39, 1};
size_t localWorkSize[3] = {16, 16, 1};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(4u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXDimensionThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 32, 32};
size_t localWorkSize[3] = {8, 8, 2};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYDimensionThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {32, 39, 32};
size_t localWorkSize[3] = {8, 8, 2};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInZDimensionThenExpectTwoWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {32, 32, 39};
size_t localWorkSize[3] = {8, 2, 8};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 39, 32};
size_t localWorkSize[3] = {8, 8, 2};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(4u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandZDimensionThenExpectFourWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 32, 39};
size_t localWorkSize[3] = {8, 2, 8};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(4u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYandZDimensionThenExpectFourWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {32, 39, 39};
size_t localWorkSize[3] = {2, 8, 8};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(4u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}
HWTEST_F(AUBSimpleArgNonUniformTest, DISABLED_givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYandZDimensionThenExpectEightWalkers) {
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
if (deviceClVersionSupport >= 20) {
cl_uint workDim = 3;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {39, 39, 39};
size_t localWorkSize[3] = {8, 8, 2};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
auto retVal = this->pCmdQ->enqueueKernel(
this->kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
hwParser.parseCommands<FamilyType>(*pCmdQ);
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(8u, walkerCount);
pCmdQ->flush();
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
}
}

View File

@@ -45,9 +45,16 @@ void setupAUB(const OCLRT::Device *pDevice, OCLRT::EngineType engineType) {
aubFile.writeMMIO(mmioBase + 0x229c, 0xffff8280);
const size_t sizeHWSP = 0x1000;
const size_t alignHWSP = 0x1000;
auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP);
const size_t sizeRing = 0x4 * 0x1000;
const size_t sizeTotal = alignUp((sizeHWSP + sizeRing + csTraits.sizeLRCA), 0x1000);
const size_t alignTotal = sizeTotal;
auto totalBuffer = alignedMalloc(sizeTotal, alignTotal);
size_t totalBufferOffset = 0;
auto pGlobalHWStatusPage = totalBuffer;
totalBufferOffset += sizeHWSP;
uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage);
AubGTTData data = {true, false};
AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data);
@@ -55,10 +62,9 @@ void setupAUB(const OCLRT::Device *pDevice, OCLRT::EngineType engineType) {
aubFile.writeMMIO(mmioBase + 0x2080, ggttGlobalHardwareStatusPage);
const size_t sizeRing = 0x4 * 0x1000;
const size_t alignRing = 0x1000;
size_t sizeCommands = 0;
auto pRing = alignedMalloc(sizeRing, alignRing);
auto pRing = ptrOffset<void *>(totalBuffer, totalBufferOffset);
totalBufferOffset += sizeRing;
auto ggttRing = (uint32_t)(uintptr_t)pRing;
auto physRing = physAddress;
@@ -84,7 +90,8 @@ void setupAUB(const OCLRT::Device *pDevice, OCLRT::EngineType engineType) {
AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer);
auto sizeLRCA = csTraits.sizeLRCA;
auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA);
auto pLRCABase = ptrOffset<void *>(totalBuffer, totalBufferOffset);
totalBufferOffset += csTraits.sizeLRCA;
csTraits.initialize(pLRCABase);
csTraits.setRingHead(pLRCABase, 0x0000);
@@ -117,9 +124,7 @@ void setupAUB(const OCLRT::Device *pDevice, OCLRT::EngineType engineType) {
aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]);
aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]);
alignedFree(pRing);
alignedFree(pLRCABase);
alignedFree(pGlobalHWStatusPage);
alignedFree(totalBuffer);
aubFile.fileHandle.close();
}

View File

@@ -49,7 +49,7 @@ class AUBFixture : public CommandQueueHwFixture {
CommandQueueHwFixture::SetUp(AUBFixture::device.get(), cl_command_queue_properties(0));
}
void TearDown() {
void TearDown() override {
CommandQueueHwFixture::TearDown();
}
@@ -65,6 +65,22 @@ class AUBFixture : public CommandQueueHwFixture {
return aubCsr;
}
template <typename FamilyType>
void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) {
auto aubCsr = getAubCsr<FamilyType>();
PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) {
if (offset > length)
abort();
aubCsr->stream->expectMemory(physAddress,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(srcAddress) + offset),
size,
aubCsr->getAddressSpaceFromPTEBits(entryBits));
};
aubCsr->ppgtt->pageWalk(reinterpret_cast<uintptr_t>(gfxAddress), length, 0, PageTableEntry::nonValidBits, walker, MemoryBanks::BankNotSpecified);
}
static void *getGpuPointer(GraphicsAllocation *allocation) {
return reinterpret_cast<void *>(allocation->getGpuAddress());
}
@@ -78,4 +94,18 @@ class AUBFixture : public CommandQueueHwFixture {
private:
using CommandQueueHwFixture::SetUp;
};
template <typename KernelFixture>
struct KernelAUBFixture : public AUBFixture,
public KernelFixture {
void SetUp() {
AUBFixture::SetUp(nullptr);
KernelFixture::SetUp(device.get(), context);
}
void TearDown() {
KernelFixture::TearDown();
AUBFixture::TearDown();
}
};
} // namespace OCLRT

View File

@@ -46,7 +46,7 @@ struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture,
using CommandStreamFixture::pCS;
using IndirectHeapFixture::SetUp;
using KernelFixture::pKernel;
using SimpleArgKernelFixture::SetUp;
using KernelFixture::SetUp;
SimpleArgFixture()
: pDestMemory(nullptr), sizeUserMemory(128 * sizeof(float)) {
@@ -64,7 +64,7 @@ struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture,
KernelFixture::SetUp(pDevice);
ASSERT_NE(nullptr, pKernel);
int argVal = (int)0x22222222;
argVal = static_cast<int>(0x22222222);
pDestMemory = alignedMalloc(sizeUserMemory, 4096);
ASSERT_NE(nullptr, pDestMemory);
@@ -79,12 +79,21 @@ struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture,
pKernel->setArgSvm(1, sizeUserMemory, pDestMemory);
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
commandStreamReceiver.createAllocationAndHandleResidency(pDestMemory, sizeUserMemory);
outBuffer = commandStreamReceiver.createAllocationAndHandleResidency(pDestMemory, sizeUserMemory);
ASSERT_NE(nullptr, outBuffer);
outBuffer->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
outBuffer->setMemObjectsAllocationWithWritableFlags(true);
}
virtual void TearDown() {
alignedFree(pExpectedMemory);
alignedFree(pDestMemory);
if (pExpectedMemory) {
alignedFree(pExpectedMemory);
pExpectedMemory = nullptr;
}
if (pDestMemory) {
alignedFree(pDestMemory);
pDestMemory = nullptr;
}
KernelFixture::TearDown();
IndirectHeapFixture::TearDown();
@@ -93,8 +102,10 @@ struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture,
DeviceFixture::TearDown();
}
int argVal;
void *pDestMemory;
void *pExpectedMemory;
size_t sizeUserMemory;
GraphicsAllocation *outBuffer;
};
} // namespace OCLRT

View File

@@ -117,8 +117,10 @@ class SimpleArgKernelFixture : public ProgramFixture {
}
virtual void TearDown() {
delete pKernel;
pKernel = nullptr;
if (pKernel) {
delete pKernel;
pKernel = nullptr;
}
pContext->release();
@@ -129,4 +131,56 @@ class SimpleArgKernelFixture : public ProgramFixture {
Kernel *pKernel;
MockContext *pContext;
};
class SimpleArgNonUniformKernelFixture : public ProgramFixture {
public:
using ProgramFixture::SetUp;
SimpleArgNonUniformKernelFixture()
: retVal(CL_SUCCESS), kernel(nullptr) {
}
protected:
void SetUp(Device *device, Context *context) {
ProgramFixture::SetUp();
cl_device_id deviceId = device;
cl_context clContext = context;
CreateProgramFromBinary<Program>(
clContext,
&deviceId,
"simple_nonuniform",
"-cl-std=CL2.0");
ASSERT_NE(nullptr, pProgram);
retVal = pProgram->build(
1,
&deviceId,
"-cl-std=CL2.0",
nullptr,
nullptr,
false);
ASSERT_EQ(CL_SUCCESS, retVal);
kernel = Kernel::create<MockKernel>(
pProgram,
*pProgram->getKernelInfo("simpleNonUniform"),
&retVal);
ASSERT_NE(nullptr, kernel);
ASSERT_EQ(CL_SUCCESS, retVal);
}
virtual void TearDown() {
if (kernel) {
delete kernel;
kernel = nullptr;
}
ProgramFixture::TearDown();
}
cl_int retVal;
Kernel *kernel;
};
} // namespace OCLRT

View File

@@ -163,6 +163,22 @@ struct HardwareParse {
return numCommands;
}
template <typename CmdType>
uint32_t getCommandCount() {
GenCmdList::iterator cmdItor = cmdList.begin();
uint32_t cmdCount = 0;
do {
cmdItor = find<CmdType *>(cmdItor, cmdList.end());
if (cmdItor != cmdList.end()) {
++cmdCount;
++cmdItor;
}
} while (cmdItor != cmdList.end());
return cmdCount;
}
// The starting point of parsing commandBuffers. This is important
// because as buffers get reused, we only want to parse the deltas.
LinearStream *previousCS;

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
__kernel void simpleNonUniform(int atomicOffset, __global volatile int *dst) {
int id = (int)(get_global_id(2) * (get_global_size(1) * get_global_size(0)) + get_global_id(1) * get_global_size(0) + get_global_id(0));
dst[id] = id;
__global volatile atomic_int *atomic_dst = ( __global volatile atomic_int * )dst;
atomic_fetch_add_explicit( &atomic_dst[atomicOffset], 1 , memory_order_relaxed, memory_scope_all_svm_devices );
}