refactor: remove not needed vme builtin related code

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
2024-09-16 14:21:29 +00:00 · 2024-09-16 14:21:29 +00:00 · d5812f49d7
parent 8170599b17
commit d5812f49d7
33 changed files with 9 additions and 3973 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -982,7 +982,6 @@ set(BUILTINS_BINARIES_STATELESS_HEAPLESS_LIB_NAME "builtins_binaries_stateless_h
 set(BUILTINS_BINARIES_BINDFUL_LIB_NAME "builtins_binaries_bindful")
 set(BUILTINS_BINARIES_BINDLESS_LIB_NAME "builtins_binaries_bindless")
 set(BUILTINS_SPIRV_LIB_NAME "builtins_spirv")
-set(BUILTINS_VME_LIB_NAME "builtins_vme")

 if(WIN32)
  set(NEO_EXTRA_LIBS Ws2_32)
--- a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp
+++ b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp
@ -14,12 +14,6 @@
 #include "level_zero/core/source/device/device.h"
 #include "level_zero/core/source/kernel/kernel.h"

-namespace NEO {
-const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) {
-    return nullptr;
-}
-} // namespace NEO
-
 namespace L0 {

 BuiltinFunctionsLibImpl::BuiltinData::~BuiltinData() {
--- a/manifests/manifest.yml
+++ b/manifests/manifest.yml
@ -42,7 +42,7 @@ components:
    dest_dir: kernels_bin
    type: git
    branch: kernels_bin
-    revision: 3043-2372
+    revision: 3043-2375
  kmdaf:
    branch: kmdaf
    dest_dir: kmdaf
--- a/opencl/CMakeLists.txt
+++ b/opencl/CMakeLists.txt
@ -1,5 +1,5 @@
 #
-# Copyright (C) 2021-2023 Intel Corporation
+# Copyright (C) 2021-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@ -20,7 +20,6 @@ macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC)
    add_subdirectory(source "${NEO_BUILD_DIR}/${LIB_NAME}" EXCLUDE_FROM_ALL)
  endif()
  target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
-  target_compile_definitions(${BUILTINS_VME_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)

  if(${MOCKABLE})
    target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual)
--- a/opencl/source/CMakeLists.txt
+++ b/opencl/source/CMakeLists.txt
@ -106,7 +106,6 @@ if(${GENERATE_EXECUTABLE})
  list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS
       $<TARGET_OBJECTS:${SHARINGS_ENABLE_LIB_NAME}>
       $<TARGET_OBJECTS:${BUILTINS_SOURCES_LIB_NAME}>
-       $<TARGET_OBJECTS:${BUILTINS_VME_LIB_NAME}>
       $<TARGET_OBJECTS:${BUILTINS_BINARIES_STATELESS_LIB_NAME}>
       $<TARGET_OBJECTS:${BUILTINS_BINARIES_STATELESS_HEAPLESS_LIB_NAME}>
       $<TARGET_OBJECTS:${BUILTINS_BINARIES_BINDFUL_LIB_NAME}>
--- a/opencl/source/api/api.cpp
+++ b/opencl/source/api/api.cpp
@ -25,7 +25,6 @@
 #include "opencl/source/accelerators/intel_motion_estimation.h"
 #include "opencl/source/api/additional_extensions.h"
 #include "opencl/source/api/api_enter.h"
-#include "opencl/source/built_ins/vme_builtin.h"
 #include "opencl/source/cl_device/cl_device.h"
 #include "opencl/source/command_queue/command_queue.h"
 #include "opencl/source/context/context.h"
@ -1506,36 +1505,12 @@ cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context context,
    TRACING_ENTER(ClCreateProgramWithBuiltInKernels, &context, &numDevices, &deviceList, &kernelNames, &errcodeRet);
    cl_int retVal = CL_SUCCESS;
    API_ENTER(&retVal);
+    cl_program program = nullptr;
    DBG_LOG_INPUTS("context", context,
                   "numDevices", numDevices,
                   "deviceList", deviceList,
                   "kernelNames", kernelNames);
-    cl_program program = nullptr;
-    Context *pContext = nullptr;
-
-    retVal = validateObjects(withCastToInternal(context, &pContext), numDevices,
-                             deviceList, kernelNames, errcodeRet);
-
-    if (retVal == CL_SUCCESS) {
-        ClDeviceVector deviceVector;
-        for (auto i = 0u; i < numDevices; i++) {
-            auto device = castToObject<ClDevice>(deviceList[i]);
-            if (!device || !pContext->isDeviceAssociated(*device)) {
-                retVal = CL_INVALID_DEVICE;
-                break;
-            }
-            deviceVector.push_back(device);
-        }
-        if (retVal == CL_SUCCESS) {
-
-            program = Vme::createBuiltInProgram(
-                *pContext,
-                deviceVector,
-                kernelNames,
-                retVal);
-        }
-    }
-
+    retVal = CL_INVALID_VALUE;
    if (errcodeRet) {
        *errcodeRet = retVal;
    }
--- a/opencl/source/built_ins/CMakeLists.txt
+++ b/opencl/source/built_ins/CMakeLists.txt
@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2022 Intel Corporation
+# Copyright (C) 2018-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@ -9,32 +9,11 @@ set(RUNTIME_SRCS_BUILT_INS
    ${CMAKE_CURRENT_SOURCE_DIR}/aux_translation_builtin.h
    ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_vme.h
    ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/vme_dispatch_builder.h
 )

 target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_INS})
 set_property(GLOBAL PROPERTY RUNTIME_SRCS_BUILT_INS ${RUNTIME_SRCS_BUILT_INS})

-set(RUNTIME_SRCS_BUILT_IN_KERNELS
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.builtin_kernel
-)
-
-target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS})
-
-hide_subdir(registry)
-hide_subdir(kernels)
-
 add_subdirectories()

-if(NOT (TARGET ${BUILTINS_VME_LIB_NAME}))
-  add_subdirectory(registry)
-  if(COMPILE_BUILT_INS)
-    add_subdirectory(kernels)
-  endif()
-endif()
--- a/opencl/source/built_ins/built_in_ops_vme.h
+++ b/opencl/source/built_ins/built_in_ops_vme.h
@ -1,20 +0,0 @@
-/*
- * Copyright (C) 2020-2023 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "shared/source/built_ins/builtinops/built_in_ops.h"
-
-namespace NEO {
-namespace EBuiltInOps {
-
-using Type = uint32_t;
-
-inline constexpr Type vmeBlockMotionEstimateIntel{maxCoreValue + 1};
-inline constexpr Type vmeBlockAdvancedMotionEstimateCheckIntel{maxCoreValue + 2};
-inline constexpr Type vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{maxCoreValue + 3};
-} // namespace EBuiltInOps
-} // namespace NEO
--- a/opencl/source/built_ins/builtins_dispatch_builder.cpp
+++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp
@ -14,7 +14,6 @@

 #include "opencl/source/built_ins/aux_translation_builtin.h"
 #include "opencl/source/built_ins/built_ins.inl"
-#include "opencl/source/built_ins/vme_dispatch_builder.h"
 #include "opencl/source/cl_device/cl_device.h"
 #include "opencl/source/execution_environment/cl_execution_environment.h"
 #include "opencl/source/helpers/convert_color.h"
--- a/opencl/source/built_ins/kernels/CMakeLists.txt
+++ b/opencl/source/built_ins/kernels/CMakeLists.txt
@ -1,11 +0,0 @@
-#
-# Copyright (C) 2018-2023 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-#
-
-add_custom_target(builtins_vme_sources)
-set_target_properties(builtins_vme_sources PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}")
-set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
-add_dependencies(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} builtins_vme_sources)
-add_subdirectories()
--- a/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel
@ -1,460 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_bidirectional_check_intel(
-sampler_t accelerator, 
-__read_only image2d_t srcImg,
-__read_only image2d_t refImg,
-__read_only image2d_t src_check_image,
-__read_only image2d_t ref0_check_image,
-__read_only image2d_t ref1_check_image,
-uint flags,
-uint search_cost_penalty,
-uint search_cost_precision, 
-short2 count_global,
-uchar bidir_weight, 
-__global short2 *count_motion_vector_buffer,
-__global short2 *prediction_motion_vector_buffer,
-__global char *skip_input_mode_buffer,
-__global short2 *skip_motion_vector_buffer,
-__global short2 *search_motion_vector_buffer,
-__global char *intra_search_predictor_modes,
-__global ushort *search_residuals,
-__global ushort *skip_residuals,
-__global ushort *intra_residuals,
-__read_only image2d_t intraSrcImg,
-int height, 
-int width, 
-int stride) {
-__local uint dstSearch[64];         // 8 GRFs
-__local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra)
-
-// distortion in the 6th GRF
-__local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5];
-
-// Initialize the MV cost table:
-// MV Cost in U4U4 format:
-// No cost    : 0,  0,  0,  0,  0,  0,  0,  0
-// Low Cost   : 1,  4,  5,  9,  10, 12, 14, 15
-// Normal Cost: 5,  26, 29, 43, 45, 47, 57, 57
-// High Cost  : 29, 61, 72, 78, 88, 89, 91, 92
-
-uint2 MVCostTable;
-if (search_cost_penalty == 1) {
-MVCostTable.s0 = 0x09050401;
-MVCostTable.s1 = 0x0F0E0C0A;
-} else if (search_cost_penalty == 2) {
-MVCostTable.s0 = 0x2B1D1A05;
-MVCostTable.s1 = 0x39392F2D;
-} else if (search_cost_penalty == 3) {
-MVCostTable.s0 = 0x4E483D1D;
-MVCostTable.s1 = 0x5C5B5958;
-} else {
-MVCostTable.s0 = 0;
-MVCostTable.s1 = 0;
-}
-
-uint MVCostPrecision = ((uint)search_cost_precision) << 16;
-
-// Frame is divided into rows * columns of MBs.
-// One h/w thread per WG.
-// One WG processes "row" MBs - one row per iteration and one MB per row.
-// Number of WGs (or h/w threads) is number of columns MBs.Each iteration
-// processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the
-// row offset.
-
-int sid_0 = stride * get_group_id(0);
-int gid_0 = sid_0 / height;
-int gid_1 = sid_0 % height;
-for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-    sid++, gid_0 = sid / height, gid_1 = sid % height) {
-int2 srcCoord;
-
-srcCoord.x = gid_0 * 16 +
-                get_global_offset(0); // 16 pixels wide MBs (globally scalar)
-srcCoord.y = gid_1 * 16 +
-                get_global_offset(1);  // 16 pixels tall MBs (globally scalar)
-uint curMB = gid_0 + gid_1 * width; // current MB id
-short2 count;
-
-// If either the search or skip vector counts are per-MB, then we need to
-// read in
-// the count motion vector buffer.
-
-if ((count_global.s0 == -1) | (count_global.s1 == -1)) {
-    count = count_motion_vector_buffer[curMB];
-}
-
-// If either the search or skip vector counts are per-frame, we need to use
-// those.
-
-if (count_global.s0 >= 0) {
-    count.s0 = count_global.s0;
-}
-
-if (count_global.s1 >= 0) {
-    count.s1 = count_global.s1;
-}
-
-int countPredMVs = count.x;
-if (countPredMVs != 0) {
-    uint offset = curMB * 4;       // 4 predictors per MB
-    offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB
-    // one predictor for MB per SIMD channel
-
-    // Reduce predictors from Q-pixel to integer precision.
-    int2 predMV = 0;
-
-    if (get_local_id(0) < countPredMVs) {
-    // one MV per work-item
-    if(prediction_motion_vector_buffer != NULL)
-    {
-        predMV = convert_int2(prediction_motion_vector_buffer[offset]);
-    }
-    // Predictors are input in QP resolution. Convert that to integer
-    // resolution.
-    predMV.x /= 4;
-    predMV.y /= 4;
-    predMV.y &= 0xFFFFFFFE;
-    }
-
-    // Do up to 4 IMEs, get the best MVs and their distortions, and optionally
-    // a FBR of
-    // the best MVs. Finally the results are written out to SLM.
-
-    intel_work_group_vme_mb_multi_query_4(
-        dstSearch,    // best search MV and its distortions into SLM
-        countPredMVs, // count of predictor MVs (globally scalar - value range
-                    // 1 to 4)
-        MVCostPrecision, // MV cost precision
-        MVCostTable,     // MV cost table
-        srcCoord,        // MB 2-D offset (globally scalar)
-        predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread)
-        srcImg, // source
-        refImg, // reference
-        accelerator); // vme object
-}
-
-int doIntra = ((flags & 0x2) != 0);
-int intraEdges = 0;
-if (doIntra) {
-    // Enable all edges by default.
-    intraEdges = 0x3C;
-    // If this is a left-edge MB, then disable left edges.
-    if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
-    intraEdges &= 0x18;
-    }
-
-    // If this is a right edge MB then disable right edges.
-    if (gid_0 == width - 1) {
-    intraEdges &= 0x34;
-    }
-
-    // If this is a top-edge MB, then disable top edges.
-    if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
-    intraEdges &= 0x20;
-    }
-
-    // Set bit6=bit5.
-    intraEdges |= ((intraEdges & 0x20) << 1);
-
-    intraEdges <<= 8;
-}
-
-int skip_block_type_8x8 = flags & 0x4;
-
-int countSkipMVs = count.y;
-if (countSkipMVs != 0 || doIntra == true) {
-    // one set of skip MV per SIMD channel
-
-    // Do up to 4 skip checks and get the distortions for each of them.
-    // Finally the results are written out to SLM.
-
-    if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) {
-    // 16x16:
-
-    uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB
-    int skipMV = 0;
-    if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV
-    {
-        offset +=
-            (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
-        if(skip_motion_vector_buffer != NULL){
-        __global int *skip1_motion_vector_buffer =
-        (__global int *)skip_motion_vector_buffer;
-        skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item
-       }
-    }
-
-    uchar skipMode = 0;
-    if (get_local_id(0) < countSkipMVs) {
-        if(skip_input_mode_buffer != NULL)
-            skipMode = skip_input_mode_buffer[curMB];
-
-        if (skipMode == 0) {
-        skipMode = 1;
-        }
-        if (skipMode > 3) {
-        skipMode = 3;
-        }
-    }
-
-    intel_work_group_vme_mb_multi_bidir_check_16x16(
-        dstSkipIntra, // distortions into SLM
-        countSkipMVs, // count of skip check MVs (globally scalar - value
-                        // range 1 to 4)
-        doIntra,      // compute intra modes
-        intraEdges,   // intra edges to use
-        srcCoord,     // MB 2-D offset (globally scalar)
-        bidir_weight, // bidirectional weight
-        skipMode,     // skip modes
-        skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs
-                // for SIMD16 thread)
-        src_check_image,  // source
-        ref0_check_image, // reference fwd
-        ref1_check_image, // reference bwd
-        intraSrcImg,      // intra source
-        accelerator);     // vme object
-    } else {
-    // 8x8:
-
-    uint offset =
-        curMB * 4 *
-        8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB
-    int2 skipMVs = 0;
-    if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV
-    {
-        offset +=
-            (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
-        if(skip_motion_vector_buffer != NULL){
-            __global int *skip1_motion_vector_buffer =
-            (__global int *)(skip_motion_vector_buffer);
-        skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs
-                                                        // per work-item
-        skipMVs.y = skip1_motion_vector_buffer[offset + 16];}
-    }
-
-    uchar skipModes = 0;
-    if (get_local_id(0) < countSkipMVs) {
-        if(skip_input_mode_buffer != NULL)
-            skipModes = skip_input_mode_buffer[curMB];
-    }
-
-    intel_work_group_vme_mb_multi_bidir_check_8x8(
-        dstSkipIntra, // distortions into SLM
-        countSkipMVs, // count of skip check MVs per MB (globally scalar -
-                        // value range 1 to 4)
-        doIntra,      // compute intra modes
-        intraEdges,   // intra edges to use
-        srcCoord,     // MB 2-D offset (globally scalar)
-        bidir_weight, // bidirectional weight
-        skipModes,    // skip modes
-        skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs
-                    // for SIMD16 thread)
-        src_check_image,  // source
-        ref0_check_image, // reference fwd
-        ref1_check_image, // reference bwd
-        intraSrcImg,      // intra source
-        accelerator);     // vme object
-    }
-}
-
-barrier(CLK_LOCAL_MEM_FENCE);
-
-// Write Out motion estimation result:
-// Result format
-//     Hierarchical row-major layout
-//     i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of
-//     MVs/distortion in blocks
-if (countPredMVs != 0) {
-    // 4x4
-    if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-    int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
-
-    // 1. 16 work-items enabled.
-    // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
-    // (interleaved
-    //    fwd/bdw MVs) with constant offset 8 (control data size) from SLM
-    //    into contiguous
-    //    short2 locations 0, 1, .., 15 of global buffer
-    //    search_motion_vector_buffer with
-    //    offset index.
-    // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
-    // distSearch into
-    //    contiguous ushort locations 0, 1, .., 15 of search_residuals with
-    //    offset index.
-
-    short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
-    if(search_motion_vector_buffer != NULL)
-    search_motion_vector_buffer[index] = val;
-    if (search_residuals != NULL)
-    {
-        search_residuals[index] = distSearch[get_local_id(0)];
-    }
-    }
-
-    // 8x8
-    else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-    // Only 1st 4 work-item are needed.
-    if (get_local_id(0) < 4) {
-        int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
-
-        // 1. 4 work-items enabled.
-        // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
-        // (interleaved
-        //    fwd/bdw MVs) with constant offset 8 from SLM into contiguous
-        //    short2 locations
-        //    0, 1, .., 15 of global buffer search_motion_vector_buffer with
-        //    offset index.
-        // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
-        // distSearch into
-        //    contiguous ushort locations 0, 1, .., 15 of search_residuals
-        //    with offset index.
-
-        short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
-        if(search_motion_vector_buffer != NULL)
-            search_motion_vector_buffer[index] = val;
-
-        if (search_residuals != NULL)
-        {
-        search_residuals[index] = distSearch[get_local_id(0) * 4];
-        }
-    }
-    }
-
-    // 16x16
-    else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-    // One 1st work is needed.
-    if (get_local_id(0) == 0) {
-        int index = gid_0 + gid_1 * width;
-
-        // 1. 1 work-item enabled.
-        // 2. Work-item gathers fwd MV in dword location 0 with constant
-        // offset 8 from
-        //    SLM into short2 locations 0 of global buffer
-        //    search_motion_vector_buffer.
-        // 3. Work-item gathers ushort location 0 from distSearch into ushort
-        //    location 0 of search_residuals with offset index.
-
-        short2 val = as_short2(dstSearch[8]);
-        if(search_motion_vector_buffer != NULL)
-            search_motion_vector_buffer[index] = val;
-        if (search_residuals != NULL)
-        {
-        search_residuals[index] = distSearch[0];
-        }
-    }
-    }
-}
-
-// Write out motion skip check result:
-// Result format
-//     Hierarchical row-major layout
-//     i.e. row-major of blocks in MBs, and row-major of 8 sets of
-//     distortions in blocks
-if (countSkipMVs != 0) {
-    if (skip_block_type_8x8 == false) {
-    // Copy out 4 (1 component) sets of distortion values.
-
-    int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width);
-
-    if (get_local_id(0) < countSkipMVs) {
-        // 1. Up to 4 work-items are enabled.
-        // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
-        //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-        //    7.
-        __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-        if(skip_residuals != NULL)
-            skip_residuals[index] = distSkip[get_local_id(0) * 16];
-    }
-    } else {
-    // Copy out 4 (4 component) sets of distortion values.
-    int index =
-        (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width);
-
-    if (get_local_id(0) < countSkipMVs * 4) {
-        // 1. Up to 16 work-items are enabled.
-        // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and
-        //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-        //    15.
-
-        __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-        if(skip_residuals != NULL)
-            skip_residuals[index] = distSkip[get_local_id(0) * 4];
-    }
-    }
-}
-
-// Write out intra search result:
-if (doIntra) {
-    // Write out the 4x4 intra modes
-    if (get_local_id(0) < 8) {
-    __local char *dstIntra_4x4 =
-        (__local char *)(&dstSkipIntra[32 + 16 + 4]);
-    char value = dstIntra_4x4[get_local_id(0)];
-    char value_low = (value)&0xf;
-    char value_high = (value >> 4) & 0xf;
-
-    int index_low =
-        (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
-
-    int index_high =
-        (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
-
-    if(intra_search_predictor_modes != NULL) {
-        intra_search_predictor_modes[index_low + 5] = value_low;
-        intra_search_predictor_modes[index_high + 5] = value_high;
-    }
-    }
-
-    // Write out the 8x8 intra modes
-    if (get_local_id(0) < 4) {
-    __local char *dstIntra_8x8 =
-        (__local char *)(&dstSkipIntra[32 + 8 + 4]);
-    char value = dstIntra_8x8[get_local_id(0) * 2];
-    char value_low = (value)&0xf;
-    int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
-    if(intra_search_predictor_modes != NULL)
-        intra_search_predictor_modes[index + 1] = value_low;
-    }
-
-    // Write out the 16x16 intra modes
-    if (get_local_id(0) < 1) {
-    __local char *dstIntra_16x16 =
-        (__local char *)(&dstSkipIntra[32 + 0 + 4]);
-    char value = dstIntra_16x16[0];
-    char value_low = (value)&0xf;
-    int index = (gid_0 * 22) + (gid_1 * 22 * width);
-    if(intra_search_predictor_modes != NULL)
-        intra_search_predictor_modes[index] = value_low;
-    }
-
-    // Get the intra residuals.
-    if (intra_residuals != NULL)
-    {
-    int index = (gid_0 * 4) + (gid_1 * 4 * width);
-
-    if (get_local_id(0) < 1) {
-        __local ushort *distIntra_4x4 =
-            (__local ushort *)(&dstSkipIntra[32 + 16 + 3]);
-        __local ushort *distIntra_8x8 =
-            (__local ushort *)(&dstSkipIntra[32 + 8 + 3]);
-        __local ushort *distIntra_16x16 =
-            (__local ushort *)(&dstSkipIntra[32 + 0 + 3]);
-
-        intra_residuals[index + 2] = distIntra_4x4[0];
-        intra_residuals[index + 1] = distIntra_8x8[0];
-        intra_residuals[index + 0] = distIntra_16x16[0];
-    }
-    }
-}
-}
-}
-
-)==="
--- a/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel
@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_bidirectional_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, __read_only image2d_t src_check_image,
-    __read_only image2d_t ref0_check_image,
-    __read_only image2d_t ref1_check_image, uint flags,
-    uint search_cost_penalty, uint search_cost_precision, short2 count_global,
-    uchar bidir_weight, __global short2 *count_motion_vector_buffer,
-    __global short2 *prediction_motion_vector_buffer,
-    __global char *skip_input_mode_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *search_motion_vector_buffer,
-    __global char *intra_search_predictor_modes,
-    __global ushort *search_residuals, __global ushort *skip_residuals,
-    __global ushort *intra_residuals) {
-}
-
-)==="
--- a/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel
@ -1,379 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, uint flags, uint skip_block_type,
-    uint search_cost_penalty, uint search_cost_precision,
-    __global short2 *count_motion_vector_buffer,
-    __global short2 *predictors_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *motion_vector_buffer,
-    __global char *intra_search_predictor_modes, __global ushort *residuals,
-    __global ushort *skip_residuals, __global ushort *intra_residuals,
-    __read_only image2d_t intraSrcImg, int height, int width, int stride) {
-  __local uint dstSearch[64];         // 8 GRFs
-  __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra)
-
-  __local ushort *distSearch =
-      (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF
-
-  // Initialize the MV cost table:
-  // MV Cost in U4U4 format:
-  // No cost    : 0,  0,  0,  0,  0,  0,  0,  0
-  // Low Cost   : 1,  4,  5,  9,  10, 12, 14, 15
-  // Normal Cost: 5,  26, 29, 43, 45, 47, 57, 57
-  // High Cost  : 29, 61, 72, 78, 88, 89, 91, 92
-
-  uint2 MVCostTable;
-  if (search_cost_penalty == 1) {
-    MVCostTable.s0 = 0x09050401;
-    MVCostTable.s1 = 0x0F0E0C0A;
-  } else if (search_cost_penalty == 2) {
-    MVCostTable.s0 = 0x2B1D1A05;
-    MVCostTable.s1 = 0x39392F2D;
-  } else if (search_cost_penalty == 3) {
-    MVCostTable.s0 = 0x4E483D1D;
-    MVCostTable.s1 = 0x5C5B5958;
-  } else {
-    MVCostTable.s0 = 0;
-    MVCostTable.s1 = 0;
-  }
-
-  uint MVCostPrecision = ((uint)search_cost_precision) << 16;
-  // Frame is divided into rows * columns of MBs.
-  // One h/w thread per WG.
-  // One WG processes 'row' MBs - one row per iteration and one MB per row.
-  // Number of WGs (or h/w threads) is number of columns MBs
-  // Each iteration processes the MB in a row - gid_0 is the MB id in a row and
-  // gid_1 is the row offset.
-
-  int sid_0 = stride * get_group_id(0);
-  int gid_0 = sid_0 / height;
-  int gid_1 = sid_0 % height;
-  for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-       sid++, gid_0 = sid / height, gid_1 = sid % height) {
-    int2 srcCoord;
-
-    srcCoord.x = gid_0 * 16 +
-                 get_global_offset(0); // 16 pixels wide MBs (globally scalar)
-    srcCoord.y = gid_1 * 16 +
-                 get_global_offset(1); // 16 pixels tall MBs (globally scalar)
-
-    uint curMB = gid_0 + gid_1 * width; // current MB id
-    short2 count = 0;
-    if(count_motion_vector_buffer != NULL)
-    count = count_motion_vector_buffer[curMB];
-
-    int countPredMVs = count.x;
-    if (countPredMVs != 0) {
-      uint offset = curMB * 8;       // 8 predictors per MB
-      offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB
-                                     // one predictor for MB per SIMD channel
-
-      // Reduce predictors from Q-pixel to integer precision.
-
-      int2 predMV = 0;
-      if (get_local_id(0) < countPredMVs) {
-        if(predictors_buffer != NULL){
-        predMV =
-            convert_int2(predictors_buffer[offset]); // one MV per work-item
-        predMV.x /= 4;
-        predMV.y /= 4;
-        predMV.y &= 0xFFFE;}
-      }
-
-      // Do up to 8 IMEs, get the best MVs and their distortions, and optionally
-      // a FBR of the best MVs.
-      // Finally the results are written out to SLM.
-
-      intel_work_group_vme_mb_multi_query_8(
-          dstSearch,    // best search MV and its distortions into SLM
-          countPredMVs, // count of predictor MVs (globally scalar - value range
-                        // 1 to 8)
-          MVCostPrecision, // MV cost precision
-          MVCostTable,     // MV cost table
-          srcCoord,        // MB 2-D offset (globally scalar)
-          predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread)
-          srcImg, // source
-          refImg, // reference
-          accelerator); // vme object
-    }
-
-    int doIntra = (flags & 0x2) != 0;
-    int intraEdges = 0;
-    if (doIntra) {
-      // Enable all edges by default.
-      intraEdges = 0x3C;
-      // If this is a left-edge MB, then disable left edges.
-      if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
-        intraEdges &= 0x18;
-      }
-      // If this is a right edge MB then disable right edges.
-      if (gid_0 == width - 1) {
-        intraEdges &= 0x34;
-      }
-      // If this is a top-edge MB, then disable top edges.
-      if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
-        intraEdges &= 0x20;
-      }
-      // Set bit6=bit5.
-      intraEdges |= ((intraEdges & 0x20) << 1);
-      intraEdges <<= 8;
-    }
-    int countSkipMVs = count.y;
-    if (countSkipMVs != 0 || doIntra == true) {
-      uint offset = curMB * 8; // 8 sets of skip check MVs per MB
-      offset +=
-          (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB
-                                 // one set of skip MV per SIMD channel
-
-      // Do up to 8 skip checks and get the distortions for each of them.
-      // Finally the results are written out to SLM.
-
-      if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) {
-        int skipMVs = 0;
-        if (get_local_id(0) < countSkipMVs) {
-          if(skip_motion_vector_buffer != NULL ) {
-          __global int *skip1_motion_vector_buffer =
-              (__global int *)skip_motion_vector_buffer;
-          skipMVs = skip1_motion_vector_buffer[offset]; } // one packed MV for one
-                                                        // work-item
-        }
-        intel_work_group_vme_mb_multi_check_16x16(
-            dstSkipIntra, // distortions into SLM
-            countSkipMVs, // count of skip check MVs (value range 0 to 8)
-            doIntra,      // compute intra modes
-            intraEdges,   // intra edges to use
-            srcCoord,     // MB 2-D offset (globally scalar)
-            skipMVs,      // skip check MVs (up to 8 sets of skip check MVs for
-                          // SIMD16 thread)
-            srcImg,       // source
-            refImg,       // reference
-            intraSrcImg,  // intra source
-            accelerator);
-      }
-
-      if ((skip_block_type == 0x1) & (countSkipMVs > 0)) {
-        int4 skipMVs = 0;
-        if (get_local_id(0) < countSkipMVs) {
-          if(skip_motion_vector_buffer != NULL){
-          __global int4 *skip4_motion_vector_buffer =
-              (__global int4 *)(skip_motion_vector_buffer);
-          skipMVs = skip4_motion_vector_buffer[offset]; } // four component MVs
-                                                        // per work-item
-        }
-        intel_work_group_vme_mb_multi_check_8x8(
-            dstSkipIntra, // distortions into SLM
-            countSkipMVs, // count of skip check MVs per MB (value range 0 to 8)
-            doIntra,      // compute intra modes
-            intraEdges,   // intra edges to use
-            srcCoord,     // MB 2-D offset (globally scalar)
-            skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16
-                     // thread)
-            srcImg,  // source
-            refImg,  // reference
-            intraSrcImg, // intra source
-            accelerator);
-      }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // Write Out motion estimation result:
-    // Result format
-    //     Hierarchical row-major layout
-    //     i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of
-    //     MVs/distortion in blocks
-
-    if (countPredMVs != 0) {
-      // 4x4
-      if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-        int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
-
-        // 1. 16 work-items enabled.
-        // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
-        // (interleaved
-        //    fwd/bdw MVs) with constant offset 8 (control data size) from SLM
-        //    into contiguous
-        //    short2 locations 0, 1, .., 15 of global buffer
-        //    search_motion_vector_buffer with
-        //    offset index.
-        // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
-        // distSearch into
-        //    contiguous ushort locations 0, 1, .., 15 of search_residuals with
-        //    offset index.
-
-        short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
-        if(motion_vector_buffer != NULL)
-        motion_vector_buffer[index] = val;
-
-        if (residuals != NULL)
-        {
-          residuals[index] = distSearch[get_local_id(0)];
-        }
-      }
-
-      // 8x8
-      else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-        // Only 1st 4 work-item are needed.
-        if (get_local_id(0) < 4) {
-          int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
-
-          // 1. 4 work-items enabled.
-          // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
-          // (interleaved
-          //    fwd/bdw MVs) with constant offset 8 from SLM into contiguous
-          //    short2 locations
-          //    0, 1, .., 15 of global buffer search_motion_vector_buffer with
-          //    offset index.
-          // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
-          // distSearch into
-          //    contiguous ushort locations 0, 1, .., 15 of search_residuals
-          //    with offset index.
-
-          short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
-          if(motion_vector_buffer != NULL)
-          motion_vector_buffer[index] = val;
-
-          if (residuals != NULL)
-          {
-            residuals[index] = distSearch[get_local_id(0) * 4];
-          }
-        }
-      }
-
-      // 16x16
-      else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-        // One 1st work is needed.
-        if (get_local_id(0) == 0) {
-          int index = gid_0 + gid_1 * width;
-
-          // 1. 1 work-item enabled.
-          // 2. Work-item gathers fwd MV in dword location 0 with constant
-          // offset 8 from
-          //    SLM into short2 locations 0 of global buffer
-          //    search_motion_vector_buffer.
-          // 3. Work-item gathers ushort location 0 from distSearch into ushort
-          //    location 0 of search_residuals with offset index.
-
-          short2 val = as_short2(dstSearch[8]);
-          if(motion_vector_buffer != NULL)
-          motion_vector_buffer[index] = val;
-
-          if (residuals != NULL)
-          {
-            residuals[index] = distSearch[0];
-          }
-        }
-      }
-    }
-
-    // Write out motion skip check result:
-    // Result format
-    //     Hierarchical row-major layout
-    //     i.e. row-major of blocks in MBs, and row-major of 8 sets of
-    //     distortions in blocks
-
-    if (countSkipMVs != 0) {
-      if (skip_block_type == 0x0) {
-        // Copy out 8 (1 component) sets of distortion values.
-
-        int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width);
-
-        if (get_local_id(0) < countSkipMVs) {
-          __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-
-          // 1. Up to 8 work-items are enabled.
-          // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
-          //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-          //    7.
-          if(skip_residuals != NULL)
-          skip_residuals[index] = distSkip[get_local_id(0) * 16];
-        }
-      } else {
-        // Copy out 8 (4 component) sets of distortion values.
-
-        int index =
-            (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width);
-
-        __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-
-        if (get_local_id(0) < countSkipMVs * 4) {
-          // 1. Up to 16 work-items are enabled.
-          // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and
-          //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-          //    31.
-          if(skip_residuals != NULL){
-          skip_residuals[index] = distSkip[get_local_id(0) * 4];
-          skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4];}
-        }
-      }
-    }
-
-    // Write out intra search result:
-
-    if (doIntra) {
-
-      int index_low =
-          (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
-      int index_high =
-          (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
-
-      // Write out the 4x4 intra modes
-      if (get_local_id(0) < 8) {
-        __local char *dstIntra_4x4 =
-            (__local char *)(&dstSkipIntra[64 + 16 + 4]);
-        char value = dstIntra_4x4[get_local_id(0)];
-        char value_low = (value)&0xf;
-        char value_high = (value >> 4) & 0xf;
-        if(intra_search_predictor_modes != NULL){
-        intra_search_predictor_modes[index_low + 5] = value_low;
-        intra_search_predictor_modes[index_high + 5] = value_high;}
-      }
-
-      // Write out the 8x8 intra modes
-      if (get_local_id(0) < 4) {
-        __local char *dstIntra_8x8 =
-            (__local char *)(&dstSkipIntra[64 + 8 + 4]);
-        char value = dstIntra_8x8[get_local_id(0) * 2];
-        char value_low = (value)&0xf;
-        int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
-        if(intra_search_predictor_modes != NULL)
-        intra_search_predictor_modes[index + 1] = value_low;
-      }
-
-      // Write out the 16x16 intra modes
-      if (get_local_id(0) < 1) {
-        __local char *dstIntra_16x16 =
-            (__local char *)(&dstSkipIntra[64 + 0 + 4]);
-        char value = dstIntra_16x16[get_local_id(0)];
-        char value_low = (value)&0xf;
-        if(intra_search_predictor_modes != NULL)
-        intra_search_predictor_modes[index_low] = value_low;
-      }
-
-// Get the intra residuals.
-      if (intra_residuals != NULL)
-      {
-        int index = (gid_0 * 4) + (gid_1 * 4 * width);
-
-        if (get_local_id(0) < 1) {
-          __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]);
-          __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]);
-          __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]);
-          intra_residuals[index + 2] = distIntra_4x4[0];
-          intra_residuals[index + 1] = distIntra_8x8[0];
-          intra_residuals[index + 0] = distIntra_16x16[0];
-        }
-      }
-    }
-  }
-}
-)==="
--- a/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel
@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, uint flags, uint skip_block_type,
-    uint search_cost_penalty, uint search_cost_precision,
-    __global short2 *count_motion_vector_buffer,
-    __global short2 *predictors_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *motion_vector_buffer,
-    __global char *intra_search_predictor_modes, __global ushort *residuals,
-    __global ushort *skip_residuals, __global ushort *intra_residuals) {
-}
-)==="
--- a/opencl/source/built_ins/kernels/vme_block_motion_estimate_intel.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_motion_estimate_intel.builtin_kernel
@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
-                            __read_only image2d_t refImg,
-                            __global short2 *prediction_motion_vector_buffer,
-                            __global short2 *motion_vector_buffer,
-                            __global ushort *residuals, int height, int width,
-                            int stride) {
-  __local uint dst[64];
-  __local ushort *dist = (__local ushort *)&dst[8 * 5];
-
-  int sid_0 = stride * get_group_id(0);
-  int gid_0 = sid_0 / height;
-  int gid_1 = sid_0 % height;
-  for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-       sid++, gid_0 = sid / height, gid_1 = sid % height) {
-    int2 srcCoord = 0;
-    int2 refCoord = 0;
-
-    srcCoord.x = gid_0 * 16 + get_global_offset(0);
-    srcCoord.y = gid_1 * 16 + get_global_offset(1);
-
-    short2 predMV = 0;
-
-#ifndef HW_NULL_CHECK
-    if (prediction_motion_vector_buffer != NULL)
-#endif
-    {
-      predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width];
-      refCoord.x = predMV.x / 4;
-      refCoord.y = predMV.y / 4;
-      refCoord.y = refCoord.y & 0xFFFE;
-    }
-
-    {
-      intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg,
-                                    accelerator);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // Write Out Result
-
-    // 4x4
-    if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-      int x = get_local_id(0) % 4;
-      int y = get_local_id(0) / 4;
-      int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4;
-
-      short2 val = as_short2(dst[8 + (y * 4 + x) * 2]);
-      motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-      if (residuals != NULL)
-#endif
-      {
-        residuals[index] = dist[y * 4 + x];
-      }
-    }
-
-    // 8x8
-    if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-      if (get_local_id(0) < 4) {
-        int x = get_local_id(0) % 2;
-        int y = get_local_id(0) / 2;
-        int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2;
-        short2 val = as_short2(dst[8 + (y * 2 + x) * 8]);
-        motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-        if (residuals != NULL)
-#endif
-        {
-          residuals[index] = dist[(y * 2 + x) * 4];
-        }
-      }
-    }
-
-    // 16x16
-    if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-      if (get_local_id(0) == 0) {
-        int index = gid_0 + gid_1 * width;
-
-        short2 val = as_short2(dst[8]);
-        motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-        if (residuals != NULL)
-#endif
-        {
-          residuals[index] = dist[0];
-        }
-      }
-    }
-  }
-}
-)==="
--- a/opencl/source/built_ins/kernels/vme_block_motion_estimate_intel_frontend.builtin_kernel
+++ b/opencl/source/built_ins/kernels/vme_block_motion_estimate_intel_frontend.builtin_kernel
@ -1,16 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-R"===(
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
-                            __read_only image2d_t refImg,
-                            __global short2 *prediction_motion_vector_buffer,
-                            __global short2 *motion_vector_buffer,
-                            __global ushort *residuals) {
-}
-)==="
--- a/opencl/source/built_ins/registry/CMakeLists.txt
+++ b/opencl/source/built_ins/registry/CMakeLists.txt
@ -1,16 +0,0 @@
-#
-# Copyright (C) 2018-2021 Intel Corporation
-#
-# SPDX-License-Identifier: MIT
-#
-
-add_library(${BUILTINS_VME_LIB_NAME} OBJECT EXCLUDE_FROM_ALL
-            CMakeLists.txt
-            register_ext_vme_source.cpp
-)
-set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}")
-
-target_include_directories(${BUILTINS_VME_LIB_NAME} PRIVATE
-                           ${KHRONOS_HEADERS_DIR}
-)
--- a/opencl/source/built_ins/registry/register_ext_vme_source.cpp
+++ b/opencl/source/built_ins/registry/register_ext_vme_source.cpp
@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2018-2023 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "shared/source/built_ins/built_ins.h"
-#include "shared/source/built_ins/registry/built_ins_registry.h"
-
-#include "opencl/source/built_ins/built_in_ops_vme.h"
-
-#include <string>
-
-namespace NEO {
-
-static RegisterEmbeddedResource registerVmeSrc(
-    createBuiltinResourceName(
-        EBuiltInOps::vmeBlockMotionEstimateIntel,
-        BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
-        .c_str(),
-    std::string(
-#include "opencl/source/built_ins/kernels/vme_block_motion_estimate_intel.builtin_kernel"
-        ));
-
-static RegisterEmbeddedResource registerVmeAdvancedSrc(
-    createBuiltinResourceName(
-        EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel,
-        BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
-        .c_str(),
-    std::string(
-#include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel"
-        ));
-
-static RegisterEmbeddedResource registerVmeAdvancedBidirectionalSrc(
-    createBuiltinResourceName(
-        EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel,
-        BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
-        .c_str(),
-    std::string(
-#include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel"
-        ));
-
-} // namespace NEO
--- a/opencl/source/built_ins/vme_builtin.cpp
+++ b/opencl/source/built_ins/vme_builtin.cpp
@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2020-2023 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/built_ins/vme_builtin.h"
-
-#include "shared/source/built_ins/built_ins.h"
-#include "shared/source/device/device.h"
-
-#include "opencl/source/built_ins/built_in_ops_vme.h"
-#include "opencl/source/built_ins/builtins_dispatch_builder.h"
-#include "opencl/source/built_ins/populate_built_ins.inl"
-#include "opencl/source/built_ins/vme_dispatch_builder.h"
-#include "opencl/source/execution_environment/cl_execution_environment.h"
-#include "opencl/source/program/program.h"
-
-#include <sstream>
-
-namespace NEO {
-
-static const char *blockMotionEstimateIntelSrc = {
-#include "kernels/vme_block_motion_estimate_intel_frontend.builtin_kernel"
-};
-
-static const char *blockAdvancedMotionEstimateCheckIntelSrc = {
-#include "kernels/vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel"
-};
-
-static const char *blockAdvancedMotionEstimateBidirectionalCheckIntelSrc = {
-#include "kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel"
-};
-
-static const std::tuple<const char *, const char *> mediaBuiltIns[] = {
-    {"block_motion_estimate_intel", blockMotionEstimateIntelSrc},
-    {"block_advanced_motion_estimate_check_intel", blockAdvancedMotionEstimateCheckIntelSrc},
-    {"block_advanced_motion_estimate_bidirectional_check_intel", blockAdvancedMotionEstimateBidirectionalCheckIntelSrc}};
-
-// Unlike other built-ins media kernels are not stored in BuiltIns object.
-// Pointer to program with built in kernels is returned to the user through API
-// call and user is responsible for releasing it by calling clReleaseProgram.
-Program *Vme::createBuiltInProgram(
-    Context &context,
-    const ClDeviceVector &deviceVector,
-    const char *kernelNames,
-    int &errcodeRet) {
-    std::string programSourceStr = "";
-    std::istringstream ss(kernelNames);
-    std::string currentKernelName;
-
-    while (std::getline(ss, currentKernelName, ';')) {
-        bool found = false;
-        for (auto &builtInTuple : mediaBuiltIns) {
-            if (currentKernelName == std::get<0>(builtInTuple)) {
-                programSourceStr += std::get<1>(builtInTuple);
-                found = true;
-                break;
-            }
-        }
-        if (!found) {
-            errcodeRet = CL_INVALID_VALUE;
-            return nullptr;
-        }
-    }
-    if (programSourceStr.empty() == true) {
-        errcodeRet = CL_INVALID_VALUE;
-        return nullptr;
-    }
-
-    Program *pBuiltInProgram = nullptr;
-    pBuiltInProgram = Program::createBuiltInFromSource(programSourceStr.c_str(), &context, deviceVector, nullptr);
-
-    auto &device = *deviceVector[0];
-
-    if (pBuiltInProgram) {
-        std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> builtinsBuilders;
-        builtinsBuilders["block_motion_estimate_intel"] =
-            &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockMotionEstimateIntel, device);
-        builtinsBuilders["block_advanced_motion_estimate_check_intel"] =
-            &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, device);
-        builtinsBuilders["block_advanced_motion_estimate_bidirectional_check_intel"] =
-            &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, device);
-
-        errcodeRet = pBuiltInProgram->build(deviceVector, mediaKernelsBuildOptions, builtinsBuilders);
-    } else {
-        errcodeRet = CL_INVALID_VALUE;
-    }
-    return pBuiltInProgram;
-}
-
-const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) {
-    switch (builtin) {
-    default:
-        return nullptr;
-    case EBuiltInOps::vmeBlockMotionEstimateIntel:
-        return "vme_block_motion_estimate_intel.builtin_kernel";
-    case EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel:
-        return "vme_block_advanced_motion_estimate_check_intel.builtin_kernel";
-    case EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
-        return "vme_block_advanced_motion_estimate_bidirectional_check_intel";
-    }
-}
-
-BuiltinDispatchInfoBuilder &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) {
-    auto &builtins = *device.getDevice().getBuiltIns();
-    uint32_t operationId = static_cast<uint32_t>(operation);
-    auto clExecutionEnvironment = static_cast<ClExecutionEnvironment *>(device.getExecutionEnvironment());
-    auto &operationBuilder = clExecutionEnvironment->peekBuilders(device.getRootDeviceIndex())[operationId];
-    switch (operation) {
-    default:
-        return BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(operation, device);
-    case EBuiltInOps::vmeBlockMotionEstimateIntel:
-        std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel>>(builtins, device); });
-        break;
-    case EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel:
-        std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel>>(builtins, device); });
-        break;
-    case EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
-        std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel>>(builtins, device); });
-        break;
-    }
-    return *operationBuilder.first;
-}
-} // namespace NEO
--- a/opencl/source/built_ins/vme_builtin.h
+++ b/opencl/source/built_ins/vme_builtin.h
@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2020 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "opencl/source/built_ins/built_in_ops_vme.h"
-
-namespace NEO {
-class Program;
-class ClDevice;
-class ClDeviceVector;
-class Context;
-class BuiltIns;
-class BuiltinDispatchInfoBuilder;
-namespace Vme {
-
-Program *createBuiltInProgram(
-    Context &context,
-    const ClDeviceVector &deviceVector,
-    const char *kernelNames,
-    int &errcodeRet);
-
-BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device);
-
-} // namespace Vme
-} // namespace NEO
--- a/opencl/source/built_ins/vme_dispatch_builder.h
+++ b/opencl/source/built_ins/vme_dispatch_builder.h
@ -1,483 +0,0 @@
-/*
- * Copyright (C) 2018-2023 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-
-#include "shared/source/built_ins/built_ins.h"
-#include "shared/source/helpers/basic_math.h"
-
-#include "opencl/source/accelerators/intel_accelerator.h"
-#include "opencl/source/built_ins/built_in_ops_vme.h"
-#include "opencl/source/built_ins/builtins_dispatch_builder.h"
-#include "opencl/source/cl_device/cl_device.h"
-#include "opencl/source/helpers/dispatch_info_builder.h"
-#include "opencl/source/mem_obj/buffer.h"
-#include "opencl/source/mem_obj/image.h"
-
-namespace NEO {
-class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
-  public:
-    VmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type builtinOp,
-                                  const char *kernelName)
-        : BuiltinDispatchInfoBuilder(kernelsLib, device) {
-        populate(builtinOp,
-                 mediaKernelsBuildOptions,
-                 kernelName, multiDeviceVmeKernel);
-        auto rootDeviceIndex = device.getRootDeviceIndex();
-        vmeKernel = multiDeviceVmeKernel->getKernel(rootDeviceIndex);
-        widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width");
-        heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height");
-        strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride");
-        acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator");
-        srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg");
-        refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg");
-        motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer");
-        predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer");
-        residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals");
-    }
-
-    void getBlkTraits(const Vec3<size_t> &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const {
-        const size_t vmeMacroBlockWidth = 16;
-        const size_t vmeMacroBlockHeight = 16;
-        gwWidthInBlk = Math::divideAndRoundUp(inGws.x, vmeMacroBlockWidth);
-        gwHeightInBlk = Math::divideAndRoundUp(inGws.y, vmeMacroBlockHeight);
-    }
-
-    bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kern,
-                            const uint32_t inDim, const Vec3<size_t> &inGws, const Vec3<size_t> &inLws, const Vec3<size_t> &inOffset) const override {
-        if (kern == nullptr) {
-            return false;
-        }
-
-        size_t gwWidthInBlk = 0;
-        size_t gwHeightInBlk = 0;
-        getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk);
-
-        cl_int height = (cl_int)gwHeightInBlk;
-        cl_int width = (cl_int)gwWidthInBlk;
-        cl_int stride = height;
-        size_t numThreadsX = gwWidthInBlk;
-        const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize();
-        stride = static_cast<cl_int>(Math::divideAndRoundUp(height * width, numThreadsX));
-
-        // update implicit args
-        vmeKernel->setArg(heightArgNum, sizeof(height), &height);
-        vmeKernel->setArg(widthArgNum, sizeof(width), &width);
-        vmeKernel->setArg(strideArgNum, sizeof(stride), &stride);
-
-        // Update global work size to force macro-block to HW thread execution model
-        Vec3<size_t> gws = {numThreadsX * simdWidth, 1, 1};
-        Vec3<size_t> lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1};
-
-        DispatchInfoBuilder<SplitDispatch::Dim::d2D, SplitDispatch::SplitMode::noSplit> builder(clDevice);
-        builder.setDispatchGeometry(gws, lws, inOffset, gws, lws);
-        builder.setKernel(vmeKernel);
-        builder.bake(multiDispatchInfo);
-        return true;
-    }
-
-    bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override {
-        DEBUG_BREAK_IF(!((argIndex != widthArgNum) && (argIndex != heightArgNum) && (argIndex != strideArgNum)));
-        if ((argIndex == acceleratorArgNum) && (argVal == nullptr)) {
-            err = CL_INVALID_ACCELERATOR_INTEL;
-            return false;
-        }
-        err = vmeKernel->setArg(argIndex, argSize, argVal);
-        return false;
-    }
-
-    cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3<size_t> &inGws, const Vec3<size_t> &inLws, const Vec3<size_t> &inOffset) const override {
-        if (inworkDim != 2) {
-            return CL_INVALID_WORK_DIMENSION;
-        }
-
-        size_t gwWidthInBlk = 0;
-        size_t gwHeightInBlk = 0;
-        getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk);
-
-        size_t blkNum = gwWidthInBlk * gwHeightInBlk;
-        size_t blkMul = 1;
-        IntelAccelerator *accelerator = castToObject<IntelAccelerator>((cl_accelerator_intel)vmeKernel->getKernelArg(acceleratorArgNum));
-        if (accelerator == nullptr) {
-            return CL_INVALID_KERNEL_ARGS; // accelerator was not set
-        }
-        DEBUG_BREAK_IF(accelerator->getDescriptorSize() != sizeof(cl_motion_estimation_desc_intel));
-        const cl_motion_estimation_desc_intel *acceleratorDesc = reinterpret_cast<const cl_motion_estimation_desc_intel *>(accelerator->getDescriptor());
-        switch (acceleratorDesc->mb_block_type) {
-        case CL_ME_MB_TYPE_8x8_INTEL:
-            blkMul = 4;
-            break;
-        case CL_ME_MB_TYPE_4x4_INTEL:
-            blkMul = 16;
-            break;
-        default:
-            break;
-        }
-
-        return validateVmeDispatch(inGws, inOffset, blkNum, blkMul);
-    }
-
-    // notes on corner cases :
-    // * if arg not available in kernels - returns true
-    // * if arg set to nullptr - returns true
-    bool validateBufferSize(int32_t bufferArgNum, size_t minimumSizeExpected) const {
-        if (bufferArgNum == -1) {
-            return true;
-        }
-
-        auto buff = castToObject<Buffer>((cl_mem)vmeKernel->getKernelArg(bufferArgNum));
-        if (buff == nullptr) {
-            return true;
-        }
-
-        size_t bufferSize = buff->getSize();
-        if (bufferSize < minimumSizeExpected) {
-            return false;
-        }
-
-        return true;
-    }
-
-    template <typename EnumBaseType>
-    bool validateEnumVal(EnumBaseType val) const {
-        return false;
-    }
-
-    template <typename EnumBaseType, typename ExpectedValType, typename... ExpectedValsTypes>
-    bool validateEnumVal(EnumBaseType val, ExpectedValType expectedVal, ExpectedValsTypes... expVals) const {
-        return (val == static_cast<EnumBaseType>(expectedVal)) || validateEnumVal<EnumBaseType, ExpectedValsTypes...>(val, expVals...);
-    }
-
-    // notes on corner cases :
-    // * if arg not available in kernels - returns true
-    template <typename EnumBaseType, typename... ExpectedValsTypes>
-    bool validateEnumArg(int32_t argNum, ExpectedValsTypes... expVals) const {
-        if (argNum == -1) {
-            return true;
-        }
-
-        EnumBaseType val = this->getKernelArgByValValue<EnumBaseType>(static_cast<uint32_t>(argNum));
-        return validateEnumVal<EnumBaseType, ExpectedValsTypes...>(val, expVals...);
-    }
-
-    template <typename RetType>
-    RetType getKernelArgByValValue(uint32_t argNum) const {
-        const auto &argAsVal = vmeKernel->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[argNum].as<ArgDescValue>();
-        DEBUG_BREAK_IF(argAsVal.elements.size() != 1);
-
-        const auto &element = argAsVal.elements[0];
-        DEBUG_BREAK_IF(sizeof(RetType) > element.size);
-        return *(RetType *)(vmeKernel->getCrossThreadData() + element.offset);
-    }
-
-    cl_int validateImages(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset) const {
-        Image *srcImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(srcImgArgNum));
-        Image *refImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(refImgArgNum));
-
-        if ((srcImg == nullptr) || (refImg == nullptr)) {
-            return CL_INVALID_KERNEL_ARGS;
-        }
-
-        for (Image *img : {srcImg, refImg}) {
-            const cl_image_format &imgFormat = img->getImageFormat();
-            if ((imgFormat.image_channel_order != CL_R) || (imgFormat.image_channel_data_type != CL_UNORM_INT8)) {
-                return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-            }
-
-            if (false == img->isTiledAllocation()) {
-                // VME only works with tiled images.
-                return CL_OUT_OF_RESOURCES;
-            }
-        }
-
-        {
-            const cl_image_desc &srcImgDesc = srcImg->getImageDesc();
-
-            size_t srcImageWidth = srcImgDesc.image_width;
-            size_t srcImageHeight = srcImgDesc.image_height;
-            if (((inputRegion.x + offset.x) > srcImageWidth) ||
-                ((inputRegion.y + offset.y) > srcImageHeight)) {
-                return CL_INVALID_IMAGE_SIZE;
-            }
-        }
-
-        return CL_SUCCESS;
-    }
-
-    virtual cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const {
-        {
-            cl_int imageValidationStatus = validateImages(inputRegion, offset);
-            if (imageValidationStatus != CL_SUCCESS) {
-                return imageValidationStatus;
-            }
-        }
-
-        size_t numPredictors = 1;
-        std::pair<int32_t, size_t> bufferRequirements[] = {
-            std::make_pair(motionVectorBufferArgNum, (blkNum * blkMul * 2 * sizeof(cl_short))),
-            std::make_pair(predictionMotionVectorBufferArgNum, (blkNum * numPredictors * 2 * sizeof(cl_short))),
-            std::make_pair(residualsArgNum, (blkNum * blkMul * sizeof(cl_ushort)))};
-        for (const auto &req : bufferRequirements) {
-            if (false == validateBufferSize(req.first, req.second)) {
-                return CL_INVALID_BUFFER_SIZE;
-            }
-        }
-
-        return CL_SUCCESS;
-    }
-
-  protected:
-    uint32_t heightArgNum;
-    uint32_t widthArgNum;
-    uint32_t strideArgNum;
-    uint32_t acceleratorArgNum;
-    uint32_t srcImgArgNum;
-    uint32_t refImgArgNum;
-    int32_t motionVectorBufferArgNum;
-    int32_t predictionMotionVectorBufferArgNum;
-    int32_t residualsArgNum;
-    MultiDeviceKernel *multiDeviceVmeKernel;
-    Kernel *vmeKernel;
-};
-
-template <>
-class BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel> : public VmeBuiltinDispatchInfoBuilder {
-  public:
-    BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
-        : VmeBuiltinDispatchInfoBuilder(kernelsLib, device,
-                                        EBuiltInOps::vmeBlockMotionEstimateIntel, "block_motion_estimate_intel") {
-    }
-};
-
-class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuilder {
-  public:
-    AdvancedVmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type builtinOp,
-                                          const char *kernelName)
-        : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, builtinOp,
-                                        kernelName) {
-        flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags");
-        intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg");
-        skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type");
-        searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty");
-        searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision");
-        bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight");
-        predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer");
-        countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer");
-        skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer");
-        intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes");
-        skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals");
-        intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals");
-    }
-
-    bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override {
-        DEBUG_BREAK_IF(argIndex == intraSrcImgArgNum);
-        if (argIndex == this->srcImgArgNum) {
-            // rebind also as media block image
-            this->vmeKernel->setArg(intraSrcImgArgNum, argSize, argVal);
-        }
-        return VmeBuiltinDispatchInfoBuilder::setExplicitArg(argIndex, argSize, argVal, err);
-    }
-
-    virtual bool isBidirKernel() const {
-        return false;
-    }
-
-    bool validateFlags(uint32_t &outSkipBlockType) const {
-        uint32_t flagsVal = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue<uint32_t>(flagsArgNum);
-
-        if ((flagsVal & CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) == CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) {
-            return false;
-        }
-
-        if (flagsVal == CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL) {
-            outSkipBlockType = CL_ME_MB_TYPE_16x16_INTEL;
-        } else if ((flagsVal & CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) == CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) {
-            outSkipBlockType = CL_ME_MB_TYPE_8x8_INTEL;
-        }
-
-        return true;
-    }
-
-    bool validateSkipBlockTypeArg(uint32_t &outSkipBlockType) const {
-        if (skipBlockTypeArgNum == -1) {
-            return true;
-        }
-
-        outSkipBlockType = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue<uint32_t>(static_cast<uint32_t>(skipBlockTypeArgNum));
-
-        switch (outSkipBlockType) {
-        case CL_ME_MB_TYPE_16x16_INTEL:
-            break;
-        case CL_ME_MB_TYPE_8x8_INTEL:
-            break;
-        default:
-            return false;
-            ;
-        }
-        return true;
-    }
-
-    size_t getIntraSearchPredictorModesBuffExpSize(size_t blkNum) const {
-        // vector size is 22 - 1 (16x16 luma block) +  4 (8x8 luma block) + 16 (4x4 luma block) + 1 (8x8 chroma block)
-        int vectorSize = 22;
-        size_t intraSearchPredictorModesBuffExpSize = blkNum * vectorSize;
-        return intraSearchPredictorModesBuffExpSize;
-    }
-
-    size_t getSkipMotionVectorBufferExpSize(uint32_t skipBlockType, size_t blkNum) const {
-        // vector size is either 1 (16x16 block) or 4 (8x8 block)
-        // 0 to 8 skip MVs per MB
-        // may be null if all MBs in frame have 0 skip check MVs in which case VME skip checks are not performed
-        // layout assumes 4 (for bidir) or 8 (otherwise) skip check MVs per MB
-        // row-major block layout; all MVs for a block are contiguous
-        // buffer size depends on the block and frame size .
-        int vectorSize = (skipBlockType == CL_ME_MB_TYPE_16x16_INTEL) ? 1 : 4;
-        int numChecks = (isBidirKernel() ? 4 : 8);
-        size_t skipMotionVectorBufferExpSize = blkNum * numChecks * vectorSize * 2 * sizeof(cl_short);
-        return skipMotionVectorBufferExpSize;
-    }
-
-    size_t getSkipResidualsBuffExpSize(uint32_t skipBlockType, size_t blkNum) const {
-        /*  output buffer of vectors of unsigned short SAD adjusted values corresponding to the input skip check MVs
-            may be null if skip_motion_vector_buffer is null
-            vector size is either 1 (16x16 block) or 4 (8x8 block)
-            0 to 8 skip check residuals per MB
-            layout always assumes 8 skip check residuals per MB
-            row major block layout; all MVs for a block are contiguous
-            buffer size depends on the block and frame size  */
-        int vectorSize = 1;
-        switch (skipBlockType) {
-        case CL_ME_MB_TYPE_16x16_INTEL:
-            vectorSize = 1;
-            break;
-        case CL_ME_MB_TYPE_8x8_INTEL:
-            vectorSize = 4;
-            break;
-        default:
-            break;
-        };
-
-        int numChecks = (isBidirKernel() ? 4 : 8);
-        size_t skipResidualsBuffExpSize = blkNum * vectorSize * numChecks * sizeof(cl_ushort);
-        return skipResidualsBuffExpSize;
-    }
-
-    size_t getIntraResidualsBuffExpSize(size_t blkNum) const {
-        /*  output buffer of vectors of  unsigned short SAD adjusted values
-            may be null in which case the intra residuals corresponding not returned
-            vector size is 4 - 1 (16x16 luma block) +  1 (8x8 luma block) + 1 (4x4  luma block) + 1 (8x8 chroma block)
-            1 vector per MB
-            buffer size depends on the frame size  */
-        int vectorSize = 4;
-        size_t intraResidualsBuffExpSize = (blkNum * sizeof(cl_ushort) * vectorSize);
-        return intraResidualsBuffExpSize;
-    }
-
-    size_t getPredictorsBufferExpSize(size_t blkNum) const {
-        size_t numPredictors = 8;
-        size_t predictorsBufferExpSize = (blkNum * numPredictors * 2 * sizeof(cl_short));
-        return predictorsBufferExpSize;
-    }
-
-    cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
-        cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, blkNum, blkMul);
-        if (basicVmeValidationStatus != CL_SUCCESS) {
-            return basicVmeValidationStatus;
-        }
-
-        uint32_t skipBlockType = CL_ME_MB_TYPE_16x16_INTEL;
-        if (false == validateFlags(skipBlockType)) {
-            return CL_INVALID_KERNEL_ARGS;
-        }
-
-        if (false == validateSkipBlockTypeArg(skipBlockType)) {
-            return CL_OUT_OF_RESOURCES;
-        }
-
-        if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg<uint32_t>(searchCostPenaltyArgNum, CL_ME_COST_PENALTY_NONE_INTEL, CL_ME_COST_PENALTY_LOW_INTEL, CL_ME_COST_PENALTY_NORMAL_INTEL,
-                                                                                       CL_ME_COST_PENALTY_HIGH_INTEL)) {
-            return CL_OUT_OF_RESOURCES;
-        }
-
-        if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg<uint32_t>(searchCostPrecisionArgNum, CL_ME_COST_PRECISION_QPEL_INTEL, CL_ME_COST_PRECISION_HPEL_INTEL, CL_ME_COST_PRECISION_PEL_INTEL,
-                                                                                       CL_ME_COST_PRECISION_DPEL_INTEL)) {
-            return CL_OUT_OF_RESOURCES;
-        }
-
-        if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg<uint8_t>(bidirWeightArgNum, 0, CL_ME_BIDIR_WEIGHT_QUARTER_INTEL, CL_ME_BIDIR_WEIGHT_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_HALF_INTEL,
-                                                                                      CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL)) {
-            return CL_INVALID_KERNEL_ARGS;
-        }
-
-        std::pair<int32_t, size_t> bufferRequirements[] = {
-            std::make_pair(countMotionVectorBufferArgNum, (blkNum * 2 * sizeof(cl_short))),
-            std::make_pair(skipMotionVectorBufferArgNum, getSkipMotionVectorBufferExpSize(skipBlockType, blkNum)),
-            std::make_pair(intraSearchPredictorModesArgNum, getIntraSearchPredictorModesBuffExpSize(blkNum)),
-            std::make_pair(skipResidualsArgNum, getSkipResidualsBuffExpSize(skipBlockType, blkNum)),
-            std::make_pair(intraResidualsArgNum, getIntraResidualsBuffExpSize(blkNum)),
-            std::make_pair(predictorsBufferArgNum, getPredictorsBufferExpSize(blkNum))};
-        for (const auto &req : bufferRequirements) {
-            if (false == this->validateBufferSize(req.first, req.second)) {
-                return CL_INVALID_BUFFER_SIZE;
-            }
-        }
-
-        return CL_SUCCESS;
-    }
-
-  protected:
-    uint32_t flagsArgNum;
-    int32_t skipBlockTypeArgNum;
-    uint32_t searchCostPenaltyArgNum;
-    uint32_t searchCostPrecisionArgNum;
-    int32_t bidirWeightArgNum;
-    int32_t predictorsBufferArgNum;
-    uint32_t countMotionVectorBufferArgNum;
-    uint32_t skipMotionVectorBufferArgNum;
-    uint32_t intraSearchPredictorModesArgNum;
-    uint32_t skipResidualsArgNum;
-    uint32_t intraResidualsArgNum;
-    uint32_t intraSrcImgArgNum;
-};
-
-template <>
-class BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> : public AdvancedVmeBuiltinDispatchInfoBuilder {
-  public:
-    BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
-        : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel,
-                                                "block_advanced_motion_estimate_check_intel") {
-    }
-
-    cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset,
-                               size_t gwWidthInBlk, size_t gwHeightInBlk) const override {
-        cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk);
-        if (basicAdvVmeValidationStatus != CL_SUCCESS) {
-            return basicAdvVmeValidationStatus;
-        }
-
-        auto countMotionVectorBuff = castToObject<Buffer>((cl_mem)this->vmeKernel->getKernelArg(this->countMotionVectorBufferArgNum));
-        if (countMotionVectorBuff == nullptr) {
-            return CL_INVALID_BUFFER_SIZE;
-        }
-
-        return CL_SUCCESS;
-    }
-};
-
-template <>
-class BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel> : public AdvancedVmeBuiltinDispatchInfoBuilder {
-  public:
-    BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
-        : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel,
-                                                "block_advanced_motion_estimate_bidirectional_check_intel") {
-    }
-
-    bool isBidirKernel() const override {
-        return true;
-    }
-};
-} // namespace NEO
--- a/opencl/test/unit_test/CMakeLists.txt
+++ b/opencl/test/unit_test/CMakeLists.txt
@ -59,7 +59,6 @@ set(NEO_IGDRCL_TESTS__TARGET_OBJECTS
    $<TARGET_OBJECTS:mock_gmm>
    $<TARGET_OBJECTS:${SHARINGS_ENABLE_LIB_NAME}>
    $<TARGET_OBJECTS:${BUILTINS_SOURCES_LIB_NAME}>
-    $<TARGET_OBJECTS:${BUILTINS_VME_LIB_NAME}>
    $<TARGET_OBJECTS:${BUILTINS_BINARIES_STATELESS_LIB_NAME}>
    $<TARGET_OBJECTS:${BUILTINS_BINARIES_STATELESS_HEAPLESS_LIB_NAME}>
    $<TARGET_OBJECTS:${BUILTINS_BINARIES_BINDFUL_LIB_NAME}>
@ -311,12 +310,6 @@ set(TEST_KERNEL_STATELESS
    test_files/stateless_kernel.cl
 )

-set(TEST_KERNEL_VME
-    ${CMAKE_CURRENT_SOURCE_DIR}/test_files/vme_kernels.cl
-    ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_backend.cl
-    ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_frontend.cl
-)
-
 set(TEST_KERNEL_BINDLESS_internal_options
    "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode"
 )
@ -336,12 +329,10 @@ set(TEST_KERNEL_PRINTF_internal_options_gen9lp
 file(GLOB_RECURSE TEST_KERNELS test_files/*.cl)
 list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_nonuniform.cl")
 list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/stateless_kernel.cl")
-list(REMOVE_ITEM TEST_KERNELS ${TEST_KERNEL_VME})
 list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/${TEST_KERNEL_PRINTF}")

 macro(macro_for_each_platform)
  set(PLATFORM_2_0_LOWER ${DEFAULT_SUPPORTED_2_0_${CORE_TYPE}_${PLATFORM_IT}_PLATFORM})
-  set(PLATFORM_VME_LOWER ${DEFAULT_SUPPORTED_VME_${CORE_TYPE}_${PLATFORM_IT}_PLATFORM})

  set(PLATFORM_TEST_KERNELS ${TEST_KERNELS})
  set(IMAGE_SUPPORT FALSE)
@ -398,12 +389,6 @@ macro(macro_for_each_platform)
        neo_gen_kernels_with_options(${PLATFORM_IT_LOWER} ${DEVICE_ID} ${REVISION_ID} "${TEST_KERNEL_2_0}" ${TEST_KERNEL_2_0_options})
      endforeach()
    endif()
-    if(PLATFORM_VME_LOWER)
-      foreach(REVISION_CONFIG ${${PLATFORM_IT}_${CORE_TYPE}_REVISIONS})
-        parse_revision_config(${REVISION_CONFIG} ${PLATFORM_IT_LOWER} DEVICE_ID REVISION_ID)
-        neo_gen_kernels(${PLATFORM_IT_LOWER} ${DEVICE_ID} ${REVISION_ID} TRUE ${TEST_KERNEL_VME})
-      endforeach()
-    endif()
  endif()

  set(PREVIOUS_TARGET)
--- a/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp
+++ b/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp
@ -1,140 +1,26 @@
 /*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

-#include "shared/source/built_ins/built_ins.h"
-#include "shared/source/compiler_interface/compiler_interface.h"
-#include "shared/source/device/device.h"
-
-#include "opencl/source/built_ins/built_in_ops_vme.h"
-#include "opencl/source/built_ins/vme_builtin.h"
-#include "opencl/source/context/context.h"
-#include "opencl/source/helpers/base_object.h"
-#include "opencl/source/kernel/kernel.h"
-#include "opencl/source/program/program.h"
-#include "opencl/test/unit_test/fixtures/run_kernel_fixture.h"
-#include "opencl/test/unit_test/mocks/mock_cl_device.h"
-
 #include "cl_api_tests.h"

 using namespace NEO;

 using ClCreateProgramWithBuiltInKernelsTests = ApiTests;

-struct ClCreateProgramWithBuiltInVmeKernelsTests : ClCreateProgramWithBuiltInKernelsTests {
-    void SetUp() override {
-        ClCreateProgramWithBuiltInKernelsTests::SetUp();
-        if (!castToObject<ClDevice>(testedClDevice)->getHardwareInfo().capabilityTable.supportsVme) {
-            GTEST_SKIP();
-        }
-
-        pClDevice = pContext->getDevice(0);
-    }
-
-    ClDevice *pClDevice;
-};
-
 namespace ULT {

-TEST_F(ClCreateProgramWithBuiltInKernelsTests, GivenInvalidContextWhenCreatingProgramWithBuiltInKernelsThenInvalidContextErrorIsReturned) {
+TEST_F(ClCreateProgramWithBuiltInKernelsTests, GivenMediaKernelsWhenCreatingProgramWithBuiltInKernelsThenProgramIsNotCreated) {
    cl_int retVal = CL_SUCCESS;
-    auto program = clCreateProgramWithBuiltInKernels(
-        nullptr, // context
-        1,       // num_devices
-        nullptr, // device_list
-        nullptr, // kernel_names
-        &retVal);
-    EXPECT_EQ(nullptr, program);
-    EXPECT_EQ(CL_INVALID_CONTEXT, retVal);
-}
-
-TEST_F(ClCreateProgramWithBuiltInKernelsTests, GivenNoKernelsWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) {
-    cl_int retVal = CL_SUCCESS;
-    auto program = clCreateProgramWithBuiltInKernels(
-        pContext,        // context
-        1,               // num_devices
-        &testedClDevice, // device_list
-        "",              // kernel_names
-        &retVal);
-    EXPECT_EQ(nullptr, program);
-    EXPECT_EQ(CL_INVALID_VALUE, retVal);
-}
-
-TEST_F(ClCreateProgramWithBuiltInKernelsTests, GivenNoDeviceWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) {
-    cl_int retVal = CL_SUCCESS;
-    auto program = clCreateProgramWithBuiltInKernels(
-        pContext,        // context
-        0,               // num_devices
-        &testedClDevice, // device_list
-        "",              // kernel_names
-        &retVal);
-    EXPECT_EQ(nullptr, program);
-    EXPECT_EQ(CL_INVALID_VALUE, retVal);
-}
-
-TEST_F(ClCreateProgramWithBuiltInKernelsTests, GivenNoKernelsAndNoReturnWhenCreatingProgramWithBuiltInKernelsThenProgramIsNotCreated) {
-    auto program = clCreateProgramWithBuiltInKernels(
-        pContext,        // context
-        1,               // num_devices
-        &testedClDevice, // device_list
-        "",              // kernel_names
-        nullptr);
-    EXPECT_EQ(nullptr, program);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenDeviceNotAssociatedWithContextWhenCreatingProgramWithBuiltInThenInvalidDeviceErrorIsReturned) {
-    cl_program pProgram = nullptr;

    const char *kernelNamesString = {
        "block_advanced_motion_estimate_bidirectional_check_intel;"
        "block_motion_estimate_intel;"
        "block_advanced_motion_estimate_check_intel;"};

-    MockClDevice invalidDevice(new MockDevice());
-
-    cl_device_id devicesForProgram[] = {&invalidDevice};
-
-    pProgram = clCreateProgramWithBuiltInKernels(
-        pContext,
-        1,
-        devicesForProgram,
-        kernelNamesString,
-        &retVal);
-    EXPECT_EQ(CL_INVALID_DEVICE, retVal);
-    EXPECT_EQ(nullptr, pProgram);
-
-    retVal = CL_INVALID_PROGRAM;
-    devicesForProgram[0] = nullptr;
-
-    pProgram = clCreateProgramWithBuiltInKernels(
-        pContext,
-        1,
-        devicesForProgram,
-        kernelNamesString,
-        &retVal);
-    EXPECT_EQ(CL_INVALID_DEVICE, retVal);
-    EXPECT_EQ(nullptr, pProgram);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreated) {
-    cl_int retVal = CL_SUCCESS;
-
-    overwriteBuiltInBinaryName("media_kernels_frontend");
-
-    const char *kernelNamesString = {
-        "block_advanced_motion_estimate_bidirectional_check_intel;"
-        "block_motion_estimate_intel;"
-        "block_advanced_motion_estimate_check_intel;"};
-
-    const char *kernelNames[] = {
-        "block_motion_estimate_intel",
-        "block_advanced_motion_estimate_check_intel",
-        "block_advanced_motion_estimate_bidirectional_check_intel",
-    };
-
    cl_program program = clCreateProgramWithBuiltInKernels(
        pContext,          // context
        1,                 // num_devices
@ -142,160 +28,7 @@ TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWhenCrea
        kernelNamesString, // kernel_names
        &retVal);

-    restoreBuiltInBinaryName();
-    EXPECT_NE(nullptr, program);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-
-    for (auto &kernelName : kernelNames) {
-        cl_kernel kernel = clCreateKernel(
-            program,
-            kernelName,
-            &retVal);
-        ASSERT_EQ(CL_SUCCESS, retVal);
-        ASSERT_NE(nullptr, kernel);
-
-        retVal = clReleaseKernel(kernel);
-        EXPECT_EQ(CL_SUCCESS, retVal);
-    }
-
-    retVal = clReleaseProgram(program);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWithOptionsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreatedWithThoseOptions) {
-    cl_int retVal = CL_SUCCESS;
-
-    overwriteBuiltInBinaryName("media_kernels_frontend");
-
-    const char *kernelNamesString = {
-        "block_motion_estimate_intel;"};
-
-    cl_program program = clCreateProgramWithBuiltInKernels(
-        pContext,          // context
-        1,                 // num_devices
-        &testedClDevice,   // device_list
-        kernelNamesString, // kernel_names
-        &retVal);
-
-    restoreBuiltInBinaryName();
-
-    auto neoProgram = castToObject<Program>(program);
-    auto builtinOptions = neoProgram->getOptions();
-    auto it = builtinOptions.find("HW_NULL_CHECK");
-    EXPECT_EQ(std::string::npos, it);
-
-    clReleaseProgram(program);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) {
-    cl_int retVal = CL_SUCCESS;
-
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockMotionEstimateIntel, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    overwriteBuiltInBinaryName("media_kernels_frontend");
-
-    const char *kernelNamesString = {
-        "block_motion_estimate_intel;"};
-
-    cl_program program = clCreateProgramWithBuiltInKernels(
-        pContext,          // context
-        1,                 // num_devices
-        &testedClDevice,   // device_list
-        kernelNamesString, // kernel_names
-        &retVal);
-    restoreBuiltInBinaryName();
-
-    cl_kernel kernel = clCreateKernel(
-        program,
-        "block_motion_estimate_intel",
-        &retVal);
-
-    auto pMultiDeviceKernel = castToObject<MultiDeviceKernel>(kernel);
-    auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex);
-    EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder);
-    EXPECT_EQ(6U, kernNeo->getKernelArgsNumber());
-
-    auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockMotionEstimateIntel, *pClDevice);
-    EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder);
-
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) {
-    cl_int retVal = CL_SUCCESS;
-
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    overwriteBuiltInBinaryName("media_kernels_frontend");
-
-    const char *kernelNamesString = {
-        "block_advanced_motion_estimate_check_intel;"};
-
-    cl_program program = clCreateProgramWithBuiltInKernels(
-        pContext,          // context
-        1,                 // num_devices
-        &testedClDevice,   // device_list
-        kernelNamesString, // kernel_names
-        &retVal);
-    restoreBuiltInBinaryName();
-
-    cl_kernel kernel = clCreateKernel(
-        program,
-        "block_advanced_motion_estimate_check_intel",
-        &retVal);
-
-    auto pMultiDeviceKernel = castToObject<MultiDeviceKernel>(kernel);
-    auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex);
-    EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder);
-    EXPECT_EQ(15U, kernNeo->getKernelArgsNumber());
-
-    auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice);
-    EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder);
-
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-}
-
-TEST_F(ClCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateBidirectionalCheckKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) {
-    cl_int retVal = CL_SUCCESS;
-
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    overwriteBuiltInBinaryName("media_kernels_frontend");
-
-    const char *kernelNamesString = {
-        "block_advanced_motion_estimate_bidirectional_check_intel;"};
-
-    cl_program program = clCreateProgramWithBuiltInKernels(
-        pContext,          // context
-        1,                 // num_devices
-        &testedClDevice,   // device_list
-        kernelNamesString, // kernel_names
-        &retVal);
-    restoreBuiltInBinaryName();
-
-    cl_kernel kernel = clCreateKernel(
-        program,
-        "block_advanced_motion_estimate_bidirectional_check_intel",
-        &retVal);
-
-    auto pMultiDeviceKernel = castToObject<MultiDeviceKernel>(kernel);
-    auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex);
-    EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder);
-    EXPECT_EQ(20U, kernNeo->getKernelArgsNumber());
-
-    auto ctxNeo = castToObject<Context>(pContext);
-    auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *ctxNeo->getDevice(0));
-    EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder);
-
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
+    EXPECT_EQ(nullptr, program);
+    EXPECT_EQ(CL_INVALID_VALUE, retVal);
 }
 } // namespace ULT
--- a/opencl/test/unit_test/built_ins/built_in_tests.cpp
+++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp
@ -30,8 +30,6 @@
 #include "opencl/source/accelerators/intel_motion_estimation.h"
 #include "opencl/source/built_ins/aux_translation_builtin.h"
 #include "opencl/source/built_ins/builtins_dispatch_builder.h"
-#include "opencl/source/built_ins/vme_builtin.h"
-#include "opencl/source/built_ins/vme_dispatch_builder.h"
 #include "opencl/source/helpers/dispatch_info_builder.h"
 #include "opencl/source/kernel/kernel.h"
 #include "opencl/test/unit_test/built_ins/built_ins_file_names.h"
@ -122,15 +120,6 @@ class BuiltInTests
    std::string allBuiltIns;
 };

-struct VmeBuiltInTests : BuiltInTests {
-    void SetUp() override {
-        BuiltInTests::SetUp();
-        if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) {
-            GTEST_SKIP();
-        }
-    }
-};
-
 struct AuxBuiltInTests : BuiltInTests, public ::testing::WithParamInterface<KernelObjForAuxTranslation::Type> {
    void SetUp() override {
        BuiltInTests::SetUp();
@ -1350,167 +1339,6 @@ TEST_F(BuiltInTests, WhenSettingExplictArgThenTrueIsReturned) {
    EXPECT_TRUE(ret);
 }

-TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenValidPointerIsReturned) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-
-    EBuiltInOps::Type vmeOps[] = {EBuiltInOps::vmeBlockMotionEstimateIntel, EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel};
-    for (auto op : vmeOps) {
-        BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice);
-        EXPECT_NE(nullptr, &builder);
-    }
-
-    restoreBuiltInBinaryName();
-}
-
-TEST_F(VmeBuiltInTests, givenInvalidBuiltInOpWhenGetVmeBuilderInfoThenExceptionIsThrown) {
-    EXPECT_THROW(Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::count, *pClDevice), std::exception);
-}
-
-TEST_F(VmeBuiltInTests, GivenVmeBuilderAndInvalidParamsWhenGettingDispatchInfoThenEmptyKernelIsReturned) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    EBuiltInOps::Type vmeOps[] = {EBuiltInOps::vmeBlockMotionEstimateIntel, EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel};
-    for (auto op : vmeOps) {
-        BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice);
-
-        MultiDispatchInfo outMdi;
-        Vec3<size_t> gws{352, 288, 0};
-        Vec3<size_t> elws{0, 0, 0};
-        Vec3<size_t> offset{0, 0, 0};
-        auto ret = builder.buildDispatchInfos(outMdi, nullptr, 0, gws, elws, offset);
-        EXPECT_FALSE(ret);
-        EXPECT_EQ(0U, outMdi.size());
-    }
-    restoreBuiltInBinaryName();
-}
-
-TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
-    MockKernelWithInternals mockKernel{*pClDevice};
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
-
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::vmeBlockMotionEstimateIntel, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    MultiDispatchInfo outMdi;
-    Vec3<size_t> gws{352, 288, 0};
-    Vec3<size_t> elws{0, 0, 0};
-    Vec3<size_t> offset{16, 0, 0};
-
-    MockBuffer mb;
-    cl_mem bufferArg = static_cast<cl_mem>(&mb);
-
-    cl_int err;
-    constexpr uint32_t bufferArgNum = 3;
-    bool ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err);
-    EXPECT_FALSE(ret);
-    EXPECT_EQ(CL_SUCCESS, err);
-
-    ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset);
-    EXPECT_TRUE(ret);
-    EXPECT_EQ(1U, outMdi.size());
-
-    auto outDi = outMdi.begin();
-    EXPECT_EQ(Vec3<size_t>(352, 1, 1), outDi->getGWS());
-    EXPECT_EQ(Vec3<size_t>(16, 1, 1), outDi->getEnqueuedWorkgroupSize());
-    EXPECT_EQ(Vec3<size_t>(16, 0, 0), outDi->getOffset());
-    EXPECT_NE(mockKernel.mockKernel, outDi->getKernel());
-
-    EXPECT_EQ(bufferArg, outDi->getKernel()->getKernelArg(bufferArgNum));
-
-    constexpr uint32_t vmeImplicitArgsBase = 6;
-    constexpr uint32_t vmeImplicitArgs = 3;
-    ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs.size());
-    uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride
-    for (uint32_t i = 0; i < vmeImplicitArgs; ++i) {
-        auto &argAsVal = outDi->getKernel()->getKernelInfo().getArgDescriptorAt(vmeImplicitArgsBase + i).as<ArgDescValue>();
-        EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + argAsVal.elements[0].offset)));
-    }
-}
-
-TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
-    MockKernelWithInternals mockKernel{*pClDevice};
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
-    mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
-
-    Vec3<size_t> gws{352, 288, 0};
-    Vec3<size_t> elws{0, 0, 0};
-    Vec3<size_t> offset{0, 0, 0};
-
-    cl_int err;
-
-    constexpr uint32_t bufferArgNum = 7;
-    MockBuffer mb;
-    cl_mem bufferArg = static_cast<cl_mem>(&mb);
-
-    constexpr uint32_t srcImageArgNum = 1;
-    auto image = std::unique_ptr<Image>(Image2dHelper<>::create(pContext));
-    cl_mem srcImageArg = static_cast<cl_mem>(image.get());
-
-    EBuiltInOps::Type vmeOps[] = {EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel};
-    for (auto op : vmeOps) {
-        MultiDispatchInfo outMdi;
-        overwriteBuiltInBinaryName("media_kernels_backend");
-        BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice);
-        restoreBuiltInBinaryName();
-
-        bool ret = builder.setExplicitArg(srcImageArgNum, sizeof(cl_mem), &srcImageArg, err);
-        EXPECT_FALSE(ret);
-        EXPECT_EQ(CL_SUCCESS, err);
-
-        ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err);
-        EXPECT_FALSE(ret);
-        EXPECT_EQ(CL_SUCCESS, err);
-
-        ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset);
-        EXPECT_TRUE(ret);
-        EXPECT_EQ(1U, outMdi.size());
-
-        auto outDi = outMdi.begin();
-        EXPECT_EQ(Vec3<size_t>(352, 1, 1), outDi->getGWS());
-        EXPECT_EQ(Vec3<size_t>(16, 1, 1), outDi->getEnqueuedWorkgroupSize());
-        EXPECT_NE(mockKernel.mockKernel, outDi->getKernel());
-
-        EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(srcImageArgNum));
-
-        uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo().getArgNumByName("intraSrcImg");
-        uint32_t vmeImplicitArgs = 4;
-        ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().getExplicitArgs().size());
-        EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(vmeImplicitArgsBase));
-        ++vmeImplicitArgsBase;
-        --vmeImplicitArgs;
-        uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride
-        for (uint32_t i = 0; i < vmeImplicitArgs; ++i) {
-            auto &argAsVal = outDi->getKernel()->getKernelInfo().getArgDescriptorAt(vmeImplicitArgsBase + i).as<ArgDescValue>();
-            EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + argAsVal.elements[0].offset)));
-        }
-    }
-}
-
-TEST_F(VmeBuiltInTests, WhenGettingBuiltinAsStringThenCorrectStringIsReturned) {
-    EXPECT_EQ(0, strcmp("aux_translation.builtin_kernel", getBuiltinAsString(EBuiltInOps::auxTranslation)));
-    EXPECT_EQ(0, strcmp("copy_buffer_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyBufferToBuffer)));
-    EXPECT_EQ(0, strcmp("copy_buffer_rect.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyBufferRect)));
-    EXPECT_EQ(0, strcmp("fill_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::fillBuffer)));
-    EXPECT_EQ(0, strcmp("copy_buffer_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyBufferToImage3d)));
-    EXPECT_EQ(0, strcmp("copy_image3d_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyImage3dToBuffer)));
-    EXPECT_EQ(0, strcmp("copy_image_to_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyImageToImage1d)));
-    EXPECT_EQ(0, strcmp("copy_image_to_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyImageToImage2d)));
-    EXPECT_EQ(0, strcmp("copy_image_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::copyImageToImage3d)));
-    EXPECT_EQ(0, strcmp("copy_kernel_timestamps.builtin_kernel", getBuiltinAsString(EBuiltInOps::queryKernelTimestamps)));
-    EXPECT_EQ(0, strcmp("fill_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::fillImage1d)));
-    EXPECT_EQ(0, strcmp("fill_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::fillImage2d)));
-    EXPECT_EQ(0, strcmp("fill_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::fillImage3d)));
-    EXPECT_EQ(0, strcmp("vme_block_motion_estimate_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::vmeBlockMotionEstimateIntel)));
-    EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_check_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel)));
-    EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_bidirectional_check_intel", getBuiltinAsString(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel)));
-    EXPECT_EQ(0, strcmp("unknown", getBuiltinAsString(EBuiltInOps::count)));
-}
-
 TEST_F(BuiltInTests, GivenEncodeTypeWhenGettingExtensionThenCorrectStringIsReturned) {
    EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::any)));
    EXPECT_EQ(0, strcmp(".bin", BuiltinCode::getExtension(BuiltinCode::ECodeType::binary)));
@ -1670,9 +1498,6 @@ TEST_F(BuiltInTests, GivenBuiltinTypeSourceWhenGettingBuiltinResourceThenResourc
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage1d, BuiltinCode::ECodeType::source, *pDevice).size());
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage2d, BuiltinCode::ECodeType::source, *pDevice).size());
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage3d, BuiltinCode::ECodeType::source, *pDevice).size());
-    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::source, *pDevice).size());
-    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::source, *pDevice).size());
-    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::source, *pDevice).size());
    EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::count, BuiltinCode::ECodeType::source, *pDevice).size());
 }

@ -1690,9 +1515,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, GivenBuiltinTypeBinaryWhenGettingBuilt
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage1d, BuiltinCode::ECodeType::binary, *pDevice).size());
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage2d, BuiltinCode::ECodeType::binary, *pDevice).size());
    EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::fillImage3d, BuiltinCode::ECodeType::binary, *pDevice).size());
-    EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::binary, *pDevice).size());
-    EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::binary, *pDevice).size());
-    EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::binary, *pDevice).size());
    EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::count, BuiltinCode::ECodeType::binary, *pDevice).size());
 }

@ -1804,554 +1626,6 @@ TEST_F(BuiltInTests, GivenForce32bitWhenCreatingProgramThenCorrectKernelIsCreate
    const_cast<DeviceInfo *>(&pDevice->getDeviceInfo())->force32BitAddressess = force32BitAddressess;
 }

-TEST_F(BuiltInTests, GivenVmeKernelWhenGettingDeviceInfoThenCorrectVmeVersionIsReturned) {
-    if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) {
-        GTEST_SKIP();
-    }
-    cl_uint param;
-    auto ret = pClDevice->getDeviceInfo(CL_DEVICE_ME_VERSION_INTEL, sizeof(param), &param, nullptr);
-    EXPECT_EQ(CL_SUCCESS, ret);
-    EXPECT_EQ(static_cast<cl_uint>(CL_ME_VERSION_ADVANCED_VER_2_INTEL), param);
-}
-
-TEST_F(VmeBuiltInTests, WhenVmeKernelIsCreatedThenParamsAreCorrect) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-
-    {
-        int32_t bufArgNum = 7;
-
-        cl_mem mem = 0;
-        vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err);
-        EXPECT_TRUE(vmeBuilder.validateBufferSize(-1, 16));
-        EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, 16));
-
-        MockBuffer mb;
-        mem = &mb;
-        vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err);
-        EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize()));
-        EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() / 2));
-        EXPECT_FALSE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() * 2));
-
-        mem = 0;
-        vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err);
-    }
-
-    {
-        EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1, 2, 3, 4));
-        EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1));
-        EXPECT_TRUE(vmeBuilder.validateEnumVal(3, 1, 2, 3));
-
-        EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 3, 4));
-        EXPECT_FALSE(vmeBuilder.validateEnumVal(1));
-        EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 2));
-
-        int32_t valArgNum = 3;
-        uint32_t val = 7;
-        vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err);
-        EXPECT_FALSE(vmeBuilder.validateEnumArg<uint32_t>(valArgNum, 3));
-        EXPECT_TRUE(vmeBuilder.validateEnumArg<uint32_t>(valArgNum, 7));
-
-        val = 0;
-        vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err);
-    }
-
-    {
-        int32_t valArgNum = 3;
-        uint32_t val = 7;
-        vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err);
-        EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue<uint32_t>(valArgNum));
-        val = 11;
-        vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err);
-        EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue<uint32_t>(valArgNum));
-
-        val = 0;
-        vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err);
-    }
-}
-
-TEST_F(VmeBuiltInTests, WhenVmeKernelIsCreatedThenDispatchIsBidirectional) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> avmeBuilder(*this->pBuiltIns, *pClDevice);
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel> avmeBidirBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    EXPECT_FALSE(avmeBuilder.isBidirKernel());
-    EXPECT_TRUE(avmeBidirBuilder.isBidirKernel());
-}
-
-struct ImageVmeValidFormat : Image2dDefaults {
-    static const cl_image_format imageFormat;
-    static const cl_image_desc iamgeDesc;
-};
-
-const cl_image_format ImageVmeValidFormat::imageFormat = {
-    CL_R,
-    CL_UNORM_INT8};
-
-const cl_image_desc ImageVmeValidFormat::iamgeDesc = {
-    CL_MEM_OBJECT_IMAGE1D,
-    8192,
-    16,
-    1,
-    1,
-    0,
-    0,
-    0,
-    0,
-    {nullptr}};
-
-struct ImageVmeInvalidDataType : Image2dDefaults {
-    static const cl_image_format imageFormat;
-};
-
-const cl_image_format ImageVmeInvalidDataType::imageFormat = {
-    CL_R,
-    CL_FLOAT};
-
-struct ImageVmeInvalidChannelOrder : Image2dDefaults {
-    static const cl_image_format imageFormat;
-};
-
-const cl_image_format ImageVmeInvalidChannelOrder::imageFormat = {
-    CL_RGBA,
-    CL_UNORM_INT8};
-
-TEST_F(VmeBuiltInTests, WhenValidatingImagesThenCorrectResponses) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    uint32_t srcImgArgNum = 1;
-    uint32_t refImgArgNum = 2;
-
-    cl_int err;
-    { // validate images are not null
-        std::unique_ptr<Image> image1(ImageHelper<ImageVmeValidFormat>::create(pContext));
-
-        cl_mem srcImgMem = 0;
-        EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3<size_t>{3, 3, 0}, Vec3<size_t>{0, 0, 0}));
-
-        srcImgMem = image1.get();
-        vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-        EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3<size_t>{3, 3, 0}, Vec3<size_t>{0, 0, 0}));
-    }
-
-    { // validate image formats
-        std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-        std::unique_ptr<Image> imageInvalidDataType(ImageHelper<ImageVmeInvalidDataType>::create(pContext));
-        std::unique_ptr<Image> imageChannelOrder(ImageHelper<ImageVmeInvalidChannelOrder>::create(pContext));
-
-        Image *images[] = {imageValid.get(), imageInvalidDataType.get(), imageChannelOrder.get()};
-        for (Image *srcImg : images) {
-            for (Image *dstImg : images) {
-                cl_mem srcImgMem = srcImg;
-                cl_mem refImgMem = dstImg;
-                vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-                vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err);
-                bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get());
-                if (shouldSucceed) {
-                    EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}));
-                } else {
-                    EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, vmeBuilder.validateImages(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}));
-                }
-            }
-        }
-    }
-
-    { // validate image tiling
-        std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-        DebugManagerStateRestore restorer;
-        debugManager.flags.ForceLinearImages.set(true);
-        std::unique_ptr<Image> imageLinear(ImageHelper<ImageVmeValidFormat>::create(pContext));
-        Image *images[] = {imageValid.get(), imageLinear.get()};
-        for (Image *srcImg : images) {
-            for (Image *dstImg : images) {
-                cl_mem srcImgMem = srcImg;
-                cl_mem refImgMem = dstImg;
-                vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-                vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err);
-                bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get());
-                if (shouldSucceed) {
-                    EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}));
-                } else {
-                    EXPECT_EQ(CL_OUT_OF_RESOURCES, vmeBuilder.validateImages(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}));
-                }
-            }
-        }
-    }
-
-    { // validate region size
-        std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-        cl_mem imgValidMem = imageValid.get();
-        vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(imgValidMem), &imgValidMem, err);
-        vmeBuilder.setExplicitArg(refImgArgNum, sizeof(imgValidMem), &imgValidMem, err);
-
-        EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3<size_t>{imageValid->getImageDesc().image_width + 1, 1, 0}, Vec3<size_t>{0, 0, 0}));
-        EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3<size_t>{1, imageValid->getImageDesc().image_height + 1, 0}, Vec3<size_t>{0, 0, 0}));
-    }
-}
-
-TEST_F(VmeBuiltInTests, WhenValidatingFlagsThenValidFlagCombinationsReturnTrue) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    uint32_t defaultSkipBlockVal = 8192;
-    uint32_t flagsArgNum = 3;
-
-    std::tuple<uint32_t, bool, uint32_t> flagsToTest[] = {
-        std::make_tuple(CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL, false, defaultSkipBlockVal),
-        std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL, true, CL_ME_MB_TYPE_16x16_INTEL),
-        std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL, true, CL_ME_MB_TYPE_8x8_INTEL),
-        std::make_tuple(defaultSkipBlockVal, true, defaultSkipBlockVal),
-    };
-
-    cl_int err;
-    for (auto &conf : flagsToTest) {
-        uint32_t skipBlock = defaultSkipBlockVal;
-        vmeBuilder.setExplicitArg(flagsArgNum, sizeof(uint32_t), &std::get<0>(conf), err);
-        bool validationResult = vmeBuilder.validateFlags(skipBlock);
-        if (std::get<1>(conf)) {
-            EXPECT_TRUE(validationResult);
-        } else {
-            EXPECT_FALSE(validationResult);
-        }
-        EXPECT_EQ(std::get<2>(conf), skipBlock);
-    }
-}
-
-TEST_F(VmeBuiltInTests, WhenValidatingSkipBlockTypeThenCorrectResponses) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel> avmeBidirectionalBuilder(*this->pBuiltIns, *pClDevice);
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> avmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-    uint32_t skipBlockTypeArgNum = 4;
-
-    uint32_t skipBlockType = 8192;
-    bool ret = avmeBidirectionalBuilder.validateSkipBlockTypeArg(skipBlockType);
-    EXPECT_TRUE(ret);
-    EXPECT_EQ(8192U, skipBlockType);
-
-    skipBlockType = 8192U;
-    avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err);
-    ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType);
-    EXPECT_FALSE(ret);
-
-    skipBlockType = CL_ME_MB_TYPE_16x16_INTEL;
-    avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err);
-    skipBlockType = 8192U;
-    ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType);
-    EXPECT_TRUE(ret);
-    EXPECT_EQ(static_cast<uint32_t>(CL_ME_MB_TYPE_16x16_INTEL), skipBlockType);
-
-    skipBlockType = CL_ME_MB_TYPE_8x8_INTEL;
-    avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err);
-    skipBlockType = 8192U;
-    ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType);
-    EXPECT_TRUE(ret);
-    EXPECT_EQ(static_cast<uint32_t>(CL_ME_MB_TYPE_8x8_INTEL), skipBlockType);
-}
-
-TEST_F(VmeBuiltInTests, GivenAcceleratorWhenExplicitlySettingArgThenFalseIsReturned) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-    uint32_t aceleratorArgNum = 0;
-    bool ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), nullptr, err);
-    EXPECT_FALSE(ret);
-    EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, err);
-
-    cl_motion_estimation_desc_intel acceleratorDesc;
-    acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL;
-    acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL;
-    acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL;
-    acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL;
-    auto neoAccelerator = std::unique_ptr<VmeAccelerator>(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err));
-    ASSERT_NE(nullptr, neoAccelerator.get());
-    cl_accelerator_intel clAccel = neoAccelerator.get();
-    ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), &clAccel, err);
-    EXPECT_FALSE(ret);
-    EXPECT_EQ(CL_SUCCESS, err);
-}
-
-TEST_F(VmeBuiltInTests, WhenValidatingDispatchThenCorrectReturns) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    struct MockVmeBuilder : BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel> {
-        using BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel>::BuiltInOp;
-
-        cl_int validateVmeDispatch(const Vec3<size_t> &inputRegion, const Vec3<size_t> &offset, size_t blkNum, size_t blkMul) const override {
-            receivedInputRegion = inputRegion;
-            receivedOffset = offset;
-            receivedBlkNum = blkNum;
-            receivedBlkMul = blkMul;
-            wasValidateVmeDispatchCalled = true;
-            return valueToReturn;
-        }
-
-        mutable bool wasValidateVmeDispatchCalled = false;
-        mutable Vec3<size_t> receivedInputRegion = {0, 0, 0};
-        mutable Vec3<size_t> receivedOffset = {0, 0, 0};
-        mutable size_t receivedBlkNum = 0;
-        mutable size_t receivedBlkMul = 0;
-        mutable cl_int valueToReturn = CL_SUCCESS;
-    };
-
-    uint32_t aaceleratorArgNum = 0;
-    MockVmeBuilder vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int ret = vmeBuilder.validateDispatch(nullptr, 1, Vec3<size_t>{16, 16, 0}, Vec3<size_t>{16, 1, 0}, Vec3<size_t>{0, 0, 0});
-    EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret);
-
-    ret = vmeBuilder.validateDispatch(nullptr, 3, Vec3<size_t>{16, 16, 0}, Vec3<size_t>{16, 1, 0}, Vec3<size_t>{0, 0, 0});
-    EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret);
-
-    ret = vmeBuilder.validateDispatch(nullptr, 2, Vec3<size_t>{16, 16, 0}, Vec3<size_t>{16, 1, 0}, Vec3<size_t>{0, 0, 0});
-    EXPECT_EQ(CL_INVALID_KERNEL_ARGS, ret); // accelerator not set
-    EXPECT_FALSE(vmeBuilder.wasValidateVmeDispatchCalled);
-
-    cl_int err;
-    cl_motion_estimation_desc_intel acceleratorDesc;
-    acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL;
-    acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL;
-    acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL;
-
-    Vec3<size_t> gws{16, 16, 0};
-    Vec3<size_t> lws{16, 1, 0};
-    Vec3<size_t> off{0, 0, 0};
-    size_t gwWidthInBlk = 0;
-    size_t gwHeightInBlk = 0;
-    vmeBuilder.getBlkTraits(gws, gwWidthInBlk, gwHeightInBlk);
-
-    {
-        acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL;
-        auto neoAccelerator = std::unique_ptr<VmeAccelerator>(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err));
-        ASSERT_NE(nullptr, neoAccelerator.get());
-        cl_accelerator_intel clAccel = neoAccelerator.get();
-        vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err);
-        vmeBuilder.wasValidateVmeDispatchCalled = false;
-        auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off);
-        EXPECT_EQ(CL_SUCCESS, ret);
-        EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled);
-        EXPECT_EQ(gws, vmeBuilder.receivedInputRegion);
-        EXPECT_EQ(off, vmeBuilder.receivedOffset);
-
-        EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum);
-        EXPECT_EQ(1U, vmeBuilder.receivedBlkMul);
-    }
-
-    {
-        acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_4x4_INTEL;
-        auto neoAccelerator = std::unique_ptr<VmeAccelerator>(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err));
-        ASSERT_NE(nullptr, neoAccelerator.get());
-        cl_accelerator_intel clAccel = neoAccelerator.get();
-        vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err);
-        vmeBuilder.wasValidateVmeDispatchCalled = false;
-        auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off);
-        EXPECT_EQ(CL_SUCCESS, ret);
-        EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled);
-        EXPECT_EQ(gws, vmeBuilder.receivedInputRegion);
-        EXPECT_EQ(off, vmeBuilder.receivedOffset);
-
-        EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum);
-        EXPECT_EQ(16U, vmeBuilder.receivedBlkMul);
-    }
-
-    {
-        acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_8x8_INTEL;
-        auto neoAccelerator = std::unique_ptr<VmeAccelerator>(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err));
-        ASSERT_NE(nullptr, neoAccelerator.get());
-        cl_accelerator_intel clAccel = neoAccelerator.get();
-        vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err);
-        vmeBuilder.wasValidateVmeDispatchCalled = false;
-        vmeBuilder.valueToReturn = 37;
-        auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off);
-        EXPECT_EQ(37, ret);
-        EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled);
-        EXPECT_EQ(gws, vmeBuilder.receivedInputRegion);
-        EXPECT_EQ(off, vmeBuilder.receivedOffset);
-
-        EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum);
-        EXPECT_EQ(4U, vmeBuilder.receivedBlkMul);
-    }
-}
-
-TEST_F(VmeBuiltInTests, WhenValidatingVmeDispatchThenCorrectReturns) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockMotionEstimateIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-
-    // images not set
-    EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    uint32_t srcImgArgNum = 1;
-    uint32_t refImgArgNum = 2;
-
-    std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-    cl_mem srcImgMem = imageValid.get();
-
-    vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-    vmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-
-    // null buffers are valid
-    EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    // too small buffers should fail
-    MockBuffer mb;
-    cl_mem mem = &mb;
-
-    uint32_t predictionMotionVectorBufferArgNum = 3;
-    uint32_t motionVectorBufferArgNum = 4;
-    uint32_t residualsBufferArgNum = 5;
-    for (uint32_t argNum : {predictionMotionVectorBufferArgNum, motionVectorBufferArgNum, residualsBufferArgNum}) {
-        EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, mb.getSize() * 2, 1));
-        vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), &mem, err);
-        EXPECT_EQ(CL_INVALID_BUFFER_SIZE, vmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, mb.getSize() * 2, 1));
-        vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), nullptr, err);
-    }
-}
-
-TEST_F(VmeBuiltInTests, GivenAdvancedVmeWhenValidatingVmeDispatchThenCorrectReturns) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> avmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-    // images not set
-    ASSERT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    uint32_t srcImgArgNum = 1;
-    uint32_t refImgArgNum = 2;
-
-    std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-    cl_mem srcImgMem = imageValid.get();
-
-    avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-    avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-
-    ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    uint32_t flagsArgNum = 3;
-    uint32_t val = CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL;
-    avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err);
-    EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL;
-    avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err);
-
-    uint32_t skipBlockTypeArgNum = 4;
-    val = 8192;
-    avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err);
-    EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    val = CL_ME_MB_TYPE_16x16_INTEL;
-    avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err);
-
-    uint32_t searchCostPenaltyArgNum = 5;
-    val = 8192;
-    avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err);
-    EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    val = CL_ME_COST_PENALTY_NONE_INTEL;
-    avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err);
-
-    uint32_t searchCostPrecisionArgNum = 6;
-    val = 8192;
-    avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err);
-    EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    val = CL_ME_COST_PRECISION_QPEL_INTEL;
-    avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err);
-
-    // for non-bidirectional avme kernel, countMotionVectorBuffer must be set
-    uint32_t countMotionVectorBufferArgNum = 7;
-    EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    MockBuffer mb;
-    cl_mem mem = &mb;
-    avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err);
-    EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 1, 1));
-}
-
-TEST_F(VmeBuiltInTests, GivenAdvancedBidirectionalVmeWhenValidatingVmeDispatchThenCorrectReturns) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateBidirectionalCheckIntel> avmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    cl_int err;
-    uint32_t srcImgArgNum = 1;
-    uint32_t refImgArgNum = 2;
-
-    std::unique_ptr<Image> imageValid(ImageHelper<ImageVmeValidFormat>::create(pContext));
-    cl_mem srcImgMem = imageValid.get();
-
-    avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-    avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err);
-
-    ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    uint32_t flagsArgNum = 6;
-    uint32_t val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL;
-    avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err);
-
-    uint32_t searchCostPenaltyArgNum = 7;
-    val = CL_ME_COST_PENALTY_NONE_INTEL;
-    avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err);
-
-    uint32_t searchCostPrecisionArgNum = 8;
-    val = CL_ME_COST_PRECISION_QPEL_INTEL;
-    avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err);
-
-    uint32_t bidirWeightArgNum = 10;
-    val = 255;
-    avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err);
-    EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-    val = CL_ME_BIDIR_WEIGHT_QUARTER_INTEL;
-    avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err);
-
-    EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 64, 1));
-
-    // test bufferSize checking
-    uint32_t countMotionVectorBufferArgNum = 11;
-    MockBuffer mb;
-    cl_mem mem = &mb;
-    avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err);
-    EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, 1, 1));
-    EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3<size_t>{1, 1, 0}, Vec3<size_t>{0, 0, 0}, mb.getSize() * 2, 1));
-}
-
-TEST_F(VmeBuiltInTests, GivenAdvancedVmeWhenGettingSkipResidualsBuffExpSizeThenDefaultSizeIsReturned) {
-    overwriteBuiltInBinaryName("media_kernels_backend");
-    BuiltInOp<EBuiltInOps::vmeBlockAdvancedMotionEstimateCheckIntel> vmeBuilder(*this->pBuiltIns, *pClDevice);
-    restoreBuiltInBinaryName();
-
-    auto size16x16 = vmeBuilder.getSkipResidualsBuffExpSize(CL_ME_MB_TYPE_16x16_INTEL, 4);
-    auto sizeDefault = vmeBuilder.getSkipResidualsBuffExpSize(8192, 4);
-    EXPECT_EQ(size16x16, sizeDefault);
-}
-
-TEST_F(BuiltInTests, GivenInvalidBuiltinKernelNameWhenCreatingBuiltInProgramThenInvalidValueErrorIsReturned) {
-    const char *kernelNames = "invalid_kernel";
-    cl_int retVal = CL_SUCCESS;
-
-    cl_program program = Vme::createBuiltInProgram(
-        *pContext,
-        pContext->getDevices(),
-        kernelNames,
-        retVal);
-
-    EXPECT_EQ(CL_INVALID_VALUE, retVal);
-    EXPECT_EQ(nullptr, program);
-}
-
 TEST_F(BuiltInTests, WhenGettingSipKernelThenReturnProgramCreatedFromIsaAcquiredThroughCompilerInterface) {
    auto mockCompilerInterface = new MockCompilerInterface();
    pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->compilerInterface.reset(mockCompilerInterface);
--- a/opencl/test/unit_test/test_files/media_kernels_backend.cl
+++ b/opencl/test/unit_test/test_files/media_kernels_backend.cl
@ -1,962 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-// VME KERNELS
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
-                            __read_only image2d_t refImg,
-                            __global short2 *prediction_motion_vector_buffer,
-                            __global short2 *motion_vector_buffer,
-                            __global ushort *residuals, int height, int width,
-                            int stride) {
-    __local uint dst[64];
-    __local ushort *dist = (__local ushort *)&dst[8 * 5];
-
-    int sid_0 = stride * get_group_id(0);
-    int gid_0 = sid_0 / height;
-    int gid_1 = sid_0 % height;
-    for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-         sid++, gid_0 = sid / height, gid_1 = sid % height) {
-        int2 srcCoord = 0;
-        int2 refCoord = 0;
-
-        srcCoord.x = gid_0 * 16 + get_global_offset(0);
-        srcCoord.y = gid_1 * 16 + get_global_offset(1);
-
-        short2 predMV = 0;
-
-#ifndef HW_NULL_CHECK
-        if (prediction_motion_vector_buffer != NULL)
-#endif
-        {
-            predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width];
-            refCoord.x = predMV.x / 4;
-            refCoord.y = predMV.y / 4;
-            refCoord.y = refCoord.y & 0xFFFE;
-        }
-
-        {
-            intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg,
-                                          accelerator);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        // Write Out Result
-
-        // 4x4
-        if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-            int x = get_local_id(0) % 4;
-            int y = get_local_id(0) / 4;
-            int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4;
-
-            short2 val = as_short2(dst[8 + (y * 4 + x) * 2]);
-            motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-            if (residuals != NULL)
-#endif
-            {
-                residuals[index] = dist[y * 4 + x];
-            }
-        }
-
-        // 8x8
-        if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-            if (get_local_id(0) < 4) {
-                int x = get_local_id(0) % 2;
-                int y = get_local_id(0) / 2;
-                int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2;
-                short2 val = as_short2(dst[8 + (y * 2 + x) * 8]);
-                motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                if (residuals != NULL)
-#endif
-                {
-                    residuals[index] = dist[(y * 2 + x) * 4];
-                }
-            }
-        }
-
-        // 16x16
-        if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-            if (get_local_id(0) == 0) {
-                int index = gid_0 + gid_1 * width;
-
-                short2 val = as_short2(dst[8]);
-                motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                if (residuals != NULL)
-#endif
-                {
-                    residuals[index] = dist[0];
-                }
-            }
-        }
-    }
-}
-
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, uint flags, uint skip_block_type,
-    uint search_cost_penalty, uint search_cost_precision,
-    __global short2 *count_motion_vector_buffer,
-    __global short2 *predictors_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *motion_vector_buffer,
-    __global char *intra_search_predictor_modes, __global ushort *residuals,
-    __global ushort *skip_residuals, __global ushort *intra_residuals,
-    __read_only image2d_t intraSrcImg, int height, int width, int stride) {
-    __local uint dstSearch[64];         // 8 GRFs
-    __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra)
-
-    __local ushort *distSearch =
-        (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF
-
-    // Initialize the MV cost table:
-    // MV Cost in U4U4 format:
-    // No cost    : 0,  0,  0,  0,  0,  0,  0,  0
-    // Low Cost   : 1,  4,  5,  9,  10, 12, 14, 15
-    // Normal Cost: 5,  26, 29, 43, 45, 47, 57, 57
-    // High Cost  : 29, 61, 72, 78, 88, 89, 91, 92
-
-    uint2 MVCostTable;
-    if (search_cost_penalty == 1) {
-        MVCostTable.s0 = 0x09050401;
-        MVCostTable.s1 = 0x0F0E0C0A;
-    } else if (search_cost_penalty == 2) {
-        MVCostTable.s0 = 0x2B1D1A05;
-        MVCostTable.s1 = 0x39392F2D;
-    } else if (search_cost_penalty == 3) {
-        MVCostTable.s0 = 0x4E483D1D;
-        MVCostTable.s1 = 0x5C5B5958;
-    } else {
-        MVCostTable.s0 = 0;
-        MVCostTable.s1 = 0;
-    }
-
-    uint MVCostPrecision = ((uint)search_cost_precision) << 16;
-    // Frame is divided into rows * columns of MBs.
-    // One h/w thread per WG.
-    // One WG processes 'row' MBs - one row per iteration and one MB per row.
-    // Number of WGs (or h/w threads) is number of columns MBs
-    // Each iteration processes the MB in a row - gid_0 is the MB id in a row and
-    // gid_1 is the row offset.
-
-    int sid_0 = stride * get_group_id(0);
-    int gid_0 = sid_0 / height;
-    int gid_1 = sid_0 % height;
-    for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-         sid++, gid_0 = sid / height, gid_1 = sid % height) {
-        int2 srcCoord;
-
-        srcCoord.x = gid_0 * 16 +
-                     get_global_offset(0); // 16 pixels wide MBs (globally scalar)
-        srcCoord.y = gid_1 * 16 +
-                     get_global_offset(1); // 16 pixels tall MBs (globally scalar)
-
-        uint curMB = gid_0 + gid_1 * width; // current MB id
-        short2 count = count_motion_vector_buffer[curMB];
-
-        int countPredMVs = count.x;
-        if (countPredMVs != 0) {
-            uint offset = curMB * 8;       // 8 predictors per MB
-            offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB
-                                           // one predictor for MB per SIMD channel
-
-            // Reduce predictors from Q-pixel to integer precision.
-
-            int2 predMV = 0;
-            if (get_local_id(0) < countPredMVs) {
-                predMV =
-                    convert_int2(predictors_buffer[offset]); // one MV per work-item
-                predMV.x /= 4;
-                predMV.y /= 4;
-                predMV.y &= 0xFFFE;
-            }
-
-            // Do up to 8 IMEs, get the best MVs and their distortions, and optionally
-            // a FBR of the best MVs.
-            // Finally the results are written out to SLM.
-
-            intel_work_group_vme_mb_multi_query_8(
-                dstSearch,       // best search MV and its distortions into SLM
-                countPredMVs,    // count of predictor MVs (globally scalar - value range
-                                 // 1 to 8)
-                MVCostPrecision, // MV cost precision
-                MVCostTable,     // MV cost table
-                srcCoord,        // MB 2-D offset (globally scalar)
-                predMV,          // predictor MVs (up to 8 distinct MVs for SIMD16 thread)
-                srcImg,          // source
-                refImg,          // reference
-                accelerator);    // vme object
-        }
-
-        int doIntra = (flags & 0x2) != 0;
-        int intraEdges = 0;
-        if (doIntra) {
-            // Enable all edges by default.
-            intraEdges = 0x3C;
-            // If this is a left-edge MB, then disable left edges.
-            if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
-                intraEdges &= 0x14;
-            }
-            // If this is a right edge MB then disable right edges.
-            if (gid_0 == width - 1) {
-                intraEdges &= 0x38;
-            }
-            // If this is a top-edge MB, then disable top edges.
-            if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
-                intraEdges &= 0x20;
-            }
-            // Set bit6=bit5.
-            intraEdges |= ((intraEdges & 0x20) << 1);
-            intraEdges <<= 8;
-        }
-        int countSkipMVs = count.y;
-        if (countSkipMVs != 0 || doIntra == true) {
-            uint offset = curMB * 8; // 8 sets of skip check MVs per MB
-            offset +=
-                (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB
-                                       // one set of skip MV per SIMD channel
-
-            // Do up to 8 skip checks and get the distortions for each of them.
-            // Finally the results are written out to SLM.
-
-            if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) {
-                int skipMVs = 0;
-                if (get_local_id(0) < countSkipMVs) {
-                    __global int *skip1_motion_vector_buffer =
-                        (__global int *)skip_motion_vector_buffer;
-                    skipMVs = skip1_motion_vector_buffer[offset]; // one packed MV for one
-                                                                  // work-item
-                }
-                intel_work_group_vme_mb_multi_check_16x16(
-                    dstSkipIntra, // distortions into SLM
-                    countSkipMVs, // count of skip check MVs (value range 0 to 8)
-                    doIntra,      // compute intra modes
-                    intraEdges,   // intra edges to use
-                    srcCoord,     // MB 2-D offset (globally scalar)
-                    skipMVs,      // skip check MVs (up to 8 sets of skip check MVs for
-                                  // SIMD16 thread)
-                    srcImg,       // source
-                    refImg,       // reference
-                    intraSrcImg,  // intra source
-                    accelerator);
-            }
-
-            if ((skip_block_type == 0x1) & (countSkipMVs > 0)) {
-                int4 skipMVs = 0;
-                if (get_local_id(0) < countSkipMVs) {
-                    __global int4 *skip4_motion_vector_buffer =
-                        (__global int4 *)(skip_motion_vector_buffer);
-                    skipMVs = skip4_motion_vector_buffer[offset]; // four component MVs
-                                                                  // per work-item
-                }
-                intel_work_group_vme_mb_multi_check_8x8(
-                    dstSkipIntra, // distortions into SLM
-                    countSkipMVs, // count of skip check MVs per MB (value range 0 to 8)
-                    doIntra,      // compute intra modes
-                    intraEdges,   // intra edges to use
-                    srcCoord,     // MB 2-D offset (globally scalar)
-                    skipMVs,      // skip check MVs (up to 8 ets of skip check MVs for SIMD16
-                                  // thread)
-                    srcImg,       // source
-                    refImg,       // reference
-                    intraSrcImg,  // intra source
-                    accelerator);
-            }
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        // Write Out motion estimation result:
-        // Result format
-        //     Hierarchical row-major layout
-        //     i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of
-        //     MVs/distortion in blocks
-
-        if (countPredMVs != 0) {
-            // 4x4
-            if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-                int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
-
-                // 1. 16 work-items enabled.
-                // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
-                // (interleaved
-                //    fwd/bdw MVs) with constant offset 8 (control data size) from SLM
-                //    into contiguous
-                //    short2 locations 0, 1, .., 15 of global buffer
-                //    search_motion_vector_buffer with
-                //    offset index.
-                // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
-                // distSearch into
-                //    contiguous ushort locations 0, 1, .., 15 of search_residuals with
-                //    offset index.
-
-                short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
-                motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                if (residuals != NULL)
-#endif
-                {
-                    residuals[index] = distSearch[get_local_id(0)];
-                }
-            }
-
-            // 8x8
-            else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-                // Only 1st 4 work-item are needed.
-                if (get_local_id(0) < 4) {
-                    int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
-
-                    // 1. 4 work-items enabled.
-                    // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
-                    // (interleaved
-                    //    fwd/bdw MVs) with constant offset 8 from SLM into contiguous
-                    //    short2 locations
-                    //    0, 1, .., 15 of global buffer search_motion_vector_buffer with
-                    //    offset index.
-                    // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
-                    // distSearch into
-                    //    contiguous ushort locations 0, 1, .., 15 of search_residuals
-                    //    with offset index.
-
-                    short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
-                    motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                    if (residuals != NULL)
-#endif
-                    {
-                        residuals[index] = distSearch[get_local_id(0) * 4];
-                    }
-                }
-            }
-
-            // 16x16
-            else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-                // One 1st work is needed.
-                if (get_local_id(0) == 0) {
-                    int index = gid_0 + gid_1 * width;
-
-                    // 1. 1 work-item enabled.
-                    // 2. Work-item gathers fwd MV in dword location 0 with constant
-                    // offset 8 from
-                    //    SLM into short2 locations 0 of global buffer
-                    //    search_motion_vector_buffer.
-                    // 3. Work-item gathers ushort location 0 from distSearch into ushort
-                    //    location 0 of search_residuals with offset index.
-
-                    short2 val = as_short2(dstSearch[8]);
-                    motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                    if (residuals != NULL)
-#endif
-                    {
-                        residuals[index] = distSearch[0];
-                    }
-                }
-            }
-        }
-
-        // Write out motion skip check result:
-        // Result format
-        //     Hierarchical row-major layout
-        //     i.e. row-major of blocks in MBs, and row-major of 8 sets of
-        //     distortions in blocks
-
-        if (countSkipMVs != 0) {
-            if (skip_block_type == 0x0) {
-                // Copy out 8 (1 component) sets of distortion values.
-
-                int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width);
-
-                if (get_local_id(0) < countSkipMVs) {
-                    __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-
-                    // 1. Up to 8 work-items are enabled.
-                    // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
-                    //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-                    //    7.
-                    skip_residuals[index] = distSkip[get_local_id(0) * 16];
-                }
-            } else {
-                // Copy out 8 (4 component) sets of distortion values.
-
-                int index =
-                    (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width);
-
-                __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-
-                if (get_local_id(0) < countSkipMVs * 4) {
-                    // 1. Up to 16 work-items are enabled.
-                    // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and
-                    //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-                    //    31.
-
-                    skip_residuals[index] = distSkip[get_local_id(0) * 4];
-                    skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4];
-                }
-            }
-        }
-
-        // Write out intra search result:
-
-        if (doIntra) {
-
-            int index_low =
-                (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
-            int index_high =
-                (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
-
-            // Write out the 4x4 intra modes
-            if (get_local_id(0) < 8) {
-                __local char *dstIntra_4x4 =
-                    (__local char *)(&dstSkipIntra[64 + 16 + 4]);
-                char value = dstIntra_4x4[get_local_id(0)];
-                char value_low = (value)&0xf;
-                char value_high = (value >> 4) & 0xf;
-                intra_search_predictor_modes[index_low + 5] = value_low;
-                intra_search_predictor_modes[index_high + 5] = value_high;
-            }
-
-            // Write out the 8x8 intra modes
-            if (get_local_id(0) < 4) {
-                __local char *dstIntra_8x8 =
-                    (__local char *)(&dstSkipIntra[64 + 8 + 4]);
-                char value = dstIntra_8x8[get_local_id(0) * 2];
-                char value_low = (value)&0xf;
-                int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
-                intra_search_predictor_modes[index + 1] = value_low;
-            }
-
-            // Write out the 16x16 intra modes
-            if (get_local_id(0) < 1) {
-                __local char *dstIntra_16x16 =
-                    (__local char *)(&dstSkipIntra[64 + 0 + 4]);
-                char value = dstIntra_16x16[get_local_id(0)];
-                char value_low = (value)&0xf;
-                intra_search_predictor_modes[index_low] = value_low;
-            }
-
-// Get the intra residuals.
-#ifndef HW_NULL_CHECK
-            if (intra_residuals != NULL)
-#endif
-            {
-                int index = (gid_0 * 4) + (gid_1 * 4 * width);
-
-                if (get_local_id(0) < 1) {
-                    __local ushort *distIntra_4x4 =
-                        (__local ushort *)(&dstSkipIntra[64 + 16 + 3]);
-                    __local ushort *distIntra_8x8 =
-                        (__local ushort *)(&dstSkipIntra[64 + 8 + 3]);
-                    __local ushort *distIntra_16x16 =
-                        (__local ushort *)(&dstSkipIntra[64 + 0 + 3]);
-                    intra_residuals[index + 2] = distIntra_4x4[0];
-                    intra_residuals[index + 1] = distIntra_8x8[0];
-                    intra_residuals[index + 0] = distIntra_16x16[0];
-                }
-            }
-        }
-    }
-}
-/*************************************************************************************************
-Built-in kernel:
-   block_advanced_motion_estimate_bidirectional_check_intel
-
-Description:
-
-   1. Do motion estimation with 0 to 4 predictor MVs using 0 to 4 (integer
-motion estimation)
-   IMEs per macro-block, calculating the best search MVs per specified (16x16,
-8x8, 4x4) luma
-   block with lowest distortion from amongst the 0 to 4 IME results, and
-optionally do
-   (fractional bi-directional refinement) FBR on the best IME search results to
-refine the best
-   search results. The best search (FBR if done, or IME) MVs and their
-distortions are returned.
-
-   2. Do undirectional or bidirectional skip (zero search) checks with 0 to 4
-sets of skip check
-   MVs for (16x16, 8x8) luma blocks using 0 to 4 (skip and intra check) SICs and
-return the
-   distortions associated with the input sets of skip check MVs per specified
-luma block. 4x4
-   blocks are not supported by h/w for skip checks.
-
-   3. Do intra-prediction for (16x16, 8x8, 4x4) luma blocks and (8x8) chroma
-blocks using 3 SICs
-   and returning the predictor modes and their associated distortions.
-Intra-prediction is done
-   for all block sizes. Support for 8x8 chroma blocks cannot be enabled until NV
-image formats
-   are supported in OCL.
-
-**************************************************************************************************/
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_bidirectional_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, __read_only image2d_t src_check_image,
-    __read_only image2d_t ref0_check_image,
-    __read_only image2d_t ref1_check_image, uint flags,
-    uint search_cost_penalty, uint search_cost_precision, short2 count_global,
-    uchar bidir_weight, __global short2 *count_motion_vector_buffer,
-    __global short2 *prediction_motion_vector_buffer,
-    __global char *skip_input_mode_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *search_motion_vector_buffer,
-    __global char *intra_search_predictor_modes,
-    __global ushort *search_residuals, __global ushort *skip_residuals,
-    __global ushort *intra_residuals, __read_only image2d_t intraSrcImg,
-    int height, int width, int stride) {
-    __local uint dstSearch[64];         // 8 GRFs
-    __local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra)
-
-    // distortion in the 6th GRF
-    __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5];
-
-    // Initialize the MV cost table:
-    // MV Cost in U4U4 format:
-    // No cost    : 0,  0,  0,  0,  0,  0,  0,  0
-    // Low Cost   : 1,  4,  5,  9,  10, 12, 14, 15
-    // Normal Cost: 5,  26, 29, 43, 45, 47, 57, 57
-    // High Cost  : 29, 61, 72, 78, 88, 89, 91, 92
-
-    uint2 MVCostTable;
-    if (search_cost_penalty == 1) {
-        MVCostTable.s0 = 0x09050401;
-        MVCostTable.s1 = 0x0F0E0C0A;
-    } else if (search_cost_penalty == 2) {
-        MVCostTable.s0 = 0x2B1D1A05;
-        MVCostTable.s1 = 0x39392F2D;
-    } else if (search_cost_penalty == 3) {
-        MVCostTable.s0 = 0x4E483D1D;
-        MVCostTable.s1 = 0x5C5B5958;
-    } else {
-        MVCostTable.s0 = 0;
-        MVCostTable.s1 = 0;
-    }
-
-    uint MVCostPrecision = ((uint)search_cost_precision) << 16;
-
-    // Frame is divided into rows * columns of MBs.
-    // One h/w thread per WG.
-    // One WG processes "row" MBs - one row per iteration and one MB per row.
-    // Number of WGs (or h/w threads) is number of columns MBs.Each iteration
-    // processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the
-    // row offset.
-
-    int sid_0 = stride * get_group_id(0);
-    int gid_0 = sid_0 / height;
-    int gid_1 = sid_0 % height;
-    for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
-         sid++, gid_0 = sid / height, gid_1 = sid % height) {
-        int2 srcCoord;
-
-        srcCoord.x = gid_0 * 16 +
-                     get_global_offset(0); // 16 pixels wide MBs (globally scalar)
-        srcCoord.y = gid_1 * 16 +
-                     get_global_offset(1);  // 16 pixels tall MBs (globally scalar)
-        uint curMB = gid_0 + gid_1 * width; // current MB id
-        short2 count;
-
-        // If either the search or skip vector counts are per-MB, then we need to
-        // read in
-        // the count motion vector buffer.
-
-        if ((count_global.s0 == -1) | (count_global.s1 == -1)) {
-            count = count_motion_vector_buffer[curMB];
-        }
-
-        // If either the search or skip vector counts are per-frame, we need to use
-        // those.
-
-        if (count_global.s0 >= 0) {
-            count.s0 = count_global.s0;
-        }
-
-        if (count_global.s1 >= 0) {
-            count.s1 = count_global.s1;
-        }
-
-        int countPredMVs = count.x;
-        if (countPredMVs != 0) {
-            uint offset = curMB * 4;       // 4 predictors per MB
-            offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB
-            // one predictor for MB per SIMD channel
-
-            // Reduce predictors from Q-pixel to integer precision.
-            int2 predMV = 0;
-
-            if (get_local_id(0) < countPredMVs) {
-                // one MV per work-item
-                predMV = convert_int2(prediction_motion_vector_buffer[offset]);
-                // Predictors are input in QP resolution. Convert that to integer
-                // resolution.
-                predMV.x /= 4;
-                predMV.y /= 4;
-                predMV.y &= 0xFFFFFFFE;
-            }
-
-            // Do up to 4 IMEs, get the best MVs and their distortions, and optionally
-            // a FBR of
-            // the best MVs. Finally the results are written out to SLM.
-
-            intel_work_group_vme_mb_multi_query_4(
-                dstSearch,       // best search MV and its distortions into SLM
-                countPredMVs,    // count of predictor MVs (globally scalar - value range
-                                 // 1 to 4)
-                MVCostPrecision, // MV cost precision
-                MVCostTable,     // MV cost table
-                srcCoord,        // MB 2-D offset (globally scalar)
-                predMV,          // predictor MVs (up to 4 distinct MVs for SIMD16 thread)
-                srcImg,          // source
-                refImg,          // reference
-                accelerator);    // vme object
-        }
-
-        int doIntra = ((flags & 0x2) != 0);
-        int intraEdges = 0;
-        if (doIntra) {
-            // Enable all edges by default.
-            intraEdges = 0x3C;
-            // If this is a left-edge MB, then disable left edges.
-            if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
-                intraEdges &= 0x14;
-            }
-
-            // If this is a right edge MB then disable right edges.
-            if (gid_0 == width - 1) {
-                intraEdges &= 0x38;
-            }
-
-            // If this is a top-edge MB, then disable top edges.
-            if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
-                intraEdges &= 0x20;
-            }
-
-            // Set bit6=bit5.
-            intraEdges |= ((intraEdges & 0x20) << 1);
-
-            intraEdges <<= 8;
-        }
-
-        int skip_block_type_8x8 = flags & 0x4;
-
-        int countSkipMVs = count.y;
-        if (countSkipMVs != 0 || doIntra == true) {
-            // one set of skip MV per SIMD channel
-
-            // Do up to 4 skip checks and get the distortions for each of them.
-            // Finally the results are written out to SLM.
-
-            if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) {
-                // 16x16:
-
-                uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB
-                int skipMV = 0;
-                if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV
-                {
-                    offset +=
-                        (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
-                    __global int *skip1_motion_vector_buffer =
-                        (__global int *)skip_motion_vector_buffer;
-                    skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item
-                }
-
-                uchar skipMode = 0;
-                if (get_local_id(0) < countSkipMVs) {
-                    skipMode = skip_input_mode_buffer[curMB];
-
-                    if (skipMode == 0) {
-                        skipMode = 1;
-                    }
-                    if (skipMode > 3) {
-                        skipMode = 3;
-                    }
-                }
-
-                intel_work_group_vme_mb_multi_bidir_check_16x16(
-                    dstSkipIntra,     // distortions into SLM
-                    countSkipMVs,     // count of skip check MVs (globally scalar - value
-                                      // range 1 to 4)
-                    doIntra,          // compute intra modes
-                    intraEdges,       // intra edges to use
-                    srcCoord,         // MB 2-D offset (globally scalar)
-                    bidir_weight,     // bidirectional weight
-                    skipMode,         // skip modes
-                    skipMV,           // skip check MVs (up to 4 distinct sets of skip check MVs
-                                      // for SIMD16 thread)
-                    src_check_image,  // source
-                    ref0_check_image, // reference fwd
-                    ref1_check_image, // reference bwd
-                    intraSrcImg,      // intra source
-                    accelerator);     // vme object
-            } else {
-                // 8x8:
-
-                uint offset =
-                    curMB * 4 *
-                    8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB
-                int2 skipMVs = 0;
-                if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV
-                {
-                    offset +=
-                        (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
-                    __global int *skip1_motion_vector_buffer =
-                        (__global int *)(skip_motion_vector_buffer);
-                    skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs
-                                                                    // per work-item
-                    skipMVs.y = skip1_motion_vector_buffer[offset + 16];
-                }
-
-                uchar skipModes = 0;
-                if (get_local_id(0) < countSkipMVs) {
-                    skipModes = skip_input_mode_buffer[curMB];
-                }
-
-                intel_work_group_vme_mb_multi_bidir_check_8x8(
-                    dstSkipIntra,     // distortions into SLM
-                    countSkipMVs,     // count of skip check MVs per MB (globally scalar -
-                                      // value range 1 to 4)
-                    doIntra,          // compute intra modes
-                    intraEdges,       // intra edges to use
-                    srcCoord,         // MB 2-D offset (globally scalar)
-                    bidir_weight,     // bidirectional weight
-                    skipModes,        // skip modes
-                    skipMVs,          // skip check MVs (up to 4 distinct sets of skip check MVs
-                                      // for SIMD16 thread)
-                    src_check_image,  // source
-                    ref0_check_image, // reference fwd
-                    ref1_check_image, // reference bwd
-                    intraSrcImg,      // intra source
-                    accelerator);     // vme object
-            }
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        // Write Out motion estimation result:
-        // Result format
-        //     Hierarchical row-major layout
-        //     i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of
-        //     MVs/distortion in blocks
-        if (countPredMVs != 0) {
-            // 4x4
-            if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
-                int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
-
-                // 1. 16 work-items enabled.
-                // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
-                // (interleaved
-                //    fwd/bdw MVs) with constant offset 8 (control data size) from SLM
-                //    into contiguous
-                //    short2 locations 0, 1, .., 15 of global buffer
-                //    search_motion_vector_buffer with
-                //    offset index.
-                // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
-                // distSearch into
-                //    contiguous ushort locations 0, 1, .., 15 of search_residuals with
-                //    offset index.
-
-                short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
-                search_motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                if (search_residuals != NULL)
-#endif
-                {
-                    search_residuals[index] = distSearch[get_local_id(0)];
-                }
-            }
-
-            // 8x8
-            else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
-                // Only 1st 4 work-item are needed.
-                if (get_local_id(0) < 4) {
-                    int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
-
-                    // 1. 4 work-items enabled.
-                    // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
-                    // (interleaved
-                    //    fwd/bdw MVs) with constant offset 8 from SLM into contiguous
-                    //    short2 locations
-                    //    0, 1, .., 15 of global buffer search_motion_vector_buffer with
-                    //    offset index.
-                    // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
-                    // distSearch into
-                    //    contiguous ushort locations 0, 1, .., 15 of search_residuals
-                    //    with offset index.
-
-                    short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
-                    search_motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                    if (search_residuals != NULL)
-#endif
-                    {
-                        search_residuals[index] = distSearch[get_local_id(0) * 4];
-                    }
-                }
-            }
-
-            // 16x16
-            else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
-                // One 1st work is needed.
-                if (get_local_id(0) == 0) {
-                    int index = gid_0 + gid_1 * width;
-
-                    // 1. 1 work-item enabled.
-                    // 2. Work-item gathers fwd MV in dword location 0 with constant
-                    // offset 8 from
-                    //    SLM into short2 locations 0 of global buffer
-                    //    search_motion_vector_buffer.
-                    // 3. Work-item gathers ushort location 0 from distSearch into ushort
-                    //    location 0 of search_residuals with offset index.
-
-                    short2 val = as_short2(dstSearch[8]);
-                    search_motion_vector_buffer[index] = val;
-
-#ifndef HW_NULL_CHECK
-                    if (search_residuals != NULL)
-#endif
-                    {
-                        search_residuals[index] = distSearch[0];
-                    }
-                }
-            }
-        }
-
-        // Write out motion skip check result:
-        // Result format
-        //     Hierarchical row-major layout
-        //     i.e. row-major of blocks in MBs, and row-major of 8 sets of
-        //     distortions in blocks
-        if (countSkipMVs != 0) {
-            if (skip_block_type_8x8 == false) {
-                // Copy out 4 (1 component) sets of distortion values.
-
-                int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width);
-
-                if (get_local_id(0) < countSkipMVs) {
-                    // 1. Up to 4 work-items are enabled.
-                    // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
-                    //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-                    //    7.
-                    __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-                    skip_residuals[index] = distSkip[get_local_id(0) * 16];
-                }
-            } else {
-                // Copy out 4 (4 component) sets of distortion values.
-                int index =
-                    (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width);
-
-                if (get_local_id(0) < countSkipMVs * 4) {
-                    // 1. Up to 16 work-items are enabled.
-                    // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and
-                    //    copies them to contiguous skip_residual locations 0, 1, 2, ..,
-                    //    15.
-
-                    __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
-                    skip_residuals[index] = distSkip[get_local_id(0) * 4];
-                }
-            }
-        }
-
-        // Write out intra search result:
-        if (doIntra) {
-            // Write out the 4x4 intra modes
-            if (get_local_id(0) < 8) {
-                __local char *dstIntra_4x4 =
-                    (__local char *)(&dstSkipIntra[32 + 16 + 4]);
-                char value = dstIntra_4x4[get_local_id(0)];
-                char value_low = (value)&0xf;
-                char value_high = (value >> 4) & 0xf;
-
-                int index_low =
-                    (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
-
-                int index_high =
-                    (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
-
-                intra_search_predictor_modes[index_low + 5] = value_low;
-                intra_search_predictor_modes[index_high + 5] = value_high;
-            }
-
-            // Write out the 8x8 intra modes
-            if (get_local_id(0) < 4) {
-                __local char *dstIntra_8x8 =
-                    (__local char *)(&dstSkipIntra[32 + 8 + 4]);
-                char value = dstIntra_8x8[get_local_id(0) * 2];
-                char value_low = (value)&0xf;
-                int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
-                intra_search_predictor_modes[index + 1] = value_low;
-            }
-
-            // Write out the 16x16 intra modes
-            if (get_local_id(0) < 1) {
-                __local char *dstIntra_16x16 =
-                    (__local char *)(&dstSkipIntra[32 + 0 + 4]);
-                char value = dstIntra_16x16[0];
-                char value_low = (value)&0xf;
-                int index = (gid_0 * 22) + (gid_1 * 22 * width);
-                intra_search_predictor_modes[index] = value_low;
-            }
-
-// Get the intra residuals.
-#ifndef HW_NULL_CHECK
-            if (intra_residuals != NULL)
-#endif
-            {
-                int index = (gid_0 * 4) + (gid_1 * 4 * width);
-
-                if (get_local_id(0) < 1) {
-                    __local ushort *distIntra_4x4 =
-                        (__local ushort *)(&dstSkipIntra[32 + 16 + 3]);
-                    __local ushort *distIntra_8x8 =
-                        (__local ushort *)(&dstSkipIntra[32 + 8 + 3]);
-                    __local ushort *distIntra_16x16 =
-                        (__local ushort *)(&dstSkipIntra[32 + 0 + 3]);
-
-                    intra_residuals[index + 2] = distIntra_4x4[0];
-                    intra_residuals[index + 1] = distIntra_8x8[0];
-                    intra_residuals[index + 0] = distIntra_16x16[0];
-                }
-            }
-        }
-    }
-}
-
-// VEBOX KERNELS:
-__kernel void ve_enhance_intel(sampler_t accelerator,
-                               int flags,
-                               __read_only image2d_t current_input,
-                               __write_only image2d_t current_output) {
-}
-
-__kernel void ve_dn_enhance_intel(sampler_t accelerator,
-                                  int flags,
-                                  __read_only image2d_t ref_input,
-                                  __read_only image2d_t current_input,
-                                  __write_only image2d_t current_output) {
-}
-
-__kernel void ve_dn_di_enhance_intel(sampler_t accelerator,
-                                     int flags,
-                                     __read_only image2d_t current_input,
-                                     __read_only image2d_t ref_input,
-                                     __write_only image2d_t current_output,
-                                     __write_only image2d_t ref_output,
-                                     __write_only image2d_t dndi_output) {
-}
--- a/opencl/test/unit_test/test_files/media_kernels_backend_options.txt
+++ b/opencl/test/unit_test/test_files/media_kernels_backend_options.txt
@ -1,8 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math
--- a/opencl/test/unit_test/test_files/media_kernels_frontend.cl
+++ b/opencl/test/unit_test/test_files/media_kernels_frontend.cl
@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-// VME KERNELS
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
-                            __read_only image2d_t refImg,
-                            __global short2 *prediction_motion_vector_buffer,
-                            __global short2 *motion_vector_buffer,
-                            __global ushort *residuals) {
-}
-
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, uint flags, uint skip_block_type,
-    uint search_cost_penalty, uint search_cost_precision,
-    __global short2 *count_motion_vector_buffer,
-    __global short2 *predictors_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *motion_vector_buffer,
-    __global char *intra_search_predictor_modes, __global ushort *residuals,
-    __global ushort *skip_residuals, __global ushort *intra_residuals) {
-}
-
-__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
-block_advanced_motion_estimate_bidirectional_check_intel(
-    sampler_t accelerator, __read_only image2d_t srcImg,
-    __read_only image2d_t refImg, __read_only image2d_t src_check_image,
-    __read_only image2d_t ref0_check_image,
-    __read_only image2d_t ref1_check_image, uint flags,
-    uint search_cost_penalty, uint search_cost_precision, short2 count_global,
-    uchar bidir_weight, __global short2 *count_motion_vector_buffer,
-    __global short2 *prediction_motion_vector_buffer,
-    __global char *skip_input_mode_buffer,
-    __global short2 *skip_motion_vector_buffer,
-    __global short2 *search_motion_vector_buffer,
-    __global char *intra_search_predictor_modes,
-    __global ushort *search_residuals, __global ushort *skip_residuals,
-    __global ushort *intra_residuals) {
-}
-
-// VEBOX KERNELS:
-__kernel void ve_enhance_intel(sampler_t accelerator,
-                               int flags,
-                               __read_only image2d_t current_input,
-                               __write_only image2d_t current_output) {
-}
-
-__kernel void ve_dn_enhance_intel(sampler_t accelerator,
-                                  int flags,
-                                  __read_only image2d_t ref_input,
-                                  __read_only image2d_t current_input,
-                                  __write_only image2d_t current_output) {
-}
-
-__kernel void ve_dn_di_enhance_intel(sampler_t accelerator,
-                                     int flags,
-                                     __read_only image2d_t current_input,
-                                     __read_only image2d_t ref_input,
-                                     __write_only image2d_t current_output,
-                                     __write_only image2d_t ref_output,
-                                     __write_only image2d_t dndi_output) {
-}
--- a/opencl/test/unit_test/test_files/media_kernels_frontend_options.txt
+++ b/opencl/test/unit_test/test_files/media_kernels_frontend_options.txt
@ -1,8 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math
--- a/opencl/test/unit_test/test_files/vme_kernels.cl
+++ b/opencl/test/unit_test/test_files/vme_kernels.cl
@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-/*************************************************************************************************\
-Device-side basic vme kernel:
-    device_side_block_motion_estimate_intel
-
-Description:
-    Computes motion vectors by comparing a 2d image source with a 2d reference image, producing a
-    vector field motion vectors. The algorithm searches the best match of each macroblock pixel
-    block in the source image by searching an image region in the reference image, centered on the
-    coordinates of that pixel macroblock in the source image (optionally offset by the prediction
-    motion vectors).
-
-    This kernel optionally takes a vector field of motion vector predictors via the
-    prediction_motion_vector_image kernel argument. The kernel also optionally returns a vector
-    field of per-macroblock pixel-block information records. Each record contains the best-match
-    distortion (SAD) value and additional search result information.
-
-    This kernel needs to be compiled with following compiler option:
-        " -D cl_intel_device_side_avc_vme_enable "
-
-\*************************************************************************************************/
-
-__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
-void device_side_block_motion_estimate_intel(__read_only image2d_t srcImg,
-                                             __read_only image2d_t refImg,
-                                             __global short2 *prediction_motion_vector_buffer,
-                                             __global short2 *motion_vector_buffer,
-                                             __global ushort *residuals_buffer,
-                                             __global uchar2 *shapes_buffer,
-                                             int iterations,
-                                             int partitionMask) {
-    int gid_0 = get_group_id(0);
-    int gid_1 = 0;
-    sampler_t vme_samp = 0;
-
-    for (int i = 0; i < iterations; i++, gid_1++) {
-        ushort2 srcCoord = 0;
-        short2 refCoord = 0;
-        short2 predMV = 0;
-
-        srcCoord.x = gid_0 * 16 + get_global_offset(0);
-        srcCoord.y = gid_1 * 16 + get_global_offset(1);
-
-        if (prediction_motion_vector_buffer != NULL) {
-            predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * get_num_groups(0)];
-            refCoord.x = predMV.x / 4;
-            refCoord.y = predMV.y / 4;
-            refCoord.y = refCoord.y & 0xFFFE;
-        }
-
-        uchar partition_mask = (uchar)partitionMask;
-        uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
-        uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL;
-
-        intel_sub_group_avc_ime_payload_t payload = intel_sub_group_avc_ime_initialize(srcCoord, partition_mask, sad_adjustment);
-        payload = intel_sub_group_avc_ime_set_single_reference(refCoord, CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL, payload);
-
-        intel_sub_group_avc_ime_result_t result = intel_sub_group_avc_ime_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload);
-
-        // Process Results
-        long mvs = intel_sub_group_avc_ime_get_motion_vectors(result);
-        ushort sads = intel_sub_group_avc_ime_get_inter_distortions(result);
-        uchar major_shape = intel_sub_group_avc_ime_get_inter_major_shape(result);
-        uchar minor_shapes = intel_sub_group_avc_ime_get_inter_minor_shapes(result);
-        uchar2 shapes = {major_shape, minor_shapes};
-        uchar directions = intel_sub_group_avc_ime_get_inter_directions(result);
-
-        // Perform FME for non-Integer Pixel mode
-        if (pixel_mode != CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL) {
-            intel_sub_group_avc_ref_payload_t payload = intel_sub_group_avc_fme_initialize(srcCoord, mvs, major_shape, minor_shapes, directions, pixel_mode, sad_adjustment);
-            intel_sub_group_avc_ref_result_t result = intel_sub_group_avc_ref_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload);
-            mvs = intel_sub_group_avc_ref_get_motion_vectors(result);
-            sads = intel_sub_group_avc_ref_get_inter_distortions(result);
-        }
-
-        // Write Out Result
-
-        if ((get_local_id(0) % 4) == 0) {
-            int x = get_local_id(0) % 4;
-            int y = get_local_id(0) / 4;
-            int width = get_image_width(srcImg);
-
-            int index = (gid_0 * 4 + x) + (gid_1 * width / 4 + y);
-            int2 bi_mvs = as_int2(mvs);
-
-            motion_vector_buffer[index] = as_short2(bi_mvs.s0);
-
-            if (residuals_buffer != NULL) {
-                residuals_buffer[index] = sads;
-            }
-            shapes_buffer[gid_0 + gid_1 * get_num_groups(0)] = shapes;
-        }
-    }
-}
-
-__kernel void non_vme_kernel(__global unsigned int *src, __global unsigned int *dst) {
-    int id = (int)get_global_id(0);
-    dst[id] = lgamma((float)src[id]);
-    dst[id] = src[id];
-}
--- a/opencl/test/unit_test/test_files/vme_kernels_options.txt
+++ b/opencl/test/unit_test/test_files/vme_kernels_options.txt
@ -1,8 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-D cl_intel_device_side_vme_enable -D HW_NULL_CHECK
--- a/shared/source/built_ins/built_ins.h
+++ b/shared/source/built_ins/built_ins.h
@ -69,7 +69,6 @@ BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
 std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension);
 StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, BuiltinCode::ECodeType type, const Device &device);
 const char *getBuiltinAsString(EBuiltInOps::Type builtin);
-const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin);

 class Storage {
  public:
--- a/shared/source/built_ins/built_ins_storage.cpp
+++ b/shared/source/built_ins/built_ins_storage.cpp
@ -23,10 +23,6 @@
 namespace NEO {

 const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
-    const char *builtinString = getAdditionalBuiltinAsString(builtin);
-    if (builtinString) {
-        return builtinString;
-    }
    switch (builtin) {
    default:
        return "unknown";
--- a/shared/test/unit_test/ult_specific_config.cpp
+++ b/shared/test/unit_test/ult_specific_config.cpp
@ -35,7 +35,6 @@ void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) {
 }
 void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) {
 }
-const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { return nullptr; }

 void RootDeviceEnvironment::initApiGfxCoreHelper() {
 }