metal : add POOL2D and fix IM2COL (#9943)
* add pool_2d Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * fix im2col and add unittest for N>=1024 Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * add tests for N % 1024 != 0 Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * remove trailing whitespaces Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * apply suggestions Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * apply more optimization - original IM2COL kernel + _ext with MIN() Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * apply review: change kernel name of pool_2d Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * apply review Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> * fix more formatting and enhance readability Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com> --------- Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>
This commit is contained in:
parent
873279b159
commit
4c9388fb96
3 changed files with 298 additions and 18 deletions
|
@ -3316,6 +3316,16 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
||||
|
||||
// test cases for 2D im2col
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 32}, {3, 3, 1, 32}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 32}, {3, 3, 2, 32}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 1024}, {3, 3, 1, 1024}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 1024}, {3, 3, 2, 1024}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2048}, {3, 3, 1, 2048}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2048}, {3, 3, 2, 2048}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2560}, {3, 3, 1, 2560}, 1, 1, 1, 1, 1, 1, true));
|
||||
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2560}, {3, 3, 2, 2560}, 1, 1, 1, 1, 1, 1, true));
|
||||
|
||||
// sycl backend will limit task global_range < MAX_INT
|
||||
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
|
||||
// however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue