Vulkan Phi Fix for AMD Proprietary Drivers (#5260)
* Replace tanh to avoid NaN in gelu shader on AMD proprietary driver * Fix another Vulkan CPY buffer size bug
This commit is contained in:
parent
8ca511cade
commit
4d0924a890
3 changed files with 83 additions and 69 deletions
|
@ -14670,14 +14670,14 @@ const uint64_t f32_to_f16_fp32_len = 1596;
|
|||
|
||||
unsigned char gelu_f32_data[] = {
|
||||
0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00,
|
||||
0x45,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00,
|
||||
0x4b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00,
|
||||
0x01,0x00,0x00,0x00,0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,
|
||||
0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,
|
||||
0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,
|
||||
0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,
|
||||
0x0b,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x24,0x00,0x00,0x00,
|
||||
0x2c,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,
|
||||
0x38,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,
|
||||
0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,
|
||||
0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,
|
||||
|
@ -14696,15 +14696,15 @@ unsigned char gelu_f32_data[] = {
|
|||
0x22,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,
|
||||
0x24,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x47,0x00,0x04,0x00,0x24,0x00,0x00,0x00,0x21,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x29,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x35,0x00,0x00,0x00,
|
||||
0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,
|
||||
0x2a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,
|
||||
0x48,0x00,0x05,0x00,0x2a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,
|
||||
0x48,0x00,0x05,0x00,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,
|
||||
0x2a,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,
|
||||
0x2c,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x47,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x21,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x42,0x00,0x00,0x00,
|
||||
0x36,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,
|
||||
0x38,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x47,0x00,0x04,0x00,0x38,0x00,0x00,0x00,0x21,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x48,0x00,0x00,0x00,
|
||||
0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,
|
||||
0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,
|
||||
0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,
|
||||
|
@ -14731,64 +14731,70 @@ unsigned char gelu_f32_data[] = {
|
|||
0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x22,0x00,0x00,0x00,
|
||||
0x3b,0x00,0x04,0x00,0x23,0x00,0x00,0x00,0x24,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x26,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,
|
||||
0x29,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,
|
||||
0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x20,0x00,0x04,0x00,
|
||||
0x2b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,
|
||||
0x3b,0x00,0x04,0x00,0x2b,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,
|
||||
0x11,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x2a,0x42,0x4c,0x3f,
|
||||
0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,
|
||||
0x00,0x00,0x80,0x3f,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,
|
||||
0x2e,0x00,0x00,0x00,0x13,0x27,0x37,0x3d,0x1d,0x00,0x03,0x00,
|
||||
0x35,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,
|
||||
0x36,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x20,0x00,0x04,0x00,
|
||||
0x37,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x36,0x00,0x00,0x00,
|
||||
0x3b,0x00,0x04,0x00,0x37,0x00,0x00,0x00,0x38,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,
|
||||
0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2b,0x00,0x04,0x00,
|
||||
0x11,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x00,0x00,0x80,0x3f,
|
||||
0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,0x32,0x00,0x00,0x00,
|
||||
0x2a,0x42,0x4c,0x3f,0x2b,0x00,0x04,0x00,0x11,0x00,0x00,0x00,
|
||||
0x35,0x00,0x00,0x00,0x13,0x27,0x37,0x3d,0x2b,0x00,0x04,0x00,
|
||||
0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x02,0x00,0x00,
|
||||
0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,
|
||||
0x42,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x41,0x00,0x00,0x00,
|
||||
0x41,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,
|
||||
0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,
|
||||
0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,
|
||||
0x43,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,
|
||||
0x44,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,
|
||||
0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,
|
||||
0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,
|
||||
0x0e,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x17,0x00,0x00,0x00,
|
||||
0x18,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x16,0x00,0x00,0x00,
|
||||
0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x19,0x00,0x00,0x00,
|
||||
0x18,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x1a,0x00,0x00,0x00,
|
||||
0x1b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x19,0x00,0x00,0x00,
|
||||
0xf7,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfa,0x00,0x04,0x00,0x1b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,
|
||||
0x1d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x1c,0x00,0x00,0x00,
|
||||
0xf9,0x00,0x02,0x00,0x43,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,
|
||||
0x1d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x26,0x00,0x00,0x00,
|
||||
0x27,0x00,0x00,0x00,0x24,0x00,0x00,0x00,0x16,0x00,0x00,0x00,
|
||||
0x0f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x11,0x00,0x00,0x00,
|
||||
0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x85,0x00,0x05,0x00,
|
||||
0x3a,0x00,0x00,0x00,0x00,0x00,0x00,0x3f,0x2b,0x00,0x04,0x00,
|
||||
0x11,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x00,0x00,0x00,0x40,
|
||||
0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x46,0x00,0x00,0x00,
|
||||
0x00,0x02,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,
|
||||
0x47,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2c,0x00,0x06,0x00,
|
||||
0x09,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x46,0x00,0x00,0x00,
|
||||
0x47,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x36,0x00,0x05,0x00,
|
||||
0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,
|
||||
0xf7,0x00,0x03,0x00,0x49,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,
|
||||
0xf8,0x00,0x02,0x00,0x4a,0x00,0x00,0x00,0x41,0x00,0x05,0x00,
|
||||
0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,
|
||||
0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x41,0x00,0x05,0x00,
|
||||
0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x14,0x00,0x00,0x00,
|
||||
0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,
|
||||
0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0xae,0x00,0x05,0x00,
|
||||
0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,
|
||||
0x19,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x1b,0x00,0x00,0x00,
|
||||
0x1c,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,
|
||||
0x1c,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x49,0x00,0x00,0x00,
|
||||
0xf8,0x00,0x02,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,
|
||||
0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x24,0x00,0x00,0x00,
|
||||
0x16,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,
|
||||
0x11,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x27,0x00,0x00,0x00,
|
||||
0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,
|
||||
0x2a,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x85,0x00,0x05,0x00,
|
||||
0x11,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,
|
||||
0x28,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x11,0x00,0x00,0x00,
|
||||
0x33,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,
|
||||
0x30,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,
|
||||
0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x34,0x00,0x00,0x00,
|
||||
0x2c,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x85,0x00,0x05,0x00,
|
||||
0x11,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,
|
||||
0x28,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,
|
||||
0x34,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x28,0x00,0x00,0x00,
|
||||
0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x37,0x00,0x00,0x00,
|
||||
0x35,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,
|
||||
0x11,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x32,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x28,0x00,0x00,0x00,
|
||||
0x31,0x00,0x00,0x00,0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,
|
||||
0x3b,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x06,0x00,0x11,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x15,0x00,0x00,0x00,0x3b,0x00,0x00,0x00,
|
||||
0x81,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,
|
||||
0x31,0x00,0x00,0x00,0x3c,0x00,0x00,0x00,0x85,0x00,0x05,0x00,
|
||||
0x11,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x30,0x00,0x00,0x00,
|
||||
0x3d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x26,0x00,0x00,0x00,
|
||||
0x3f,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x16,0x00,0x00,0x00,
|
||||
0x0f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x3f,0x00,0x00,0x00,
|
||||
0x3e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x43,0x00,0x00,0x00,
|
||||
0xf8,0x00,0x02,0x00,0x43,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,
|
||||
0x38,0x00,0x01,0x00,
|
||||
0x3f,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x34,0x00,0x00,0x00,
|
||||
0x0c,0x00,0x06,0x00,0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,
|
||||
0x81,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x41,0x00,0x00,0x00,
|
||||
0x40,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x88,0x00,0x05,0x00,
|
||||
0x11,0x00,0x00,0x00,0x42,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,
|
||||
0x41,0x00,0x00,0x00,0x83,0x00,0x05,0x00,0x11,0x00,0x00,0x00,
|
||||
0x43,0x00,0x00,0x00,0x3d,0x00,0x00,0x00,0x42,0x00,0x00,0x00,
|
||||
0x85,0x00,0x05,0x00,0x11,0x00,0x00,0x00,0x44,0x00,0x00,0x00,
|
||||
0x3c,0x00,0x00,0x00,0x43,0x00,0x00,0x00,0x41,0x00,0x06,0x00,
|
||||
0x26,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x38,0x00,0x00,0x00,
|
||||
0x16,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,
|
||||
0x45,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,
|
||||
0x49,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x49,0x00,0x00,0x00,
|
||||
0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00,
|
||||
};
|
||||
const uint64_t gelu_f32_len = 1408;
|
||||
const uint64_t gelu_f32_len = 1484;
|
||||
|
||||
unsigned char get_rows_f16_data[] = {
|
||||
0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue