package com.huawei.videoengine.compute;

/* loaded from: classes4.dex */
public class ShaderMadi {
    public static final String SHADER = "#version 310 es \n    layout(local_size_x = 8, local_size_y = 8)  in;  \n    layout(std430, binding = 0) buffer Result_buffer_out {  \n        int result_out[];  \n    };  \n    layout(std430, binding = 1) buffer DeltaQp_buffer_out {  \n        int DeltaQp_out[];  \n    };  \n    layout(std430, binding = 2) buffer curFrame {  \n        uint YPixOut[];  \n    };  \n    layout(std430, binding = 6) buffer MadiBuffer {  \n        int Madi[];  \n    };  \n    layout(std430, binding = 7) buffer SkinBuffer {  \n        int SkinCnt[];  \n    };  \n    layout(binding = 0)uniform mediump sampler2D input_texture ;\n    int cuYThread[2][2];  \n    int cuUThread; \n    int cuVThread; \n     shared int cuPixSum;  \n    shared int MadiSum;  \n    shared int numSkin32;  \n    shared float YPix[8][8];  \n    shared int sumAry[64];  \n    uniform int orientation;  \n    struct RCStruct  \n    {  \n        int frameQp;  \n        int QpMax;  \n        int QpMin;  \n        int SkinMaxQp;  \n        int SkinMinQp;  \n        int SkinQpDelta;  \n        int iCuQpDeltaSwitchThr;  \n        int cu_qp_delta_thresh[16];  \n    };  \n    layout(binding = 0) uniform RCBlock  \n    {  \n        RCStruct stRC;  \n    };   \n  \n    #define SKIN_U_TH_LOW   100  \n    #define SKIN_U_TH_HIGH  127  \n    #define SKIN_V_TH_LOW   135  \n    #define SKIN_V_TH_HIGH  160  \n    #define SKIN_COUNT_TH   32  \n    #define CU_WIDTH_BITS   4  \n    void main()  \n    {  \n        int LcuWidth = int(gl_NumWorkGroups.x); \n        int LcuHeight = int(gl_NumWorkGroups.y); \n        int LcuX = int(gl_WorkGroupID.x);  \n        int LcuY = int(gl_WorkGroupID.y);  \n        int xlocal = int(gl_LocalInvocationID.x);  \n        int ylocal = int(gl_LocalInvocationID.y);  \n        int i, j;  \n        int cuPixIdx = 0;  \n        int pixIdx = 0;  \n        if (xlocal == 0 && ylocal == 0) {  \n            cuPixSum = 0;  \n            numSkin32 = 0;  \n            MadiSum = 0;  \n        }  \n        memoryBarrierShared();  \n        barrier();  \n        int previous, temp = 0;  \n        float sumPixel2x2 = 0.0;  \n        int cuMean, cuMadi;  \n        ivec2 pos; \n        pos.x = (LcuX << CU_WIDTH_BITS) + 2*xlocal;    \n        pos.y = (LcuY << CU_WIDTH_BITS) + 2*ylocal;   \n        ivec2 tSize = textureSize(input_texture, 0); \n        int picWidth = tSize.x;  \n        int picHeight = tSize.y;  \n        for (i = 0; i < 2; i++) {  \n            for (j = 0; j < 2; j++) {  \n                pos.x = clamp(pos.x, 0, picWidth-1); \n                pos.y = clamp(pos.y, 0, picHeight-1); \n                vec4 t = texelFetch(input_texture, pos, 0); \n                 float y = (0.257*t.r + 0.504*t.g + 0.098*t.b + 0.0625); // \n                if(i==0 && j==0) { \n                float u = (-0.148*t.r - 0.291*t.g + 0.439*t.b)*255.0f+128.0f; \n                float v = (0.439*t.r - 0.368*t.g - 0.071*t.b)*255.0f+128.0f; \n                    cuUThread = int(u); \n                    cuVThread = int(v); \n                    pixIdx++; \n                } \n                cuYThread[i][j] = int(y*256.0f);\n                sumPixel2x2 += y;\n                temp += cuYThread[i][j];  \n                pos.x += 1; //pixIdx++;  \n            }  \n            pos.x -= 2;  //pixIdx += (picWidth - 4);  \n            pos.y += 1;  //pixIdx += (picWidth - 4);  \n        }  \n        YPix[ylocal][xlocal] = sumPixel2x2 / 4.0;\n        sumAry[gl_LocalInvocationIndex] = temp;  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 32u)   sumAry[gl_LocalInvocationIndex] += sumAry[63u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 16u)   sumAry[gl_LocalInvocationIndex] += sumAry[31u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 8u)    sumAry[gl_LocalInvocationIndex] += sumAry[15u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 4u)    sumAry[gl_LocalInvocationIndex] += sumAry[7u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (xlocal == 0 && ylocal == 0) {  \n            cuPixSum = sumAry[0] + sumAry[1] + sumAry[2] + sumAry[3];  \n        }\n        if (xlocal == 0 ) {  \n            vec4 pix4 = vec4(YPix[ylocal][0], YPix[ylocal][1], YPix[ylocal][2], YPix[ylocal][3]); \n            YPixOut[(LcuY*8+ylocal)*picWidth/8 + LcuX*2] = packUnorm4x8(pix4);  \n            pix4 = vec4(YPix[ylocal][4], YPix[ylocal][5], YPix[ylocal][6], YPix[ylocal][7]); \n            YPixOut[(LcuY*8+ylocal)*picWidth/8 + LcuX*2 + 1] = packUnorm4x8(pix4);  \n        }\n        memoryBarrierShared();  \n        barrier();  \n        cuMean = (cuPixSum + 128) >> 8;  \n        int sum = 0;  \n        for (i = 0; i < 2; i++) {  \n            for (j = 0; j < 2; j++) {  \n                temp = abs(cuYThread[i][j] - cuMean);  \n                sum += temp;  \n            }  \n        }  \n        sumAry[gl_LocalInvocationIndex] = sum;  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 32u)   sumAry[gl_LocalInvocationIndex] += sumAry[63u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 16u)   sumAry[gl_LocalInvocationIndex] += sumAry[31u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 8u)    sumAry[gl_LocalInvocationIndex] += sumAry[15u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 4u)    sumAry[gl_LocalInvocationIndex] += sumAry[7u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (xlocal == 0 && ylocal == 0) {  \n            MadiSum = sumAry[0] + sumAry[1] + sumAry[2] + sumAry[3];  \n        }  \n        memoryBarrierShared();  \n        barrier();  \n        cuMadi = (MadiSum + 128) >> 8;  \n        int Utemp, Vtemp;  \n        sum = 0;  \n        {  \n            Utemp = cuUThread;  \n            Vtemp = cuVThread;  \n            if (Utemp >= SKIN_U_TH_LOW && Utemp <= SKIN_U_TH_HIGH && Vtemp >= SKIN_V_TH_LOW && Vtemp <= SKIN_V_TH_HIGH)  \n            {  \n                sum++;  \n            }  \n        }  \n        sumAry[gl_LocalInvocationIndex] = sum;  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 32u)   sumAry[gl_LocalInvocationIndex] += sumAry[63u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 16u)   sumAry[gl_LocalInvocationIndex] += sumAry[31u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 8u)    sumAry[gl_LocalInvocationIndex] += sumAry[15u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (gl_LocalInvocationIndex < 4u)    sumAry[gl_LocalInvocationIndex] += sumAry[7u - gl_LocalInvocationIndex];  \n        memoryBarrierShared();  \n        barrier();  \n        if (xlocal == 0 && ylocal == 0) {  \n            numSkin32 = sumAry[0] + sumAry[1] + sumAry[2] + sumAry[3];  \n        }  \n        memoryBarrierShared();  \n        barrier();  \n        int isSkinCU = 0;  \n        if (numSkin32 >= SKIN_COUNT_TH) {  \n            isSkinCU = 1;  \n        }  \n        else {  \n            isSkinCU = 0;  \n        }  \n        if (xlocal == 0 && ylocal == 0) {  \n            int cuQP = 0;  \n            int cuDeltaQP = 0;  \n            int iMadiThr;  \n            int dq = 0;  \n                int cuIdx = 0;    \n            if(orientation == 270 || orientation == 90) {      \n                cuIdx = (LcuWidth - LcuX -1) * LcuHeight + LcuY;  } \n            else if(orientation == 0) {      \n                cuIdx = LcuY * LcuWidth + LcuX;  } \n            else if(orientation == 180) {      \n                cuIdx = (LcuHeight - LcuY -1) * LcuWidth + (LcuWidth - LcuX -1);  } \n            int iCuQpDeltaSwitchThr = stRC.iCuQpDeltaSwitchThr;  \n            iMadiThr = stRC.cu_qp_delta_thresh[iCuQpDeltaSwitchThr - 1];  \n            if (cuMadi <= iMadiThr)  \n            {  \n                for (j = 0; j < iCuQpDeltaSwitchThr; j++)  \n                {  \n                    if (cuMadi < stRC.cu_qp_delta_thresh[j]) {  \n                        dq = dq - 1;  \n                    }  \n                    else {  \n                        dq = dq; //break;  \n                    }  \n                }  \n            }  \n            else  \n            {  \n                for (j = iCuQpDeltaSwitchThr; j < 16; j++)  \n                {  \n                    if (cuMadi > stRC.cu_qp_delta_thresh[j]) {  \n                        dq = dq + 1;  \n                    }  \n                    else {  \n                        dq = dq;//break;  \n                    }  \n                }  \n            }  \n            cuQP = stRC.frameQp + dq;  \n            cuQP = min(cuQP, stRC.QpMax);  \n            cuQP = max(cuQP, stRC.QpMin);  \n            if (isSkinCU == 1)  \n            {  \n                cuQP = cuQP - stRC.SkinQpDelta;  \n                cuQP = min(cuQP, stRC.SkinMaxQp);  \n                cuQP = max(cuQP, stRC.SkinMinQp);  \n            }  \n            cuDeltaQP = cuQP - stRC.frameQp;    \n            DeltaQp_out[cuIdx] = cuDeltaQP;  \n            Madi[cuIdx] = cuMadi;  \n            SkinCnt[cuIdx] = isSkinCU;  \n            int Outidx = cuIdx >> 2;  \n            int OutidxLeft = cuIdx & 3;  \n            int a = ~(0xff << (8 * OutidxLeft));  \n            int t = (cuDeltaQP << (8 * OutidxLeft)) & (~a); \n            previous = atomicAnd(result_out[Outidx], a);  \n            previous = atomicOr(result_out[Outidx], t);  \n        }  \n    }  \n";
}
