Search code examples
computer-visionface-detectiondlib

Which architecture does dlib's CNN face detector use?


I tried googling a lot but could not find it. Is it a implementation of some paper published on CNN face detection?

Is there any details on the theoretical part of dlib's convolutional face detector?


Solution

  • It uses a custom architecture. You can check it in the source code

        ...    
    
        template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
        using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
    
        template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
        using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
    
        template <int N, template <typename> class BN, int stride, typename SUBNET> 
        using block  = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
    
        template <int N, typename SUBNET> using ares      = relu<residual<block,N,affine,SUBNET>>;
        template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
    
        template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>;
        template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
        template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
        template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
        template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;
    
        using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<
                                    alevel0<
                                    alevel1<
                                    alevel2<
                                    alevel3<
                                    alevel4<
                                    max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,
                                    input_rgb_image_sized<150>
                                    >>>>>>>>>>>>;
        anet_type net;
    
        ...