Search code examples
metadataphp-7

PHP - Get Meta Information by EmbedURL


How I will get meta information specified by EmbedURL? I have tried get_meta_tags('MY_VIDEO_URL'). But it returns nothing.

MY HTML:

<!--Video 1-->
        <div class="embed-responsive-item" itemprop="video" itemscope="" itemtype="http://schema.org/VideoObject">
                                      <video class="embed-responsive-item" controls="" loop="">
                            <source src="INTERNAL_VIDEO_URL">
                          </video>
                         <meta itemprop="name" content="">
                         <meta itemprop="duration" content="">
                         <meta itemprop="thumbnailUrl" content="THUMBNAIL_URL">
                         <meta itemprop="embedURL" content="INTERNAL_VIDEO_URL">
                          <meta itemprop="uploadDate" content="2021-03-27T04:10:1600Z">
            </div>
<!--Video 2-->
            <div class="embed-responsive embed-responsive-16by9" onclick="loadYoutubeVideoThroughYTApi(this)" data-src-id="player-1" data-yut-var="YOUTUBE_VIDEO_ID">
                <meta itemprop="description" content="META_DESCRIPTION_2">
                <meta itemprop="duration" content="PT1M13S">
                <meta itemprop="name" content="META_TITLE_2">
                <meta itemprop="thumbnailUrl" content="https://i.ytimg.com/vi/YOUTUBE_VIDEO_ID/maxresdefault.jpg">
               <meta itemprop="embedURL" content="https://www.youtube.com/embed/YOUTUBE_VIDEO_ID">
              <meta itemprop="uploadDate" content="2019-02-04T11:00:43.000Z">
            </div>
<!--Video 3-->
    <div class="embed-responsive-item" itemprop="video" itemscope="" itemtype="http://schema.org/VideoObject">
                                      <video class="embed-responsive-item" controls="" loop="">
                            <source src="INTERNAL_VIDEO_URL_2">
                          </video>
                         <meta itemprop="name" content="">
                         <meta itemprop="duration" content="">
                         <meta itemprop="thumbnailUrl" content="THUMBNAIL_URL_2">
                         <meta itemprop="embedURL" content="INTERNAL_VIDEO_URL_2">
                          <meta itemprop="uploadDate" content="2021-03-27T04:10:1600Z">
            </div>

As you can see the html contains three different videos with three different meta properties. So the result array should be look like this

Desired Output:

    Array
    (
        [0] => Array
            (
                [url] => INTERNAL_VIDEO_URL
                [meta_name] => NULL
                [meta_description] => NULL // as you can see no meta tags for description
                [meta_duration] => NULL
                [meta_thumbnail] => THUMBNAIL_IMAGE_URL
                [upload_date] => 2021-03-27T04:10:1600Z
            )
        [1] => Array
            (
                [url] => https://www.youtube.com/embed/YOUTUBE_VIDEO_ID
                [meta_name] => META_TITLE_2
                [meta_description] => META_DESCRIPTION_2
                [meta_duration] => PT1M13S
                [meta_thumbnail] => https://i.ytimg.com/vi/YOUTUBE_VIDEO_ID/maxresdefault.jpg
                [upload_date] => 2021-03-27T04:10:1600Z
            )
        [2] => Array
            (
                [url] => INTERNAL_VIDEO_URL_2
                [meta_name] => NULL
                [meta_description] => NULL // as you can see no meta tags for description
                [meta_duration] => NULL
                [meta_thumbnail] => THUMBNAIL_IMAGE_URL_2
                [upload_date] => 2021-03-27T04:10:1600Z
            )
    )

How will I get the set of array like this?


Solution

  • After so many research I have come up with a solution that gives my desired output.

    <?php
    function load_my_custom_attributes_from_meta_tags( $string )
    {
        
        $htmlDom = new DOMDocument();
        $errorBuffer = libxml_use_internal_errors( true );
        $htmlDom->loadHTML($string);
        libxml_clear_errors();
        libxml_use_internal_errors( $errorBuffer  );
        
        $result = Array(
                "url" => NULL,
                "meta_name" => NULL,
                "meta_description" => NULL,
                "meta_duration" => NULL,
                "meta_thumbnail" => NULL,
                "upload_date" => NULL
        );
        
        $arrayKeyMap = Array( 
            "embedURL"=>"url",
            "name"=>"meta_name",
            "description"=>"meta_description",
            "thumbnailURL"=>"meta_thumbnail",
            "duration"=>"meta_duration",
            "uploadDate"=>"upload_date"
        );
        
        $main_array = array();
        foreach ($htmlDom->getElementsByTagName("meta") as $tag) {
            $temp = Array();
            foreach ( $tag->attributes as $attribute ) {
                $temp[ $attribute->nodeName ] = $attribute->nodeValue;
            }
            if ( array_key_exists( "itemprop" , $temp ) && array_key_exists( "content" , $temp )  && array_key_exists( $temp["itemprop"] , $arrayKeyMap ) ) {
                $result[ $arrayKeyMap[ $temp["itemprop"] ] ] = $temp["content"] === "" ? NULL : $temp["content"];
                
            }
        }
        
        return $result;
    }
    
    
    $file_contents = file_get_contents('PAGE_URL');
    preg_match_all('/<div class=\"embed-responsive\">(.*?)<\/div>/s',$file_contents,$matches);
    $all_meta_properties = $matches[0];
    $meta_array = array();
    foreach($all_meta_properties as $video_meta) {
        $meta_array[] = load_my_custom_attributes_from_meta_tags($video_meta);
    }
    print_r($meta_array);
    ?>