Search code examples

Is there other options for Yahoo's YQL for extracting HTML from other websites

In my application I used Yahoo's YQL API to extract HTML from other websites, but yahoo stopped the API and Yahoo's YQL API for extracting HTML will not work anymore.

 "query": {
  "count": 0,
  "created": "2017-06-26T12:57:49Z",
  "lang": "en-US",
  "meta": {
   "message": "html table is no longer supported. See for YQL Terms of Use"
  "results": null

It can be read here.

This is how I did it so far:

$(function () {
    var fileFieldId;
    var fileFieldClass;
    var query;
    var apiUrl;
    $(".data-from-url").keyup(function () {
        fileFieldId = $(this).attr('id');
        fileFieldClass = $(this).attr('class');
        fileFieldVal = $(this).val();
        query = 'select * from html where url="' + $(this).val() + '" and xpath="*"';
        apiUrl = '' + encodeURIComponent(query);

        $.get(apiUrl, function(data) {
          var html = $(data).find('html');
          $("[data-title='" + fileFieldId + "']" ).val(html.find("meta[property='og:title']").attr('content') || 'no title found');
          $("[data-description='" + fileFieldId + "']" ).val(html.find("meta[property='og:description']").attr('content') || 'no title found');
          $("[data-img='" + fileFieldId + "']" ).val(html.find("meta[property='og:image']").attr('content') || '');



Here is a jsfiddle for call I am doing

  $(function () {
      var query;
      var apiUrl;
      $("").click(function () {
          //query = 'select * from htmlstring where url="' + $(this).val() + '" and xpath="//a"&format=json&env=store://';
          apiUrl = " * from htmlstring where url=''&format=json&diagnostics=true&env=store://";
          $.get(apiUrl, function(data) {
            var html = $(data).find('html');
            $("input.title" ).val(html.find("meta[property='og:title']").attr('content') || 'no title found');
           	 $("textarea.description").val(html.find("meta[property='og:description']").attr('content') || 'no title found');
            $("input.image").val(html.find("meta[property='og:image']").attr('content') || '');


input {
    width: 100%;
    margin-bottom: 20px;
    padding: 10px;

<script src=""></script>
<button class="click">Click Me</button>
<p class="extract" style="display:none;">Extracting html</p>
<input type="text" class="title">
<textarea name="" id="" cols="30" rows="5" class="description"></textarea>
<input type="text" class="image">

Is there other alternative for extracting HTML meta from other sites head?


  • You might be able to read the meta tags using queryselector? I use fetch to grab google docs which helpfully has all the document properties in the html meta tags. I then put the html into a temporary object which I can hit with queryselector as I see fit. Something like:

    var url = ""
    var id = url.split("://")[1].split("/")[3];
    var source = "" + id + "/edit?usp=sharing";
    fetch(source).then(function(response) {
            return response.text();
        }).then(function(html) {
            var doc = document.implementation.createHTMLDocument("foo");
            doc.documentElement.innerHTML = html;
            return doc.querySelector("meta[property='og:description']").getAttribute("content");
        }).then(function(title) {
           console.log("document title", title);