Search code examples
google-apps-scriptmetadatameta-tags

An AppScript which fetch metadata from a list of URLs


Thank you for this great forum. I am a very very beginner in coding.

My issue: I have a column of hundreds URLs, and I want to automatically query their metatags to get their: "title", "description", "keywords" and "logo".

I don't manage to write a good script to do it (knowing that I will copy this script in Google Sheet): it is difficult to deal with the various x-path that exist within websites.

Many thanks in advance for your help Best

I tried some scripts, and more basically also tested a Google Sheet function (importxml). But both only work 1 out 5 times...


Solution

  • Here's what I tried:

    function removeOldMetaDataFromSheet() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      sh.createDeveloperMetadataFinder().find().forEach(e => e.remove());//removes meta data in the range
      SpreadsheetApp.getUi().alert('Complete');
    }
    
    function addRangeMetaData() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      const rg = sh.getDataRange();
      const vs = rg.getValues();
      vs.forEach((r, i) => {
        sh.getRange(String(i + 1) + ':' + String(i + 1)).addDeveloperMetadata('row' + String(i));
        if (i == 0) {
          r.forEach((c, j) => {
            let l = getColumnLetters(j+1);
            sh.getRange(l + ':' + l).addDeveloperMetadata('col' + String(j));
          });
        }
      });
      SpreadsheetApp.getUi().alert('Complete');
    }
    
    function displayMetaDataForARange() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      const rg = sh.getDataRange();
      var v = rg.createDeveloperMetadataFinder().onIntersectingLocations().find();
      let md = [];
      v.forEach(e => {
        let obj = {};
        obj['key'] = e.getKey();
        obj['value'] = e.getValue();
        obj['visibility'] = e.getVisibility();
        if (e.getKey().toString().slice(0, 3) == 'row') {
          obj['A1'] = e.getLocation().getRow().getA1Notation();
          let w = getRowWidth(e.getLocation().getRow().getRow(), sh, ss);
          obj['rowWidth'] = w;
          obj['rowvalues'] = e.getLocation().getRow().getValues().flat().filter((r, i) => i < w).join(',');
        } else {
          obj['A1'] = e.getLocation().getColumn().getA1Notation();
          let h = getColumnHeight(e.getLocation().getColumn().getColumn(), sh, ss);
          obj['colheight'] = h;
          obj['colvalues'] = e.getLocation().getColumn().getValues().flat().filter((c, i) => i < h).join(',');
        }
        obj['id'] = e.getId();
        md.push(obj);
        //e.remove();
      });
      SpreadsheetApp.getUi().showModelessDialog(HtmlService.createHtmlOutput('<textarea rows="25" cols="150">' + JSON.stringify(md) + '</textarea>').setWidth(1200).setHeight(600), 'Meta Data');
    }
    
    function getMetaDataForARangeById() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      const rg = sh.getDataRange();
      let resp = SpreadsheetApp.getUi().prompt('Search for Meta Data by Id', 'Enter Id', SpreadsheetApp.getUi().ButtonSet.OK_CANCEL);
      let o;
      if (resp.getSelectedButton() == SpreadsheetApp.getUi().Button.OK) {
        let id = resp.getResponseText();
        let r = rg.createDeveloperMetadataFinder().onIntersectingLocations().withId(id).find();
        r.forEach(e => {
          if (e.getLocation().getLocationType() == SpreadsheetApp.DeveloperMetadataLocationType.ROW) {
            o = e.getLocation().getRow().getValues().flat().filter(e => e).join();
          } else if (e.getLocation().getLocationType() == SpreadsheetApp.DeveloperMetadataLocationType.COLUMN) {
            o = e.getLocation().getColumn().getValues().flat().filter(e => e).join(',');
          }
        });
        SpreadsheetApp.getUi().showModelessDialog(HtmlService.createHtmlOutput('<textarea row="30" cols="120">' + o + '</textarea>').setWidth(800).setHeight(400), 'Results');
      }
    }
    
    function getMetaDataForARangeByKey() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      const rg = sh.getDataRange();
      let resp = SpreadsheetApp.getUi().prompt('Search for Meta Data by Key', 'Enter Key', SpreadsheetApp.getUi().ButtonSet.OK_CANCEL);
      let o;
      let s;
      if (resp.getSelectedButton() == SpreadsheetApp.getUi().Button.OK) {
        let key = resp.getResponseText();
        let r = rg.createDeveloperMetadataFinder().onIntersectingLocations().withKey(key).find();
        r.forEach(e => {
          if (e.getLocation().getLocationType() == SpreadsheetApp.DeveloperMetadataLocationType.ROW) {
            let row = e.getLocation().getRow().getRow();
            let w = getRowWidth(row, sh, ss);
            o = e.getLocation().getRow().getValues().flat().filter((e, i) => i < w).join(', ');
            s = `row: ${row} values: ${o}`;
          } else if (e.getLocation().getLocationType() == SpreadsheetApp.DeveloperMetadataLocationType.COLUMN) {
            let col = e.getLocation().getColumn().getColumn();
            let h = getColumnHeight(col, sh, ss);
            o = e.getLocation().getColumn().getValues().flat().filter((e, i) => i < h).join(', ');
            s = `col: ${col} values: ${o}`;
          }
        });
        s = (s)?s:'No Such Key Found: ' + key;
        SpreadsheetApp.getUi().showModelessDialog(HtmlService.createHtmlOutput(s).setWidth(300).setHeight(150), 'Results');
      }
    }
    
    function getColumnLetters(colnum) {
      const a1 = SpreadsheetApp.getActiveSheet().getRange(1, colnum).getA1Notation();
      return a1.slice(0, a1.indexOf(/\d+/));
    }
    
    function UsedToBeAnonEdit(e) {
      e.source.toast('entry');
      const sh = e.range.getSheet();
      let col = sh.createDeveloperMetadataFinder().withKey('cb1').find()[0].getLocation().getColumn().getColumn();
      if (sh.getName() == 'Sheet1' && e.range.columnStart == col && e.range.rowStart > 1 && e.value == "TRUE" ) {
        e.range.setValue('FALSE');
        e.range.offset(0,1).setValue(e.range.offset(0,1).getValue() + 1);
      }
    }
    
    function setupOnEditMetaData() {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      const l = getColumnLetters(sh.getLastColumn(), sh, ss);
      const rg = sh.getRange(l + ':' + l);
      rg.addDeveloperMetadata('cb1');
      SpreadsheetApp.getUi().alert('Complete');
    }
    
    function getCheckBoxColumn(key='cb1') {
      const ss = SpreadsheetApp.getActive();
      const sh = ss.getSheetByName('Sheet1');
      let col = sh.createDeveloperMetadataFinder().withKey('cb1').find()[0].getLocation().getColumn().getColumn();
      let s = getColumnLetters(col);
      SpreadsheetApp.getUi().showModelessDialog(HtmlService.createHtmlOutput(s),'Checkbox Column');
    }
    
    function addingmetadatatosomesheet() {
      const ss = SpreadsheetApp.getActive();
      ss.getSheets().forEach((sh, i) => {
        if (sh.getName().match(/^Sheet\d{1,}/)) {
          sh.addDeveloperMetadata('shidx', i + 1);
        }
      });
    }
    
    function findSheetMetaData() {
      const ss = SpreadsheetApp.getActive();
      ss.getSheets().forEach(sh => {
        let v = sh.createDeveloperMetadataFinder().withKey('shidx').find();
        if (v && v.length > 0) {
          v.forEach(e => {
            Logger.log(`SheetName: ${sh.getName()} Metadata: ${e.getValue()}`);
          });
        }
      });
    }