Search code examples
javascriptgraphqlgatsbygithub-api

Get results from different arrays in one Promise.All with Github GraphQL API


I'm making a custom source plugin for Gatsby, that will get markdown files from a GitHub repository. The repository has individual files (blobs) and folders (trees), which in their turn also contain files. I need to get all files (including files inside folders) in one Promise.all, but I can't figure out how to do that. I've managed to get individual files from the repository and I have a function which returns an array of files from the trees. But I don't know how to combine them.

Here is my code. GraphQL queries to get repository, trees and files information:

const repositoryQuery = `
{
  viewer {
    repository(name: "repository-name") {
      object(expression: "master:") {
        ... on Tree {
          entries {
            name
            oid
            type
          }
        }
      }
    }
  }
}
`

const treeQuery = `
  query getTree($id: GitObjectID!) {
    viewer {
      repository(name: "repository-name") {
        object(oid: $id) {
          ... on Tree {
            entries {
              name
              oid
              type
            }
          }
        }
      }
    }
  }
`

const fileQuery = `
  query getFile($id: GitObjectID!) {
    viewer {
      repository(name: "repository-name") {
        object(oid: $id) {
          ... on Blob {
            text
          }
        }
      }
    }
  }
` 

And the functions themselves:

const data = await client.request(repositoryQuery)

const getTree = async entry => {
  const data = await client.request(treeQuery, { id: entry.oid })
  const array = await data.viewer.repository.object.entries
  return array
}

const getFile = async entry => {
  const data = await client.request(fileQuery, { id: entry.oid })
  const result = await data.viewer.repository.object
  return result
}

const files = await Promise.all(
  data.viewer.repository.object.entries
    .filter(entry => entry.type !== "tree")
    .map(entry => {
      return (
        getFile(entry)
        .then(file => {
          return {
            data: file.text
          }
        })
      )
    }
  )
)

files.forEach(file =>
  createNode({...})
)

How can I update const files so that it will:

  1. Run getFile(), if entry.type !== "tree"
  2. If entry.type is tree, get an array of files inside the tree with getTree() and then run getFile() for each file.
  3. Combine all results in one array, so that I can apply to them createNode.

I would really appreciate your help.


Solution

  • You could get some inspiration from the walk function used to walk through directories recursively. From there. It would look like this :

    async function walk(entry, isRoot) {
      if (isRoot){
        return await processEntry(entry);
      }
      let files = await getTreeEntryFromTree(repository, entry.oid);
      files = await Promise.all(files.data.viewer.repository.object.entries.map(async file => {
        return await processEntry(file);
      }));
      return files.reduce((all, folderContents) => all.concat(folderContents), []);
    }
    
    async function processEntry(entry){
      if (entry.type === "tree") {
        return walk(entry, false); 
      } else {
        let res = await getBlob(repository, entry.oid);
        return [{
          name: entry.name,
          oid: entry.oid,
          data:res.data.viewer.repository.object.text
        }];
      }
    }
    

    So, it's just replacing directories with trees, and requesting the data content for each file when you return the file.

    The following gatsby-node.js code for a source plugin (without the createSchemaCustomization) :

    const { ApolloClient } = require("apollo-client")
    const { InMemoryCache } = require("apollo-cache-inmemory")
    const { HttpLink } = require("apollo-link-http")
    const fetch = require("node-fetch")
    const gql = require("graphql-tag")
    const { setContext } = require('apollo-link-context');
    
    const token = "YOUR_TOKEN";
    const repository = "YOUR_REPO";
    
    const authLink = setContext((_, { headers }) => {
      return {
        headers: {
          ...headers,
          authorization: token ? `Bearer ${token}` : null,
        }
      }
    });
    
    const defaultOptions = {
      watchQuery: {
        fetchPolicy: 'no-cache',
        errorPolicy: 'ignore',
      },
      query: {
        fetchPolicy: 'no-cache',
        errorPolicy: 'all',
      },
    }
    
    const client = new ApolloClient({
      link: authLink.concat(new HttpLink({ uri: 'https://api.github.com/graphql', fetch: fetch  })),
      cache: new InMemoryCache(),
      defaultOptions: defaultOptions,
    });
    
    exports.sourceNodes = async function sourceNodes(
      {
        actions,
        cache,
        createContentDigest,
        createNodeId,
        getNodesByType,
        getNode,
      },
      pluginOptions
    ) {
      const { createNode, touchNode, deleteNode } = actions
      const { data } = await getTreeFromRepo(repository)
    
      let sourceData = data;
    
      fileArr = []
      sourceData.viewer.repository.object.entries.map(it => {
        fileArr.push(walk(it, true))
      });
      let res = await Promise.all(fileArr)
      let result = res.flat();
      console.log(result);
      console.log(`got ${result.length} results`);
      return
    }
    
    async function walk(entry, isRoot) {
      if (isRoot){
        return await processEntry(entry);
      }
      let files = await getTreeEntryFromTree(repository, entry.oid);
      files = await Promise.all(files.data.viewer.repository.object.entries.map(async file => {
        return await processEntry(file);
      }));
      return files.reduce((all, folderContents) => all.concat(folderContents), []);
    }
    
    async function processEntry(entry){
      if (entry.type === "tree") {
        return walk(entry, false); 
      } else {
        let res = await getBlob(repository, entry.oid);
        return [{
          name: entry.name,
          oid: entry.oid,
          data:res.data.viewer.repository.object.text
        }];
      }
    }
    
    async function getTreeFromRepo(repo) {
        return await client.query({
          query: gql`
            query {
              viewer {
                repository(name: "${repo}") {
                  object(expression: "master:") {
                    ... on Tree {
                      entries {
                        name
                        oid
                        type
                      }
                    }
                  }
                }
              }
            }
          `,
        })
    }
    
    async function getTreeEntryFromTree(repo, oid) {
      return await client.query({
        query: gql`
          query getTree($id: GitObjectID!) {
            viewer {
              repository(name: "${repo}") {
                object(oid: $id) {
                  ... on Tree {
                    entries {
                      name
                      oid
                      type
                    }
                  }
                }
              }
            }
          }
        `,
        variables: {
          id: oid
        }
      })
    }
    
    async function getBlob(repo, oid){
      return await client.query({
        query: gql`
          query getFile($id: GitObjectID!) {
            viewer {
              repository(name: "${repo}") {
                object(oid: $id) {
                  ... on Blob {
                    text
                  }
                }
              }
            }
          }
        `,
        variables: {
          id: oid
        }
      })
    }
    

    You would need to replace the Github token and the repo name in the above code.

    It returns an array of object with file content, name and oid

    Note that using ... on Blob { text } returns null for binary file :

    text (String) UTF8 text data or null if the Blob is binary


    Also, it's possible to use Github API v3 to walk through the tree recursively in a single call, which reduces drastically the number of requests. You would have something like this :

    async function getAllEntries(repo, owner){
      return fetch(`https://api.github.com/repos/${owner}/${repo}/git/trees/master?recursive=1`,{
        headers: {
          'Authorization': `Bearer ${token}`,
        }
      })
      .then(response => response.json());
    }
    

    Full example (for a Gatsby source plugin) :

    const { ApolloClient } = require("apollo-client")
    const { InMemoryCache } = require("apollo-cache-inmemory")
    const { HttpLink } = require("apollo-link-http")
    const fetch = require("node-fetch")
    const gql = require("graphql-tag")
    const { setContext } = require('apollo-link-context');
    
    const token = "YOUR_TOKEN";
    const repository = "YOUR_REPO";
    const owner = "YOUR_LOGIN";
    
    const authLink = setContext((_, { headers }) => {
      return {
        headers: {
          ...headers,
          authorization: token ? `Bearer ${token}` : null,
        }
      }
    });
    
    const defaultOptions = {
      watchQuery: {
        fetchPolicy: 'no-cache',
        errorPolicy: 'ignore',
      },
      query: {
        fetchPolicy: 'no-cache',
        errorPolicy: 'all',
      },
    }
    
    const client = new ApolloClient({
      link: authLink.concat(new HttpLink({ uri: 'https://api.github.com/graphql', fetch: fetch  })),
      cache: new InMemoryCache(),
      defaultOptions: defaultOptions,
    });
    
    exports.sourceNodes = async function sourceNodes(
      {
        actions,
        cache,
        createContentDigest,
        createNodeId,
        getNodesByType,
        getNode,
      },
      pluginOptions
    ) {
      const { createNode, touchNode, deleteNode } = actions
      const { tree } = await getAllEntries(repository, owner)
      fileArr = []
      tree.map(it => {
        fileArr.push(walk(it, true))
      });
      let res = await Promise.all(fileArr)
      let result = res.filter(value => Object.keys(value).length !== 0);
      console.log(result);
      console.log(`got ${result.length} results`);
      return
    }
    
    async function walk(entry){
      if (entry.type === "blob") {
        let res = await getBlob(repository, entry.sha);
        return {
          name: entry.path,
          oid: entry.sha,
          data: res.data.viewer.repository.object.text
        };
      }
      return {};
    }
    
    async function getAllEntries(repo, owner){
      return fetch(`https://api.github.com/repos/${owner}/${repo}/git/trees/master?recursive=1`,{
        headers: {
          'Authorization': `Bearer ${token}`,
        }
      })
      .then(response => response.json());
    }
    
    async function getBlob(repo, oid){
      return await client.query({
        query: gql`
          query getFile($id: GitObjectID!) {
            viewer {
              repository(name: "${repo}") {
                object(oid: $id) {
                  ... on Blob {
                    text
                  }
                }
              }
            }
          }
        `,
        variables: {
          id: oid
        }
      })
    }
    

    If you need to get the binary content at any cost, you would need to use Github API v3 which gives the content url directly in the get tree result. The content URL returns the content encoded in base64, see this file.

    So, if you want base64 encoded content (binary + text), you would have the following gatsby-node.js (for source plugin) :

    const fetch = require("node-fetch")
    
    const token = "YOUR_TOKEN";
    const repository = "YOUR_REPO";
    const owner = "YOUR_LOGIN";
    
    exports.sourceNodes = async function sourceNodes(
      {
        actions,
        cache,
        createContentDigest,
        createNodeId,
        getNodesByType,
        getNode,
      },
      pluginOptions
    ) {
      const { createNode, touchNode, deleteNode } = actions
      const { tree } = await getAllEntries(repository, owner)
      fileArr = []
      tree.map(it => {
        fileArr.push(walk(it, true))
      });
      let res = await Promise.all(fileArr)
      console.log(res);
      console.log(`got ${res.length} results`);
      return
    }
    
    async function walk(entry){
      if (entry.type === "blob") {
        let res = await getBlob(entry.url);
        return {
          name: entry.path,
          oid: entry.sha,
          data: res.content
        };
      }
      return {};
    }
    
    async function getAllEntries(repo, owner){
      return fetch(`https://api.github.com/repos/${owner}/${repo}/git/trees/master?recursive=1`, {
        headers: {
          'Authorization': `Bearer ${token}`,
        }
      })
      .then(response => response.json());
    }
    
    async function getBlob(url){
      return fetch(url, {
        headers: {
          'Authorization': `Bearer ${token}`,
        }
      })
      .then(response => response.json());
    }