Search code examples
pythonpygithub

pygithub search and read specific files


I am using pyGithub to go through the files in the Github repository. The problem is, with this code my_code.get_contents(""), it goes through each and every file in all the folders and subfolders in the repo. Is there a way to make this code efficient. I am only interested in parsing the .csproj files and the packages.config files where they are found. But these files are scattered in multiple places.

from github import Github
import pathlib
import xml.etree.ElementTree as ET

def processFilesInGitRepo():
  while len(contents)>0:
    file_content = contents.pop(0)
    if file_content.type=='dir':
      contents.extend(my_code.get_contents(file_content.path))
    else :
       path=pathlib.Path(file_content.path)
       file_name=path.name
       extention=path.suffix
       if(file_name=='packages.config'):
          parseXMLInPackagesConfig(file_content.decoded_content.decode())
          
       if(extention=='.csproj'):
          parseXMLInCsProj(file_content.decoded_content.decode())  
  
       print(file_content)


my_git=Github("MyToken")


my_code=my_git.get_repo("BeclsAutomation/Echo65XPlus")
contents=my_code.get_contents("") #empty string i.e. ("") gives all the items in the Repository. But can I specify some kind of a search term here saying I need only .csproj and packages.config files.

processFilesInGitRepo()

Solution

  • We can get the file content of a specific file on a specific branch as well using pyGithub.

    from github import Github
    
    def connect_to_github():
        host = "https://github.com/<organisation-name>"
        token = "PAT TOKEN"
        github_instance = Github(token)
        return github_instance
    
    if __name__ == "__main__":
        gl = connect_to_github()
        project = gl.get_repo("<organisation>/<project-name>")
        data = project.get_contents(path="<path of the file>",ref="<branch name>")
        content = data.decoded_content
        print(content.decode('utf-8'))