Search code examples
pythonpython-3.xgitgit-commit

Getting the latest git commit id from URL on internal git using Python


With the following function I am extracting the latest git commit id in a short form and write it into a text file.

from os.path import exists
from subprocess import Popen, PIPE

def get_git_commit_id(txt_file: str) -> str:
    """
    Gets the latest git commit id and places in the txt file if not exists
    Parameters:
    ==========
    :param txt_file: name of the txt file
    Returns:
    ==========
    :return: the latest git commit id
    """
    if not exists(txt_file):
        print(f"'{txt_file}' did not exist before")  # for logging
    try:
        process = Popen("git rev-parse --short HEAD", stdout=PIPE)
        output = process.communicate()[0]
    except Exception as error:
        output = bytes("latest", "utf-8")
        print("It could not read the .git", error)  # for logging
    with open(txt_file, "w", encoding="utf8") as file:
        file.write(output.decode("utf-8"))
    file = open(txt_file, "r", encoding="utf8")
    git_commit_id = file.readline().strip()
    return git_commit_id

get_git_commit_id("git_commit_id.txt")

However this code only works when I have my .git dir inside my project.

How can extract the latest git commit id in a short form from the URL where my project placed on the internal git?


References:


Solution

  • With the following code I was able to get the desired output:

    # imports
    from os import devnull
    from os.path import exists
    from logging import getLogger
    from urllib.parse import parse_qsl
    from urllib.request import urlopen
    from ssl import create_default_context
    from xml.dom.minidom import parseString
    from subprocess import call, Popen, STDOUT, PIPE
    
    logger = getLogger(__name__)
    
    project_git_url = "https://path/to/your/git.com/?p=your-project;a=summary"
    cert = None
    
    def get_git_commit_id(txt_file: str) -> str:
        """
        Gets the latest git commit id and places in the txt file if not exists
        Parameters:
        ==========
        :param txt_file: name of the txt file
        Returns:
        ==========
        :return: the latest git commit id
        """
        if not exists(txt_file):
            logger.info(
                msg=f"The following text file does not exist : '{txt_file}'. Now it was created.",
                exc_info=False,
            )
    
        output = bytes("latest", "utf-8")
    
        git_check = call(["git", "master"], stderr=STDOUT, stdout=open(devnull, 'w'))
    
        if git_check != 0:
            process = Popen("git rev-parse --short HEAD", stdout=PIPE)
            output = process.communicate()[0]
        else:
            logger.debug(
                msg=f"It could not read the .git repository to extract the latest git commit id.",
                exc_info=False,
            )
            try:
                url = urlopen(project_git_url, context=create_default_context(cafile=cert))
                url_as_xml = url.read().decode("utf-8")
    
                docs = parseString(url_as_xml)
                html = docs.getElementsByTagName("html")[0]
                body = html.getElementsByTagName("body")[0]
    
                for div_element in body.getElementsByTagName("div"):
                    if 'page_nav' in div_element.attributes.items()[0]:
                        for href in div_element.getElementsByTagName("a"):
                            href_content = href.attributes.items()[0][1]
                            if 'a=commit;' in href_content:
                                parsed_href_content = parse_qsl(href_content, encoding='utf-8', separator=';')
                                output = bytes(parsed_href_content[2][1][:6], "utf-8")
            except Exception as error:
                logger.debug(
                    msg=f"It could not get the latest git commit id from the git : {project_git_url}.\nThe following error occurred : {error}",
                    exc_info=False,
                )
        with open(txt_file, "w", encoding="utf8") as file:
            file.write(output.decode("utf-8"))
        file = open(txt_file, "r", encoding="utf8")
        git_commit_id = file.readline().strip()
        return git_commit_id
    

    It also includes the parsing of the URL where the internal git located.


    References: