Search code examples
c++cdoxygendocumentation-generation

Create Documentation using Doxygen and non-Doxygen commented source code


We got here some like 10 years old C and C++ code, without documents or manual. However the source is documented in the header files quite good, but it is a lot of working going through all the files looking for an information. It looks like this:

// Description description ....
//
// @param parameter 1 name:
// description of parameter 1
//
// @param parameter 2 name:
// description of parameter 2
//
Returntype Functionname(parameter1, parameter2);

Using doxygen wizard a documentation can be created but all the comments are lost, because they are not formated in a way the parser understands.

So is that a format i dont know? Can i teach the parser what to do? Or is it a special format that is used by another software?


Solution

  • I wrote a python script, to convert the comments to a format the parser understands. It's not pretty, it's not safe, but it works for us.

    import re
    import time
    import os
    import shutil
    
    def convertHeaderDocumentation(file):
    
        with open(file) as f:
            lines = f.readlines()
        lines = [line.rstrip('\n') for line in lines]
    
        scanning = False
        commentLines = []
    
        convertedDocument = ""
        declaration = ""
    
        for line in lines:
    
            if line == "" or \
               line.strip().startswith("#"):
                if len(commentLines) > 0:
                    convertedDocument += ''.join(el + "\n" for el in commentLines)
                    commentLines.clear()
                convertedDocument += line + "\n"
    
                continue
    
            if line.strip().startswith('//'):
                if not scanning:
                    commentLines.clear()
                    scanning = True
    
                commentLines.append(line)
            else:
    
                if scanning:
                    if line.strip() != "":
    
                        declaration = line.strip()
    
                        match = re.search('\s*\w*\s*(\w+)\s+(\w+).*\((.*)[^)].*;', declaration)
                        if not match is None:
                            # check for function description
                            description = ""
                            for line in commentLines:
                                if line[2:].strip().startswith("@") or \
                                   line[2:].strip() == "":
                                   break
                                else:
                                    description += line[2:].strip()
    
                            # scan for parameter description
                            parameters = []
                            parameter = ""
                            scanning = False
                            for line in commentLines:
                                # start scanning, if line starts with @
                                if line[2:].strip().startswith("@") and \
                                   scanning == False :
                                    # if present add to parameter lst
                                    if parameter != "":
                                        parameters.append(parameter)
    
                                    scanning = True
                                    parameter = line[2:].strip() + " "
                                    continue
    
                                # stop scanning if an empty line is read
                                if line[2:].strip() == "":
                                    scanning = False
    
                                    # save if parameter is in buffer
                                    if parameter != "":
                                        parameters.append(parameter)
                                        parameter = ""
    
                                if scanning == True and line[2:].strip() != "":
                                    parameter += line[2:].strip()
    
    
                            convertedDocument += "/**\n"
                            convertedDocument += " * @fn    " + declaration[:-1] + "\n"
                            convertedDocument += " *\n"
    
                            convertedDocument += " * @brief "
                            restLine = 80 - len(" * @brief ")
                            for index in range(0, len(description), restLine):
                                convertedDocument += description[index:index + restLine] + "\n *        "
                            convertedDocument += "\n"
    
                            for parameter in parameters:
                                convertedDocument += " * " + parameter + "\n *\n"
    
                            convertedDocument += " * @return " + match.group(1) + "\n"
                            convertedDocument += " *\n"
    
                            convertedDocument += " * @date  " + time.strftime("%d.%m.%Y") + "<br> parsed using python\n"
                            convertedDocument += " */\n"
                            convertedDocument += declaration + "\n\n"
    
                            commentLines.clear()
    
                    else :
                        convertedDocument += ''.join(el + "\n" for el in commentLines)
                        commentLines.clear();
    
    
        return convertedDocument
    
    projectDir = "path to source files goes here"
    projectDir = os.abspath(projectDir)
    parentProjectDir, projectDirName   = os.path.split(projectDir)
    convertedDir     = os.path.join(parentProjectDir, "converted")
    print(convertedDir)
    
    
    for root, dirs, files in os.walk(projectDir):
    
        # create directory structure if not present
        tmpConvertedDir = os.path.join(convertedDir, root[len(projectDir) + 1:])
        if not os.path.exists(tmpConvertedDir):
            os.makedirs(tmpConvertedDir)
    
        for file in files:
    
            filename, fileextension = os.path.splitext(file)
            # only copy/convert c++/c source files
            if fileextension in {'.h','.c','.cpp'} :
                newPath = os.path.join(tmpConvertedDir, file)
                print(newPath)
    
                # convert header files
                if fileextension in {'.h'}:
                    #print("convert ", os.path.join(root, file), " to ", newPath)
                    converted = convertHeaderDocumentation(os.path.join(root, file))
    
                    with open(newPath, 'w') as f:
                        f.write(converted)
    
                # copy source files
                else:
                    print("copy ", os.path.join(root, file), " to ", newPath)
                    shutil.copyfile(os.path.join(root, file), newPath)
    

    The function declaration was a bit tricky to catch with regex. For my case \s*\w*\s*(\w+)\s+(\w+).*\((.*)[^)].*; works just fine, but without any extra keywords a lazy quantifier is going to be more acurate for keywords before the returntype \s*\w*?\s*(\w+)\s+(\w+).*\((.*)[^)].*;

    All C and C++ files in a given projectDir directory and subfolders are converted if they are header files or just copy if they are source files. Therefore a directory ..\converted is created which contains the copied/converted files.

    With the resulting files doxygen wizard created a sufficient documentation. Maybe this is going to help someone :-)