Search code examples
pythonpandasxlsxwriteropenxlsx

Transform lines from .txt file into .xls columns pandas


i have a text file that contains datas on lines like his:

Total usage 2022-09-17T06:02:50+02:00 for vob "/Tmobile_Cro/TMobileCro" is 7868.0 Mb
Total usage 2022-09-17T06:04:18+02:00 for vob "/XpressCM/SX-BASE" is 25265.7 Mb
Total usage 2022-09-17T06:02:56+02:00 for vob "/vobs/LI" is 5916.9 Mb

I want to process this data and export into an excel file as:

TAG                              Size
/Tmobile_Cro/TmobileCro         7868.0 Mb
/XpressCM/SX-BASE               25265.7 Mb

This is my code:

import xlrd, xlwt, re
from svnscripts.timestampdirectory import createdir, path_dir
import os

def clearcasesize():
    pathdest = path_dir()
    dest = createdir()
    txtName = rf"{pathdest}\vobs_size.txt"
    workBook = xlwt.Workbook(encoding='ascii')
    workSheet = workBook.add_sheet('sheet1')

    fp = open(txtName, 'r+b')

    workSheet.write(0, 0, "TAG")
    workSheet.write(0, 1, "Size")

    row = 0
    entries = 0
    fullentry = []
    for linea in fp.readlines():
        str_linea = linea.decode('gb2312', 'ignore')
        str_linea = str_linea[:-2]  # str  string

        txt = str_linea
        arr = str_linea

        if arr[:9] == "Total":
            txt = arr
            entries += 1
            s = txt.index("/")
            e = txt.index('"', s)
            txt = txt[s:e]
            fullentry.append(txt)
        elif arr.find("is") >= 0:
            entries += 1
            txt = arr
            s = txt.index("is")
            txt1 = txt[s + 7:20]
            fullentry.append(txt1)
        if (row == 65536):
            break;

    finalarr = []
    finalarr1 = []
    temp = 0
    row = 1

    for r in fullentry:
        finalarr.append(r)
        temp += 1

        if temp == 12:
            finalarr1.append(finalarr)

            temp = 0

            col = 0
            for arr in finalarr:
                workSheet.write(row, col, arr)
                col += 1
            row += 1
            finalarr.clear()
            if (row == 65536):
                break;

    workBook.save(os.path.join(dest, "ClearcaseSize.xls"))
    fp.close()
clearcasesize()

The code should work, but its only creating the name of columns "Tag and Size" The idea of my script is that he locate the first argg of the line "Total", and after he puts into 1 column the args that we find into " " , and after using the other arr.fin "is" to add the size into the second line, but its not working...


Solution

  • Based on your short sample:

    Total usage 2022-09-17T06:02:50+02:00 for vob "/Tmobile_Cro/TMobileCro" is 7868.0 Mb
    Total usage 2022-09-17T06:04:18+02:00 for vob "/XpressCM/SX-BASE" is 25265.7 Mb
    Total usage 2022-09-17T06:02:56+02:00 for vob "/vobs/LI" is 5916.9 Mb
    

    Why don't you try pandas like this:

    import pandas as pd
    
    with open("sample_file.txt") as f:
        lines = [
            line.strip().split(" vob ")[-1].replace('"', "").split(" is ")
            for line in f.readlines()
        ]
    
    df = pd.DataFrame(
        [l for l in lines if not l[0].startswith("/vobs")],
        columns=["TAG", "Size"],
    )
    df.to_excel("sample_file.xlsx", index=False)
    

    I don't have Excel but you should get an xlsx file that looks like this:

    enter image description here