Search code examples
python-3.xtext-parsing

writing a parsed file in the same format it was read


some overview of what im doing: I'm reading and parsing a .cube file which have a very specific format, then I want to do some manipulation on the parsed data and write out the new file in the same format.

the file I'm parsing looks like:

 OT-RSH
 Total Density
   12   -9.448633   -9.448633   -3.779453
  101    0.188973    0.000000    0.000000
  101    0.000000    0.188973    0.000000
   41    0.000000    0.000000    0.188973
    6    6.000000   -1.869343    1.869343    0.000000
    6    6.000000    0.684227    2.553571    0.000000
    6    6.000000    2.553571    0.684227    0.000000
    6    6.000000    1.869343   -1.869343    0.000000
    6    6.000000   -0.684227   -2.553571    0.000000
    6    6.000000   -2.553571   -0.684227    0.000000
    1    1.000000   -3.340623    3.340623    0.000000
    1    1.000000    1.222753    4.563376    0.000000
    1    1.000000    4.563376    1.222753    0.000000
    1    1.000000    3.340623   -3.340623    0.000000
    1    1.000000   -1.222753   -4.563376    0.000000
    1    1.000000   -4.563376   -1.222753    0.000000
  0.43578E-08  0.48992E-08  0.54452E-08  0.59816E-08  0.64918E-08  0.69577E-08
  0.73600E-08  0.76792E-08  0.78964E-08  0.79941E-08  0.79570E-08  0.77736E-08
  0.74361E-08  0.69419E-08  0.62937E-08  0.54998E-08  0.45742E-08  0.35359E-08

the content isn't really important for the sake of the question.

after reading I'm trying to write the file as it is in order for comparison with "diff" to make sure im keeping the format exactly as it is. I'm having some trouble doing so my piece of code that write the file is (ive played with the format values but didnt have any luck):

        with open(file_name, 'w') as output_file:
            for line_num in range(6 + self.num_atoms):
                if line_num == 0 or line_num == 1:
                    # comment line
                    output_file.write("{:s}".format(self.comments[line_num]))
                if line_num == 2:
                    # number of total atoms, and the origin coordinates
                    output_file.write("{:4d} {:5.6f} {:5.6f} {:5.6f}\n".format(self.num_atoms, *self.origin))
                if line_num == 3:
                    # number of x grid points and step size in x,y,z
                    output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_x, *self.x))
                if line_num == 4:
                    # number of y grid points and step size in x,y,z
                    output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_y, *self.y))
                if line_num == 5:
                    # number of z grid points and step size in x,y,z
                    output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_z, *self.z))
                if line_num in range(6, 6 + self.num_atoms):
                    # atomic number, charge and coordinates of the atom
                    output_file.write("{:4d}\t{:.6f} {:.6f} {:.6f} {:.6f}\n".format(self.atoms[line_num - 6],
                                                                                   self.atoms_charge[line_num - 6],
                                                                                   *self.atoms_xyz[line_num - 6]))
            # the calculated quantity
            for idx_x in range(self.num_x):
                for idx_y in range(self.num_y):
                    for idx_z in range(self.num_z):
                        output_file.write("{:4e} ".format(self.calc_data[idx_x, idx_y, idx_z]))
                        if (np.mod(idx_z, 6) == 5): output_file.write("\n")
                    output_file.write("\n")
        output_file.close()

which produce the following output:

 OT-RSH
 Total Density
  12 -9.448633 -9.448633 -3.779453
 101 0.188973 0.000000 0.000000
 101 0.000000 0.188973 0.000000
  41 0.000000 0.000000 0.188973
   6    6.000000 -1.869343 1.869343 0.000000
   6    6.000000 0.684227 2.553571 0.000000
   6    6.000000 2.553571 0.684227 0.000000
   6    6.000000 1.869343 -1.869343 0.000000
   6    6.000000 -0.684227 -2.553571 0.000000
   6    6.000000 -2.553571 -0.684227 0.000000
   1    1.000000 -3.340623 3.340623 0.000000
   1    1.000000 1.222753 4.563376 0.000000
   1    1.000000 4.563376 1.222753 0.000000
   1    1.000000 3.340623 -3.340623 0.000000
   1    1.000000 -1.222753 -4.563376 0.000000
   1    1.000000 -4.563376 -1.222753 0.000000
4.357800e-09 4.899200e-09 5.445200e-09 5.981600e-09 6.491800e-09 6.957700e-09 
7.360000e-09 7.679200e-09 7.896400e-09 7.994100e-09 7.957000e-09 7.773600e-09 
7.436100e-09 6.941900e-09 6.293700e-09 5.499800e-09 4.574200e-09 3.535900e-09 
2.408700e-09 1.220100e-09 0.000000e+00 -1.220100e-09 -2.408700e-09 -3.535900e-09 
-4.574200e-09 -5.499800e-09 -6.293700e-09 -6.941900e-09 -7.436100e-09 -7.773600e-09 

It can be seen that the data is almost identical but I have shifts in the lines im trying to avoid and also the printing of the last lines is X.XXXE-09 instead of 0.XXXE-08

would appreciate some help format it properly

Thanks


Solution

  • I haven't able to find some one line way of doing that formatted printing you desire. However the most general way I could do was as following:

    first, a function that format float numbers to be represented as +-0.XXXE-Y:

    def formatFloat(float_num):
        """
            Format a float to be in the form 0.XXXX-E(Y-1) instead of X.XXXX-E(Y)
        """
        zero_pad = ['','','0','00','000','0000','00000','000000','0000000','00000000']
        neg_float_len = 12
        pos_float_len = 11
        if float_num == 0.0:
            return "0.00000E+00"
    
        neg_flag = True if (float_num < 0 ) else False
        float_str = f"{float_num:.8n}"
        original_exp=np.int(np.abs(np.floor(np.log10(np.abs(float_num)))))
        exp_str = "e-0{:d}".format(original_exp)
        desired_exp_str = "E+0{:d}".format(original_exp-1) if original_exp == 1 else "E-0{:d}".format(original_exp-1)
        if original_exp <= 4:
            float_str = float_str.replace("0","",original_exp)
            if neg_flag:
                float_str = float_str.replace("-", "-0") + exp_str
                float_str = float_str.replace(exp_str,
                                              "{:s}{:s}".format(zero_pad[neg_float_len-len(float_str)+1],exp_str))
            else:
                float_str = float_str.replace(".", "0.") + exp_str
                float_str = float_str.replace(exp_str,
                                              "{:s}{:s}".format(zero_pad[pos_float_len - len(float_str)+1], exp_str))
        else:
            if neg_flag:
    
                if float_str[2] == "." and float_str.endswith(exp_str):
                    float_str = float_str.replace("-", "", 1)
                    float_str = "-0." + float_str.replace(".", "").replace(exp_str,
                                                                           "{:s}{:s}".format(zero_pad[neg_float_len-len(float_str)-1],exp_str))
                elif not float_str[2] == "." and float_str.endswith(exp_str):
                    float_str = float_str.replace("-", "", 1)
                    float_str = "-0." + float_str.replace(exp_str,"{:s}{:s}".format(zero_pad[neg_float_len - len(float_str) - 2], exp_str))
            else:
                if float_str[1] == "." and float_str.endswith(exp_str):
                    float_str = "0." + float_str.replace(".","").replace(exp_str,
                                                                         "{:s}{:s}".format(zero_pad[pos_float_len-len(float_str)],exp_str))
                elif not float_str[1] == "." and float_str.endswith(exp_str):
                    float_str = "0."+float_str.replace(exp_str,"{:s}{:s}".format(zero_pad[pos_float_len - len(float_str)-1],exp_str))
    
        float_str = float_str.replace(exp_str, desired_exp_str)
        return float_str
    

    after you have the following function you can change your code to be:

        with open(file_name, 'w') as output_file:
            for line_num in range(6 + self.num_atoms):
                if line_num == 0 or line_num == 1:
                    # comment line
                    output_file.write("{:s}".format(self.comments[line_num]))
                if line_num == 2:
                    # number of total atoms, and the origin coordinates
                    output_file.write("{:5d}".format(self.num_atoms))
                    for i in range(3):
                        if self.origin[i] < 0:
                            output_file.write("   {:.6f}".format(self.origin[i]))
                        else:
                            output_file.write("    {:.6f}".format(self.origin[i]))
                    output_file.write("\n")
                    # output_file.write("{:4d} {:5.6f} {:5.6f} {:5.6f}\n".format(self.num_atoms, *self.origin))
                if line_num == 3:
                    # number of x grid points and step size in x,y,z
                    output_file.write("{:5d}".format(self.num_x))
                    for i in range(3):
                        if self.x[i] < 0:
                            output_file.write("   {:.6f}".format(self.x[i]))
                        else:
                            output_file.write("    {:.6f}".format(self.x[i]))
                    output_file.write("\n")
                    # output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_x, *self.x))
                if line_num == 4:
                    # number of y grid points and step size in x,y,z
                    output_file.write("{:5d}".format(self.num_y))
                    for i in range(3):
                        if self.y[i] < 0:
                            output_file.write("   {:.6f}".format(self.y[i]))
                        else:
                            output_file.write("    {:.6f}".format(self.y[i]))
                    output_file.write("\n")
                    # output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_y, *self.y))
                if line_num == 5:
                    # number of z grid points and step size in x,y,z
                    output_file.write("{:5d}".format(self.num_z))
                    for i in range(3):
                        if self.z[i] < 0:
                            output_file.write("   {:.6f}".format(self.z[i]))
                        else:
                            output_file.write("    {:.6f}".format(self.z[i]))
                    output_file.write("\n")
                    # output_file.write("{:4d} {:.6f} {:.6f} {:.6f}\n".format(self.num_z, *self.z))
                if line_num in range(6, 6 + self.num_atoms):
                    # atomic number, charge and coordinates of the atom
                    output_file.write("{:5d}".format(self.atoms[line_num - 6]))
                    if self.atoms_charge[line_num - 6] < 0:
                        output_file.write("   {:.6f}".format(self.atoms[line_num - 6]))
                    else:
                        output_file.write("    {:.6f}".format(self.atoms[line_num - 6]))
                    temp = self.atoms_xyz[line_num - 6]
                    for i in range(3):
                        if temp[i] < 0:
                            output_file.write("   {:.6f}".format(temp[i]))
                        else:
                            output_file.write("    {:.6f}".format(temp[i]))
                    output_file.write("\n")
                    # output_file.write("{:4d}\t{:.6f} {:.6f} {:.6f} {:.6f}\n".format(self.atoms[line_num - 6],
                    #                                                                self.atoms_charge[line_num - 6],
                    #                                                                *self.atoms_xyz[line_num - 6]))
            # the calculated quantity
            for idx_x in range(self.num_x):
                for idx_y in range(self.num_y):
                    for idx_z in range(self.num_z):
                        if (self.calc_data[idx_x, idx_y, idx_z] < 0):
                            output_file.write(" {:s}".format(formatFloat(self.calc_data[idx_x, idx_y, idx_z])))
                        else:
                            output_file.write("  {:s}".format(formatFloat(self.calc_data[idx_x, idx_y, idx_z])))
                        if (np.mod(idx_z, 6) == 5): output_file.write("\n")
                    output_file.write("\n")
        output_file.close()
    

    which will produce the desired output