Search code examples
pythonregexfileregex-groupwritefile

How read the correct lines from this text file with a python program, and then create a .py file by filling in the data extracted from the .txt file?


Text file to be read (the real one contains more numbers), called number_info.txt

veinti tres
23

veinti dos
22

veinti uno
21

veinte
20

tres
3

dos
2

uno
1

This is the code (I need help with this)

import re

def auto_coding_text_to_number():

    with open('number_info.txt', 'r') as f:

        #lines 0, 3, 6, 9, 12, 15, 18, ...
        coloquial_numbers = []

        #lines 0+1, 3+1, 6+1, 9+1, 12+1, 15+1, 18+1, ... 
        symbolic_numbers = []


    n = 0
    with open('number_to_text.py', 'w') as f:
        f.write('import re\n\ndef number_to_text_func(input_text):\n')
       
        #write replacement lines based on regex
        if(" " in coloquial_numbers[n]):
            #for example write this line:   "    input_text = re.sub(r"veinti[\s|-|]*tres", "23", input_text)"
        
        if not (" " in coloquial_numbers[n]):
            #for example write this line:   "    input_text = re.sub("tres", "3", input_text)"
            
        f.write("    return(input_text)\n    input_text = str(input())\n 
   print(number_to_text_func(input_text))")

        n = n + 1

auto_coding_text_to_number()

And this is the correct file, called number_to_text.py, that should be written by the other script

import re

def number_to_text_func(input_text):
    input_text = re.sub(r"veinti[\s|-|]*tres", "23", input_text)
    input_text = re.sub(r"veinti[\s|-|]*dos", "22", input_text)
    input_text = re.sub(r"veinti[\s|-|]*uno", "21", input_text)
    input_text = re.sub("tres", "3", input_text)
    input_text = re.sub("dos", "2", input_text)
    input_text = re.sub("uno", "1", input_text)

    return(input_text)

input_text = str(input())
print(number_to_text_func(input_text))

EDIT:

The lines inside the .txt file are structured like this

"veinti tres"  <---- line 0
"23"           <---- line 1
"veinti dos"   <---- line 2
"22"           <---- line 3
"veinti uno"   <---- line 4
"21"           <---- line 5
"veinte"       <---- line 6
"20"           <---- line 7
"tres"         <---- line 8
"3"            <---- line 9

Then I suggested separating them into 2 groups and storing them in 2 lists

#lines 0, 3, 6, 9, 12, 15, 18, ...
coloquial_numbers = ["veinti tres", "veinti dos", "veinti uno", "veinte", "tres"]

#lines 0+1, 3+1, 6+1, 9+1, 12+1, 15+1, 18+1, ...
symbolic_numbers = ["23", "22", "21", "20". "3"]


body_template = """    input_text = re.sub(r"{}", "{}", input_text)\n"""

And then the body of the function should be structured like this

input_text = re.sub(coloquial_numbers[n].replace(' ', '[\s|-|]'), symbolic_numbers[n], input_text)

Getting something like this in the function body of the output file

def number_to_text(input_text):
    input_text = re.sub(r"veinti[\s|-|]*tres", "23", input_text)
    input_text = re.sub(r"veinti[\s|-|]*dos", "22", input_text)
    input_text = re.sub(r"veinti[\s|-|]*uno", "21", input_text)
    input_text = re.sub("tres", "3", input_text)

    return(input_text)

Solution

  • I omitted the reading/write steps for sake of simplicity. No rule(s) to specify the body of the meta function is given so I did a guess.

    import re 
    
    # body-component of the meta-code
    body_template = """    input_text = re.sub(r"{}", "{}", input_text)\n"""
    
    # read from file
    with open('number_info.txt', 'r') as fd:
        text = fd.read()
    
    # update body
    body = ''
    for n_text, n in re.findall(r'\n*([a-z\s]+)\n(\d+)', text):
        body += body_template.format(n_text.replace(' ', '[\s|-|]'), n)
    
    # other components of the meta-code
    header = """import re
    
    def number_to_text_func(input_text):
    """
    
    tail = """\n    return(input_text)
    
    input_text = str(input())
    print(number_to_text_func(input_text))"""
    
    # merge together texts to be saved to file
    meta_code = header + body + tail
    print(meta_code)
    

    Output (content of number_to_text.py)

    import re
    
    def number_to_text_func(input_text):
        input_text = re.sub(r"treinta[\s|-|]y[\s|-|]uno", "31", input_text) # <-
        input_text = re.sub(r"veinti[\s|-|]tres", "23", input_text)
        input_text = re.sub(r"veinti[\s|-|]dos", "22", input_text)
        input_text = re.sub(r"veinti[\s|-|]uno", "21", input_text)
        input_text = re.sub(r"veinte", "20", input_text)
        input_text = re.sub(r"tres", "3", input_text)
        input_text = re.sub(r"dos", "2", input_text)
        input_text = re.sub(r"uno", "1", input_text)
    
        return(input_text)
    
    input_text = str(input())
    print(number_to_text_func(input_text))
    

    From the comments:

    read file per line, no regex

    with open('number_info.txt', 'r') as fd:
        lines = fd.read().split('\n')
    
    symbolic_numbers, coloquial_numbers = [], []
    for i, line in enumerate(lines):
        if i % 3 == 0:
            coloquial_numbers.append(line)
        elif i % 3 == 1:
            symbolic_numbers.append(line)
    

    or read file with slices

    with open('number_info.txt', 'r') as fd:
        lines = fd.read().split('\n')
    
    coloquial_numbers = lines[::3]
    symbolic_numbers = lines[1::3]