Search code examples
pythontext-extraction

how do I get a particular text using Python


I need a text after a keyword

My text file like this. I am trying to extract the car info, car model, and append in to the python dict.

# car name 
BMW
suzuki

# car model 
X1 
TT

# color
red 
blue

My code:

keywords = [car_name,car_model,color]
parsed_content = {}

def car_info(text):
    content = {}
    indices = []
    keys = []
    for key in Keywords:
        try:
            content[key] = text[text.index(key) + len(key):]
            indices.append(text.index(key))
            keys.append(key)
        except:
            pass         
    zipped_lists = zip(indices, keys)
    sorted_pairs = sorted(zipped_lists)
    # sorted_pairs

    tuples = zip(*sorted_pairs)
    indices, keys = [ list(tuple) for tuple in  tuples]
    # return keys
    print(keys)

    content = []
    for idx in range(len(indices)):
        if idx != len(indices)-1:
            content.append(text[indices[idx]: indices[idx+1]])
        else:
            content.append(text[indices[idx]: ])
        
    for i in range(len(indices)):
        parsed_content[keys[i]] = content[i]   
    return parsed_content

my output is

parsed_content = {car_name : car_name BMW SUZUKI,
car_model : car_model x1 tt,
color : color red blue

 }

Expected output:

{'car_name': ['bmw', 'suzuki'],
 'car_model': ['x1', 'TT'],
 'color': ['red', 'blue']
}

Solution

  • using index

    txt = '''# car name 
    BMW
    suzuki
    
    # car model 
    X1 
    TT
    
    # color
    red 
    blue'''
    
    txt_split = [i.strip() for i in txt.split('\n') if i.strip()]
    header_list = ['# car name', '# car model', '# color']
    headers = [txt_split.index(i) for i in txt_split if i in header_list]
    for i in range(len(headers)):
        values = txt_split[headers[i]].partition('#')[2].strip().replace(' ', '_')
        if i == len(headers) - 1:
            keys = txt_split[headers[i] + 1:]
        else:
            keys = txt_split[headers[i] + 1:headers[i + 1]]
        print({values: keys})
    
        >>>> {'car_name': ['BMW', 'suzuki']}
        >>>> {'car_model': ['X1', 'TT']}
        >>>> {'color': ['red', 'blue']}