Search code examples
pythonhtmlpandashtml-tableheader

In Python, what is the panda syntax to select a subheader from a table being pulled from a url?


Headers:          Volume       Energy        Enthalpy      
Subheaders:       vf  vg       uf  ug      hf  hfg  hg

For example: If I wanted to select [Enthalpy, hfg] I would expect something like this:

df = pd.read_html('url', header=[2,1])

I used this post as a reference, but this didn't work.

import pandas as pd

def get_data_from_webpage(url):# This returns a list of dataframes
    
    t_table = pd.read_html(url, header=1)
   
    return t_table[0]

result = get_data_from_webpage('http://ouopentextbooks.org/thermodynamics/saturation-properties-temperature-table/') # Url with table being passed through function.

print(result) 

From here I would like to select the temp column to use as a key and then put the rest of the corresponding row into a lookup table. Then when the user inputs the key, it outputs the corresponding row of values.

Final result would work like:

User inputs: 20

Output: vf, uf, hf, and sf values


Solution

  • Okay I did a solution. But, you might have to do pip install Unidecode first to install the the necessary libraries.

    I used alphabets for the index so as to not confuse the users since the columns are numerical.

    Here is the output:

    List of available temperatures:
    
    AA) 000.01
    AB) 005.00
    AC) 010.00
    AD) 015.00
    AE) 020.00
    AF) 025.00
    AG) 030.00
    AH) 035.00
    AI) 040.00
    AJ) 045.00
    AK) 050.00
    AL) 055.00
    AM) 060.00
    AN) 065.00
    AO) 070.00
    AP) 075.00
    AQ) 080.00
    AR) 085.00
    AS) 090.00
    AT) 095.00
    AU) 100.00
    AV) 110.00
    AW) 120.00
    AX) 130.00
    AY) 140.00
    AZ) 150.00
    BA) 160.00
    BB) 170.00
    BC) 180.00
    BD) 190.00
    BE) 200.00
    BF) 210.00
    BG) 220.00
    BH) 230.00
    BI) 240.00
    BJ) 250.00
    BK) 260.00
    BL) 270.00
    BM) 280.00
    BN) 290.00
    BO) 300.00
    BP) 310.00
    BQ) 320.00
    BR) 330.00
    BS) 340.00
    BT) 350.00
    BU) 360.00
    BV) 370.00
    BW) 373.95
    
    Enter a temperature (in degrees celsius) from the available options above: 250
    
    vf is: 0.0012519999999999999
    uf is: 1080.8
    hf is: 1085.8
    sf is: 2.7935
    

    Here is the code:

    import pandas as pd
    import os
    from unidecode import unidecode
    import itertools
    import string
    import sys
    
    def file_len(fname):
        with open(fname) as fp:
            for i, line in enumerate(fp):
                pass
        return i + 1
    
    def excel_cols():
        n = 1
        while True:
            yield from (''.join(group) for group in itertools.product(string.ascii_uppercase, repeat=n))
            n += 1
    
    def remove_and_exit(temp13,temp12=0):
        try:
            os.remove("test.csv")
        except:
            pass
        if temp13:
            sys.exit(temp12)
    
    def get_data_from_webpage(url):# This returns a list of dataframes
    
        t_table = pd.read_html(url, header=1)
    
        return t_table[0]
    
    result = get_data_from_webpage('http://ouopentextbooks.org/thermodynamics/saturation-properties-temperature-table/') # Url with table being passed through function.
    
    result.index.rename('index', inplace=True)
    
    
    new_columns = result.columns.values
    for temp1 in range(len(new_columns)):
        new_columns[temp1] = unidecode(new_columns[temp1])
    result.columns  = new_columns
    
    remove_and_exit(False)
    
    result.to_csv("test.csv", sep=',', encoding='utf-8')
    
    temp2 = []
    
    with open("test.csv") as file_in:
        for line in file_in:
            temp2.append(line.strip())
    
    temp5 = list(itertools.islice(excel_cols(), file_len("test.csv")+26))
    temp5 = temp5[26:-1]
    
    temp6 = -1 #before decimal
    temp7 = -1 #after decimal
    
    print("List of available temperatures:\n")
    for temp3 in range(len(temp2)): #temp5[temp3-1]
        if temp3 == 0:
            continue
        if (len(temp2[temp3].split(",")[1].split(".")[0])) > temp6:
            temp6 = len(temp2[temp3].split(",")[1].split(".")[0])
        if (len(temp2[temp3].split(",")[1].split(".")[1])) > temp7:
            temp7 = len(temp2[temp3].split(",")[1].split(".")[1])
    
    for temp8 in range(len(temp2)): #temp5[temp8-1]
        if temp8 == 0:
            continue
        print(temp5[temp8-1]+") "+temp2[temp8].split(",")[1].split(".")[0].zfill(temp6)+"."+temp2[temp8].split(",")[1].split(".")[1].ljust(temp7, '0'))
    
    
    temp4 = input("\nEnter a temperature (in degrees celsius) from the available options above: ")
    
    try:
        temp9 = float(temp4)
    except:
        print("\nYou did not enter a temperature (in degrees celsius)")
        remove_and_exit(True,11)
    temp11 = False
    for temp10 in range(len(temp2)):
        if temp10 == 0:
            continue
        if temp9 == float(temp2[temp10].split(",")[1]):
            print("")
            print("vf is: "+temp2[temp10].split(",")[3])
            print("uf is: "+temp2[temp10].split(",")[5])
            print("hf is: "+temp2[temp10].split(",")[7])
            print("sf is: "+temp2[temp10].split(",")[10])
            temp11 = True
            break
    if not temp11:
        print("\nYou did not enter a value from the available options above")
        remove_and_exit(True,1761)
    remove_and_exit(True)