Search code examples
pythonpandasspacystreamlityfinance

Python Streamlit, and yfinance issues


I'll just list the two bugs I know as of now, and if you have any recommendations for refactoring my code let me know I'll go ahead and list out the few known issues as of now.

  1. yfinance is not appending the dividendYield to my dict, I did make sure that their is an actual Dividend Yield for those Symbols.

enter image description here

  1. TypeError: can only concatenate str (not "Tag") to str which I assume is something to do with how it parsing through the xml, and it ran into a tag so I am not able to create the expander, I thought I could solve it with this if statement, but instead I just don't get any expander at all.
with st.expander("Expand for stocks news"):
    for heading in fin_headings:
        if heading == str:
            st.markdown("* " + heading)
        else:
            pass

Full code for main.py:

import requests
import spacy
import pandas as pd
import yfinance as yf
import streamlit as st
from bs4 import BeautifulSoup


st.title("Fire stocks :fire:")
nlp = spacy.load("en_core_web_sm")


def extract_rss(rss_link):
    # Parses xml, and extracts the headings.
    headings = []
    response1 = requests.get(
        "http://feeds.marketwatch.com/marketwatch/marketpulse/")
    response2 = requests.get(rss_link)
    parse1 = BeautifulSoup(response1.content, features="xml")
    parse2 = BeautifulSoup(response2.content, features="xml")
    headings1 = parse1.findAll('title')
    headings2 = parse2.findAll('title')
    headings = headings1 + headings2
    return headings


def stock_info(headings):
    # Get the entities from each heading, link it with nasdaq data // if possible, and Extract market data with yfinance.
    stock_dict = {
        'Org': [],
        'Symbol': [],
        'currentPrice': [],
        'dayHigh': [],
        'dayLow': [],
        'forwardPE': [],
        'dividendYield': []
    }
    stocks_df = pd.read_csv("./data/nasdaq_screener_1658383327100.csv")
    for title in headings:
        doc = nlp(title.text)
        for ent in doc.ents:
            try:
                if stocks_df['Name'].str.contains(ent.text).sum():
                    symbol = stocks_df[stocks_df['Name'].str.contains(
                        ent.text)]['Symbol'].values[0]
                    org_name = stocks_df[stocks_df['Name'].str.contains(
                        ent.text)]['Name'].values[0]

                    # Recieve info from yfinance
                    stock_info = yf.Ticker(symbol).info
                    print(symbol)
                    stock_dict['Org'].append(org_name)
                    stock_dict['Symbol'].append(symbol)

                    stock_dict['currentPrice'].append(
                        stock_info['currentPrice'])
                    stock_dict['dayHigh'].append(stock_info['dayHigh'])
                    stock_dict['dayLow'].append(stock_info['dayLow'])
                    stock_dict['forwardPE'].append(stock_info['forwardPE'])
                    stock_dict['dividendYield'].append(
                        stock_info['dividendYield'])
                else:
                    # If name can't be found pass.
                    pass
            except:
                # Don't raise an error.
                pass

    output_df = pd.DataFrame.from_dict(stock_dict, orient='index')
    output_df = output_df.transpose()
    return output_df


# Add input field input field
user_input = st.text_input(
    "Add rss link here", "https://www.investing.com/rss/news.rss")

# Get financial headlines
fin_headings = extract_rss(user_input)

print(fin_headings)
# Output financial info
output_df = stock_info(fin_headings)
output_df.drop_duplicates(inplace=True, subset='Symbol')
st.dataframe(output_df)

with st.expander("Expand for stocks news"):
    for heading in fin_headings:
        if heading == str:
            st.markdown("* " + heading)
        else:
            pass

Solution

  • There is an issue in your logic in stock_info function because of which same symbol is getting different values and when you are cleaning the duplicate, based on occurrence of the symbol its retaining the row with first occurrence of symbol.

    The below code will solve both of your issues.

    import requests
    import spacy
    import pandas as pd
    import yfinance as yf
    import streamlit as st
    from bs4 import BeautifulSoup
    
    st.title("Fire stocks :fire:")
    nlp = spacy.load("en_core_web_sm")
    
    def extract_rss(rss_link):
        # Parses xml, and extracts the headings.
        headings = []
        response1 = requests.get(
            "http://feeds.marketwatch.com/marketwatch/marketpulse/")
        response2 = requests.get(rss_link)
        parse1 = BeautifulSoup(response1.content, features="xml")
        parse2 = BeautifulSoup(response2.content, features="xml")
        headings1 = parse1.findAll('title')
        headings2 = parse2.findAll('title')
        headings = headings1 + headings2
        return headings
    
    def stock_info(headings):
        stock_info_list = []
        stocks_df = pd.read_csv("./data/nasdaq_screener_1658383327100.csv")
        for title in headings:
            doc = nlp(title.text)
            for ent in doc.ents:
                try:
                    if stocks_df['Name'].str.contains(ent.text).sum():
                        symbol = stocks_df[stocks_df['Name'].str.contains(
                            ent.text)]['Symbol'].values[0]
                        org_name = stocks_df[stocks_df['Name'].str.contains(
                            ent.text)]['Name'].values[0]
    
                        # Recieve info from yfinance
                        print(symbol)
                        stock_info = yf.Ticker(symbol).info
    
                        stock_info['Org'] = org_name
                        stock_info['Symbol'] = symbol
                        stock_info_list.append(stock_info)
                    else:
                        # If name can't be found pass.
                        pass
                except:
                    # Don't raise an error.
                    pass
    
        output_df = pd.DataFrame(stock_info_list)
        return output_df
    
    # Add input field input field
    user_input = st.text_input(
        "Add rss link here", "https://www.investing.com/rss/news.rss")
    
    # Get financial headlines
    fin_headings = extract_rss(user_input)
    
    
    output_df = stock_info(fin_headings)
    
    output_df = output_df[['Org','Symbol','currentPrice','dayHigh','dayLow','forwardPE','dividendYield']]
    output_df.drop_duplicates(inplace=True, subset='Symbol')
    st.dataframe(output_df)
    
    
    with st.expander("Expand for stocks news"):
        for heading in fin_headings:
            heading = heading.text
            if type(heading) == str:
                st.markdown("* " + heading)
            else:
                pass