Search code examples
pythonweb-scrapingterminalmacos-sierra

python script - grouping words into a If-Not statement


Trying to figure out how to use an if not statement in which I can group three to four words to omit from a CSV file. Towards the bottom of the code, you'll see that I'm stuck at: if ('reddit', 'passwords') not in x:

Any help would be great.

# import libraries
import bs4
from urllib2 import urlopen as uReq
from bs4 import BeautifulSoup as soup

my_url = 'https://www.reddit.com/r/NHLStreams/comments/71uhwi/game_thread_sabres_at_maple_leafs_730_pm_et/'

# opening up connection, grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()

# html parsing
page_soup = soup(page_html, "html.parser")


filename = "sportstreams.csv"
f = open(filename, "w")
headers = "Sport Links " + "\n"
f.write(headers)

links = page_soup.select("form a[href]")
for link in links:
    href = link["href"]
    print(href)

    f.write(href + "\n")



with open('sportstreams.csv') as f,open('sstream.csv', "w") as f2:
    for x in f:
        if ('reddit', 'passwords') not in x: # trying to find multi words to omit
            f2.write(x.strip()+'\n')

Solution

  • Use the builtin function all:

    if all(t not in x for t in ('reddit', 'passwords')):
    

    Or any:

    if not any(t in x for t in ('reddit', 'passwords')):
    

    Here's it is in your context manager:

    with open('sportstreams.csv') as f, open('sstream.csv', "w") as f2:
        for line in f:
            if any(t in line for t in ('reddit', 'passwords')):
                # The line contains one of the strings.
                continue
            else:
                # The line contains none of the strings.
                f2.write(line.strip() + '\n')