Search code examples
pythonfunctionpraw

Why is one of my functions running twice?


The function search_for_song(pbody) is running twice, i can't figure out why. would like some help, just started learning python a few days ago.

Here's the full code:

#a bot that replies with youtube songs that were mentioned in the comments
import traceback
import praw
import time
import sqlite3
import requests
from lxml import html
import socket
import errno
import re
import urllib
from bs4 import BeautifulSoup
import sys
import urllib2


'''USER CONFIGURATION'''

APP_ID = ""
APP_SECRET = ""
APP_URI = ""
APP_REFRESH = ""
    # https://www.reddit.com/comments/3cm1p8/how_to_make_your_bot_use_oauth2/
USERAGENT = "Python automatic youtube linkerbot"
# This is a short description of what the bot does.
# For example "Python automatic replybot v2.0 (by /u/GoldenSights)"
SUBREDDIT = "kqly"
# This is the sub or list of subs to scan for new posts. For a single sub, use "sub1". For multiple subreddits, use "sub1+sub2+sub3+..."
DO_SUBMISSIONS = False
DO_COMMENTS = True
# Look for submissions, comments, or both.
KEYWORDS = ["linksong"]
# These are the words you are looking for
KEYAUTHORS = []
# These are the names of the authors you are looking for
# The bot will only reply to authors on this list
# Keep it empty to allow anybody.
#REPLYSTRING = "**Hi, I'm a bot.**"
# This is the word you want to put in reply
MAXPOSTS = 100
# This is how many posts you want to retrieve all at once. PRAW can download 100 at a time.
WAIT = 30
# This is how many seconds you will wait between cycles. The bot is completely inactive during this time.

CLEANCYCLES = 10
# After this many cycles, the bot will clean its database
# Keeping only the latest (2*MAXPOSTS) items

'''All done!'''

try:
    import bot
    USERAGENT = bot.aG
except ImportError:
    pass

print('Opening SQL Database')
sql = sqlite3.connect('sql.db')
cur = sql.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS oldposts(id TEXT)')

print('Logging in...')
r = praw.Reddit(USERAGENT)
r.set_oauth_app_info(APP_ID, APP_SECRET, APP_URI)
r.refresh_access_information(APP_REFRESH)

def replybot():
    print('Searching %s.' % SUBREDDIT)
    subreddit = r.get_subreddit(SUBREDDIT)
    posts = []
    if DO_SUBMISSIONS:
        posts += list(subreddit.get_new(limit=MAXPOSTS))
    if DO_COMMENTS:
        posts += list(subreddit.get_comments(limit=MAXPOSTS))
    posts.reverse()

    for post in posts:
        #print ("Searching for another the next comment")
        # Anything that needs to happen every loop goes here.
        pid = post.id

        try:
            pauthor = post.author.name
        except AttributeError:
            # Author is deleted. We don't care about this post.
            continue

        if pauthor.lower() == r.user.name.lower():
            # Don't reply to yourself, robot!
            print('Will not reply to myself.')
            continue

        if KEYAUTHORS != [] and all(auth.lower() != pauthor for auth in KEYAUTHORS):
            # This post was not made by a keyauthor
            continue

        cur.execute('SELECT * FROM oldposts WHERE ID=?', [pid])
        if cur.fetchone():
            # Post is already in the database
            continue

        if isinstance(post, praw.objects.Comment):
            pbody = post.body
        else:
            pbody = '%s %s' % (post.title, post.selftext)
        pbody = pbody.lower()

        if not any(key.lower() in pbody for key in KEYWORDS):
            # Does not contain our keyword
            continue

        cur.execute('INSERT INTO oldposts VALUES(?)', [pid])
        sql.commit()
        print('Replying to %s by %s' % (pid, pauthor))
        try:
            if search_for_song(pbody):
                # pbody=pbody[8:]
                # pbody=pbody.replace("\n", "")
                temp=pbody[8:].lstrip()
                post.reply("[**"+temp+"**]("+search_for_song(pbody)+") \n ---- \n ^^This ^^is ^^an ^^automated ^^message ^^by ^^a ^^bot, ^^if ^^you ^^found ^^any ^^bug ^^and/or ^^willing ^^to ^^contact ^^me. [**^^Press ^^here**](https://www.reddit.com/message/compose?to=itailitai)")
        except praw.errors.Forbidden:
            print('403 FORBIDDEN - is the bot banned from %s?' % post.subreddit.display_name)

def search_for_song(pbody):
    #print("in search_for_song")
    song=pbody
    if len(song)>8:
        song=song[8:]
        if song.isspace()==True or song=='':
            return False
        else:
            print("Search if %s exists in the database" % song  )
            #HEADERS = {'User-Agent': 'Song checker - check if songs exists by searching this website, part of a bot for reddit'}
            author, song_name = song_string_generator(song)
            url = 'http://www.songlyrics.com/'+author+'/'+song_name+'-lyrics/'
            print url
            #page = requests.get(url, HEADERS)
            check=1
            while check==1:
                try:
                    headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0' }
                    req = urllib2.Request(url, None, headers)
                    page= urllib2.urlopen(req)
                    check=2
                except socket.error as error:
                    pass
                except Exception:
                    print('An error occured while tryinc to verify song existence')
                    return False

            soup = BeautifulSoup(page.read(), "lxml")
            if "Please check the spelling and try again" not in soup.get_text():
                print ("Song was found in the database!")
                result=first_youtube(song)
                return result
            else:
                print ("Song was not found in the database!")
                return False

def song_string_generator(song):
    #print("in song_string_generator")
    song=song
    author,song_name= '',''
    try:
        if "-" in song:
            l=song.split('-', 1 )
            print ("2 ",l)
            author=l[0]
            song_name=l[1]
        elif "by" in song:
            l=song.split('by', 1 )
            print ("2 ",l)
            author=l[1]
            song_name=l[0]
        song_name=" ".join(song_name.split())
        author=" ".join(author.split())
        print (author,song_name)
        if author == 'guns and roses':
            author="guns n' roses"
        song_name=song_name.replace("\n", "")
        author=author.replace("\n", "")
        author=author.replace(" ", "-")
        song_name=song_name.replace(" ", "-")
        author=author.replace("'", "-")
        song_name=song_name.replace("'", "-")
        song_name=song_name.rstrip()
        song_name=" ".join(song_name.split())
        return author, song_name
    except:
        print ("No song was mentioned in the comment!")
        return False


def first_youtube(textToSearch):
    reload(sys)
    sys.setdefaultencoding('UTF-8')
    query_string = textToSearch
    try:
        html_content = urllib.urlopen("http://www.youtube.com/results?search_query=" + query_string)
        search_results = re.findall(r'href=\"\/watch\?v=(.{11})', html_content.read().decode())
        result="http://www.youtube.com/watch?v=" + search_results[0]
        return result
    except IOError:
        print ("IOError Occured while contacting Youtube!")

    except Exception:
        print ("A non IOError Occured while contacting Youtube!")
        return False



cycles = 0
while True:
    try:
        replybot()
        cycles += 1
    except Exception as e:
        traceback.print_exc()
    if cycles >= CLEANCYCLES:
        print('Cleaning database')
        cur.execute('DELETE FROM oldposts WHERE id NOT IN (SELECT id FROM oldposts ORDER BY id DESC LIMIT ?)', [MAXPOSTS * 2])
        sql.commit()
        cycles = 0
    print('Running again in %d seconds \n' % WAIT)
    time.sleep(WAIT)

This is the output I'm getting:

    Opening SQL Database
    Logging in...
    Searching kqly.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Will not reply to myself.
    Replying to d0kwcrs by itailitai
    Search if  guns                 and             roses - paradise                      city exists in the database
    ('2 ', [u' guns                 and             roses ', u' paradise                      city'])
    (u'guns and roses', u'paradise city')
    http://www.songlyrics.com/guns-n--roses/paradise-city-lyrics/
    Song was found in the database!
    Search if  guns                 and             roses - paradise                      city exists in the database
    ('2 ', [u' guns                 and             roses ', u' paradise                      city'])
    (u'guns and roses', u'paradise city')
    http://www.songlyrics.com/guns-n--roses/paradise-city-lyrics/
    Song was found in the database!
    Running again in 30 seconds 

it's a bot for reddit that replies with the youtube video of a song that was mentioned in the comments, if anyone wants to know.


Solution

  • With a cursory reading of your code you have

    if search_for_song(pbody):
        # do stuff..
         post.reply("[**"+temp+"**]("+search_for_song(pbody)+") \n ---- \n ^^This ^^is ^^an ^^automated ^^message ^^by ^^a ^^bot, ^^if ^^you ^^found ^^any ^^bug ^^and/or ^^willing ^^to ^^contact ^^me. [**^^Press ^^here**](https://www.reddit.com/message/compose?to=itailitai)")
    

    You call the function in the start of the if and in your post.reply line

    RESPONDING TO COMMENTS If you need to check the results but don't want to call twice simply save the output

    res = search_for_song(pbody):
    if res:
       #...
       post.reply(... + res + ...)