Search code examples
pythontextwikipediakeyerrordata-extraction

Parse wikitext myself with python - KeyError: 'revisions'


I'm python newbie. I took this code from here but i get this KeyError: I just want to mention that it worked so I do not know what happened.

wikicode = page['revisions'][0]['*']
KeyError: 'revisions'

this is the code, thank you for any help!

def request_wiki_value(title=None, sentence=''):
if title is None:
    title = input("No title entered.\nPlease enter a title: ")
import requests
import mwparserfromhell
response = requests.get(
    'https://en.wikipedia.org/w/api.php',
    params={
        'action': 'query',
        'format': 'json',
        'titles': title,
        'prop': 'revisions',
        'rvprop': 'content',
    }
).json()
page = next(iter(response['query']['pages'].values()))
wikicode = page['revisions'][0]['*']
parsed_wikicode = mwparserfromhell.parse(wikicode)
# open("article.txt", "a", encoding='utf-8').write(parsed_wikicode.strip_code())
for ch in parsed_wikicode.strip_code():
    sentence = sentence + ch
    if ch == '\n':
        sentence = sentence.removesuffix(ch)
        open("article.txt", "a", encoding='utf-8').write(sentence)
        sentence = ''

Solution

  • Try this :

    import requests
    import mwparserfromhell
    def request_wiki_value(title=None, sentence=''):
      if title is None:
          title = input("No title entered.\nPlease enter a title: ")
    
      response = requests.get(
          'https://en.wikipedia.org/w/api.php',
          params={
              'action': 'query',
              'format': 'json',
              'titles': title,
              'prop': 'revisions',
              'rvprop': 'content',
          }
      ).json()
      page = next(iter(response['query']['pages'].values()))
      wikicode = page['revisions'][0]['*']
      parsed_wikicode = mwparserfromhell.parse(wikicode)
      # open("article.txt", "a", encoding='utf-8').write(parsed_wikicode.strip_code())
      for ch in parsed_wikicode.strip_code():
          sentence = sentence + ch
          if ch == '\n':
              sentence = sentence.removesuffix(ch)
              open("article.txt", "a", encoding='utf-8').write(sentence)
              sentence = ''
    

    The problem is you did not add indent for the function body