Search code examples
pythonpython-3.xnlpnltk

NLP-POS challenge


Please find the original question here on hackerrank

Although,my solution is incomplete can someone please help me understand where I'm going wrong? (In the second function the tagger returns a 2-letter tag although the question asks for a 3-letter tag. Thanks!

import re
import nltk
import string
final_tagged = ""
raw_input(strs)
def tokenize_two(i):
    temp = i
    global strs
    "remove /?? and pos tag"
    for ch in ['/??']:
        if ch in i:
            i=i.replace(ch,"")
            #pos tagging
    tag = nltk.pos_tag([i])
    for item in tag:
        for ch in ['??']:
            if ch in temp:
                temp = temp.replace(ch,item[1])
    replace = i+"/??"
    strs = string.replace(strs,replace,temp)
    return temp;

def tokenize_three(i):
    "remove /??? and pos tag"
    temp = i 
    global strs
    for ch in ['/???']:
        if ch in i:
            i=i.replace(ch,"")
    tag = nltk.pos_tag([i])
    for item in tag:
        for ch in ['???']:
            if ch in temp:
                temp = temp.replace(ch,item[1])
    replace = i+"/???"
    strs = string.replace(strs,replace,temp)
    return temp;

a = [w for w in re.split('\s+',strs)]
for i in a :
    if(i.endswith("/??")):
        tagged = tokenize_two(i)
    if(i.endswith("/???")):
        final_tagged = tokenize_three(i)
print strs

Solution

  • tag = nltk.pos_tag([i])
    

    POS tagging is context-dependent. You need to pass the entire tokenized sentence as an argument to pos_tag, rather than calling pos_tag one time for each unknown word.