Please find the below program that I'm using. It is compiling but not giving any output. Request to help with error.
import gzip
import warc
import os
from mrjob.job import MRJob
class DocumentCounter(MRJob):
def mapper(self, _, line):
entries = os.listdir("C://Users//HP//WARCDataset")
for entry in entries:
yield 1,1
def reducer(self, key, values):
yield key, sum(values)
if __name__ == '__main__':
DocumentCounter.run()
The screenshot of the IDE and the output window. The result is not displayed even though the program runs to success.
class DocumentCounter(MRJob):
def mapper_raw(self,_,line):
for fname in os.listdir(WARC_PATH):
yield "total_documents",1
def combiner(self, key, values):
"""
Sums up count for each mapper.
"""
yield key, sum(values)
def reducer(self, key, values):
##TOTAL_DOUCMENTS = sum(values)
NumberofDocuments = sum(values)
yield key, NumberofDocuments
if __name__ == '__main__':
DocumentCounter.run()
The above code uses os.listdir function to iterate to all files at given path