New to Python and I'm trying to count the words in a directory of text files and write the output to a separate text file. However, I want to specify conditions. So if word count is > 0 is would like to write the count and file path to one file and if the count is == 0. I would like to write the count and file path to a separate file. Below is my code so far. I think I'm close, but I'm hung up on how to do the conditions and separate files. Thanks.
import sys
import os
from collections import Counter
import glob
stdoutOrigin=sys.stdout
sys.stdout = open("log.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join("path", '*.txt')):
with open(filepath) as f:
data = f.read()
for key,val in words.items():
#print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key,val in sorted(words.items()):
print(filepath)
if val > 0:
print('{0}:\t{1}'.format(
key,
val))
filepath = sys.argv[1]
keys = ["x", "y"]
words = dict.fromkeys(keys,0)
count_words_in_dir(filepath, words, action=print_summary)
sys.stdout.close()
sys.stdout=stdoutOrigin
I would strongly urge you to not repurpose stdout
for writing data to a file as part of the normal course of your program. I also wonder how you can ever have a word "count < 0". I assume you meant "count == 0".
The main problem that your code has is in this line:
for filepath in glob.iglob(os.path.join("path", '*.txt')):
The string constant "path"
I'm pretty sure doesn't belong there. I think you want filepath
there instead. I would think that this problem would prevent your code from working at all.
Here's a version of your code where I fixed these issues and added the logic to write to two different output files based on the count:
import sys
import os
import glob
out1 = open("/tmp/so/seen.txt", "w")
out2 = open("/tmp/so/missing.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
data = f.read()
for key, val in words.items():
# print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key, val in sorted(words.items()):
whichout = out1 if val > 0 else out2
print(filepath, file=whichout)
print('{0}: {1}'.format(key, val), file=whichout)
filepath = sys.argv[1]
keys = ["country", "friend", "turnip"]
words = dict.fromkeys(keys, 0)
count_words_in_dir(filepath, words, action=print_summary)
out1.close()
out2.close()
Result:
file seen.txt:
/Users/steve/tmp/so/dir/data2.txt
friend: 1
/Users/steve/tmp/so/dir/data.txt
country: 2
/Users/steve/tmp/so/dir/data.txt
friend: 1
file missing.txt:
/Users/steve/tmp/so/dir/data2.txt
country: 0
/Users/steve/tmp/so/dir/data2.txt
turnip: 0
/Users/steve/tmp/so/dir/data.txt
turnip: 0
(excuse me for using some search words that were a bit more interesting than yours)