Search code examples
lispcommon-lispnested-listsclisp

read words from a file into nested list in common lisp programming language


I have a file named test.txt, it contains

"hello this is a test file"

I want to read it from the file so that every word represents lists of character and every paragraph represents lists of words means that I want to store them into a nested list like:

(list(list (h e l l o)) (list(t h i s))(list(i s)) (list(a)) (list(t e s t)) (list(f i l e))))

I am totally new in lisp and have a lot of confusion about this problem.


Solution

  • Solution without any dependencies

    (defun split (l &key (separators '(#\Space #\Tab #\Newline)) (acc '()) (tmp '()))
      (cond ((null l) (nreverse (if tmp (cons (nreverse tmp) acc) acc)))
            ((member (car l) separators)
             (split (cdr l) :separators separators 
                            :acc (if tmp (cons (nreverse tmp) acc) acc)
                            :tmp '()))
            (t 
             (split (cdr l) :separators separators
                            :acc acc
                            :tmp (cons (car l) tmp)))))
    
    (defun read-file-lines (file-path)
      (with-open-file (f file-path :direction :input)
        (loop for line = (read-line f nil)
              while line
              collect line)))
    
    (defun read-file-to-word-characters (file-path)
      (mapcan (lambda (s) (split (coerce s 'list))) 
              (read-file-lines file-path)))
    
    (read-file-to-word-characters "~/test.lisp.txt")
    ;; ((#\h #\e #\l #\l #\o) (#\t #\h #\i #\s) (#\i #\s) (#\a) (#\t #\e #\s #\t)
    ;; (#\f #\i #\l #\e))
    

    Convert the characters to one-letter strings:

    ;; apply to elements of nested list (= a tree) the conversion function `string`
    (defun map-tree (fn tree)
      (cond ((null tree) '())
            ((atom tree) (funcall fn tree))
            (t (mapcar (lambda (branch) (map-tree fn branch)) tree))))
    
    (map-tree #'string (read-file-to-word-characters "~/test.lisp.txt"))
    ;; (("h" "e" "l" "l" "o") ("t" "h" "i" "s") ("i" "s") ("a") ("t" "e" "s" "t")
    ;;  ("f" "i" "l" "e"))
    

    Content of "~/test.lisp.txt":

    hello this
    is a test file
    

    Solution using cl-ppcre (Edi Weitz's congenial regex package)

    ;; look here in an answer how to use cl-ppcre:split
    ;; https://stackoverflow.com/questions/15393797/lisp-splitting-input-into-separate-strings
    (ql:quickload :cl-ppcre)
    
    (defun read-file-lines (file-path)
      (with-open-file (f file-path :direction :input)
        (loop for line = (read-line f nil)
              while line
              collect line)))
    
    (defun string-to-words (s) (cl-ppcre:split "\\s+" s))
    (defun to-single-characters (s) (coerce s 'list))
    
    (defun read-file-to-character-lists (file-path)
      (mapcan (lambda (s) 
                (mapcar #'to-single-characters
                        (string-to-words s)))
              (read-file-lines file-path)))
    
    (read-file-to-character-lists "~/test.lisp.txt")
    ;; ((#\h #\e #\l #\l #\o) (#\t #\h #\i #\s) (#\i #\s) (#\a) (#\t #\e #\s #\t)
    ;;  (#\f #\i #\l #\e))
    
    ;; or use above's function:
    (map-tree #'string (read-file-to-character-lists "~/test.lisp.txt"))
    ;; (("h" "e" "l" "l" "o") ("t" "h" "i" "s") ("i" "s") ("a") ("t" "e" "s" "t")
    ;;  ("f" "i" "l" "e"))
    
    
    ;; or:
    (defun to-single-letter-strings (s) (cl-ppcre:split "\\s*" s))
    
    (defun read-file-to-letter-lists (file-path)
      (mapcan (lambda (s) 
                (mapcar #'to-single-letter-strings
                        (string-to-words s)))
              (read-file-lines file-path)))
    
    (read-file-to-letter-lists "~/test.lisp.txt")
    ;; (("h" "e" "l" "l" "o") ("t" "h" "i" "s") ("i" "s") ("a") ("t" "e" "s" "t")
    ;; ("f" "i" "l" "e"))