Search code examples
clojureiteratorcombinatorics

How to identify sub sets of an object?


How would I best iterate over the below object in Clojure?

{
  :item-set-1 ["a" "b" "c"]
  :item-set-2 ["d" "e" "f"]
}

I want to try and identify all sub sets of the object and produce a result like this:

{
 [:item-set-1 ["a"]]
 [:item-set-1 ["a" "b"]]
 [:item-set-1 ["a" "b" "c"]]
 [:item-set-1 ["b"]]
 [:item-set-1 ["b" "c"]]
 [:item-set-1 ["c"]]
 [:item-set-2 ["d"]]
 [:item-set-2 ["d" "e"]]
 [:item-set-2 ["d" "e" "f"]]
 [:item-set-1 ["e"]]
 [:item-set-1 ["e" "f"]]
 [:item-set-1 ["f"]]

 [:item-set-1 ["a"] [:item-set-2 ["d"]]]
 [:item-set-1 ["b"] [:item-set-2 ["e"]]]
 [:item-set-1 ["c"] [:item-set-2 ["f"]]]

 [:item-set-1 ["a" "b"] [:item-set-2 ["d" "e"]]]
 [:item-set-1 ["a" "b"] [:item-set-2 ["e" "f"]]]
 [:item-set-1 ["a" "b"] [:item-set-2 ["d" "f"]]]
 [:item-set-1 ["b" "c"] [:item-set-2 ["d" "e"]]]
 [:item-set-1 ["b" "c"] [:item-set-2 ["e" "f"]]]
 [:item-set-1 ["b" "c"] [:item-set-2 ["d" "f"]]]
 [:item-set-1 ["a" "c"] [:item-set-2 ["d" "e"]]]
 [:item-set-1 ["a" "c"] [:item-set-2 ["e" "f"]]]
 [:item-set-1 ["a" "c"] [:item-set-2 ["d" "f"]]]

 [:item-set-1 ["a" "b" "c"] [:item-set-2 ["d" "e" "f"]]]
}

I belive I can use clojure.math.combinatorics to identify the subsets in each key but not the whole object.

Update: I attempted to produce the sub sets with this code:

(defn generate-freq-item-set []
  (let [result [{:data (generate-string {:item-set-1 ["a" "b" "c"] :item-set-2 ["d" "e" "f"]})}]
        items (as-> () items
                    (->> (for [row result]
                           (for [data (parse-string (:data row))]
                              (for [subset (combo/subsets (second data))]
                               (conj items {(first data) subset}))))))
        frequencies (sort-by last >
                             (->> (apply concat (apply concat (apply concat items)))
                                      (frequencies)))]
      (prn frequencies)))

But this produces the following output which is not exactly what I'm after:

([{"item-set-1" ()} 1] 
 [{"item-set-2" ("d")} 1] 
 [{"item-set-1" ("a" "b" "c")} 1] 
 [{"item-set-2" ("d" "e")} 1] 
 [{"item-set-1" ("b" "c")} 1] 
 [{"item-set-2" ("d" "e" "f")} 1] 
 [{"item-set-2" ()} 1] 
 [{"item-set-1" ("a" "b")} 1] 
 [{"item-set-1" ("c")} 1] 
 [{"item-set-2" ("e")} 1] 
 [{"item-set-2" ("d" "f")} 1] 
 [{"item-set-2" ("f")} 1] 
 [{"item-set-2" ("e" "f")} 1] 
 [{"item-set-1" ("b")} 1] 
 [{"item-set-1" ("a")} 1] 
 [{"item-set-1" ("a" "c")} 1])

Solution

  • I'd approach this problem as follows.

    At first, I'd splice initial map you have into a list, saving in the metadata the info about the set where the item belonged to.

    Since it is not possible to attach metadata to raw strings, we need to create a wrapper type:

    (defrecord ItemSetElement [x])
    
    (defn make-item-set-element [x]
      (->ItemSetElement x))
    
    (defn unwrap-item-set-element [elem]
      (:x elem))
    

    Then go the functions that convert an initial map to sequence, saving needed info:

    (defn wrap-element-and-save-owner [owner s]
      (with-meta (make-item-set-element s) {::owner owner}))
    
    (defn prepare-data [data]
      (mapcat
       (fn [[key ss]]
         (map (partial wrap-element-and-save-owner key) ss))
       data))
    
    > (prepare-data {:item-set-1 ["a" "b"], :item-set-2 ["c"]})
    ({:x "a"} {:x "b"} {:x "c"})
    

    As you see, the result of prepare-data is just a sequence, but every element of the sequence has information about the "owner" set in its meta, e.g.:

    > (meta (first (prepare-data {:item-set-1 ["a" "b"], :item-set-2 ["c"]})))
    {:user/owner :item-set-1}
    

    Having a sequence, we can use clojure.math.combinatorics/subsets to generate all its subsets:

    > (require '[clojure.math.combinatorics :as combo])
    nil
    > (combo/subsets (prepare-data {:item-set-1 ["a" "b"], :item-set-2 ["c"]}))
    (()
     ({:x "a"})
     ({:x "b"})
     ({:x "c"})
     ({:x "a"} {:x "b"})
     ({:x "a"} {:x "c"})
     ({:x "b"} {:x "c"})
     ({:x "a"} {:x "b"} {:x "c"}))
    

    Each element of the subset still has information about its "owner", so we can easily convert it to an initial-like structure. Here's a function for that:

    (defn reconstruct-item-sets [subset]
      (->> subset
           (group-by #(::owner (meta %)))
           (map (fn [[key elements]]
                  [key (map unwrap-item-set-element elements)]))
           (into {})))
    

    To sum up here's all the code including function, that glues everything together:

    (require '[clojure.math.combinatorics :as combo])
    
    (defrecord ItemSetElement [x])
    
    (defn make-item-set-element [x]
      (->ItemSetElement x))
    
    (defn unwrap-item-set-element [elem]
      (:x elem))
    
    (defn wrap-element-and-save-owner [owner s]
      (with-meta (make-item-set-element s) {::owner owner}))
    
    (defn prepare-data [data]
      (mapcat
       (fn [[key ss]]
         (map (partial wrap-element-and-save-owner key) ss))
       data))
    
    (defn reconstruct-item-sets [subset]
      (->> subset
           (group-by #(::owner (meta %)))
           (map (fn [[key elements]]
                  [key (map unwrap-item-set-element elements)]))
           (into {})))
    
     (defn my-subsets [data]
       (->> data
            prepare-data
            combo/subsets
            (map reconstruct-item-sets)))
    
    (def data {:item-set-1 ["a" "b"]
               :item-set-2 ["c" "d" "e"]})
    
    > (my-subsets data)
    ({}
     {:item-set-1 ("a")}
     {:item-set-1 ("b")}
     {:item-set-2 ("c")}
     {:item-set-2 ("d")}
     {:item-set-2 ("e")}
     {:item-set-1 ("a" "b")}
     {:item-set-1 ("a"), :item-set-2 ("c")}
     {:item-set-1 ("a"), :item-set-2 ("d")}
     {:item-set-1 ("a"), :item-set-2 ("e")}
     {:item-set-1 ("b"), :item-set-2 ("c")}
     {:item-set-1 ("b"), :item-set-2 ("d")}
     {:item-set-1 ("b"), :item-set-2 ("e")}
     {:item-set-2 ("c" "d")}
     {:item-set-2 ("c" "e")}
     {:item-set-2 ("d" "e")}
     {:item-set-1 ("a" "b"), :item-set-2 ("c")}
     {:item-set-1 ("a" "b"), :item-set-2 ("d")}
     {:item-set-1 ("a" "b"), :item-set-2 ("e")}
     {:item-set-1 ("a"), :item-set-2 ("c" "d")}
     {:item-set-1 ("a"), :item-set-2 ("c" "e")}
     {:item-set-1 ("a"), :item-set-2 ("d" "e")}
     {:item-set-1 ("b"), :item-set-2 ("c" "d")}
     {:item-set-1 ("b"), :item-set-2 ("c" "e")}
     {:item-set-1 ("b"), :item-set-2 ("d" "e")}
     {:item-set-2 ("c" "d" "e")}
     {:item-set-1 ("a" "b"), :item-set-2 ("c" "d")}
     {:item-set-1 ("a" "b"), :item-set-2 ("c" "e")}
     {:item-set-1 ("a" "b"), :item-set-2 ("d" "e")}
     {:item-set-1 ("a"), :item-set-2 ("c" "d" "e")}
     {:item-set-1 ("b"), :item-set-2 ("c" "d" "e")}
     {:item-set-1 ("a" "b"), :item-set-2 ("c" "d" "e")})