Search code examples
pythonrethinkdbrethinkdb-python

How to delete a set of data based on group in RethinkDB with Python


I have a table with musical instruments and I want to split them by category count their number and if a category groups is larger than a number delete all the instruments in that group. My code currently is:

from rethinkdb import RethinkDB
from faker import Faker
from faker_music import MusicProvider
from random import random
from time import sleep

fake = Faker()
fake.add_provider(MusicProvider)
r = RethinkDB()

r.connect( "localhost", 28015).repl()

try:
  r.db("test").table_drop("instruments").run()
except:
  pass

r.db("test").table_create("instruments").run()

def instrument()->dict:
  instrument = {"name":fake.music_instrument(),"category":fake.music_instrument_category()}
  return instrument

initial = [instrument() for _ in range(3)]
r.table("instruments").insert(initial).run()

while True:
  check = random()
  if check < 0.5 and check >0.25:
    r.table("instruments").insert(instrument()).run()

  if  check < 0.25:
    cursor = r.table("instruments").group("category").count().gt(3).filter.delete().run()

  sleep(1)

where the r.table("instruments").group("category").count().gt(3).filter.delete().run() does not work but is indicative of what I am trying to achieve.


Solution

  • Ok this worked:

    from rethinkdb import RethinkDB
    from faker import Faker
    from faker_music import MusicProvider
    from random import random
    from time import sleep
    
    fake = Faker()
    fake.add_provider(MusicProvider)
    r = RethinkDB()
    
    conn = r.connect( "localhost", 28015)
    
    try:
      r.db("test").table_drop("instruments").run(conn)
    except:
      pass
    
    r.db("test").table_create("instruments").run(conn)
    
    def instrument()->dict:
      instrument = {"name":fake.music_instrument(),"category":fake.music_instrument_category()}
      return instrument
    
    initial = [instrument() for _ in range(3)]
    r.table("instruments").insert(initial).run(conn)
    
    
    while True:
      check = random()
      if check < 0.5 and check >0.25:
        r.table("instruments").insert(instrument()).run(conn)
      counts ={}
      counts = dict(r.table("instruments").group("category").count().run(conn))
    
      to_rm =[]
      for category, count in counts.items():
        if count >2:
          r.table("instruments").filter( {"category":category}).delete().run(conn)
    
      sleep(1)
    

    but I am sure there is a more streamlined functional solution, if someone knows please comment.