Search code examples
pythondjangomongodbrestpymongo

storing large data into mongodb collection using python


I am working on a Django project where I have to create 10 years of data and store them in MongoDB retrieve it and display it on the HTML page. I am trying to divide 10 years of data into 1 year and then store it in MongoDB collection but whenever I try to do so only two documents get stored and

this error is shown in pymongo. errors.DocumentTooLarge: BSON document too large (29948865 bytes) - the connected server supports BSON document sizes up to 16793598 bytes.

my python code is


now=start
workdate=now.date()
nowtime=now.time()

endt=end
ktime=start

times=[]
states=[]
level=[]



#generating random level of water in the tank 
while (now!=endt): # loop for creating data for given time
    ktime=ktime+relativedelta(months=5)
    print(current_level)
    def fill():
        global df 
        global now
        global workdate
        global nowtime
        global ktime
        global current_level
        global flag
       
        global times
        global states 
        global level
        while x=='on' and current_level<=450:
            times.append(now)
            states.append(x)
            level.append(current_level)
            
           
              
            current_level+=filling
            current_level=round(current_level,2)
            now=now+timedelta(minutes=1)
            nowtime=now.time()
            workdate=now.date
            if now==ktime:
              times.append(now)
              states.append(x)
              level.append(current_level)
                
              print("true")
              flag='red'
              break
            
          
    
    def drain():
        global df
        global now 
        global workdate
        global nowtime 
        global ktime
        global current_level
        global flag 
     
        global times
        global states 
        global level
       
    
    
        while x=='off' and  current_level>50:
            times.append(now)
            states.append(x)
            level.append(current_level)
            
            
           
            print(current_level)
            current_level-=emptyrate
            current_level=round(current_level,4)
            now=now+timedelta(minutes=1)
            nowtime=now.time()
            workdate=now.date()
            if now==ktime:
              times.append(now)
              states.append(x)
              level.append(current_level)
                
              print("true")
              flag='red'
              break
               
         
           
    
           
          
    
       
    flag='green'
    k=True
    while k:       
        if  x=='off' and current_level>50:
            drain()
            x='on'
            
    
    
        if flag =='red':
         break
    
    
    
    
        if x=='on' and  current_level<450: 
            fill()
            x='off'
            
       
            
          
            
        if flag=='red':
            break
    
    
    
    data = {'time': times, 'status': states, 'level': level}
    df = pd.DataFrame(data)
    
    
    df.reset_index(inplace=True)
    data = df.to_dict('records')
    colle.insert({"data":data}) #transfering data to collection 
    del df
    data.clear()
    

Solution

  • so the problem was with my logic instead of clearing data I should have cleared the already stored data in the times, status and level list after the end of the loop so the last part that should be changed is as follows

    while k:       
        if  x=='off' and current_level>50:
            drain()
            x='on'
    
        if flag=='red':
            data = {'time': times, 'status': states, 'level': level}
            flag='green'
            break
    
        if x=='on' and  current_level<450: 
            fill()
            x='off'
    
        if flag=='red':
            data = {'time': times, 'status': states, 'level': level}
            flag='green'
            break
    
    df = pd.DataFrame(data)
    print(df)
    df.reset_index(inplace=True)
    data_dict = df.to_dict("records")
    colle.insert_one({"DATA":data_dict})
    df=df.iloc[0:0]
    times.clear()
    states.clear()
    

    the outer loop or the main loop that control the period for which the following code runs remain same