Search code examples
orbeonxformsexist-db

Growing eXist database


I'm using Orbeon Forms, and eXist to store filled forms there. And I have next problem, eXist is growing more than 500 MB per day. If I will made reserve copy of whole database, then will delete all DB files, and restore reserved copy, then DB is only 1 GB large, but after few days it becomes 4 GB, and grows, grows and grows...

First I was using embeded eXist 1.2.6, and I thought that it's unstable version, so I moved all data to external eXist 1.4.1, but still it continue to grow in new version.

Here's conf.xml from exist/WEB-INF

<?xml version="1.0" encoding="UTF-8"?>
<exist xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="schema/conf.xsd">
    <cluster dbaPassword="" dbaUser="admin" exclude="/db/system,/db/system/config" 
    journalDir="data/journal" 
    protocol="UDP(mcast_addr=228.1.2.3;mcast_port=45566;ip_ttl=8;ip_mcast=true;mcast_send_buf_size=800000;mcast_recv_buf_size=150000;ucast_send_buf_size=800000;ucast_recv_buf_size=150000;loopback=true):PING(timeout=2000;num_initial_members=3;up_thread=true;down_thread=true):MERGE2(min_interval=10000;max_interval=20000):FD(shun=true;up_thread=true;down_thread=true;timeout=2500;max_tries=5):VERIFY_SUSPECT(timeout=3000;num_msgs=3;up_thread=true;down_thread=true):pbcast.NAKACK(gc_lag=50;retransmit_timeout=300,600,1200,2400,4800;max_xmit_size=8192;up_thread=true;down_thread=true):UNICAST(timeout=300,600,1200,2400,4800;window_size=100;min_threshold=10;down_thread=true):pbcast.STABLE(desired_avg_gossip=20000;up_thread=true;down_thread=true):FRAG(frag_size=8192;down_thread=true;up_thread=true):pbcast.GMS(join_timeout=5000;join_retry_timeout=2000;shun=true;print_local_addr=true)"/>


    <db-connection cacheSize="200M" collectionCache="200M" database="native" files="/opt/alfresco-3.4.c/tomcat/work/exist/data/" pageSize="4096">

        <recovery enabled="yes" group-commit="no" journal-dir="/opt/alfresco-3.4.c/tomcat/work/exist/recovery-data/" size="100M" sync-on-commit="yes" force-restart="yes" consistency-check="yes"/>

        <watchdog output-size-limit="10000" query-timeout="180000"/>

    </db-connection>

    <indexer caseSensitive="yes" index-depth="5" preserve-whitespace-mixed-content="yes" stemming="no" suppress-whitespace="none" tokenizer="org.exist.storage.analysis.SimpleTokenizer" track-term-freq="yes">

            <modules>
            <module id="ngram-index" file="ngram.dbx" n="3" class="org.exist.indexing.ngram.NGramIndex"/>

            <module id="lucene-index" buffer="32" class="org.exist.indexing.lucene.LuceneIndex"/>
        </modules>

        <stopwords file="stopword"/>

        <index>
            <fulltext attributes="false" default="none">
                <exclude path="/auth"/>
            </fulltext>
        </index>
    </indexer>

    <scheduler>

    </scheduler>
    <serializer add-exist-id="none" compress-output="no" enable-xinclude="yes" enable-xsl="no" indent="yes" match-tagging-attributes="no" match-tagging-elements="no">
    </serializer>

    <transformer class="org.apache.xalan.processor.TransformerFactoryImpl" caching="yes"/>

    <validation mode="no">
        <entity-resolver>
            <catalog uri="${WEBAPP_HOME}/WEB-INF/catalog.xml"/>
        </entity-resolver>
    </validation>

    <xquery enable-java-binding="no" disable-deprecated-functions="no" enable-query-rewriting="yes" backwardCompatible="no" raise-error-on-failed-retrieval="no">
        <builtin-modules>
            <module uri="http://exist-db.org/xquery/lucene" class="org.exist.xquery.modules.lucene.LuceneModule"/>          
            <module uri="http://exist-db.org/xquery/ngram" class="org.exist.xquery.modules.ngram.NGramModule"/>

            <module uri="http://exist-db.org/xquery/examples" class="org.exist.xquery.modules.example.ExampleModule"/>           
            <module uri="http://exist-db.org/xquery/request" class="org.exist.xquery.functions.request.RequestModule"/>            
            <module uri="http://exist-db.org/xquery/response" class="org.exist.xquery.functions.response.ResponseModule"/>            
            <module uri="http://exist-db.org/xquery/session" class="org.exist.xquery.functions.session.SessionModule"/>            
            <module uri="http://exist-db.org/xquery/system" class="org.exist.xquery.functions.system.SystemModule"/>
            <module uri="http://exist-db.org/xquery/text" class="org.exist.xquery.functions.text.TextModule"/>           
            <module uri="http://exist-db.org/xquery/transform" class="org.exist.xquery.functions.transform.TransformModule"/>            
            <module uri="http://exist-db.org/xquery/util" class="org.exist.xquery.functions.util.UtilModule"/>            
            <module uri="http://exist-db.org/xquery/validation" class="org.exist.xquery.functions.validation.ValidationModule"/>           
            <module uri="http://exist-db.org/xquery/xmldb" class="org.exist.xquery.functions.xmldb.XMLDBModule"/>            

            <module uri="http://expath.org/ns/http-client" class="org.expath.exist.HttpClientModule"/>        
            <module uri="http://www.expath.org/mod/http-client" src="resource:org/expath/www/mod/http-client/http-client.xqm"/>      

            <module uri="http://exist-db.org/xquery/httpclient" class="org.exist.xquery.modules.httpclient.HTTPClientModule"/>

            <module uri="http://exist-db.org/xquery/kwic" src="resource:org/exist/xquery/lib/kwic.xql"/>

            <module uri="http://www.json.org" src="resource:org/exist/xquery/lib/json.xq"/>

            <module uri="http://exist-db.org/xquery/sequences" src="resource:org/exist/xquery/lib/sequences.xq"/>

            <module uri="http://exist-db.org/versioning" src="resource:org/exist/versioning/xquery/versioning.xqm"/>

            <module uri="http://exist-db.org/xquery/testing" src="resource:org/exist/xquery/lib/test.xq"/>   

            <module uri="http://xproc.net/xproc" src="resource:net/xproc/xprocxq/src/xquery/xproc.xqm"/>         
            <module uri="http://xproc.net/xproc/const" src="resource:net/xproc/xprocxq/src/xquery/const.xqm"/>          
            <module uri="http://xproc.net/xproc/ext" src="resource:net/xproc/xprocxq/src/xquery/ext.xqm"/>    
            <module uri="http://xproc.net/xproc/functions" src="resource:net/xproc/xprocxq/src/xquery/functions.xqm"/>        
            <module uri="http://xproc.net/xproc/naming" src="resource:net/xproc/xprocxq/src/xquery/naming.xqm"/>
            <module uri="http://xproc.net/xproc/opt" src="resource:net/xproc/xprocxq/src/xquery/opt.xqm"/> 
            <module uri="http://xproc.net/xproc/std" src="resource:net/xproc/xprocxq/src/xquery/std.xqm"/>     
            <module uri="http://xproc.net/xproc/util" src="resource:net/xproc/xprocxq/src/xquery/util.xqm"/>       

            <module uri="http://exist-db.org/xquery/file" class="org.exist.xquery.modules.file.FileModule"/>

            <module uri="http://exist-db.org/xquery/xqdoc" class="org.exist.xqdoc.xquery.XQDocModule"/>

        </builtin-modules>
    </xquery>
    <xupdate allowed-fragmentation="5" enable-consistency-checks="no"/>

</exist>

Is it a common problem? Or may be I configured eXist wrong?

Thanks for any help.


Solution

  • Thanks to @joewiz and @avernet

    Now I can answer my own question. The main problem is that I'm using orbeon 3.8, which use eXist DB to store cache there. Updating to 3.9 version will help, because 3.9 use Ehcahe instead.

    As temporary workaround, I configured orbeon 3.8 to store data to external eXist for that in properties-local.xml I add

    <property as="xs:anyURI"  name="oxf.fr.persistence.service.exist.uri" value="http://127.0.0.1:8080/exist/rest/db"/>
    

    property as told here, and embeded eXist to store cache for that next properties should be set

    <property as="xs:anyURI"  name="oxf.xforms.store.application.uri"                   value="xmldb:exist:///"/>                      
    <property as="xs:string"  name="oxf.xforms.store.application.collection"            value="/db/orbeon/xforms/cache/"/>                                        
    

    So when servlet container stops I can safely delete embeded eXist data files.