Search code examples
amazon-web-servicesneo4jhaproxyhigh-availability

Neo4j HA with Haproxy (Neo.ClientError.Transaction.TransactionNotFound)


I am using neo4j-enterprise-3.0.4 in cluster on AWS with bolt protocol. I'm using HAproxy to know who's master and who are the slaves in the HA cluster.

This is a settings of my HAproxy haproxy.cfg

global
   log 127.0.0.1 local0
   log 127.0.0.1 local1 notice
   chroot /var/lib/haproxy
   stats socket /run/haproxy/admin.sock mode 660 level admin
   stats timeout 30s
   user haproxy
   group haproxy
   daemon
   maxconn 256
defaults
    log global
    mode tcp
    option tcplog
    option dontlognull
    timeout connect 30s
    timeout client 2h
    timeout server 2h
frontend http-in
    bind *:81
    acl write_method method POST DELETE PUT
    acl write_hdr hdr_val(X-Write) eq 1
    acl write_payload payload(0,0) -m reg -i CREATE|MERGE|SET|DELETE|REMOVE
    acl tx_cypher_endpoint path_beg /db/data/transaction
    http-request set-var(txn.tx_cypher_endpoint) bool(true) if tx_cypher_endpoint
    use_backend neo4j-master if write_hdr
    use_backend neo4j-master if tx_cypher_endpoint write_payload
    use_backend neo4j-all if tx_cypher_endpoint
    use_backend neo4j-master if write_method
    default_backend neo4j-all
backend neo4j-all
    option httpchk GET /db/manage/server/ha/available HTTP/1.0\r\nAuthorization:\ Basic\ [code]
    acl tx_cypher_endpoint var(txn.tx_cypher_endpoint),bool
    stick-table type integer size 1k expire 70s  # slightly higher with org.neo4j.server.transaction.timeout
    stick match path,word(4,/) if tx_cypher_endpoint
    stick store-response hdr(Location),word(6,/) if tx_cypher_endpoint
    server neo4j-1 192.0.0.250:7687 check port 7474
    server neo4j-2 192.0.0.251:7687 check port 7474
    server neo4j-3 192.0.0.252:7687 check port 7474
backend neo4j-master
    option httpchk GET /db/manage/server/ha/master HTTP/1.0\r\nAuthorization:\ Basic\ [code]
    server neo4j-1 192.0.0.250:7687 check port 7474
    server neo4j-2 192.0.0.251:7687 check port 7474
    server neo4j-3 192.0.0.252:7687 check port 7474
listen admin
    bind *:82
    mode http
    stats enable
    stats uri /haproxy?stats
    stats realm Haproxy\ Statistics
    stats auth admin:admin

Sometimes I get this when I want to use cypher in browser

"errors": [
    {
      "code": "Neo.ClientError.Transaction.TransactionNotFound",
      "message": "Unrecognized transaction id. Transaction may have timed out and been rolled back."
    }
  ]

Also I tried with this HAproxy configuration but I still have a same problem. This is a settings of my second HAproxy haproxy.cfg

global
   log 127.0.0.1 local0
   log 127.0.0.1 local1 notice
   chroot /var/lib/haproxy
   stats socket /run/haproxy/admin.sock mode 660 level admin
   stats timeout 30s
   user haproxy
   group haproxy
   daemon
   maxconn 256
defaults
    log global
    mode tcp
    option tcplog
    option dontlognull
    timeout connect 30s
    timeout client 2h
    timeout server 2h
frontend http-in
    bind *:81
    acl write_method method POST DELETE PUT
    acl write_hdr hdr_val(X-Write) eq 1
    acl write_payload payload(0,0) -m reg -i CREATE|MERGE|SET|DELETE|REMOVE
    acl tx_cypher_endpoint path_beg /db/data/transaction
    http-request set-var(txn.tx_cypher_endpoint) bool(true) if tx_cypher_endpoint
    use_backend neo4j-master if write_hdr
    use_backend neo4j-master if tx_cypher_endpoint write_payload
    use_backend neo4j-all if tx_cypher_endpoint
    use_backend neo4j-master if write_method
    default_backend neo4j-all
backend neo4j-all
    option httpchk GET /db/manage/server/ha/master HTTP/1.0\r\nAuthorization:\ Basic\ [code]
    acl tx_cypher_endpoint var(txn.tx_cypher_endpoint),bool
    stick-table type integer size 1k expire 70s  # slightly higher with org.neo4j.server.transaction.timeout
    stick match path,word(4,/) if tx_cypher_endpoint
    stick store-response hdr(Location),word(6,/) if tx_cypher_endpoint
    server neo4j-1 192.0.0.250:7687 check port 7474
    server neo4j-2 192.0.0.251:7687 check port 7474
    server neo4j-3 192.0.0.252:7687 check port 7474
backend neo4j-master
    option httpchk GET /db/manage/server/ha/slave HTTP/1.0\r\nAuthorization:\ Basic\ [code]
    server neo4j-1 192.0.0.250:7687 check port 7474
    server neo4j-2 192.0.0.251:7687 check port 7474
    server neo4j-3 192.0.0.252:7687 check port 7474
listen admin
    bind *:82
    mode http
    stats enable
    stats uri /haproxy?stats
    stats realm Haproxy\ Statistics
    stats auth admin:admin

So I am not sure why is this happening. Is this because HAproxy or AWS or Bolt. When I switch protocol on http everything is working well and I do not have error.


Solution

  • I fix this problem by adding to HAproxy .cfg this parameters:

    backend neo4j-browser with mode http and option prefer-last-server. Now HAproxy is working as charme and I do not getting error any more.

    global
       log 127.0.0.1 local0
       log 127.0.0.1 local1 notice
       chroot /var/lib/haproxy
       stats socket /run/haproxy/admin.sock mode 660 level admin
       stats timeout 30s
       user haproxy
       group haproxy
       daemon
       maxconn 256
    defaults
        log global
        mode tcp
        option tcplog
        option dontlognull
        timeout connect 30s
        timeout client 2h
        timeout server 2h
    frontend http-in
        bind *:81
        acl write_method method POST DELETE PUT
        acl write_hdr hdr_val(X-Write) eq 1
        acl write_payload payload(0,0) -m reg -i CREATE|MERGE|SET|DELETE|REMOVE
        acl tx_cypher_endpoint path_beg /db/data/transaction
        http-request set-var(txn.tx_cypher_endpoint) bool(true) if tx_cypher_endpoint
        use_backend neo4j-master if write_hdr
        use_backend neo4j-master if tx_cypher_endpoint write_payload
        use_backend neo4j-all if tx_cypher_endpoint
        use_backend neo4j-master if write_method
        default_backend neo4j-all
    frontend http-browse
        bind *:83
        mode http
        default_backend neo4j-browser
    backend neo4j-all
        option httpchk GET /db/manage/server/ha/available HTTP/1.0\r\nAuthorization:\ Basic\[code]
        acl tx_cypher_endpoint var(txn.tx_cypher_endpoint),bool
        stick-table type integer size 1k expire 70s  # slightly higher with org.neo4j.server.transaction.timeout
        stick match path,word(4,/) if tx_cypher_endpoint
        stick store-response hdr(Location),word(6,/) if tx_cypher_endpoint
        server neo4j-1 192.0.0.250:7687 check port 7474
        server neo4j-2 192.0.0.251:7687 check port 7474
        server neo4j-3 192.0.0.252:7687 check port 7474
    backend neo4j-master
        option httpchk GET /db/manage/server/ha/master HTTP/1.0\r\nAuthorization:\ Basic\[code]
        server neo4j-1 192.0.0.250:7687 check port 7474
        server neo4j-2 192.0.0.251:7687 check port 7474
        server neo4j-3 192.0.0.252:7687 check port 7474
    backend neo4j-browser
        mode http
        option prefer-last-server
        option httpchk GET /db/manage/server/ha/master HTTP/1.0\r\nAuthorization:\ Basic\ [code]
        server neo4j-1 192.0.0.250:7474 check
        server neo4j-2 192.0.0.251:7474 check
        server neo4j-3 192.0.0.252::7474 check
    listen admin
        bind *:82
        mode http
        stats enable
        stats uri /haproxy?stats
        stats realm Haproxy\ Statistics
        stats auth admin:admin