Search code examples
jclouds

JClouds: Executing a long script


I am using JClouds to execute the scripts in the servers that I am creating in the cloud.

The problem that I am having is that the script that I run will take around 40 minutes to finish.

My questions are:

1) How can I avoid the timeout ?

2) JClouds's SshClient will try 5 times. Is there any way to limit the number of retries ?

Although JClouds thinks that the script failed, the script was successfully executed in the server.

So far the code that I use to execute scripts looks like this:

    org.jclouds.ssh.SshClient sshClient = null;

    Properties overrides = new Properties();
    overrides.setProperty(ComputeServiceProperties.POLL_INITIAL_PERIOD, POLL_PERIOD_TWENTY_SECONDS);
    overrides.setProperty(ComputeServiceProperties.POLL_MAX_PERIOD, POLL_PERIOD_TWENTY_SECONDS);
    overrides.setProperty(ComputeServiceProperties.TIMEOUT_SCRIPT_COMPLETE,  String.valueOf(18000000));


    ComputeServiceContext context;
    try {
        context = ContextBuilder.newBuilder(provider.getProvider_cs())
                .credentials(providerCredentials.getClientId(), providerCredentials.getKey())
                .modules(modules)
                .overrides(overrides)
                .buildView(ComputeServiceContext.class);
    } catch (Exception e) {
        context = null;
        log.error("Error while creating ComputeServiceContext ", e);
    }

    if (context != null ) {
        try {
            String host = cloudServer.getDaemon().getHost();
            int port = cloudServer.getDaemon().getPort();
            String username = cloudServer.getCredential().getUsername();
            String password = cloudServer.getCredential().getPassword();
            String privKey = cloudServer.getCredential().getPrivKey();

            if (password == null || password.isEmpty()) {
                sshClient = context.utils().sshFactory().create(HostAndPort.fromParts(host, port),
                        LoginCredentials.builder().user(username).privateKey(privKey).build());
            } else {
                sshClient = context.utils().sshFactory().create(HostAndPort.fromParts(host, port),
                        LoginCredentials.builder().user(username).password(password).build());
            }

        } catch (Exception e) {
            log.error("Error while building sshClient nodeMetaData", e);
        }
    }

    // put the script in the server
    if ( sshClient != null ) {
        log.info("Putting file");
        try {
            sshClient.put(scriptFile.getScriptPath(), scriptFile.getContent());
        } catch (Exception e) {
            log.error("Error while uploading script", e);
        }
    }

    // run the script
    if ( sshClient != null ) {
        log.info("Running script");
        try {
            String script = getScriptContent(); // the script will take around 40 minutes to be executed
            log.info(script);
            ExecResponse response = sshClient.exec(script);
            if (response.getExitStatus() != 0) {
                log.error("Error while executing script: " + response.getError());
            }
        } catch (Exception e) {
            log.error("Error while executing script", e);
        }
    }

Logs:

11:48:39.648 INFO  com.r3systems.manageacloud.service.CloudService:257 - Putting file
11:48:40.087 INFO  n.s.sshj.connection.channel.direct.SessionChannel:207 - Will request `sftp` subsystem
11:48:41.739 INFO  com.r3systems.manageacloud.service.CloudService:267 - Running script
11:48:41.740 INFO  com.r3systems.manageacloud.service.CloudService:270 - #!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?

11:48:42.149 INFO  n.s.sshj.connection.channel.direct.SessionChannel:120 - Will request to exec `#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
`
11:49:00.957 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:49:00.958 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:54:06.421 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:54:06.421 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:54:06.423 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - BY_APPLICATION
11:54:06.425 INFO  jclouds.ssh:68 - << (root:rsa[fingerprint(1d:7c:f8:7e:b0:f4:23:a2:bc:2e:22:69:8a:4a:5b:a4),sha1(14:14:f4:d3:71:72:8a:c1:d3:b3:a2:c5:71:b4:e0:98:7b:03:c3:6c)]@198.199.97.55:22) error acquiring ExecResponse(command=[#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
]) (attempt 1 of 5): Read timed out
11:54:06.634 WARN  net.schmizz.sshj.DefaultConfig:159 - Disabling high-strength ciphers: cipher strengths apparently limited by JCE policy
11:54:06.806 INFO  net.schmizz.sshj.transport.TransportImpl:152 - Client identity string: SSH-2.0-SSHJ_0_8_1_SNAPSHOT
11:54:07.000 INFO  net.schmizz.sshj.transport.TransportImpl:161 - Server identity string: SSH-2.0-OpenSSH_6.0p1 Debian-4+deb7u1
11:54:08.623 INFO  n.s.sshj.connection.channel.direct.SessionChannel:120 - Will request to exec `#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
`
11:55:10.680 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:55:10.680 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:55:10.682 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - BY_APPLICATION
11:55:10.683 INFO  jclouds.ssh:68 - << (root:rsa[fingerprint(1d:7c:f8:7e:b0:f4:23:a2:bc:2e:22:69:8a:4a:5b:a4),sha1(14:14:f4:d3:71:72:8a:c1:d3:b3:a2:c5:71:b4:e0:98:7b:03:c3:6c)]@198.199.97.55:22) error acquiring ExecResponse(command=[#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
]) (attempt 2 of 5): Read timed out
11:55:11.547 WARN  net.schmizz.sshj.DefaultConfig:159 - Disabling high-strength ciphers: cipher strengths apparently limited by JCE policy
11:55:11.724 INFO  net.schmizz.sshj.transport.TransportImpl:152 - Client identity string: SSH-2.0-SSHJ_0_8_1_SNAPSHOT
11:55:11.961 INFO  net.schmizz.sshj.transport.TransportImpl:161 - Server identity string: SSH-2.0-OpenSSH_6.0p1 Debian-4+deb7u1
11:55:13.614 INFO  n.s.sshj.connection.channel.direct.SessionChannel:120 - Will request to exec `#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
`
11:56:15.360 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:56:15.360 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:56:15.361 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - BY_APPLICATION
11:56:15.363 INFO  jclouds.ssh:68 - << (root:rsa[fingerprint(1d:7c:f8:7e:b0:f4:23:a2:bc:2e:22:69:8a:4a:5b:a4),sha1(14:14:f4:d3:71:72:8a:c1:d3:b3:a2:c5:71:b4:e0:98:7b:03:c3:6c)]@198.199.97.55:22) error acquiring ExecResponse(command=[#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
]) (attempt 3 of 5): Read timed out
11:56:17.336 WARN  net.schmizz.sshj.DefaultConfig:159 - Disabling high-strength ciphers: cipher strengths apparently limited by JCE policy
11:56:17.537 INFO  net.schmizz.sshj.transport.TransportImpl:152 - Client identity string: SSH-2.0-SSHJ_0_8_1_SNAPSHOT
11:56:17.759 INFO  net.schmizz.sshj.transport.TransportImpl:161 - Server identity string: SSH-2.0-OpenSSH_6.0p1 Debian-4+deb7u1
11:56:19.789 INFO  n.s.sshj.connection.channel.direct.SessionChannel:120 - Will request to exec `#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
`
11:57:22.530 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:57:22.531 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:57:22.532 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - BY_APPLICATION
11:57:22.534 INFO  jclouds.ssh:68 - << (root:rsa[fingerprint(1d:7c:f8:7e:b0:f4:23:a2:bc:2e:22:69:8a:4a:5b:a4),sha1(14:14:f4:d3:71:72:8a:c1:d3:b3:a2:c5:71:b4:e0:98:7b:03:c3:6c)]@198.199.97.55:22) error acquiring ExecResponse(command=[#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
]) (attempt 4 of 5): Read timed out
11:57:24.537 WARN  net.schmizz.sshj.DefaultConfig:159 - Disabling high-strength ciphers: cipher strengths apparently limited by JCE policy
11:57:24.714 INFO  net.schmizz.sshj.transport.TransportImpl:152 - Client identity string: SSH-2.0-SSHJ_0_8_1_SNAPSHOT
11:57:24.930 INFO  net.schmizz.sshj.transport.TransportImpl:161 - Server identity string: SSH-2.0-OpenSSH_6.0p1 Debian-4+deb7u1
11:57:26.592 INFO  n.s.sshj.connection.channel.direct.SessionChannel:120 - Will request to exec `#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
bash very_long_script.sh
exit $?
`
11:58:27.844 ERROR net.schmizz.sshj.transport.TransportImpl:570 - Dying because - java.net.SocketTimeoutException: Read timed out
11:58:27.845 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - UNKNOWN
11:58:27.847 INFO  net.schmizz.sshj.transport.TransportImpl:93 - Disconnected - BY_APPLICATION
11:58:27.850 ERROR jclouds.ssh:96 - << (root:rsa[fingerprint(1d:7c:f8:7e:b0:f4:23:a2:bc:2e:22:69:8a:4a:5b:a4),sha1(14:14:f4:d3:71:72:8a:c1:d3:b3:a2:c5:71:b4:e0:98:7b:03:c3:6c)]@198.199.97.55:22) error acquiring ExecResponse(command=[#!/bin/bash
set +u
shopt -s xpg_echo
shopt -s expand_aliases
unset PATH JAVA_HOME LD_LIBRARY_PATH
function abort {
   echo "aborting: $@" 1>&2
   exit 1
}
export PATH=/usr/ucb/bin:/bin:/sbin:/usr/bin:/usr/sbin
DATE=`date -u +"%Y%m%d%H%M"`
bash very_long_script.sh
exit $?
]) (out of retries - max 5): Read timed out
net.schmizz.sshj.common.SSHException: Read timed out
    at net.schmizz.sshj.common.SSHException$1.chain(SSHException.java:56) ~[sshj-0.8.1.jar:na]
    at net.schmizz.sshj.common.SSHException$1.chain(SSHException.java:49) ~[sshj-0.8.1.jar:na]
    at net.schmizz.sshj.transport.TransportImpl.die(TransportImpl.java:572) ~[sshj-0.8.1.jar:na]
    at net.schmizz.sshj.transport.Reader.run(Reader.java:79) ~[sshj-0.8.1.jar:na]
java.net.SocketTimeoutException: Read timed out
    at java.net.SocketInputStream.socketRead0(Native Method) ~[na:1.7.0_21]
    at java.net.SocketInputStream.read(SocketInputStream.java:150) ~[na:1.7.0_21]
    at java.net.SocketInputStream.read(SocketInputStream.java:121) ~[na:1.7.0_21]
    at net.schmizz.sshj.transport.Reader.run(Reader.java:68) ~[sshj-0.8.1.jar:na]

Solution

  • This is an old question, but I'm hoping this will help someone in this situation in the future.

    I'm building the SSH client directly but through a guice injector and am able to set timeout through dependency injection:

    protected SshClient getSshClient(String nodeHostName, String username,
            String password) {
    
        final long timeout = 300000;
    
        Injector i = Guice.createInjector(new SshjSshClientModule() {
            @Override
            protected void configure() {
                super.configure();
    
                bindConstant().annotatedWith(
                        Names.named(Constants.PROPERTY_CONNECTION_TIMEOUT)).to(
                        timeout);
            }
        });
        SshClient.Factory factory = i.getInstance(SshClient.Factory.class);
        SshClient connection = null;
    
        connection = factory.create(
                HostAndPort.fromParts(nodeHostName, SSH_PORT), LoginCredentials
                        .builder().user(username).password(password)
                        .authenticateSudo(false).build());
    
        return connection;
    
    }