Search code examples
springhadoopkerberoswebhdfs

Spring support for WebHDFS


Is there any Spring support for wedhdfs? I didnt find any useful link on google.

I want to connect to hadoop with normal authentication and kerberos authentication via webhdfs. Is this supported in spring?

Any useful links will be helpful.

Thanks


Solution

  • Yes, Spring Data supports this. According to this documentation, it's possible to configure any supported Hadoop file system:

    http://docs.spring.io/spring-hadoop/docs/current/reference/html/fs.html

    SHDP does not enforce any specific protocol to be used - in fact, as described in this section any FileSystem implementation can be used, allowing even other implementations than HDFS to be used.

    See below for a code sample that demonstrates auto-wiring a WebHDFS FileSystem instance into a command-line application. To run this, pass file paths as command line arguments, and it will list every file present at that path by calling FileSystem.listStatus.

    The code sample is configured to connect to an unsecured WebHDFS instance with "simple" authentication. To connect to a WebHDFS instance secured with Kerberos, you'd set up the relevant configuration properties in the <hdp:configuration id="hadoopConfiguration" /> bean. Hadoop security configuration is a very large topic. Rather than repeat the information, I'll just point to the documentation in Apache:

    http://hadoop.apache.org/docs/r2.4.1/hadoop-project-dist/hadoop-common/SecureMode.html

    pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <groupId>test-spring-hadoop</groupId>
        <artifactId>test-webhdfs</artifactId>
        <packaging>jar</packaging>
        <version>0.0.1-SNAPSHOT</version>
        <name>Test Spring Hadoop with WebHDFS</name>
        <description>Test Spring Hadoop with WebHDFS</description>
    
        <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>1.1.0.RELEASE</version>
        </parent>
    
        <repositories>
            <repository>
                <id>spring-milestones</id>
                <url>http://repo.spring.io/libs-release</url>
            </repository>
        </repositories>
    
        <properties>
            <start-class>testwebhdfs.Main</start-class>
            <java.version>1.6</java.version>
            <hadoop.version>2.4.1</hadoop.version>
        </properties>
    
        <build>
            <plugins>            
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                </plugin>
            </plugins>
        </build>
    
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter</artifactId>
            </dependency>
            <dependency>
                <groupId>org.springframework.data</groupId>
                <artifactId>spring-data-hadoop</artifactId>
                <version>2.0.2.RELEASE</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>${hadoop.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-hdfs</artifactId>
                <version>${hadoop.version}</version>
            </dependency>
        </dependencies>
    </project>
    

    src/main/resources/hadoop-context.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <beans xmlns="http://www.springframework.org/schema/beans"
           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
           xmlns:hdp="http://www.springframework.org/schema/hadoop"
           xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
                       http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd">
    
        <hdp:configuration id="hadoopConfiguration" />
        <hdp:file-system uri="webhdfs://localhost:50070" />
    </beans>
    

    src/main/java/testwebhdfs/Main.java

    package testwebhdfs;
    
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.boot.CommandLineRunner;
    import org.springframework.boot.SpringApplication;
    import org.springframework.context.annotation.Configuration;
    import org.springframework.context.annotation.ImportResource;
    
    @Configuration
    @ImportResource("hadoop-context.xml")
    public class Main implements CommandLineRunner {
    
        @Autowired
        private FileSystem fs;
    
        @Override
        public void run(String... strings) throws Exception {
            Path[] paths = new Path[strings.length];
            for (int i = 0; i < strings.length; ++i) {
                paths[i] = new Path(strings[i]);
            }
            for (FileStatus stat: fs.listStatus(paths)) {
                System.out.println(stat.getPath());
            }
        }
    
        public static void main(String[] args) {
            SpringApplication.run(Main.class, args);
        }
    }