Search code examples
jsonperltextstring-conversion

JSON to text, special sequence


I want to create a text document from following JSON document:

[
    {
        "id": 12345,
        "url": "https://www.w3schools.com",
        "person": {
            "firstname": "John",
            "lastname": "Doe"
        },
        "department": "IT"
    },
    {
        "id": 12346,
        "url": "https://www.w3schools.com",
        "person": {
            "firstname": "Anna",
            "lastname": "Jackson"
        },
        "department": "LOG"
    }
]

My JSON schema looks like the following:

{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "id": { "type": "integer" },
            "url": { "type": "string", "format": "uri" },
            "person": {
                "type": "object",
                "properties": {
                    "firstname": { "type": "string" },
                    "lastname": { "type": "string" }
                }
            },
            "department": { "enum": [ "IT", "LOG"] }
        }
    }
}

The text document should be structured as follows:

pid:        12345
dep-abb:    IT
surname:    Doe
name:       John

pid:        12346
dep-abb:    LOG
surname:    Jackson
name:       Anna

I'm a Perl and JSON newbie and was searching for a Perl lib that can handle this approach by extending the schema (e.g. by txt_seq_no and txt_label). The labels in the text file should be sorted by txt_seq_no ASC and renamed by txt_label. Is is possible to solve that issue that simple? Then the schema could look like:

{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "id": { "type": "integer", "txt_seq_no"=10, "txt_label"="pid" },
            "url": { "type": "string", "format": "uri" },
            "person": {
                "type": "object",
                "properties": {
                    "firstname": { "type": "string", "txt_seq_no"=40, "txt_label"="name" },
                    "lastname": { "type": "string", "txt_seq_no"=30, "txt_label"="surname" }
                }
            },
            "department": { "enum": [ "IT", "LOG", "PROD" ], "txt_seq_no"=20, "txt_label"="dep-abb" }
        }
    }
}

Solution

  • Thanks all for tips and support! I'm a totally beginner in Perl, feedback is welcome! (I've ommited function parameter check to save space)

    used packages:

    use Data::Traverse;
    use Data::Path;
    

    sub transform

    # key identifier[0]: mark JSON key that write into text file
    # key identifier[1]: optional in JSON schema: new label in text file
    my @KEY_IDENTIFIER = ("txt_seq_no", "txt_label");
    # keys in JSON SCHEMA, can not be found in JSON document
    my @DELETE_FROM_PATH = ( "items", "properties" );
    
    # transform a JSON document into text file
    # use identifier in JSON schema to define keys to transform to text
    sub transform {
    my ( $doc, $schema, $key_identifier) = @_;
    if( not defined $key_identifier ) { $key_identifier = \@KEY_IDENTIFIER; }
    
    my $transformator = get_transformator4schema( $schema, $key_identifier, $log );
    
    my $data = get_data4transformator( $doc, $transformator, $log );
    
    print_text( $data, $transformator, $key_identifier, $log );
    
    }
    

    sub get_transformator4schema

    sub get_transformator4schema {
    my ( $schema, $key_identifier) = @_;
    my $t= Data::Traverse->new( $schema);
    
    my %transformator;
    
    $t->traverse(
        sub {
            if( $t->item_key eq @{$key_identifier}[0] or $t->item_key eq @{$key_identifier}[1]) {
                my $path = $t->path;
                my $key;
    
                # delete path that only exists in schema, not in doc
                foreach my $del_path (@DELETE_FROM_PATH) {
                    $path =~ s/\/$del_path//g;
                }
    
                # delete last element from path
                if( $t->item_key eq @{$key_identifier}[0]) {
                    $key = @{$key_identifier}[0];
                    $path =~ s/\/@{$key_identifier}[0]$//g;
                }
                if( $t->item_key eq @{$key_identifier}[1]) {
                    $key = @{$key_identifier}[1];
                    $path =~ s/\/@{$key_identifier}[1]$//g;
                }
    
                # add key => { key => value }
                if( not exists $transformator{$path} ) {
                    my %key_val = ( $key => $_ );
                    $transformator{$path} = \%key_val;
                } else {
                    my $nested_hash = $transformator{$path};
                    $nested_hash->{$key} = $_;
                }
            }
        }
    );
    return \%transformator;
    }
    

    sub get_data4transformator

    # fetch needed data from document
    sub get_data4transformator {
    my ( $document, $transformator, $log ) = @_;
    
    my @template;
    
    foreach my $doc (@{$document}) {
        my $doc_path= Data::Path->new( $doc );
    
        my %th = ();
    
        # load values for key = path
        foreach my $path (keys %{$transformator}) {
            my $val = $doc_path->get($path);
            # add value from doc to hash
            $th{$path} = $val;
        }
        push @template, \%th;
    }
    return \@template;
    }
    

    sub print_text:

    sub print_text {
    
    my ( $data, $transformator, $key_ids, $log ) = @_;
    
    # sort by 1st item of $key_ids
    my $sort_by = @{$key_ids}[0];
    my $opt_name = @{$key_ids}[1];
    print "\nsort data by '$sort_by' ASC\n";
    my @sorted_keys = sort {
        $transformator->{$a}->{$sort_by}
        <=>
        $transformator->{$b}->{$sort_by}
    } keys %{$transformator};
    
    foreach my $tdoc ( @{$data} ) {
        print "\nread document item:\n";
        foreach my $key ( @sorted_keys ) {
            my $name = $key;
            $name =~ s/^.*\/(\w+)$/$1/g;
            my $alias_name = "";
            if( defined $transformator->{$key}->{$opt_name} ) {
                $alias_name = $transformator->{$key}->{$opt_name};
            }
    
            print   " $name => $tdoc->{$key}";
            if( not $alias_name eq "" ) {
                print " alias '$alias_name'\n";
            } else {
                print "\n";
            }
        }
    }
    }
    

    This functionality allows to control which key goes into the text file and its position. With at least one parameter txt_seq_no. The second parameter txt_label is optional and stores the alternative label in the textfile.

    output:

     id => 12345 alias 'pid'
     department => IT alias 'dep-abb'
     lastname => Doe
     firstname => John alias 'name'
    
     id => 12346 alias 'pid'
     department => LOG alias 'dep-abb'
     lastname => Jackson
     firstname => Anna alias 'name'
    
     id => 12347 alias 'pid'
     department => PROD alias 'dep-abb'
     lastname => Smith
     firstname => Peter alias 'name'