Search code examples
clinuxverilogmmapsoc

Hardware accelerated Arithmetic Logic Unit (ALU) Linux application on DE1-SoC using ARM processor (HPS)


I have created a Verilog file for the ALU which have the following operation: add, subtract, AND and reset. I then wrapped the ALU with Avalon memory slave interface so that the ARM processor can access it specifically via H2F lightweight bridge. Then after mmap() is done, the user can choose the operation and enter value for data1 and data2. The result will be displayed on the terminal. The offsets for each registers in ALU is 4 bit. Meaning that the base address of the registers in ALU had a 4bit span.

The problem I faced is I cant seem to write value into the ALU register (opcode,data1,data2). I have done the correct mapping using mmap() function. However, the result I get is always 0.

The ALU verilog code is shown in below.

module alu_avalon(
input clk,
input[1:0] opcode,
input[31:0] dataA,
input[31:0] dataB,
output[31:0] alu_result
);


assign alu_result =     (opcode == 0) ? 0               :
                        (opcode == 1) ? dataA + dataB   :
                        (opcode == 2) ? dataA - dataB   :
                                        dataA & dataB;

endmodule

The ALU is then wrapped with Avalon Memory Mapped slave interface as shown in the verilog coding below.

module alu_avalon_top (
input reset,
input clk,
input chipselect,
input [1:0]address,
input write,
input [31:0]writedata,
output [31:0]readdata
);

wire [31:0]lineA;
wire [31:0]lineB;
wire [1:0]opcode;
wire [31:0]result_alu;

alu_avalon inst3 (
                    .clk(clk),
                    .opcode(opcode),
                    .dataA(lineA),
                    .dataB(lineB),
                    .alu_result(result_alu)
                    );

alu_interface inst2(

                    .clk        (clk),
                    .reset      (reset),
                    .chipselect (chipselect),
                    .address    (address),
                    .writedata  (writedata),
                    .readdata   (readdata),
                    .alu_result (result_alu),
                    .data1      (lineA),
                    .data2      (lineB),
                    .opcode     (opcode),
                    .write      (write)
                    );


endmodule

module alu_interface (
input reset,
input clk,
input chipselect,
input [1:0]address,
input write,
input [31:0]writedata,
output reg [31:0]readdata,
output reg[1:0]opcode,
output reg[31:0]data1,
output reg[31:0]data2,
input[31:0] alu_result

);

always @ (posedge clk or negedge reset)
begin

    if (reset == 0)
    begin
        readdata <= 0;
        data1 <= 0;
        data2 <= 0;
    end
    else 
    begin
        if(chipselect == 1 && write == 1)
        begin
            case (address)
                2'b00:      opcode <= writedata[1:0];
                2'b01:      data1 <= writedata;
                2'b10:      data2 <= writedata;
                default:    readdata <= alu_result;
            endcase
        end
    end
end

endmodule

I have added the custom IP using Qsys and connect the avalon slave to the H2F lightweight bridge AXI master.

Qsys interconnect: Qsys interconnect map

The C coding for Linux application

#define HW_REGS_BASE ( ALT_LWFPGASLVS_OFST )
#define HW_REGS_SPAN ( 0x00200000 )
#define HW_REGS_MASK ( HW_REGS_SPAN - 1 )

volatile unsigned long *aluMap = NULL;
void *virtual_base;
int main(void){

    int fd;
    printf("Open memory map\n");
    if( ( fd = open( "/dev/mem", ( O_RDWR | O_SYNC ) ) ) == -1 ) {
        printf( "ERROR: could not open \"/dev/mem\"...\n" );
        return( 1 );
    }

    virtual_base = mmap( NULL, HW_REGS_SPAN , ( PROT_READ | PROT_WRITE ), MAP_SHARED, fd, HW_REGS_BASE );

    if( virtual_base == MAP_FAILED ) {
        printf( "ERROR: mmap() failed...\n" );
        close( fd );
        return( 1 );
    }
    aluMap = (unsigned char *)(virtual_base + ALU8_0_BASE);
    printf("ALU addr: %x\n", aluMap);
    volatile unsigned int *opcode =(unsigned int*)(aluMap + 0x0);
    volatile unsigned int *data1 = (unsigned int*)(aluMap + 0x4);
    volatile unsigned int *data2 = (unsigned int*)(aluMap + 0x8);
    volatile unsigned int *result= (unsigned int*)(aluMap + 0xc);
    printf("op:%x\ndat1:%x\ndat2:%x\nresult:%x\n", opcode,data1,data2,result);
    int op;
    int dat1;
    int dat2;
    printf("operation code: ");
    scanf(" %d", &op);
    *opcode = op;
    printf("data1: ");
    scanf(" %d", &dat1);
    *data1 = dat1;
    printf("data2: ");
    scanf(" %d", &dat2);
    *data2 = dat2;
    int z = *result;
    printf("The result is %d\n", z);
    return 0;
}

The output is ALU output

Can someone shows me what have i done wrong in the coding or the connection? Have been troubleshooting this for a month... Is the memory mapping for the IP's registers different with IP that doesn't have register... Or do I need to write a ALU kernel driver so that the Linux can recognize the hardware ALU?

Any advice is appreciated.


Solution

  • Many thanks to the online community for helping me to solve my problem.

    As @Unn had pointed out,

    • Write signal should be used only for write transaction
    • Read signal should be used only for read transaction
    • chipselect is deprecated in newer QSYS

    I have edited the verilog coding (separate read & write transaction and remove chipselect) and voila the ALU works like a charm.

    always @ (posedge clk or negedge reset)
    begin
    
        if (reset == 0)
        begin
            readdata <= 0;
            data1 <= 0;
            data2 <= 0;
        end
        else 
        begin
            if(write == 1)
            begin
                case (address)
                    2'b00:  opcode <= writedata[1:0];
                    /* OPCODE
                     1: ADD
                     2: SUB
                     3: AND   */
                    2'b01:  data1 <= writedata;
                    2'b10:  data2 <= writedata;
                    default:    ;
                endcase
            end
            else if (read == 1)
            begin
                case (address)
                    2'b00:  readdata <= opcode;
                    2'b01:  readdata <= data1;
                    2'b10:  readdata <= data2;
                    2'b11:  readdata <= alu_result;
                    default: readdata <= 0;
                endcase
            end
        end
    end
    

    One more changes had to be done is to replace #define HW_REGS_BASE ( ALT_LWFPGASLVS_OFST ) with #define HW_REGS_BASE ( ALT_STM_OFST ) in the main.c program

    Below is the ALU output terminal

    ALU output

    Special thanks to Rocketboard community as well