convert x86 to x86-64 assembly

I am struggling with my program for unfortunatelly tomorrow. I was supposed to write program in assembly rotationg 24bpp square bmp image for x86 and x86-x64 platforms. Version for x86 for 32bit processor works fine, howewer i have problem with converting it to 64bit.

program compiles, but during execution I get segmentation fault and as i cannot debug it it is very hard to guess where is the problem.

Compilation code: nasm -f elf64 rot64.asm && cc -m64 -o project64 project.c rot64.o && ./project64 picture.bmp

code in c passes plain bitmap (without header) to the assembly part via call: void rotbmp24(void *r9, int width)

Below i put working code for 32bit processor and not working for 64bit:

32bit version:

; void rotbmp24(void *img, int width);
;  Rotate a 24 bpp square .BMP image of any size 90 degrees clockwise.

global rotbmp24

;define addresses of parameters passed by caller
%define     img         [ebp+8]     ;pointer to bmp image   
%define     width       [ebp+12]    ;address of width   
; allocation of place in memory for local variables
%define     row_bytes   [ebp-4]
%define     img_line    [ebp-8]
%define     img_collumn     [ebp-12]
%define     img_width       [ebp-16]

rotbmp24:
; making stack frame
push    ebp
mov     ebp, esp
sub     esp, 16

; prologue
push    ebx
push    esi
push    edi

; calculate row size
mov     eax, width       ; row size in pixels
mov     edx, eax        ;preserve eax
imul    edx, 3      ; row size* in bytes (3 bytes per pixel)
add     edx, 3      ; 3 is the maximum value to fit on 2 least sign. bits
and     edx, 0fffffffch ; zero out 2 least sign. bits, to round up to multiple of 4
mov     row_bytes, edx  ; row size in bytes (multiple of 4)

; img_width variable is now width of our bmp image
mov     img_width, eax

; pixel line    
mov     esi, img
mov     img_line, esi

; pixel collumn
sub     eax, 1
mov     edi, eax        ;preserve eax
imul    edi, 3      ; row size* in bytes (3 bytes per pixel)
mul     dword row_bytes     ; eax=eax* (row_bytes, set to 32b)
add     edi, img
add     edi, eax
mov     img_collumn, edi    ;last row

changebyte:
; change rows into collumns
mov     eax, [esi]  ;first element
mov     edx, [edi]  ;last row element

mov     ebx, eax    ;duplicate them
mov     ecx, edx

and     eax, 00ffffffh  ;check if given bits are...
and     edx, 00ffffffh
and     ebx, 0ff000000h
and     ecx, 0ff000000h

or      eax, ecx    ;put bits together
or      edx, ebx

mov     [edi], eax  ;change of elementst
mov     [esi], edx    

add     esi, 3  ;move to next byte from first
sub     edi, row_bytes  ;move to next element from last
cmp     edi, esi    ;if they are equal then we finished outer lines
jne     changebyte  
;we proceed with next lines&collumns
mov     esi, img_line
add     esi, row_bytes
mov     img_line, esi
mov     edi, img_collumn    
sub     edi, 3
mov     img_collumn, edi

cmp     edi, esi
jne     changebyte

; eeax is again width of pixel array
mov     eax, width
mov     img_width, eax

mov     esi, img
mov     img_line, esi

settt:
mov     eax, width    
sub     eax, 1
lea     edi, [eax+eax*2]
add     edi, esi

invertbyte:
; horizontal mirror
mov     eax, [esi]
mov     edx, [edi]

mov     ebx, eax
mov     ecx, edx

and     eax, 00ffffffh
and     edx, 00ffffffh
and     ebx, 0ff000000h
and     ecx, 0ff000000h

or      eax, ecx
or      edx, ebx
;changing one lines
mov     [edi], eax
mov     [esi], edx    
add     esi, 3
sub     edi, 3

cmp     edi, esi
ja      invertbyte
;jump to next lines
mov     esi, img_line
add     esi, row_bytes
mov     img_line, esi

mov     ecx, img_width
dec     ecx
mov     img_width, ecx
jnz     settt

; epilogue
pop     edi
pop     esi
pop     ebx

; return to caller        
mov     esp, ebp
pop     ebp
ret

Code for 64bit processor:

; 17. void rotbmp24(void *r9, int width);
;  Rotate a 24 bpp square .BMP image of any size 90 degrees clockwise.
;section .data
;message:
;   db 'Hello, World', 10, 0
    
;global rotbmp24
;section .text
;define addresses of parameters passed by caller
;rdi - r9  img        [rbp+8]       pointer to bmp image    
;rsi - r10    width       [rbp+12]  ;address of width   
; allocation of place in memory for local variables
;r11     row_bytes   [rbp-4]
;r12     r9_line    [rbp-8]
;r13     r9_collumn     [rbp-12]
;r14     r9_width       [rbp-16]
 
rotbmp24:
    ; making stack frame
push    rbp
mov     rbp, rsp
;sub     rsp, 16

; prologue
push    rbx
push    rsi 
push    r9
push    r10
push    r11
push    r12
push    r14
push    r15
mov     r9, rdi
mov     r10, [rsi]

  ; calculate row size
    mov     rax, r10     ; row size in pixels
mov     edx, r10d       ;preserve rax
imul    edx, 3      ; row size* in bytes (3 bytes per pixel)
add     edx, 3      ; 3 is the maximum value to fit on 2 least sign. bits
and     edx, 0xfffffffc ; zero out 2 least sign. bits, to round up to multiple of 4
mov     r11d, edx   ; row size in bytes (multiple of 4)

; r14 variable is now width of our bmp image
mov     r14, rax

; pixel line    
mov     rsi, r9
mov     r12, rsi

; pixel collumn
sub     rax, 1
mov     rdi, rax        ;preserve rax
imul    rdi, 3      ; row size* in bytes (3 bytes per pixel)
mul     qword r11       ; rax=rax* (r11, set to 32b)
add     rdi, r9
add     rdi, rax
mov     r13, rdi    ;last row


;mov rdi, message ; rdi gets the first argument (a pointer)
;xor rax, rax call
;ret printf ; printf has a variable number of arguments,
; so rax needs to be set to the number of
; vector registers used...zero in this case


changebyte:
; change rows into collumns
mov     eax, [rsi]  ;first element
mov     edx, [rdi]  ;last row element

mov     ecx, eax    ;duplicate them
mov     r15d, edx

and     eax,  00ffffffh ;get 24  bits and leave rest untouched
and     edx,  00ffffffh
and     ecx, 0ff000000h
and     r15d, 0ff000000h

or      eax, r15d   ;put bits together
or      edx, ecx

mov     [rdi], eax  ;change of elements
mov     [rsi], edx    

add     rsi, 3  ;move to next byte from first
sub     rdi, r11    ;move to next element from last
cmp     rdi, rsi    ;if they are equal then we finished outer lines
jne     changebyte  
;we proceed with next lines&collumns
mov     rsi, r12
add     rsi, r11
mov     r12, rsi
mov     rdi, r13    
sub     rdi, 3
mov     r13, rdi

cmp     rdi, rsi
jne     changebyte

; erax is again width of pixel array
mov     rax, r10
mov     r14, rax

mov     rsi, r9
mov     r12, rsi

settt:
mov     rax, r10    
sub     rax, 1
lea     rdi, [rax+rax*2]
add     rdi, rsi

invertbyte:
; horizontal mirror
mov     eax, [r9]
mov     edx, [rdi]

mov     ecx, eax
mov     r15d, edx

and     eax, 00ffffffh
and     edx, 00ffffffh
and     ecx, 0ff000000h
and     r15d, 0ff000000h

or      eax, r15d
or      edx, ecx
;changing one lines
mov     [rdi], eax
mov     [rsi], edx    
add     rsi, 3
sub     rdi, 3

cmp     rdi, rsi
ja      invertbyte
;jump to next lines
mov     rsi, r12
add     rsi, r11
mov     r12, rsi

mov     r15d, r14d
dec     r15d
mov     r14d, r15d
jnz     settt

; epilogue
pop    rbp
mov     rbp, rsp
sub     rsp, 16


pop     r15
pop     r14
pop     r12
pop     r11
pop     r10
pop     r9

pop    rsi  
pop    rbx
; return to caller        
mov     rsp, rbp
pop     rbp
ret

Solution

mov r10, [rsi] should just be mov r10, rsi since rsi has the width and is not a pointer.

The three lines at the label epilogue should be deleted.

With these changes it doesn't blow up for the test case I have tried.

You should really learn to debug yourself.