Search code examples
armclangcross-compilingstm32

How to crosscompile for STM32L4 cortex-m4 mcu using Clang/LLVM on Windows


i've been trying to compile a simple application implementing a USB CDC device for a stm32l4 micro using Clang on Windows. The code was generated by STMCubeMX with some minor changes so that it just echos whatever is sent via the virtual com port.

Compiling using the arm-none-eabi-gcc toolchain from ubuntu using the generated make file works just fine. After flashing to the micro controller it does exactly what it's supposed to do. To compile with clang from windows, i made this fancy build script (generate objects from asm/c sources, link those. Using these options:

set TARGET_TRIPE=--target=arm-none-eabi
set ARCH=-march=armv7e-m
set CPU=-mcpu=cortex-m4
set FPU=-mfpu=fpv4-sp-d16 -mfloat-abi=hard
set MCU=%TARGET_TRIPE% %ARCH% %CPU% %FPU% -mthumb -mlittle-endian

set COMMON_FLAGS=-Wall %OPTIMIZATIONS% --sysroot=%SYSROOT% -fdata-sections -ffunction-sections -O0

set C_FLAGS=%MCU% %C_DEFINES% %COMMON_FLAGS% %C_INCLUDES% -c
set ASM_FLAGS=%MCU% %ASM_DEFINES% %COMMON_FLAGS% -x assembler-with-cpp -c
set LD_FLAGS=%MCU% %COMMON_FLAGS% -nostdlib -nostartfiles -fuse-ld=lld -T%LD_SCRIPT% -Wl,-Map=%BUILD_DIR%/%PROJECT_NAME%.map,--cref,--gc-sections %LIBDIRS% %LIBS%

).

Compilation finishes successfully, the firmware image seems fine, however after flashing it, the micro controller just does nothing. Not even the external crystal starts up, so the firmware images are obviously faulty. I have no idea, why that is. Binaries generated by Clang/GCC. Repo with source.

Edit: The clang binary image is ~400MB big, that doesn't seem right either.

Edit 2:

This is the Linker script im using:

/* Entry Point */
ENTRY(Reset_Handler)

/* Highest address of the user mode stack */
_estack = 0x2000A000;    /* end of RAM */
/* Generate a link error if heap and stack don't fit into RAM */
_Min_Heap_Size = 0x200;      /* required amount of heap  */
_Min_Stack_Size = 0x400; /* required amount of stack */

/* Specify the memory areas */
MEMORY
{
RAM (xrw)      : ORIGIN = 0x20000000, LENGTH = 40K
FLASH (rx)      : ORIGIN = 0x8000000, LENGTH = 128K
}

/* Define output sections */
SECTIONS
{
  /* The startup code goes first into FLASH */
  .isr_vector :
  {
    . = ALIGN(8);
    KEEP(*(.isr_vector)) /* Startup code */
    . = ALIGN(8);
  } >FLASH

  /* The program code and other data goes into FLASH */
  .text :
  {
    . = ALIGN(8);
    *(.text)           /* .text sections (code) */
    *(.text*)          /* .text* sections (code) */
    *(.glue_7)         /* glue arm to thumb code */
    *(.glue_7t)        /* glue thumb to arm code */
    *(.eh_frame)

    KEEP (*(.init))
    KEEP (*(.fini))

    . = ALIGN(8);
    _etext = .;        /* define a global symbols at end of code */
  } >FLASH

  /* Constant data goes into FLASH */
  .rodata :
  {
    . = ALIGN(8);
    *(.rodata)         /* .rodata sections (constants, strings, etc.) */
    *(.rodata*)        /* .rodata* sections (constants, strings, etc.) */
    . = ALIGN(8);
  } >FLASH

  .ARM.extab   : 
  { 
  . = ALIGN(8);
  *(.ARM.extab* .gnu.linkonce.armextab.*)
  . = ALIGN(8);
  } >FLASH
  .ARM : {
    . = ALIGN(8);
    __exidx_start = .;
    *(.ARM.exidx*)
    __exidx_end = .;
    . = ALIGN(8);
  } >FLASH

  .preinit_array     :
  {
    . = ALIGN(8);
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP (*(.preinit_array*))
    PROVIDE_HIDDEN (__preinit_array_end = .);
    . = ALIGN(8);
  } >FLASH

  .init_array :
  {
    . = ALIGN(8);
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP (*(SORT(.init_array.*)))
    KEEP (*(.init_array*))
    PROVIDE_HIDDEN (__init_array_end = .);
    . = ALIGN(8);
  } >FLASH
  .fini_array :
  {
    . = ALIGN(8);
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP (*(SORT(.fini_array.*)))
    KEEP (*(.fini_array*))
    PROVIDE_HIDDEN (__fini_array_end = .);
    . = ALIGN(8);
  } >FLASH

  /* used by the startup to initialize data */
  _sidata = LOADADDR(.data);

  /* Initialized data sections goes into RAM, load LMA copy after code */
  .data : 
  {
    . = ALIGN(8);
    _sdata = .;        /* create a global symbol at data start */
    *(.data)           /* .data sections */
    *(.data*)          /* .data* sections */

    . = ALIGN(8);
    _edata = .;        /* define a global symbol at data end */
  } >RAM AT> FLASH


  /* Uninitialized data section */
  . = ALIGN(4);
  .bss :
  {
    /* This is used by the startup in order to initialize the .bss secion */
    _sbss = .;         /* define a global symbol at bss start */
    __bss_start__ = _sbss;
    *(.bss)
    *(.bss*)
    *(COMMON)

    . = ALIGN(4);
    _ebss = .;         /* define a global symbol at bss end */
    __bss_end__ = _ebss;
  } >RAM

  /* User_heap_stack section, used to check that there is enough RAM left */
  ._user_heap_stack :
  {
    . = ALIGN(8);
    PROVIDE ( end = . );
    PROVIDE ( _end = . );
    . = . + _Min_Heap_Size;
    . = . + _Min_Stack_Size;
    . = ALIGN(8);
  } >RAM



  /* Remove information from the standard libraries */
  /DISCARD/ :
  {
    libc.a ( * )
    libm.a ( * )
    libgcc.a ( * )
  }

  .ARM.attributes 0 : { *(.ARM.attributes) }
}

This is the starup code i am using:

.syntax unified
    .cpu cortex-m4
    .fpu softvfp
    .thumb

.global g_pfnVectors
.global Default_Handler

/* start address for the initialization values of the .data section.
defined in linker script */
.word   _sidata
/* start address for the .data section. defined in linker script */
.word   _sdata
/* end address for the .data section. defined in linker script */
.word   _edata
/* start address for the .bss section. defined in linker script */
.word   _sbss
/* end address for the .bss section. defined in linker script */
.word   _ebss

.equ  BootRAM,        0xF1E0F85F
/**
 * @brief  This is the code that gets called when the processor first
 *          starts execution following a reset event. Only the absolutely
 *          necessary set is performed, after which the application
 *          supplied main() routine is called.
 * @param  None
 * @retval : None
*/

    .section    .text.Reset_Handler
    .weak   Reset_Handler
    .type   Reset_Handler, %function
Reset_Handler:
  ldr   sp, =_estack    /* Set stack pointer */

/* Copy the data segment initializers from flash to SRAM */
  movs  r1, #0
  b LoopCopyDataInit

CopyDataInit:
    ldr r3, =_sidata
    ldr r3, [r3, r1]
    str r3, [r0, r1]
    adds    r1, r1, #4

LoopCopyDataInit:
    ldr r0, =_sdata
    ldr r3, =_edata
    adds    r2, r0, r1
    cmp r2, r3
    bcc CopyDataInit
    ldr r2, =_sbss
    b   LoopFillZerobss
/* Zero fill the bss segment. */
FillZerobss:
    movs    r3, #0
    str r3, [r2], #4

LoopFillZerobss:
    ldr r3, = _ebss
    cmp r2, r3
    bcc FillZerobss

/* Call the clock system intitialization function.*/
    bl  SystemInit
/* Call CRT entry point */
    //bl _mainCRTStartup
    bl __libc_init_array
    bl main

LoopForever:
    b LoopForever

.size   Reset_Handler, .-Reset_Handler

/**
 * @brief  This is the code that gets called when the processor receives an
 *         unexpected interrupt.  This simply enters an infinite loop, preserving
 *         the system state for examination by a debugger.
 *
 * @param  None
 * @retval : None
*/
    .section    .text.Default_Handler,"ax",%progbits
Default_Handler:
Infinite_Loop:
    b   Infinite_Loop
    .size   Default_Handler, .-Default_Handler
/******************************************************************************
*
* The minimal vector table for a Cortex-M4.  Note that the proper constructs
* must be placed on this to ensure that it ends up at physical address
* 0x0000.0000.
*
******************************************************************************/
    .section    .isr_vector,"a",%progbits
    .type   g_pfnVectors, %object
    .size   g_pfnVectors, .-g_pfnVectors


g_pfnVectors:
    .word   _estack
    .word   Reset_Handler
    .word   NMI_Handler
    .word   HardFault_Handler
    .word   MemManage_Handler
    .word   BusFault_Handler
    .word   UsageFault_Handler
    .word   0
    .word   0
    .word   0
    .word   0
    .word   SVC_Handler
    .word   DebugMon_Handler
    .word   0
    .word   PendSV_Handler
    .word   SysTick_Handler
    .word   WWDG_IRQHandler
    .word   PVD_PVM_IRQHandler
    .word   TAMP_STAMP_IRQHandler
    .word   RTC_WKUP_IRQHandler
    .word   FLASH_IRQHandler
    .word   RCC_IRQHandler
    .word   EXTI0_IRQHandler
    .word   EXTI1_IRQHandler
    .word   EXTI2_IRQHandler
    .word   EXTI3_IRQHandler
    .word   EXTI4_IRQHandler
    .word   DMA1_Channel1_IRQHandler
    .word   DMA1_Channel2_IRQHandler
    .word   DMA1_Channel3_IRQHandler
    .word   DMA1_Channel4_IRQHandler
    .word   DMA1_Channel5_IRQHandler
    .word   DMA1_Channel6_IRQHandler
    .word   DMA1_Channel7_IRQHandler
    .word   ADC1_2_IRQHandler
    .word   0
    .word   0
    .word   0
    .word   0
    .word   EXTI9_5_IRQHandler
    .word   TIM1_BRK_TIM15_IRQHandler
    .word   TIM1_UP_TIM16_IRQHandler
    .word   TIM1_TRG_COM_IRQHandler
    .word   TIM1_CC_IRQHandler
    .word   TIM2_IRQHandler
    .word   0
    .word   0
    .word   I2C1_EV_IRQHandler
    .word   I2C1_ER_IRQHandler
    .word   I2C2_EV_IRQHandler
    .word   I2C2_ER_IRQHandler
    .word   SPI1_IRQHandler
    .word   SPI2_IRQHandler
    .word   USART1_IRQHandler
    .word   USART2_IRQHandler
    .word   USART3_IRQHandler
    .word   EXTI15_10_IRQHandler
    .word   RTC_Alarm_IRQHandler
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   0
    .word   TIM6_IRQHandler
    .word   0
    .word   DMA2_Channel1_IRQHandler
    .word   DMA2_Channel2_IRQHandler
    .word   DMA2_Channel3_IRQHandler
    .word   DMA2_Channel4_IRQHandler
    .word   DMA2_Channel5_IRQHandler
    .word   0
    .word   0
    .word   0
    .word   COMP_IRQHandler
    .word   LPTIM1_IRQHandler
    .word   LPTIM2_IRQHandler
    .word   USB_IRQHandler
    .word   DMA2_Channel6_IRQHandler
    .word   DMA2_Channel7_IRQHandler
    .word   LPUART1_IRQHandler
    .word   QUADSPI_IRQHandler
    .word   I2C3_EV_IRQHandler
    .word   I2C3_ER_IRQHandler
    .word   0
    .word   0
    .word   0
    .word   TSC_IRQHandler
    .word   0
    .word   AES_IRQHandler
    .word   RNG_IRQHandler
    .word   FPU_IRQHandler
    .word   CRS_IRQHandler


/*******************************************************************************
*
* Provide weak aliases for each Exception handler to the Default_Handler.
* As they are weak aliases, any function with the same name will override
* this definition.
*
*******************************************************************************/

  .weak NMI_Handler
    .thumb_set NMI_Handler,Default_Handler

  .weak HardFault_Handler
    .thumb_set HardFault_Handler,Default_Handler

  .weak MemManage_Handler
    .thumb_set MemManage_Handler,Default_Handler

  .weak BusFault_Handler
    .thumb_set BusFault_Handler,Default_Handler

    .weak   UsageFault_Handler
    .thumb_set UsageFault_Handler,Default_Handler

    .weak   SVC_Handler
    .thumb_set SVC_Handler,Default_Handler

    .weak   DebugMon_Handler
    .thumb_set DebugMon_Handler,Default_Handler

    .weak   PendSV_Handler
    .thumb_set PendSV_Handler,Default_Handler

    .weak   SysTick_Handler
    .thumb_set SysTick_Handler,Default_Handler

    .weak   WWDG_IRQHandler
    .thumb_set WWDG_IRQHandler,Default_Handler

    .weak   PVD_PVM_IRQHandler
    .thumb_set PVD_PVM_IRQHandler,Default_Handler

    .weak   TAMP_STAMP_IRQHandler
    .thumb_set TAMP_STAMP_IRQHandler,Default_Handler

    .weak   RTC_WKUP_IRQHandler
    .thumb_set RTC_WKUP_IRQHandler,Default_Handler

    .weak   FLASH_IRQHandler
    .thumb_set FLASH_IRQHandler,Default_Handler

    .weak   RCC_IRQHandler
    .thumb_set RCC_IRQHandler,Default_Handler

    .weak   EXTI0_IRQHandler
    .thumb_set EXTI0_IRQHandler,Default_Handler

    .weak   EXTI1_IRQHandler
    .thumb_set EXTI1_IRQHandler,Default_Handler

    .weak   EXTI2_IRQHandler
    .thumb_set EXTI2_IRQHandler,Default_Handler

    .weak   EXTI3_IRQHandler
    .thumb_set EXTI3_IRQHandler,Default_Handler

    .weak   EXTI4_IRQHandler
    .thumb_set EXTI4_IRQHandler,Default_Handler

    .weak   DMA1_Channel1_IRQHandler
    .thumb_set DMA1_Channel1_IRQHandler,Default_Handler

    .weak   DMA1_Channel2_IRQHandler
    .thumb_set DMA1_Channel2_IRQHandler,Default_Handler

    .weak   DMA1_Channel3_IRQHandler
    .thumb_set DMA1_Channel3_IRQHandler,Default_Handler

    .weak   DMA1_Channel4_IRQHandler
    .thumb_set DMA1_Channel4_IRQHandler,Default_Handler

    .weak   DMA1_Channel5_IRQHandler
    .thumb_set DMA1_Channel5_IRQHandler,Default_Handler

    .weak   DMA1_Channel6_IRQHandler
    .thumb_set DMA1_Channel6_IRQHandler,Default_Handler

    .weak   DMA1_Channel7_IRQHandler
    .thumb_set DMA1_Channel7_IRQHandler,Default_Handler

    .weak   ADC1_2_IRQHandler
    .thumb_set ADC1_2_IRQHandler,Default_Handler

    .weak   EXTI9_5_IRQHandler
    .thumb_set EXTI9_5_IRQHandler,Default_Handler

    .weak   TIM1_BRK_TIM15_IRQHandler
    .thumb_set TIM1_BRK_TIM15_IRQHandler,Default_Handler

    .weak   TIM1_UP_TIM16_IRQHandler
    .thumb_set TIM1_UP_TIM16_IRQHandler,Default_Handler

    .weak   TIM1_TRG_COM_IRQHandler
    .thumb_set TIM1_TRG_COM_IRQHandler,Default_Handler

    .weak   TIM1_CC_IRQHandler
    .thumb_set TIM1_CC_IRQHandler,Default_Handler

    .weak   TIM2_IRQHandler
    .thumb_set TIM2_IRQHandler,Default_Handler

    .weak   I2C1_EV_IRQHandler
    .thumb_set I2C1_EV_IRQHandler,Default_Handler

    .weak   I2C1_ER_IRQHandler
    .thumb_set I2C1_ER_IRQHandler,Default_Handler

    .weak   I2C2_EV_IRQHandler
    .thumb_set I2C2_EV_IRQHandler,Default_Handler

    .weak   I2C2_ER_IRQHandler
    .thumb_set I2C2_ER_IRQHandler,Default_Handler

    .weak   SPI1_IRQHandler
    .thumb_set SPI1_IRQHandler,Default_Handler

    .weak   SPI2_IRQHandler
    .thumb_set SPI2_IRQHandler,Default_Handler

    .weak   USART1_IRQHandler
    .thumb_set USART1_IRQHandler,Default_Handler

    .weak   USART2_IRQHandler
    .thumb_set USART2_IRQHandler,Default_Handler

    .weak   USART3_IRQHandler
    .thumb_set USART3_IRQHandler,Default_Handler

    .weak   EXTI15_10_IRQHandler
    .thumb_set EXTI15_10_IRQHandler,Default_Handler

    .weak   RTC_Alarm_IRQHandler
    .thumb_set RTC_Alarm_IRQHandler,Default_Handler

    .weak   TIM6_IRQHandler
    .thumb_set TIM6_IRQHandler,Default_Handler

    .weak   DMA2_Channel1_IRQHandler
    .thumb_set DMA2_Channel1_IRQHandler,Default_Handler

    .weak   DMA2_Channel2_IRQHandler
    .thumb_set DMA2_Channel2_IRQHandler,Default_Handler

    .weak   DMA2_Channel3_IRQHandler
    .thumb_set DMA2_Channel3_IRQHandler,Default_Handler

    .weak   DMA2_Channel4_IRQHandler
    .thumb_set DMA2_Channel4_IRQHandler,Default_Handler

    .weak   DMA2_Channel5_IRQHandler
    .thumb_set DMA2_Channel5_IRQHandler,Default_Handler

    .weak   COMP_IRQHandler
    .thumb_set COMP_IRQHandler,Default_Handler

    .weak   LPTIM1_IRQHandler
    .thumb_set LPTIM1_IRQHandler,Default_Handler

    .weak   LPTIM2_IRQHandler
    .thumb_set LPTIM2_IRQHandler,Default_Handler    

    .weak   USB_IRQHandler
    .thumb_set USB_IRQHandler,Default_Handler   

    .weak   DMA2_Channel6_IRQHandler
    .thumb_set DMA2_Channel6_IRQHandler,Default_Handler 

    .weak   DMA2_Channel7_IRQHandler
    .thumb_set DMA2_Channel7_IRQHandler,Default_Handler 

    .weak   LPUART1_IRQHandler
    .thumb_set LPUART1_IRQHandler,Default_Handler   

    .weak   QUADSPI_IRQHandler
    .thumb_set QUADSPI_IRQHandler,Default_Handler   

    .weak   I2C3_EV_IRQHandler
    .thumb_set I2C3_EV_IRQHandler,Default_Handler   

    .weak   I2C3_ER_IRQHandler
    .thumb_set I2C3_ER_IRQHandler,Default_Handler

    .weak   TSC_IRQHandler
    .thumb_set TSC_IRQHandler,Default_Handler

    .weak   AES_IRQHandler
    .thumb_set AES_IRQHandler,Default_Handler

    .weak   RNG_IRQHandler
    .thumb_set RNG_IRQHandler,Default_Handler

    .weak   FPU_IRQHandler
    .thumb_set FPU_IRQHandler,Default_Handler

    .weak   CRS_IRQHandler
    .thumb_set CRS_IRQHandler,Default_Handler

Both, startup code and linker script were generated by STM32CubeMX

Here is the main function that gets called from the startup code:

int main(void)
{
  HAL_Init();

  SystemClock_Config();

  MX_GPIO_Init();
  MX_USB_DEVICE_Init();

  while (1)
  {
  }
}

Initializes the peripherals and the CDC usb device.

Disassembly of vector table:

Disassembly of section .isr_vector:

08000000 <g_pfnVectors>:
 8000000: 00 a0                         adr r0, #0
 8000002: 00 20                         movs    r0, #0
 8000004: 81 53                         strh    r1, [r0, r6]
 8000006: 00 08                         lsrs    r0, r0, #32
 8000008: a5 04                         lsls    r5, r4, #18
 800000a: 00 08                         lsrs    r0, r0, #32
 800000c: a7 04                         lsls    r7, r4, #18
 800000e: 00 08                         lsrs    r0, r0, #32
 8000010: ab 04                         lsls    r3, r5, #18
 8000012: 00 08                         lsrs    r0, r0, #32
 8000014: af 04                         lsls    r7, r5, #18
 8000016: 00 08                         lsrs    r0, r0, #32
 8000018: b3 04                         lsls    r3, r6, #18
 800001a: 00 08                         lsrs    r0, r0, #32
        ...
 800002c: b7 04                         lsls    r7, r6, #18
 800002e: 00 08                         lsrs    r0, r0, #32
 8000030: b9 04                         lsls    r1, r7, #18
 8000032: 00 08                         lsrs    r0, r0, #32
 8000034: 00 00                         movs    r0, r0
 8000036: 00 00                         movs    r0, r0
 8000038: bb 04                         lsls    r3, r7, #18
 800003a: 00 08                         lsrs    r0, r0, #32
 800003c: bd 04                         lsls    r5, r7, #18
 800003e: 00 08                         lsrs    r0, r0, #32
 8000040: d1 53                         strh    r1, [r2, r7]
 8000042: 00 08                         lsrs    r0, r0, #32
 8000044: d1 53                         strh    r1, [r2, r7]
 8000046: 00 08                         lsrs    r0, r0, #32
 8000048: d1 53                         strh    r1, [r2, r7]
 800004a: 00 08                         lsrs    r0, r0, #32
 800004c: d1 53                         strh    r1, [r2, r7]
 800004e: 00 08                         lsrs    r0, r0, #32
 8000050: d1 53                         strh    r1, [r2, r7]
 8000052: 00 08                         lsrs    r0, r0, #32
 8000054: d1 53                         strh    r1, [r2, r7]
 8000056: 00 08                         lsrs    r0, r0, #32
 8000058: d1 53                         strh    r1, [r2, r7]
 800005a: 00 08                         lsrs    r0, r0, #32
 800005c: d1 53                         strh    r1, [r2, r7]
 800005e: 00 08                         lsrs    r0, r0, #32
 8000060: d1 53                         strh    r1, [r2, r7]
 8000062: 00 08                         lsrs    r0, r0, #32
 8000064: d1 53                         strh    r1, [r2, r7]
 8000066: 00 08                         lsrs    r0, r0, #32
 8000068: d1 53                         strh    r1, [r2, r7]
 800006a: 00 08                         lsrs    r0, r0, #32
 800006c: d1 53                         strh    r1, [r2, r7]
 800006e: 00 08                         lsrs    r0, r0, #32
 8000070: d1 53                         strh    r1, [r2, r7]
 8000072: 00 08                         lsrs    r0, r0, #32
 8000074: d1 53                         strh    r1, [r2, r7]
 8000076: 00 08                         lsrs    r0, r0, #32
 8000078: d1 53                         strh    r1, [r2, r7]
 800007a: 00 08                         lsrs    r0, r0, #32
 800007c: d1 53                         strh    r1, [r2, r7]
 800007e: 00 08                         lsrs    r0, r0, #32
 8000080: d1 53                         strh    r1, [r2, r7]
 8000082: 00 08                         lsrs    r0, r0, #32
 8000084: d1 53                         strh    r1, [r2, r7]
 8000086: 00 08                         lsrs    r0, r0, #32
 8000088: d1 53                         strh    r1, [r2, r7]
 800008a: 00 08                         lsrs    r0, r0, #32
        ...
 800009c: d1 53                         strh    r1, [r2, r7]
 800009e: 00 08                         lsrs    r0, r0, #32
 80000a0: d1 53                         strh    r1, [r2, r7]
 80000a2: 00 08                         lsrs    r0, r0, #32
 80000a4: d1 53                         strh    r1, [r2, r7]
 80000a6: 00 08                         lsrs    r0, r0, #32
 80000a8: d1 53                         strh    r1, [r2, r7]
 80000aa: 00 08                         lsrs    r0, r0, #32
 80000ac: d1 53                         strh    r1, [r2, r7]
 80000ae: 00 08                         lsrs    r0, r0, #32
 80000b0: d1 53                         strh    r1, [r2, r7]
 80000b2: 00 08                         lsrs    r0, r0, #32
        ...
 80000bc: d1 53                         strh    r1, [r2, r7]
 80000be: 00 08                         lsrs    r0, r0, #32
 80000c0: d1 53                         strh    r1, [r2, r7]
 80000c2: 00 08                         lsrs    r0, r0, #32
 80000c4: d1 53                         strh    r1, [r2, r7]
 80000c6: 00 08                         lsrs    r0, r0, #32
 80000c8: d1 53                         strh    r1, [r2, r7]
 80000ca: 00 08                         lsrs    r0, r0, #32
 80000cc: d1 53                         strh    r1, [r2, r7]
 80000ce: 00 08                         lsrs    r0, r0, #32
 80000d0: d1 53                         strh    r1, [r2, r7]
 80000d2: 00 08                         lsrs    r0, r0, #32
 80000d4: d1 53                         strh    r1, [r2, r7]
 80000d6: 00 08                         lsrs    r0, r0, #32
 80000d8: d1 53                         strh    r1, [r2, r7]
 80000da: 00 08                         lsrs    r0, r0, #32
 80000dc: d1 53                         strh    r1, [r2, r7]
 80000de: 00 08                         lsrs    r0, r0, #32
 80000e0: d1 53                         strh    r1, [r2, r7]
 80000e2: 00 08                         lsrs    r0, r0, #32
 80000e4: d1 53                         strh    r1, [r2, r7]
 80000e6: 00 08                         lsrs    r0, r0, #32
        ...
 8000118: d1 53                         strh    r1, [r2, r7]
 800011a: 00 08                         lsrs    r0, r0, #32
 800011c: 00 00                         movs    r0, r0
 800011e: 00 00                         movs    r0, r0
 8000120: d1 53                         strh    r1, [r2, r7]
 8000122: 00 08                         lsrs    r0, r0, #32
 8000124: d1 53                         strh    r1, [r2, r7]
 8000126: 00 08                         lsrs    r0, r0, #32
 8000128: d1 53                         strh    r1, [r2, r7]
 800012a: 00 08                         lsrs    r0, r0, #32
 800012c: d1 53                         strh    r1, [r2, r7]
 800012e: 00 08                         lsrs    r0, r0, #32
 8000130: d1 53                         strh    r1, [r2, r7]
 8000132: 00 08                         lsrs    r0, r0, #32
        ...
 8000140: d1 53                         strh    r1, [r2, r7]
 8000142: 00 08                         lsrs    r0, r0, #32
 8000144: d1 53                         strh    r1, [r2, r7]
 8000146: 00 08                         lsrs    r0, r0, #32
 8000148: d1 53                         strh    r1, [r2, r7]
 800014a: 00 08                         lsrs    r0, r0, #32
 800014c: c7 04                         lsls    r7, r0, #19
 800014e: 00 08                         lsrs    r0, r0, #32
 8000150: d1 53                         strh    r1, [r2, r7]
 8000152: 00 08                         lsrs    r0, r0, #32
 8000154: d1 53                         strh    r1, [r2, r7]
 8000156: 00 08                         lsrs    r0, r0, #32
 8000158: d1 53                         strh    r1, [r2, r7]
 800015a: 00 08                         lsrs    r0, r0, #32
 800015c: d1 53                         strh    r1, [r2, r7]
 800015e: 00 08                         lsrs    r0, r0, #32
 8000160: d1 53                         strh    r1, [r2, r7]
 8000162: 00 08                         lsrs    r0, r0, #32
 8000164: d1 53                         strh    r1, [r2, r7]
 8000166: 00 08                         lsrs    r0, r0, #32
        ...
 8000174: d1 53                         strh    r1, [r2, r7]
 8000176: 00 08                         lsrs    r0, r0, #32
 8000178: 00 00                         movs    r0, r0
 800017a: 00 00                         movs    r0, r0
 800017c: d1 53                         strh    r1, [r2, r7]
 800017e: 00 08                         lsrs    r0, r0, #32
 8000180: d1 53                         strh    r1, [r2, r7]
 8000182: 00 08                         lsrs    r0, r0, #32
 8000184: d1 53                         strh    r1, [r2, r7]
 8000186: 00 08                         lsrs    r0, r0, #32
 8000188: d1 53                         strh    r1, [r2, r7]
 800018a: 00 08                         lsrs    r0, r0, #32
 800018c: 00 00                         movs    r0, r0
 800018e: 00 00                         movs    r0, r0

Solution

  • So this inspired me to try clang/llvm after a few years off...

    Now I am on Linux not Windows, but you should be able to adapt to Windows (or of course dual boot Linux or put Linux in a vm or whatever).

    Derived from build instructions on the clang/llvm site(s)

    rm -rf /opt/llvmv6m
    rm -rf llvm-project
    git clone https://github.com/llvm/llvm-project.git
    cd llvm-project
    mkdir build
    cd build
    cmake -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_CROSSCOMPILING=True -DCMAKE_INSTALL_PREFIX=/opt/llvmv6m -DLLVM_DEFAULT_TARGET_TRIPLE=armv6m-none-eabi -DLLVM_TARGET_ARCH=ARM -DLLVM_TARGETS_TO_BUILD=ARM -G "Unix Makefiles" ../llvm
    make
    sudo make install
    

    Is how I built it. Yes, I know yours is a cortex-m4 all the cortex-ms (so far) support armv6-m, you can easily make this armv7m. Was an experiment based on those web pages and interestingly now I don't have to specify stuff on the command line to specify the architecture or cpu, curious to know if this is still a generic clang cross compiler and armv6m is just the default. Anyway...

    This is a little more complicated than a simple infinite loop, but playing with llvm features that you don't get in gnu.

    start.s

    .thumb
    .cpu    cortex-m0
    .globl _start
    _start:
    .word 0x20001000
    .word reset
    .word loop
    .word loop
    .thumb_func
    reset:
        bl notmain
    .thumb_func
    loop:
        b .
    

    notmain.c

    unsigned int fun ( void );
    unsigned int notmain ( void )
    {
        return(fun());
    }
    

    fun.c

    unsigned int fun ( void )
    {
        return(5);
    }
    

    memmap

    MEMORY
    {
        rom : ORIGIN = 0x08000000, LENGTH = 0x1000
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    
    SECTIONS
    {
        .text : { *(.text*) } > rom
        .rodata : { *(.rodata*) } > rom
        .bss : { *(.bss*) } > ram
    }
    

    Makefile

    all :
        arm-none-eabi-as start.s -o start.o
        clang -O2 -fomit-frame-pointer -c notmain.c -o notmain.o
        clang -O2 -fomit-frame-pointer -c fun.c -o fun.o
        arm-none-eabi-ld -T memmap start.o notmain.o fun.o -o basic.elf
        arm-none-eabi-objdump -D basic.elf > basic.list
        clang -fomit-frame-pointer -c -emit-llvm notmain.c -o notmain.bc
        clang -fomit-frame-pointer -c -emit-llvm fun.c -o fun.bc
        llc $(LLCOPS) notmain.bc -filetype=obj -o notmain.not.o
        llc $(LLCOPS) fun.bc -filetype=obj -o fun.not.o
        arm-none-eabi-ld -T memmap start.o notmain.not.o fun.not.o -o not.elf
        arm-none-eabi-objdump -D not.elf > not.list
        llvm-link notmain.bc fun.bc -o notmain.not.bc
        opt -O2 notmain.not.bc -o notmain.opt.bc
        llc $(LLCOPS) notmain.opt.bc -filetype=obj -o notmain.opt.o
        arm-none-eabi-ld -T memmap start.o notmain.opt.o -o opt.elf
        arm-none-eabi-objdump -D opt.elf > opt.list
    
    clean:
        rm -f *.S
        rm -f *.o
        rm -f *.list
        rm -f *.elf
        rm -f *.bc
    

    basic.list

    Disassembly of section .text:
    
    08000000 <_start>:
     8000000:   20001000    andcs   r1, r0, r0
     8000004:   08000011    stmdaeq r0, {r0, r4}
     8000008:   08000015    stmdaeq r0, {r0, r2, r4}
     800000c:   08000015    stmdaeq r0, {r0, r2, r4}
    
    08000010 <reset>:
     8000010:   f000 f802   bl  8000018 <notmain>
    
    08000014 <loop>:
     8000014:   e7fe        b.n 8000014 <loop>
        ...
    
    08000018 <notmain>:
     8000018:   b580        push    {r7, lr}
     800001a:   f000 f801   bl  8000020 <fun>
     800001e:   bd80        pop {r7, pc}
    
    08000020 <fun>:
     8000020:   2005        movs    r0, #5
     8000022:   4770        bx  lr
    

    not.list

    Disassembly of section .text:
    
    08000000 <_start>:
     8000000:   20001000    andcs   r1, r0, r0
     8000004:   08000011    stmdaeq r0, {r0, r4}
     8000008:   08000015    stmdaeq r0, {r0, r2, r4}
     800000c:   08000015    stmdaeq r0, {r0, r2, r4}
    
    08000010 <reset>:
     8000010:   f000 f802   bl  8000018 <notmain>
    
    08000014 <loop>:
     8000014:   e7fe        b.n 8000014 <loop>
        ...
    
    08000018 <notmain>:
     8000018:   b580        push    {r7, lr}
     800001a:   f000 f801   bl  8000020 <fun>
     800001e:   bd80        pop {r7, pc}
    
    08000020 <fun>:
     8000020:   2005        movs    r0, #5
     8000022:   4770        bx  lr
    

    opt.list

    Disassembly of section .text:
    
    08000000 <_start>:
     8000000:   20001000    andcs   r1, r0, r0
     8000004:   08000011    stmdaeq r0, {r0, r4}
     8000008:   08000015    stmdaeq r0, {r0, r2, r4}
     800000c:   08000015    stmdaeq r0, {r0, r2, r4}
    
    08000010 <reset>:
     8000010:   f000 f802   bl  8000018 <notmain>
    
    08000014 <loop>:
     8000014:   e7fe        b.n 8000014 <loop>
        ...
    
    08000018 <notmain>:
     8000018:   b580        push    {r7, lr}
     800001a:   f000 f802   bl  8000022 <fun>
     800001e:   2005        movs    r0, #5
     8000020:   bd80        pop {r7, pc}
    
    08000022 <fun>:
     8000022:   2005        movs    r0, #5
     8000024:   4770        bx  lr
    

    The fun part here is that you can optimize across files/objects which you cant do with gnu tools AFAIK. So, actually LLVM did a really bad job there going to have to look into this.

    Now I used gnus linker and assembler, still not sure how to get around that I get an error when trying to build just with clang.

    These are all generic enough to run on your processor as shown here, key traps to look for with a new project or tool.

    08000000 <_start>:
     8000000:   20001000
     8000004:   08000011
     8000008:   08000015
     800000c:   08000015
    
    08000010 <reset>:
    
    08000014 <loop>:
    

    For a cortex-m to boot properly and not immediately fail to work, the vector table needs to have the lsbit set for the vectors, starting with reset in this case reset is at 0x08000010 so the vector table entry needs to be 0x08000011 for that code to be run. and that is what we see here so we won't fail due to that.

    While some mcus don't have 0x1000 bytes I assume yours does, so 0x20001000 is an okay starting place for the stack pointer.

    From there this is again not just an infinite loop it is more complicated but this should run on your processor and not fail. If you change it to this:

    .thumb
    .cpu    cortex-m0
    .globl _start
    _start:
    .word 0x20001000
    .word reset
    .word loop
    .word loop
    .thumb_func
    reset:
        mov r0,#0
        b reset
    .thumb_func
    loop:
        b .
    

    (granted this becomes an gnu tool project not llvm/clang what assembler are you using and what linker?)

    then with a debugger (stlink plus openocd plus telnet) you can stop and resume and examine r0 to see that it is running.

    .thumb
    .cpu    cortex-m0
    .globl _start
    _start:
    .word 0x20001000
    .word reset
    .word loop
    .word loop
    .thumb_func
    reset:
        bl notmain
    .thumb_func
    loop:
        b .
    .thumb_func
    bounce:
        bx lr
    
    
    void bounce ( unsigned int );
    unsigned int notmain ( void )
    {
        for(ra=0;;ra++) bounce(ra);
        return(0);
    }
    

    which adds a little clang/llvm to it and see that r0 is changing if you stop/resume.

    some chips will look to see that the vectors are there if it sees 0xFFs then it may go into a bootloader so with the debugger you can also examine 0x00000000 and 0x08000000, try reset halt on the telnet/openocd command line and then mdw 0 20 to see what the cpu is going to see at address zero to see if it is your vector table.

    If you get past these very simple but very fatal common problems then you may be dealing with something else like clang doesn't like while(1) loops, maybe they finally fixed that bug but when I filed it they refused to so if there is code waiting for a status bit to change that uses a while(1) then maybe that's the problem. I would take baby steps after the above adding one thing at a time to main as you have been try the clocks, perhaps have an infinite loop ((asm) function) you call after clock init and see if clock init is running to completion and returning back to main().

    Are you using clang/llvm to build the libraries you are using or are they pre-built for you to use with clang/llvm?

    Edit

    Did more work, in case the above is relevant to anyone and doesn't get deleted.

    change to

    -DLLVM_ENABLE_PROJECTS='clang;lld'

    MEMORY
    {
        rom : ORIGIN = 0x08000000, LENGTH = 0x1000
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    SECTIONS
    {
        /DISCARD/ : {
            *(.ARM.exidx*)
        }
        .text : { *(.text*) } > rom
        .rodata : { *(.rodata*) } > rom
        .bss : { *(.bss*) } > ram
    }
    

    and maybe another couple of things. and now the disassembly resembles yours too with the data in individual little endian bytes.

    Disassembly of section .text:
    
    08000000 _start:
     8000000: 00 10                         asrs    r0, r0, #32
     8000002: 00 20                         movs    r0, #0
     8000004: 11 00                         movs    r1, r2
     8000006: 00 08                         lsrs    r0, r0, #32
     8000008: 15 00                         movs    r5, r2
     800000a: 00 08                         lsrs    r0, r0, #32
     800000c: 15 00                         movs    r5, r2
     800000e: 00 08                         lsrs    r0, r0, #32
    
    08000010 reset:
     8000010: 00 f0 02 f8                   bl  #4
    
    08000014 loop:
     8000014: fe e7                         b   #-4 <loop>
     8000016: d4 d4                         bmi #-88 <start.c+0x7ffffc2>
    
    08000018 notmain:
     8000018: 80 b5                         push    {r7, lr}
     800001a: 00 f0 02 f8                   bl  #4
     800001e: 05 20                         movs    r0, #5
     8000020: 80 bd                         pop {r7, pc}
    
    08000022 fun:
     8000022: 05 20                         movs    r0, #5
     8000024: 70 47                         bx  lr
    

    Now on to your comment about 400MB.

    0x20000000 - 0x08000000 = 0x18000000 = 402653184.
    

    And this is probably your problem, so it sounds like you have some .data.

    Let me start a new one:

    start.s

    .text
    /*.syntax unified*/
    .cpu    cortex-m0
    .code   16
    .globl _start
    _start:
    .word 0x20001000
    .word reset
    .word loop
    .word loop
    .thumb_func
    reset:
        bl notmain
    .thumb_func
    loop:
        b .
    

    notmain.c

    unsigned int notmain ( void )
    {
        return(7);
    }
    

    Makefile

    all :
        clang -c start.s -o start.o
        clang -O2 -fomit-frame-pointer -c notmain.c -o notmain.o
        ld.lld -T memmap start.o notmain.o -o basic.elf
        llvm-objdump -D basic.elf > basic.list
        llvm-objcopy -O binary basic.elf basic.bin
    
    clean:
        rm -f *.o
        rm -f *.list
        rm -f *.elf
    

    memmap

    MEMORY
    {
        rom : ORIGIN = 0x08000000, LENGTH = 0x1000
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    
    SECTIONS
    {
        /DISCARD/ : {
            *(.ARM.exidx*)
        }
        .text : { *(.text*) } > rom
        .rodata : { *(.rodata*) } > rom
        .bss : { *(.bss*) } > ram
        .data : { *(.data*) } > ram
    }
    

    and this produces a 28 byte basic.bin file

    08000000 _start:
     8000000: 00 10                         asrs    r0, r0, #32
     8000002: 00 20                         movs    r0, #0
     8000004: 11 00                         movs    r1, r2
     8000006: 00 08                         lsrs    r0, r0, #32
     8000008: 15 00                         movs    r5, r2
     800000a: 00 08                         lsrs    r0, r0, #32
     800000c: 15 00                         movs    r5, r2
     800000e: 00 08                         lsrs    r0, r0, #32
    
    08000010 reset:
     8000010: 00 f0 02 f8                   bl  #4
    
    08000014 loop:
     8000014: fe e7                         b   #-4 <loop>
     8000016: d4 d4                         bmi #-88 <start.c+0x7ffffc2>
    
    08000018 notmain:
     8000018: 07 20                         movs    r0, #7
     800001a: 70 47                         bx  lr
    

    Now let's add .data:

    unsigned int x = 5;
    unsigned int notmain ( void )
    {
        return(7);
    }
    

    now my basic.bin is 402653188 bytes.

    What is going on is the objcopy is making a binary memory image that starts at the first loadable or relevant space and ends with the last one so

    Disassembly of section .text:
    
    08000000 _start:
     8000000: 00 10                         asrs    r0, r0, #32
     8000002: 00 20                         movs    r0, #0
     8000004: 11 00                         movs    r1, r2
     8000006: 00 08                         lsrs    r0, r0, #32
     8000008: 15 00                         movs    r5, r2
     800000a: 00 08                         lsrs    r0, r0, #32
     800000c: 15 00                         movs    r5, r2
     800000e: 00 08                         lsrs    r0, r0, #32
    
    08000010 reset:
     8000010: 00 f0 02 f8                   bl  #4
    
    08000014 loop:
     8000014: fe e7                         b   #-4 <loop>
     8000016: d4 d4                         bmi #-88 <start.c+0x7ffffc2>
    
    08000018 notmain:
     8000018: 07 20                         movs    r0, #7
     800001a: 70 47                         bx  lr
    
    Disassembly of section .data:
    
    20000000 x:
    20000000: 05 00                         movs    r5, r0
    20000002: 00 00                         movs    r0, r0
    

    from 0x08000000 to 0x20000002 inclusive

    0x20000003 - 0x08000000 = 402653187 so they padded it to the nearest word (or halfword).

    You cannot load this into your microcontroller it wouldn't work anyway, your program needs to be contained in non volatile memory...flash...

    first step:

    MEMORY
    {
        rom : ORIGIN = 0x08000000, LENGTH = 0x1000
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    
    SECTIONS
    {
        /DISCARD/ : {
            *(.ARM.exidx*)
        }
        .text : { *(.text*) } > rom
        .rodata : { *(.rodata*) } > rom
        .bss : { *(.bss*) } > ram
        .data : { *(.data*) } > ram AT > rom
    }
    

    changing it to ram AT rom.

    the basic.bin file is 32 bytes now.

    00000000  00 10 00 20 11 00 00 08  15 00 00 08 15 00 00 08  |... ............|
    00000010  00 f0 02 f8 fe e7 d4 d4  07 20 70 47 05 00 00 00  |......... pG....|
    00000020
    
    Disassembly of section .text:
    
    08000000 _start:
     8000000: 00 10                         asrs    r0, r0, #32
     8000002: 00 20                         movs    r0, #0
     8000004: 11 00                         movs    r1, r2
     8000006: 00 08                         lsrs    r0, r0, #32
     8000008: 15 00                         movs    r5, r2
     800000a: 00 08                         lsrs    r0, r0, #32
     800000c: 15 00                         movs    r5, r2
     800000e: 00 08                         lsrs    r0, r0, #32
    
    08000010 reset:
     8000010: 00 f0 02 f8                   bl  #4
    
    08000014 loop:
     8000014: fe e7                         b   #-4 <loop>
     8000016: d4 d4                         bmi #-88 <start.c+0x7ffffc2>
    
    08000018 notmain:
     8000018: 07 20                         movs    r0, #7
     800001a: 70 47                         bx  lr
    
    Disassembly of section .data:
    
    20000000 x:
    20000000: 05 00                         movs    r5, r0
    20000002: 00 00                         movs    r0, r0
    

    notice the end of the binary file:

    70 47 05 00 00 00
    

    it has the last .text item 70 47 then the .data item.

    and let the tools do the work for you

    MEMORY
    {
        rom : ORIGIN = 0x08000000, LENGTH = 0x1000
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    
    SECTIONS
    {
        /DISCARD/ : {
            *(.ARM.exidx*)
        }
        .text : { *(.text*) } > rom
        .rodata : { *(.rodata*) } > rom
        .bss : { *(.bss*) } > ram
       __data_rom_start__ = .;
       .data : {
        __data_start__ = .;
        *(.data*)
       } > ram AT > rom
       __data_end__ = .;
       __data_size__ = __data_end__ - __data_start__;
    }
    

    basic.bin still currently 32 bytes but

    llvm-nm basic.elf 
    20000004 D __data_end__
    0800001c T __data_rom_start__
    00000004 A __data_size__
    20000000 D __data_start__
    08000000 T _start
    08000014 t loop
    08000018 T notmain
    08000010 t reset
    20000000 D x
    

    Now we know that in the flash at address 0x0800001c the embedded .data starts, it is 4 bytes in size and its destination in ram is 0x20000000 so now the bootstrap code can copy .data from flash to ram before calling the C entry point.

    Now you have already done all of this, and I assume knew all of this with respect to linker scripts and bootstrap, but you are getting that 400MB binary which indicates there is something else leaking into the ram address space.

    Examine the disassembly (objdump -D) and.or readelf and/or nm outputs to find out what is out there and add it to the linker script along with bootstrap code to copy it.

    Adding some .bss

    unsigned int x = 5;
    unsigned int y;
    unsigned int notmain ( void )
    {
        return(7);
    }
    

    From objdump

    Disassembly of section .bss:
    
    20000000 y:
    ...
    
    Disassembly of section .data:
    
    20000004 x:
    20000004: 05 00                         movs    r5, r0
    20000006: 00 00                         movs    r0, r0
    

    From readelf

    Program Headers:
      Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
      LOAD           0x001000 0x08000000 0x08000000 0x0001c 0x0001c R E 0x1000
      LOAD           0x002000 0x20000000 0x20000000 0x00000 0x00004 RW  0x1000
      LOAD           0x002004 0x20000004 0x0800001c 0x00004 0x00004 RW  0x1000
    

    the .bss one looks a little scary but it doesn't end up in the binary which is again 32 bytes. But we see here that .data is physically in the flash but wants to be in ram which is what we desire for this type of platform. Maybe from readelf you can find the leak into ram.

    Your objcopy -O binary output should fit in flash and contain 100% of your program and data otherwise it won't work if you were to extract only the flash part from that 400MByte file there would or might be some data items that are not going to be there that the software expected to be there to operate. Or maybe it's some silly string table thing or some other item that is not really meant for the binary but happens to have a section name so far not handled in the linker script.

    Sorry, the 400MB should have been obvious to me from the start, yet another common linker script trap when prepping a new project. I personally never use .data nor rely on .bss so don't have these issues, but your experience may be different, but I am still very aware of it. (It's more fun when you have .text at 0 and ram at 0x80000000 or even higher you get files that are gigabytes in size).