When trying to actually use my custom compiler for a made up language I get the following warning from ld: "ld: warning: cannot find entry symbol _start; defaulting to 0000000000401000" it seems that ld cant find the _start tag even though it should be written to the assembly file. Here's the compiler code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#define STACK_SIZE 1000 // Default loop stack size
#define STACK_GROWTH_FACTOR 0.1 // How much stack to add when its full
void compile(char *asm_filename, char *src_filename);
char *string(char *str);
char *replace_extension(char *name, char *ext);
enum stage {
COMPILE, // Compile only
ASSEMBLE, // Compile and assemble only
LINK // Compile, assemble and link
};
struct info_t {
char *pname; // Process name
char *ifilename; // Input source code file name
char *ofilename; // Output file name
enum stage ostage; // Final stage that generates the output file
char *arr_size; // Memory allocated for the executable
} info;
int main(int argc, char **argv)
{
int verbose = 0; // 1 enables verbosity; 0 disables
char *arr_size = "30000"; // Default size of array of cells
char *asm_filename; // File name for assembly code
char *obj_filename; // File name for object code
char *exe_filename = "out"; // File name for executable code
char *command; // Buffer for command line strings
size_t i; // Counter
size_t len; // Stores string lengths
if ((info.pname = strrchr(argv[0], '/')) == NULL) {
info.pname = argv[0];
} else {
info.pname++; // Address of the basename part in argv[0]
}
info.ifilename = NULL;
info.ofilename = NULL;
info.ostage = LINK;
info.arr_size = arr_size;
if (argc < 2) {
fprintf(stderr, "Usage: %s file.gear\n", info.pname);
exit(EXIT_FAILURE);
}
info.ifilename = argv[1];
// Compiling
// Determine name for assembly code filename
if (info.ostage == COMPILE && info.ofilename != NULL) {
asm_filename = string(info.ofilename);
} else {
asm_filename = replace_extension(info.ifilename, "s");
}
if (verbose) {
printf("Compiling: compile(\"%s\", \"%s\")\n",
asm_filename, info.ifilename);
}
compile(asm_filename, info.ifilename);
// If compile only option was specified, exit
if (info.ostage == COMPILE) {
free(asm_filename);
exit(EXIT_SUCCESS);
}
// Assembling
// Determine name for object code filename
if (info.ostage == ASSEMBLE && info.ofilename != NULL) {
obj_filename = string(info.ofilename);
} else {
obj_filename = replace_extension(info.ifilename, "o");
}
// as
len = strlen("as -o") + strlen(asm_filename) +
strlen(obj_filename) + 2;
if ((command = malloc(len)) == NULL) {
fprintf(stderr, "%s: Out of memory while assembling", info.pname);
}
sprintf(command, "as -o %s %s", obj_filename, asm_filename);
// Assemble the asm code into its object file
if (verbose) {
printf("Assembling: %s\n", command);
}
system(command);
free(command);
// Assembly code file is not required after assembling
//unlink(asm_filename);
free(asm_filename);
// Link object file
// Determine name for executable code filename
if (info.ostage == LINK && info.ofilename != NULL) {
exe_filename = info.ofilename;
}
// ld
len = strlen("ld -o") + strlen(exe_filename) +
strlen(obj_filename) + 2;
if ((command = malloc(len)) == NULL) {
fprintf(stderr, "%s: Out of memory while compiling", info.pname);
}
sprintf(command, "ld -o %s %s", exe_filename, obj_filename);
// Link the object code to executable code using ld
if (verbose) {
printf("Linking: %s\n", command);
}
system(command);
free(command);
// Object code file is not needed after linking
unlink(obj_filename);
free(obj_filename);
exit(EXIT_SUCCESS);
}
char *string(char *str) {
char *new_str;
if ((new_str = malloc(strlen(str) + 1)) == NULL) {
fprintf(stderr, "%s: Out of memory while allocating memory for "
"string: %s\n", info.pname, str);
exit(1);
}
strcpy(new_str, str);
return new_str;
}
char *replace_extension(char *name, char *ext) {
char *dot = strrchr(name, '.');
char *new_name;
size_t len = dot == NULL ? strlen(name) : dot - name;
if ((new_name = malloc(len + strlen(ext) + 2)) == NULL) {
fprintf(stderr, "%s: Out of memory while changing extension of "
"%s to %s\n", info.pname, name, ext);
exit(1);
}
strncpy(new_name, name, len);
new_name[len] = '\0';
strcat(new_name, ".");
strcat(new_name, ext);
return new_name;
}
void compile(char *asm_filename, char *src_filename) {
FILE *src; // Source code file
FILE *as; // Assembly code file
size_t *stack; // Loop stack
size_t top = 0; // Next free location in stack
size_t stack_size = STACK_SIZE; // Stack size
size_t loop = 0; // Used to generate loop labels
int c;
// Open source file
if ((src = fopen(src_filename, "r")) == NULL) {
fprintf(stderr, "%s: %s: Could not read file\n",
info.pname, src_filename);
exit(EXIT_FAILURE);
}
// Open assembly file
if ((as = fopen(asm_filename, "w")) == NULL) {
fprintf(stderr, "%s: %s: Could not write file\n",
info.pname, asm_filename);
exit(EXIT_FAILURE);
}
// Create loop stack
if ((stack = malloc(stack_size * sizeof *stack)) == NULL) {
fprintf(stderr, "%s: Out of memory while creating loop stack "
"of size %lu\n", info.pname, stack_size);
exit(EXIT_FAILURE);
}
/* Write assembly code */
fputs(".section .bss\n", as);
fprintf(as, "\t.lcomm buffer %s\n", info.arr_size);
fputs(".section .text\n", as);
fputs(".globl _start\n", as);
while ((c = fgetc(src)) != EOF) {
char keyword[6];
if (fscanf(src, "%5s", keyword) == 1) {
if (strcmp(keyword, "_START") == 0) {
fputs("_start:\n", as);
fputs("\tmov $buffer, %edi\n", as);
} else if (strcmp(keyword, "_END") == 0) {
fputs("movl $1, %eax\n", as);
fputs("movl $0, %ebx\n", as);
fputs("int $0x80\n", as);
}
}
}
fclose(as);
fclose(src);
}
And here's the file im attempting to run:
_START
_END
I tried using a different linker (gcc) and I expected that the compiler should have output an executable which would have simply started and ended the process.
The assembler code:
.section .bss
.lcomm buffer 30000
.section .text
.globl _start
movl $1, %eax
movl $0, %ebx
int $0x80
Disclaimer: I don't know AT&T syntax (I like NASM), so bear with me.
It seems you're telling the computer to start execution at _start
, but you're not saying where _start
is. And so, the linker (ld
) assumes that it should enter at 00000000040100
. It's simply kind enough to warn you first. Additionally, I'm pretty sure GCC prefers you use main
instead of _start
(don't quote me on that).
You'll need your C program to make code that looks more like this:
.section .bss
.lcomm buffer 30000
.section .text
.globl main
main:
movl $1, %eax
movl $0, %ebx
int $0x80
This compiles on my computer with no warnings nor errors.