File operations in C on different architectures

As a research project we are writing an abstraction layer on top of the standard C (BINARY) file handling library (stdio), by providing a few extra functions for file handling with transactions.

The workflow is the following:

user opens a file with our API (or with the standard fopen). Both return FILE*. File is open in BINARY mode!
user writes data to the file using standard library commands (such as fwrite)
user opens a transaction on the opened file using our API: TRANSACTION a = trans_start(FILE* )
user sets data validators for the TRANSACTION object (set_validator(TRANSACTION, int(*)(char*))
user "writes" data to the file using our own API (int trans_write_string(TRANSACTION*, char*, length)
- in reality this "write" puts its data in the memory for the validators defined above, which might do operations on the data and set some flags somewhere... not relevant to the question.
user uses trans_commit(TRANSACTION) in order to actually write the data to the file. Now, depending on the flags set by the validators, this might NOT write the data to the file, but report an error to the user (which programatically can be resolved. ... not so relevant to the question).
user closes file using standard API fclose.

Till now we have only the string handling method of the API (trans_write_string), which works nicely. It constructs its own in memory data buffer, modifies stuff if required, calls validators, etc... on consecutive calls it appends the new data into its internal memory buffer, handles allocation, etc... and at a successful commit it writes the data to the file using fwrite (Yes, this is mostly a C project, however C++ answers will not be excluded either).

But now we want to (... have to) expand the API to be able to write also numbers (16 bit, 32 bit, 64 bit) and also floats too... in a very similar way that the standard C stdio API does it. Using the already existing implementation for the string, this assumes that we have a data buffer in the memory, which holds N bytes of characters (the string itself), the we might need to have 2 bytes for a 16 bit value then another M bytes for another string, the 8 bytes for a 64 bit value, the 2 bytes for a 16 bit value, etc...

We got stuck at the point of "how to represent a number in the file in order to be readable by someone else too who uses a different computer/architecture/os/endianness".

Inserting the number in the memory stream theoretically is possible via a casting to char (char* addr = &my_16bit_int), and place *(addr) and *(addr + 1) to the required address (ie: after the N characters of the string) and writing it to the file is also possible, but what if I want to read the resulting file on a different architecture where the endiannes is different? And what if the "other" computer is only a 16 bit ancient piece of pile of metal? What would happen in that case to the 64 bit values written in the file?

What good practices are there for resolving this kind of issues?

EDIT: The target file must be binary it will be accompanied by a text file (XML) describing its format (such as: N 8 byte characters, 1 16 bit value, etc..) (this text file is generated based on the output of our beloved validators). The validator "says" something like, YES, I accept this 16 bit value, no I reject this long string, etc... and someone else is creating a data format XML based on this "output".

EDIT2: Yes, we need to share the file across various platforms, even huge 20 year old fridge size boxes :)

EDIT3: Yes, we need float too!

Solution

Casting is not sufficient, I think the sockets method htons and htonl will be sufficient solution for int16 and int32. for int64 you should build it yourself, since there is no official method:

Note that all the functions are reversing the bytes order only if needed, so you can also use the same method in order to 'fix' a number back to normal.

typedef union{
    unsigned char c[2];
    unsigned short s;
}U2;

//you can use the standard htons or this
unsigned short htons(unsigned short s)
{
    U2 mask,res;
    unsigned char* p = (unsigned char*)&s; 
    mask.s = 0x0001;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    return res.s;
}

//the same for 4 bytes
typedef union{
    unsigned char c[4];
    unsigned short s[2];
    unsigned long l;
}U4;

//you can use the standard htonl or this
unsigned long htonl(unsigned long l)
{
    U4 mask,res;
    unsigned char* p = (unsigned char*)&l; 
    mask.l = 0x00010203;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    res.c[mask.c[2]] = p[2];
    res.c[mask.c[3]] = p[3];
    return res.l;
}

typedef union{
    unsigned char c[8];
    unsigned char c2[2][4];
    unsigned short s[4];    
    unsigned long l[2];
    unsigned long long ll; 
}U8; 

//for int64 you can use the int64 and do the same, or you can to do it with 2*4 like i did
//you can give a void pointer as well.. 
unsigned long long htonll(unsigned long long ll)//void htonll(void* arg, void* result)
{
    U2 mask1;
    U4 mask2;
    U8 res;

    unsigned char* p = (unsigned char*)&ll; //or (unsigned char*)arg   
    mask1.s = 0x0001;
    mask2.l = 0x00010203;
    //I didn't use the int64 for convertion 
    res.c2[mask1.c[0]][mask2.c[0]] = p[0];
    res.c2[mask1.c[0]][mask2.c[1]] = p[1];
    res.c2[mask1.c[0]][mask2.c[2]] = p[2];
    res.c2[mask1.c[0]][mask2.c[3]] = p[3];
    res.c2[mask1.c[1]][mask2.c[0]] = p[4];
    res.c2[mask1.c[1]][mask2.c[1]] = p[5];
    res.c2[mask1.c[1]][mask2.c[2]] = p[6];
    res.c2[mask1.c[1]][mask2.c[3]] = p[7];

    //memcpy(result,res.c,8);
    return res.ll;
}
//or if you want to use the htonl:
unsigned long long htonll2(unsigned long long ll)
{
    U2 mask1;
    U8 res;
    mask1.s = 0x0001;
    unsigned long* p = (unsigned long*)&ll;
    res.l[0] = htonl(p[mask1.c[0]]);
    res.l[1] = htonl(p[mask1.c[1]]);
    return res.ll;
}

int main()
{
    unsigned short s = 0x1122;
    cout<<hex<<htons(s)<<endl;
    unsigned long l = 0x11223344;
    cout<<hex<<htonl(l)<<endl;
    unsigned long long ll=0x1122334455667788;
    cout<<hex<<htonll(ll)<<endl;
    cout<<hex<<htonll2(ll)<<endl;
    return 0;
}