Search code examples
c++pythoncsocketswinsockets

extra newlines at end of file transported over tcp


I have two programs, recvfile.py and sendfile.cpp. They work except that I end up with a bunch of extra newline characters at the end of the new file. I don't know how the extra spaces get there. I know the problem is sender side, because the same doesn't happen when I use python's sendall() function to send the file.

Here are the files:

jmm_sockets.c

#include <winsock.h>
#include <stdio.h>
#include <stdlib.h>

int getServerSocket(int port)
{
  WSADATA wsaData;
  if(WSAStartup(MAKEWORD(2,0), &wsaData) != 0){
    fprintf(stderr, "WSAStartup() failed\n");
    exit(1);
  }

  // create socket for incoming connections
  int servSock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if(servSock == INVALID_SOCKET){
    fprintf(stderr, "Oops: socket() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // construct local address structure
  struct sockaddr_in servAddr;
  memset(&servAddr, 0, sizeof(servAddr));
  servAddr.sin_family = AF_INET;
  servAddr.sin_addr.s_addr = INADDR_ANY;
  servAddr.sin_port = htons(port);

  // bind to the local address
  int servAddrLen = sizeof(servAddr);
  if(bind(servSock, (SOCKADDR*)&servAddr, servAddrLen) == SOCKET_ERROR){
    fprintf(stderr, "Oops: bind() failed %d\n", WSAGetLastError());
    exit(1);
  }

  return servSock;
}

int getClientSocket(char* host, int port)
{
  WSADATA wsaData;
  if(WSAStartup(MAKEWORD(2,0), &wsaData) != 0){
    fprintf(stderr, "Oops: WSAStartup() failed");
    exit(1);
  }

  // create tcp socket
  int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  if(socket<0){
    fprintf(stderr, "Oops: socket() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // set up serverAddr structure
  struct sockaddr_in servAddr;
  memset(&servAddr, 0, sizeof(servAddr));
  servAddr.sin_family = AF_INET;
  servAddr.sin_addr.s_addr = inet_addr(host);
  servAddr.sin_port = htons(port);

  // connecet to server address
  if(connect(sock, (SOCKADDR*)&servAddr, sizeof(servAddr)) < 0){
    fprintf(stderr, "Oops: connect() failed. %d\n", WSAGetLastError());
    exit(1);
  }

  return sock;
}

sendfile.cpp:

#include "jmm_sockets.h"
#include <windows.h>
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <sys/stat.h>
using namespace std;

int main(int argc, char** argv)
{
  int port;
  string host;
  string filename;

  if(argc==2){
    cout << "Host: ";
    cin >> host;

    cout << "Port: ";
    cin >> port;

    filename = argv[1];
  }else if (argc == 4){
    host = argv[1];
    port = atoi(argv[2]);
    filename = argv[3];
  }else{
    cerr << "Usage: " << argv[0] << " [<host> <port>] <filename>" << endl;
    exit(1);
  }

  // open file for reading
  ifstream fin;
  fin.open(filename.c_str());
  if(fin.fail()){
    cerr << "Error: opening " << filename << " failed. " << endl;
    exit(1);
  }

  // get file size
  fin.seekg(0, ios::end);
  int size = fin.tellg();
  fin.seekg(0, ios::beg);

  // open socket for sending
  int sock = getClientSocket((char*)host.c_str(), port);

  // send file size
  char buffer[16];
  itoa(size, buffer, 10);
  int i;
  for(i=0; i<strlen(buffer); i++){
    if(send(sock, &buffer[i], 1, 0)!=1){
      cerr << "Error: send() failed " << WSAGetLastError() << endl;
      exit(1);
    }
  }
  char c = '\n';
  if(send(sock, &c, 1, 0)!=1){
    fprintf(stderr, "Error: send() failed %d\n", WSAGetLastError());
    exit(1);
  }

  // recv y or n
  int recvMsgSize = recv(sock, &c, 1, 0);
  if(recvMsgSize!=1){
    fprintf(stderr, "Error: recv() failed %d\n", WSAGetLastError());
    exit(1);
  }

  if(c=='y'){
    // send entire file
    int readSoFar = 0;
    while(readSoFar < size){
      fin.get(c);
      if(send(sock, &c, 1, 0)!=1){
    cerr << "Error: send() failed " << WSAGetLastError() << endl;
    exit(1);
      }
      readSoFar++;
    }

  }else if (c=='n'){
    // leave
    cout << "Remote host declined file." << endl;
  }

  fin.close();
  closesocket(sock);
  WSACleanup();

  // 
  return 0;
}

and finally, recvfile.py:

import sys
from jmm_sockets import *
import yesno

if len(sys.argv) != 2:
    print "Usage: ", argv[0], "<port>"

s = getServerSocket(None, int(sys.argv[1]))
conn, addr = s.accept()

buffer = None
filelen = str()

# receive filesize
while 1:
    buffer = conn.recv(1)
    if buffer == '\n':
        # give it a rest
        break
    else:
        filelen = filelen + buffer

# prompt user to accept file
filelen = int(filelen)
print "file size = ", filelen,
userChoice = yesno.yesno("Accept?")
conn.send(userChoice)

# conditionally accecpt file
if bool(userChoice):
    filename = raw_input("What do you want to call the file? ")
    f = open(filename, 'w')

    buffer = None
    data = str()
    recvdBytes = 0
    while recvdBytes < filelen:
        buffer = conn.recv(1)
        recvdBytes = recvdBytes + 1
        data = data + buffer

print "File: ",
f.write(data)
print "written"
conn.close()

Solution

  • The reason you're ending up with extra newlines is because you're sending extra newlines across the socket, which is because you try to send more data than you should.

    If you checked the fail() state of your input file fin, you'd discover that it's failing on the last several calls to fin.get(c), so the value of c remains unchanged -- it stays as a newline character, which is the last character in the input file.

    This is happening because of CRLF translation: the file size you're using (the size variable) is the raw file size on disk, counting all of the CRs. But, when you open it up in text mode and read it in one byte at a time, the standard library silently translates all CRLFs into LFs, so you don't send the CRs across the socket. Hence, the number of extra newlines you get at the end of this process is equal to the number of newlines that were in the original file.

    The way to fix this is to open the file in binary mode to disable CRLF translation:

    fin.open(filename.c_str(), ios::in | ios::binary);
    

    Furthermore, you shouldn't be sending the file one byte at a time -- this is horrendously slow. If you're unlucky, you'll be sending an entire packet for each byte. If you're lucky, your OS's network stack will accumulate these multiple sends into larger packets (don't depend on that), but even then you're still making a huge number of system calls into the kernel.

    Consider refactoring your code to make fewer calls to send() and recv(), where you pass a larger number of bytes per call, e.g.:

    // Just use one call to send instead of looping over bytes and sending one
    // byte at a time.  Simpler and faster!
    send(sock, buffer, strlen(buffer), 0);