Search code examples
chttp-redirecttcptcpclient

tcpclient - web client to get content in c


So here's the code I have to download content from a website, say, wiki

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <netdb.h>
void error(char *msg)
{
    fprintf(stderr, "%s: %s\n", msg, strerror(errno));
    exit(1);
}

int main(int argc, char *argv[])
{
    int d_sock;
    char buf[255];
    char rec[256];
    int bytesRcvd;
    int c, result;
    struct addrinfo *res;
    struct addrinfo hints;

    memset(&hints, 0, sizeof(hints));
    hints.ai_family = PF_UNSPEC;
    hints.ai_socktype = SOCK_STREAM;
    // get ip from list of domains and puts it onto heap called naming resource
    getaddrinfo("en.wikipedia.org", "80", &hints, &res);

    /*Creating socket */
    d_sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
    if (d_sock == -1)
        error("Can't open socket");

    /*Connecting socket */
    c = connect(d_sock, res->ai_addr, res->ai_addrlen);
    freeaddrinfo(res);
    if (c == -1)
        error("Can't connect to socket");

    /* Sending GET */
    sprintf(buf, "GET /wiki/%s http/1.1\r\n", argv[1]);
    result = send(d_sock, buf, strlen(buf), 0);
    if (result == -1)
        fprintf(stderr, "%s: %s\n", "Error talking to the server",
    strerror(errno));

    /* Sending Host and blank line */
    memset(buf, 0, 255);
    strcpy(buf, "Host: en.wikipedia.org\r\n\r\n");
    result = send(d_sock, buf, strlen(buf), 0);
    if (result == -1)
        fprintf(stderr, "%s: %s\n", "Error talking to the server",
    strerror(errno));

    /*Receiving the page information */
    bytesRcvd = recv(d_sock, rec, 255, 0);
    while (bytesRcvd) {
        if (bytesRcvd == -1)
            error("Can't read from server");
        rec[bytesRcvd] = '\0';
        printf("%s", rec);
        bytesRcvd = recv(d_sock, rec, 255, 0);
    }

    /*Closing socket */
    close(d_sock);
    return 0;
}

The problem here is that the output

HTTP/1.1 301 TLS Redirect
Server: Varnish
Location: https://en.wikipedia.org/wiki/apples
Content-Length: 0
Accept-Ranges: bytes
Date: Sat, 05 Dec 2015 06:46:13 GMT
...

What happened and why is it being redirected? My course uses head first c book, and I even ran the example and it also gets redirected, but their snippets showed it successfully got the content


Solution

  • wikipedia seems to redirect all http requests now to https, that's why you are getting this http redierect, maybe you should consider trying your code with another website that still uses http(maybe stackoverflow for example)