Search code examples
phplinuxsocketsservernonblocking

Unix socket set O_NONBLOCK to communication with php-fpm get segmentfault


I'm writing a Web Server to support FastCGI. Using Unix socket to communicate with php-fpm, the non-block option cannot be set, which will cause the php-fpm response parser to access illegal memory.

I've used socket() to set non-blocking options, and using fcntl() to set non-blocking options can lead to illegal memory access. Once the non-blocking options are cancelled, everything works. But my Web Server is a non-blocking event-driven model, so I have to use Unix sockets for non-blocking communications.

test.cc

/**
 * Created by Crow on 12/27/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief  This file is test the ResponseParser
 * @details construct the request, send/write it to the peer endpoint.
 *          use tcpdump can get the result [if php-fpm listened on TCP socket]
 *          $ sudo tcpdump port xxxx -i lo -vv -w a.cap
 *          $ wireshark a.cap
 */

#include <fcntl.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <arpa/inet.h>

#include <cstdio>
#include <iostream>
#include <fstream>

#include "protocol/fastCGI/request_builder.h"
#include "protocol/fastCGI/response_parser.h"

int main()
{
  std::map<std::string, std::string> param_map;
  param_map.insert({"REMOTE_PORT", "80"});
  param_map.insert({"REMOTE_ADDR", "127.0.0.1"});
  param_map.insert({"REQUEST_METHOD", "POST"});
  param_map.insert({"SERVER_PROTOCOL", "HTTP/1.1"});
  param_map.insert({"SCRIPT_FILENAME", "/home/Crow/1.php"});
  param_map.insert({"CONTENT_LENGTH", "11"});
  std::string in_str("a=b&c=d&e=f");
  platinum::fcgi::RequestBuilder builder(3, 11, in_str, param_map);

  builder.Build();

  auto b = builder.begin_requset();
  auto p = builder.fcgi_params();
  auto i = builder.fcgi_in();

  errno = 0;
//  ssize_t ret{};
//  int fd = ::socket(AF_INET, SOCK_STREAM, 0);
//  struct sockaddr_in addr{};
//  addr.sin_family = AF_INET;
//  addr.sin_port = ::htons(9000);
//  addr.sin_addr.s_addr = ::inet_addr("127.0.0.1");
  int fd = ::socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC , 0);   <= Here! ! !
  auto flag = ::fcntl(fd, F_GETFL);
  flag |= O_NONBLOCK;
  if (::fcntl(fd, F_SETFL, flag)) {
    perror("fcntl");
    std::abort();
  }
  struct sockaddr_un addr{};
  addr.sun_family = AF_UNIX;
  ::strcpy(addr.sun_path, "/home/Crow/xfc.sock");
  auto ret = ::connect(fd, (const struct sockaddr *) &addr, sizeof(addr));
  if (ret < 0)
    perror("connect");

 ::write(fd, reinterpret_cast<void *>(&b), sizeof(b));
  for (const auto &var : p) {
    ::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
    perror("write");
  }
  for (const auto &var : i) {
    ::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
    perror("write");
  }

  char buf[10000];
  std::vector<unsigned char> data(1024);
  platinum::fcgi::ResponseParser parser;
  while (!parser.Complete()) {
    ret = ::read(fd, data.data(), 1024);
    parser.feed(data.cbegin(), static_cast<int>(ret));
    auto stdout_ = parser.transform_data();
    std::string str(stdout_.cbegin(), stdout_.cend());
    std::cout << str << std::endl;
  }
  close(fd);

  return 0;
}

fastCGI/response_parser.h

/**
 * Created on 12/26/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief
 */

#ifndef PLATINUM_RESPONSE_PARSER_H
#define PLATINUM_RESPONSE_PARSER_H

#include "base.h"
#include "protocol/fastCGI/component.h"
#include "protocol/parser.hpp"

namespace platinum {
namespace fcgi {

enum State : int {
  COMPLETED,
  UNCOMPLETED,
  FAULT,
};

 class ResponseParser : public platinum::Parser {
 public:
  using const_iter = std::vector<FCGIData>::const_iterator;
  ResponseParser();
  ~ResponseParser() override = default;

  long feed(const_iter iter, long length);

  auto transform_data() -> const std::vector<FCGIData> & {
    return transform_data_;
  }
  int request_id() { return request_id_; }
  long long app_status() { return app_status_; }
  State state() { return static_cast<State>(state_); }
  Status status() { return static_cast<Status>(status_); }
  bool Complete() { return complete_; }
  void Reset();

 private:
  void ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len);
  void ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len);
  void ParseEndRequest(const_iter &iter);

  std::vector<FCGIData> transform_data_;
  std::vector<FCGIData> name_value_data_;
  int request_id_;
  long transform_len_;
  long padding_len_;
  long long app_status_;

  bool complete_;
  bool in_content_;
  State state_;
  Status status_;
};

}
}

#endif //PLATINUM_RESPONSE_PARSER_H

fastCGI/response_parser.cc

/**
 * Created by Crow on 12/26/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief This file is Class ResponseParser. It can be reentrant
 */

#include "response_parser.h"

#include <cstring>
#include <string>

using namespace platinum::fcgi;

ResponseParser::ResponseParser()
    : request_id_(-1),
      transform_len_(0),
      padding_len_(0),
      app_status_(-1),
      complete_(false),
      in_content_(false),
      state_(State::UNCOMPLETED),
      status_(Status::FCGI_UNKNOWN_ROLE)
{
  transform_data_.reserve(1024);         // make sure reserve space for transform_data_
}

/**
 * @brief feed() the core to parse the FCGI response
 * @param iter  Buffer's cosnt iterator
 * @param length Buffer's length this time
 * @return parse result
 */

long ResponseParser::feed(ResponseParser::const_iter iter, long length)
{
  auto len_temp(length);
  transform_data_.clear();
  // To ensure the last parsing result is complete
  if (transform_len_) {
    auto len = transform_len_ > length ? length : transform_len_;
    transform_data_.insert(transform_data_.cend(), iter, iter + len);
    length -= len;                                                     // reduce the length
    iter += len;                                                       // move the iter
    transform_len_ -= len;
  }

  if (length == 0) {
    return (len_temp - length);
  } else if (padding_len_) {
    auto len = padding_len_ > length ? length : padding_len_;
    length -= len;
    iter += len;
    padding_len_ -= len;
  }

  while (length) {    // the whole parsing process continus utils length < 0
    if (state_ == State::COMPLETED
        || state_ == State::FAULT
        || length < sizeof(Header))
    {
      return (len_temp - length);
    }

    Header header(iter);                                                           // Construct a header
    iter += sizeof(Header);
    length -= sizeof(Header);
    request_id_ = header.request_id();
    auto ct_len = header.content_length();
    auto pd_len = header.padding_length();
    switch (header.type()) {
      case Type::FCGI_STDOUT: ParseStdout(iter, length, ct_len, pd_len); break;
      case Type::FCGI_STDERR: ParseStderr(iter, length, ct_len, pd_len); break;
      case Type::FCGI_END_REQUEST: ParseEndRequest(iter); break;
      default: break;
    }
  }

  return len_temp - length;
}

/**
 * @brief To parse the STDOUT part
 * @param iter buffer's iterator (ref)
 * @param length buffer's length (ref)
 * @param ct_len the content length of FCGI_STDOUT
 * @param pd_len the padding length of FCGI_STDOUT
 */
void ResponseParser::ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len)
{
  if (ct_len == 0 && pd_len == 0)
    return ;

  auto len1 = ct_len > length ? length : ct_len;                   // judge if we have enough space to deal with

  std::string str(iter, iter + len1);
  std::string::size_type pos;
  if (!in_content_) {
    if ((pos = str.find("\r\n\r\n")) != std::string::npos) {
      name_value_data_.insert(name_value_data_.cend(), iter, iter + pos);
      iter += pos + 4;
      length -= pos + 4;
      ct_len -= pos + 4;
      len1 -= pos + 4;
    } else {
      state_ = State::FAULT;
      complete_ = true;
      return ;
    }
    in_content_ = true;
  }

  transform_data_.insert(transform_data_.cend(), iter, iter + len1);
  iter += len1;
  length -= len1;
  ct_len -= len1;

  if (length == 0) {
    transform_len_ += ct_len;
    padding_len_ = pd_len;
    return ;
  }

  auto len2 = pd_len > length ? length : pd_len;
  iter += len2;
  length -= len2;
  pd_len -= len2;

  if (length == 0) {
    padding_len_ = pd_len;
    return ;
  }
}

/**
 * @beief To parse the STDERR part
 * @param iter buffer's iterator (ref)
 * @param length buffer's length (ref)
 * @param ct_len the conten length of FCGI_STDERR
 * @param pd_len the padding length of FCGI_STDERR
 */

void ResponseParser::ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len)
{
  if (ct_len == 0 && pd_len == 0)
    return ;

  auto len1 = ct_len > length ? length : ct_len;                   // judge if we have enough space to deal with

  transform_data_.insert(transform_data_.cend(), iter, iter + len1);
  iter += len1;
  length -= len1;
  ct_len -= len1;

  if (length == 0) {
    transform_len_ += ct_len;
    padding_len_ = pd_len;
    return ;
  }

  auto len2 = pd_len > length ? length : pd_len;
  iter += len2;
  length -= len2;
  pd_len -= len2;

  if (length == 0) {
    padding_len_ = pd_len;
    return ;
  }
}

/**
 * @brief To parse the EndRequestRecord part
 * @param iter Buffer's iterator
 */
void ResponseParser::ParseEndRequest(const_iter &iter)
{
  iter -= sizeof(Header);                               // back to the Header's start to constrcut the EndRequestRecord

  EndRequestRocord end_request_record(iter);

  app_status_ = end_request_record.app_status();
  status_ = end_request_record.protocol_status();

  complete_ = true;
  state_ = State::COMPLETED;
}

void ResponseParser::Reset()
{
  request_id_ = -1;
  transform_len_ = 0;
  padding_len_ = 0;
  app_status_ = -1;
  complete_ = false;
  in_content_ = false;
  state_ = State::UNCOMPLETED;
  status_ = Status::FCGI_UNKNOWN_ROLE;

  transform_data_.clear();
  name_value_data_.clear();
}

I think the key is ResponseParser, so RequestBuilder is not posted, if necessary, I can fill it up.

gdb backstrace

(gdb) r
Starting program: /home/Crow/CLionProjects/platinum/test/bin/response_parser 
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success

Program received signal SIGSEGV, Segmentation fault.
0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
30          : version_(*iter),
(gdb) bt
#0  0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
#1  0x0000000000408c70 in platinum::fcgi::ResponseParser::feed (this=0x7fffffffb830, 
    iter=<error reading variable: Cannot access memory at address 0x638000>, length=-58305)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/response_parser.cc:65
#2  0x0000000000402ac7 in main () at /home/Crow/CLionProjects/platinum/test/response_parser_test.cc:81
(gdb) 

Without the O_NONBLOCK

[Crow@EvilCrow bin]$ sudo ./response_parser 
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
hello

the request PHP file 1.php

<?php
    echo hello;
?>

I expect to make Unix Socket work properly after setting up non-blocking IO. As I understand it, Unix sockets and INET sockets should behave identically on blocking. Why does this happen? Thank for you very much.


Solution

  •   while (!parser.Complete()) {
        ret = ::read(fd, data.data(), 1024);
        parser.feed(data.cbegin(), static_cast<int>(ret));
    

    If you are using non-blocking sockets the ::read can fail with EAGAIN. In this case ret will be -1. Your code does not properly handle this case, i.e. instead of retrying to read you essentially just call parser.feed(data.cbegin(),-1).

    The -1 will then not be specifically handled inside parser.feed either but it just assumes that the length will be positive. This ultimately results in accessing some memory which is not there which causes the segmentation fault.

    Note that you also don't properly deal with writing: your code simply assumes that all writes will succeed and write the full buffer. That this is the case in your tests is only pure luck because you are not writing much data - if you would write more it might happen that a write fails completely (only with non-blocking sockets) or that only partial data are written (also with blocking sockets).