Search code examples
phpcurltwitter

Logic using cURL, PHP and Twitter without using API


I have this code, which saves the cookies in a .txt file, and authenticates the user with Twitter

<?php

require_once 'class/Cookies.php';

$cookie = [];

$username = 'formUser';
$password = 'formPass';

$index_url = 'https://twitter.com';

$token = curl_init();
curl_setopt_array($token, [
      CURLOPT_URL             => $index_url,
      CURLOPT_CUSTOMREQUEST   => 'GET',
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_SSL_VERIFYPEER  => false,
      CURLOPT_SSL_VERIFYHOST  => 2,
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      //CURLOPT_COOKIEFILE      => __DIR__ . DIRECTORY_SEPARATOR . 'cookies' . DIRECTORY_SEPARATOR . $username . '.txt',
      CURLOPT_COOKIEJAR       => __DIR__ . DIRECTORY_SEPARATOR . 'cookies' . DIRECTORY_SEPARATOR . $username . '.txt',
      CURLOPT_COOKIESESSION   => true,
      CURLOPT_REFERER         => $index_url,
      CURLOPT_HEADER          => true,
      CURLOPT_HTTPHEADER      => ['Cookie:' . http_build_query($cookie, '', ';') . ';'],
      CURLOPT_HEADERFUNCTION => function ($curl, $header) use (&$cookie) {
        if (stripos($header, 'Set-Cookie:') === 0) {
          if (preg_match('/Set-Cookie:\s?(.*?)=(.*?);/i', $header, $matches)) {
            $cookie[$matches[1]] = urldecode($matches[2]);
          }
        }
        return strlen($header);
      }
    ]
);    
$access = curl_exec($token);

preg_match('/value="(.*?)" name="authenticity_token"/', $access, $matches);

$authenticity_token = $matches[1];

//var_dump($authenticity_token);

$session_post = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token";



$session_url = 'https://twitter.com/sessions';

curl_setopt_array($token, [
      CURLOPT_URL             => $session_url,
      CURLOPT_CUSTOMREQUEST   => 'POST',
      CURLOPT_POSTFIELDS      => $session_post,
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_HTTPHEADER      => [
        "Content-type: application/x-www-form-urlencoded",
        'Cookie: '. http_build_query($cookie, '', ';').';'
      ],
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      CURLOPT_HEADER          => true,
      CURLOPT_FOLLOWLOCATION  => true,
      CURLOPT_MAXREDIRS       => 2,
      CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
      CURLOPT_POSTREDIR       => 2,
      CURLOPT_AUTOREFERER     => 1
  ]

);
$auth = curl_exec($token);

var_dump($cookie);

if (isset($cookie['auth_token']))
{
  $twid = filter_var($cookie['twid'], FILTER_SANITIZE_NUMBER_INT);

  Cookies::set('login_token', $cookie['ct0']);
  Cookies::set('kdt', $cookie['kdt']);
  Cookies::set('user_id', $twid);
  Cookies::set('auth_token', $cookie['auth_token']);
  Cookies::set('username', $username);


  echo json_encode(array(
    "status"      => "success",
    "message"     => "Authentication successful, we are redirecting you.",
  ));
}
else
{
  echo json_encode(
    array(
      "status" => "error",
      'message'=> "Unable to authenticate with Twitter.",
    ));
}

And this code that captures the information of the logged in user:

<?php

$username = 'sessionUser';

$url = 'https://twitter.com/' . $username;

$user = curl_init();
curl_setopt_array($user, [
      CURLOPT_URL             => $url,
      CURLOPT_CUSTOMREQUEST   => 'GET',
      CURLOPT_CAINFO          => 'cacert-2017-06-07.pem',
      CURLOPT_RETURNTRANSFER  => true,
      CURLOPT_SSL_VERIFYPEER  => false,
      CURLOPT_SSL_VERIFYHOST  => 2,
      CURLOPT_HTTPHEADER      => [
        "Content-type:text/html;charset=utf-8",
      ],
      CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
      CURLOPT_HEADER          => true,
      CURLOPT_FOLLOWLOCATION  => true,
      CURLOPT_MAXREDIRS       => 2,
      CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
      CURLOPT_POSTREDIR       => 2,
      CURLOPT_AUTOREFERER     => 1,
      CURLOPT_ENCODING        => "gzip"
  ]
);

$user_info = curl_exec($user);

$header_size = curl_getinfo($user, CURLINFO_HEADER_SIZE);
$header = substr($user_info, 0, $header_size);
$body = substr($user_info, $header_size);

$dom = new DOMDocument("5.0", "utf-8");
@$dom->loadHTML($body);

$data = json_decode($dom->getElementById("init-data")->getAttribute("value"));

echo "Nome: ", $data->profile_user->id, PHP_EOL;
echo "Nome: ", $data->profile_user->name, PHP_EOL;
echo "Usuário: ", $data->profile_user->screen_name, PHP_EOL;
echo "Foto de perfil: ", $data->profile_user->profile_image_url, PHP_EOL;

I need help so that the cookies of users saved in the .txt or in the database, make followers exchange.

How can I do this?

EDIT

Who downvote, leave comments.

EDIT 2

File follow.php

<?php

require_once '../modules/config.php';
require_once '../modules/class/Cookies.php';


$username = Cookies::get('username');

$friend_url = 'https://api.twitter.com/1.1/friendships/create.json';

$friend = curl_init();

curl_setopt_array($friend, [
        CURLOPT_URL             => $friend_url,
        CURLOPT_SSL_VERIFYPEER  => 1,
        CURLOPT_SSL_VERIFYHOST  => 2,
        CURLOPT_CAINFO          => ROOT . 'modules' . SEPARATOR . 'cacert' . SEPARATOR . 'cacert-2017-06-07.pem',
        CURLOPT_CUSTOMREQUEST   => 'POST',
        CURLOPT_POSTFIELDS      => 'screen_name=' . $username,
        CURLOPT_USERAGENT       => $_SERVER['HTTP_USER_AGENT'],
        CURLOPT_RETURNTRANSFER  => true,
        CURLOPT_HTTPHEADER      => [
            "Content-type: application/json; charset=utf-8",
      ],
        CURLOPT_HEADER          => true,
    ]

);

$response  = curl_exec($friend);

var_dump($response);

Response:

C:\wamp64\www\brfollow\api\follow.php:32:string 'HTTP/1.1 400 Bad Request
content-length: 62
content-type: application/json; charset=utf-8
date: Fri, 07 Jul 2017 08:09:54 GMT
server: tsa_d
set-cookie: guest_id=v1%3A149941499419523606; Domain=.twitter.com; Path=/; Expires=Sun, 07-Jul-2019 08:09:54 UTC
strict-transport-security: max-age=631138519
x-connection-hash: 9e951d1215095efa246c5b852acd2e8a
x-response-time: 131
x-tsa-request-body-time: 0

{"errors":[{"code":215,"message":"Bad Authentication data."}]}' (length=472)

Solution

  • first some notes on your existing code: don't use CURLOPT_CUSTOMREQUEST for GET and POST requests. for GET, use CURLOPT_HTTPGET=>true (also note that GET is the default request for libcurl), and for POST requests, use CURLOPT_POST=>true.

    this line preg_match('/value="(.*?)" name="authenticity_token"/', $access, $matches); will break if they put any additional properties between the value and the name, and it will even break if they just move the name behind the value, and it will break if they put a simmilar string in a comment (<!-- -->-style), and it will even break if they just put another space between value and name, and parsing HTML with regex is generally a bad idea

    a much more robust approach would be:

    $authenticity_token=(new DOMXpath(@DOMDocument::loadHTML($access)))->query("//input[@name='authenticity_token']")->item(0)->getAttribute("value");
    

    in this line, you do the same mistake 3 times:

    $session_post = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token";
    

    you don't urlencode $username , $password, and $authenticity_token . that means, if any of those 3 contains any characters with special meaning in application/x-www-urlencoded format, the server will get the wrong data (this includes spaces, &, =, [,Æ,Ø,Å, and a lot of other characters), the easy solution is to use urlencode() on them - and the pretty solution is to use http_build_query to make the string, like this:

    $session_post = http_build_query ( array (
            'session' => array (
                    'username_or_email' => $username,
                    'password' => $password 
            ),
            'return_to_sssl' => true,
            'scribe_log' => '',
            'redirect_after_login' => '/',
            'authenticity_token' => $authenticity_token 
    ) );
    

    also you make the same mistake on this line:

        CURLOPT_POSTFIELDS      => 'screen_name=' . $username,
    

    and this line must have been added by mistake:

      CURLOPT_HTTPHEADER      => [
        "Content-type:text/html;charset=utf-8",
      ],
    

    it is a GET request with no request body, thus there is no content-type, because there is no content, so there's no way that content-type header declaration is supposed to be there, get rid of it.

    this line

      CURLOPT_ENCODING        => "gzip"
    

    will break your code if curl was not compiled with gzip, and the server actually decide to use gzip (more specifically, you will get unintelligible binary data), and you provide no code to actually handle gzip binary data. a much more robust approach is to set it to emptystring "", then curl will provide all encodings that libcurl was compiled will, and will de-encode it for you on the fly (including gzip, if compiled in. its usually gzip and deflate, but its also future-proof because it will add any future encodings automatically)

    this line

        "Content-type: application/x-www-form-urlencoded",
    

    don't add this header automatically. libcurl will automatically detect application/x-www-urlencoded and multipart/form-data encodings, and set the appropriate content-type headers automatically. and unlike you, libcurl won't make typos in doing so.

    now, the next step is to get all your current followers, and send a follow request. you say you don't want to use the api, but THERE IS LITERALLY NO WAY to avoid it (short of hacking the twitter databases, ofc), even tiwtter's javascript's "follow" button uses the api. good news is, you can get away by using twitter's javascript's api token, and thus not having to use your own token. this may sound easy in theory, but its really not. never the less, here's an example implementation with hhb_curl (from https://github.com/divinity76/hhb_.inc.php/blob/master/hhb_.inc.php ), getting a list of your followers, and sending a follow request to each one, using twitters own api key (extracted automatically):

    <?php
    declare(strict_types = 1);
    require_once ('hhb_.inc.php');
    const USERNAME = '???';
    const PASSWORD = '???';
    
    $hc = new hhb_curl ( 'https://twitter.com/login', true );
    $hc->exec ();
    // get csrf token
    $csrf_token = [ ];
    preg_match ( '/\s+ct0\s*=\s*(.*?)\;/', implode ( "\n", $hc->getResponseHeaders () ), $csrf_token );
    if (count ( $csrf_token ) !== 2) {
        throw new Exception ( 'failed to extract the csrf token!' );
    }
    $csrf_token = $csrf_token [1];
    // to log in...
    
    $html = $hc->getStdOut ();
    $domd = @DOMDocument::loadHTML ( $html );
    $inputs = getDOMDocumentFormInputs ( $domd, true ) [0]; // << not sure why, but they have 6 seemingly duplicate login forms. the first 1 works fine.
    $inputs = DOMInputsToArray ( $inputs );
    $inputs ['session[username_or_email]'] = USERNAME;
    $inputs ['session[password]'] = PASSWORD;
    // hhb_var_dump ( $inputs ) & die ();
    $html = $hc->setopt_array ( array (
            CURLOPT_POST => true,
            CURLOPT_POSTFIELDS => http_build_query ( $inputs ),
            CURLOPT_URL => 'https://twitter.com/sessions' 
    ) )->exec ()->getResponseBody ();
    $domd = @DOMDocument::loadHTML ( $html );
    $xpath = new DOMXPath ( $domd );
    // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () );
    if (false !== stripos ( $hc->getinfo ( CURLINFO_EFFECTIVE_URL ), 'login/error' )) {
        throw new Exception ( 'failed to login!' );
    }
    echo "logged in!", PHP_EOL;
    // now to get the api key
    $js = $hc->exec ( 'https://abs.twimg.com/k/en/init.en.c5a67fc1f42cedcdbbcd.js' )->getResponseBody ();
    // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () ) & die ();
    // fragile regex: assumes that there's only 1x i="114 characters"; , and that the api key is exactly 114 characters.
    preg_match ( '/i\s*\=\s*\"([^\"]{114})\"\s*\;/iu', $js, $matches );
    // hhb_var_dump ( $matches ) & die ();
    if (count ( $matches ) !== 2) {
        throw new RuntimeException ( 'failed to extract the api auth key!' );
    }
    $api_auth_key = $matches [1];
    
    $myurl = 'https://twitter.com/' . ltrim ( $xpath->query ( '//a[contains(@class,\'DashboardProfileCard\')]' )->item ( 0 )->getAttribute ( "href" ), '/' );
    echo 'myurl: ' . $myurl . PHP_EOL;
    // $myurl = 'https://twitter.com/scaleway';
    $myurl .= '/followers';
    $html = $hc->exec ( $myurl )->getResponseBody ();
    // hhb_var_dump ( $hc->getStdErr (), $hc->getStdOut () ) & die ();
    $toFollow = array ();
    $domd = @DOMDocument::loadHTML ( $html );
    $xpath = new DOMXPath ( $domd );
    foreach ( $xpath->query ( '//div[contains(@class,\'ProfileCard-content\')]' ) as $followerDiv ) {
        $name = $xpath->query ( './/*[@data-screen-name]', $followerDiv )->item ( 0 )->getAttribute ( "data-screen-name" );
        $user_id = $xpath->query ( './/*[@data-user-id]', $followerDiv )->item ( 0 )->getAttribute ( "data-user-id" );
        echo "following " . $name . ' (' . $user_id . ')' . PHP_EOL;
        try {
            $hc->setopt_array ( array (
                    CURLOPT_CUSTOMREQUEST => 'OPTIONS',
                    CURLOPT_URL => 'https://api.twitter.com/1.1/friendships/create.json',
                    CURLOPT_HTTPHEADER => array (
                            'Access-Control-Request-Method: POST',
                            'Access-Control-Request-Headers: authorization,x-csrf-token,x-twitter-active-user,x-twitter-auth-type',
                            'DNT: 1',
                            'Origin: https://twitter.com' 
                    ) 
            ) )->exec ();
        } catch ( Throwable $ex ) {
            // there is a bug where it sometimes respond http 200 OK, but with 0 bytes content. hhb_curl doesn't like this, as 0-bytes-responses should actually be http 201.
            // feel free to contact twitter with a bugreport.
        }
        // hhb_var_dump ( $hc->getStdErr () );
        $hc->setopt ( CURLOPT_CUSTOMREQUEST, NULL );
        $hc->setopt_array ( array (
                CURLOPT_POST => true,
                CURLOPT_URL => 'https://api.twitter.com/1.1/friendships/create.json',
                CURLOPT_POSTFIELDS => http_build_query ( array (
                        'challenges_passed' => 'false',
                        'handles_challenges' => '1',
                        'impression_id' => '',
                        'include_blocked_by' => 'true',
                        'include_blocking' => 'true',
                        'include_can_dm' => 'true',
                        'include_followed_by' => 'true',
                        'include_mute_edge' => 'true',
                        'skip_status' => 'true',
                        'user_id' => $user_id 
                ) ),
                CURLOPT_HTTPHEADER => array (
                        'Accept: application/json, text/javascript, */*; q=0.01',
                        'Accept-Language: en-US,en;q=0.5',
                        'Authorization: Bearer ' . $api_auth_key,
                        'x-twitter-auth-type: OAuth2Session',
                        'x-csrf-token: ' . $csrf_token,
                        'X-Twitter-Active-User: yes',
                        'DNT: 1',
                        'Origin: https://twitter.com',
                        'Referer: ' . $myurl 
    
                ) 
    
        ) )->exec ();
    
        // parse_str ( $hc->getopt ( CURLOPT_POSTFIELDS ), $fields );
        // hhb_var_dump ( $fields, $hc->getStdErr (), $hc->getStdOut () ) & die ();
    }
    
    // hhb_var_dump ( $myurl );
    function DOMInputsToArray($inputs): array {
        $ret = [ ];
        foreach ( $inputs as $in ) {
            if ($in->hasAttribute ( "disabled" )) {
                continue;
            }
            $name = $in->getAttribute ( "name" );
            if (empty ( $name )) {
                continue;
            }
            $ret [$name] = $in->getAttribute ( "value" );
        }
        return $ret;
    }
    function getDOMDocumentFormInputs(\DOMDocument $domd, bool $getOnlyFirstMatches = false): array {
        // :DOMNodeList?
        $forms = $domd->getElementsByTagName ( 'form' );
        $parsedForms = array ();
        $isDescendantOf = function (\DOMNode $decendant, \DOMNode $ele): bool {
            $parent = $decendant;
            while ( NULL !== ($parent = $parent->parentNode) ) {
                if ($parent === $ele) {
                    return true;
                }
            }
            return false;
        };
        // i can't use array_merge on DOMNodeLists :(
        $merged = function () use (&$domd): array {
            $ret = array ();
            foreach ( $domd->getElementsByTagName ( "input" ) as $input ) {
                $ret [] = $input;
            }
            foreach ( $domd->getElementsByTagName ( "textarea" ) as $textarea ) {
                $ret [] = $textarea;
            }
            return $ret;
        };
        $merged = $merged ();
        foreach ( $forms as $form ) {
            $inputs = function () use (&$domd, &$form, &$isDescendantOf, &$merged): array {
                $ret = array ();
                foreach ( $merged as $input ) {
                    // hhb_var_dump ( $input->getAttribute ( "name" ), $input->getAttribute ( "id" ) );
                    if ($input->hasAttribute ( "disabled" )) {
                        // ignore disabled elements?
                        continue;
                    }
                    $name = $input->getAttribute ( "name" );
                    if ($name === '') {
                        // echo "inputs with no name are ignored when submitted by mainstream browsers (presumably because of specs)... follow suite?", PHP_EOL;
                        continue;
                    }
                    if (! $isDescendantOf ( $input, $form ) && $form->getAttribute ( "id" ) !== '' && $input->getAttribute ( "form" ) !== $form->getAttribute ( "id" )) {
                        // echo "this input does not belong to this form.", PHP_EOL;
                        continue;
                    }
                    if (! array_key_exists ( $name, $ret )) {
                        $ret [$name] = array (
                                $input 
                        );
                    } else {
                        $ret [$name] [] = $input;
                    }
                }
                return $ret;
            };
            $inputs = $inputs (); // sorry about that, Eclipse gets unstable on IIFE syntax.
            $hasName = true;
            $name = $form->getAttribute ( "id" );
            if ($name === '') {
                $name = $form->getAttribute ( "name" );
                if ($name === '') {
                    $hasName = false;
                }
            }
            if (! $hasName) {
                $parsedForms [] = array (
                        $inputs 
                );
            } else {
                if (! array_key_exists ( $name, $parsedForms )) {
                    $parsedForms [$name] = array (
                            $inputs 
                    );
                } else {
                    $parsedForms [$name] [] = $tmp;
                }
            }
        }
        unset ( $form, $tmp, $hasName, $name, $i, $input );
        if ($getOnlyFirstMatches) {
            foreach ( $parsedForms as $key => $val ) {
                $parsedForms [$key] = $val [0];
            }
            unset ( $key, $val );
            foreach ( $parsedForms as $key1 => $val1 ) {
                foreach ( $val1 as $key2 => $val2 ) {
                    $parsedForms [$key1] [$key2] = $val2 [0];
                }
            }
        }
        return $parsedForms;
    }
    

    output: logged in! myurl: https://twitter.com/HansHenrik_ following tianwm (53056654) following theburakcinar (2335591322) following DnR_iData (260134525) following 7wData (1713417312) following deepudeepana (783199483404226560) following remco_verhoef (201001391) following PaulVlasin (1079477118) ^C (i manually canceled it at ^C, as i dont want to follow everyone, but it was enough to prove that it worked) - and don't forget to replace username/password on line 4 & line 5 - and note that since i don't have any followers, i used https://twitter.com/scaleway/followers as a test page with lots of followers, you can see where i faked the url at the commented-out line 50. - also, it will probably just send a request to all the followers you can see on the followers page, which, if you have a lot of followers, is not the full list (this would crash your browser etc), so you'll have to find out how to get the full list of followers if you have a big amount of followers -