I am following this tutorial here to install python: http://docs.python-guide.org/en/latest/starting/install/win/
When it gets to the part about installing setup tools and pip it links to a ez_setup.py script which I downloaded and ran. It seemed to install setup_tools. I then clicked on the link to the pip-get.py and noticed it only contained the following:
#!/usr/bin/env python
import sys
def main():
sys.exit(
"You're using an outdated location for the get-pip.py script, please "
"use the one available from https://bootstrap.pypa.io/get-pip.py"
)
if __name__ == "__main__":
main()
The referenced url contains the following seemingly dubious code. Is this malicious code or a legitimate way install pip?
NOTE: I have deleted most (like 18000) of the lines of the "binary blob" because if it is malicious I don't want this post to be an infection vector.
#!/usr/bin/env python
#
# Hi There!
# You may be wondering what this giant blob of binary data here is, you might
# even be worried that we're up to something nefarious (good for you for being
# paranoid!). This is a base85 encoding of a zip file, this zip file contains
# an entire copy of pip.
#
# Pip is a thing that installs packages, pip itself is a package that someone
# might want to install, especially if they're looking to run this get-pip.py
# script. Pip has a lot of code to deal with the security of installing
# packages, various edge cases on various platforms, and other such sort of
# "tribal knowledge" that has been encoded in its code base. Because of this
# we basically include an entire copy of pip inside this blob. We do this
# because the alternatives are attempt to implement a "minipip" that probably
# doesn't do things correctly and has weird edge cases, or compress pip itself
# down into a single file.
#
# If you're wondering how this is created, it is using an invoke task located
# in tasks/generate.py called "installer". It can be invoked by using
# ``invoke generate.installer``.
import os.path
import pkgutil
import shutil
import sys
import struct
import tempfile
# Useful for very coarse version differentiation.
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
if PY3:
iterbytes = iter
else:
def iterbytes(buf):
return (ord(byte) for byte in buf)
try:
from base64 import b85decode
except ImportError:
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
def b85decode(b):
_b85dec = [None] * 256
for i, c in enumerate(iterbytes(_b85alphabet)):
_b85dec[c] = i
padding = (-len(b)) % 5
b = b + b'~' * padding
out = []
packI = struct.Struct('!I').pack
for i in range(0, len(b), 5):
chunk = b[i:i + 5]
acc = 0
try:
for c in iterbytes(chunk):
acc = acc * 85 + _b85dec[c]
except TypeError:
for j, c in enumerate(iterbytes(chunk)):
if _b85dec[c] is None:
raise ValueError(
'bad base85 character at position %d' % (i + j)
)
raise
try:
out.append(packI(acc))
except struct.error:
raise ValueError('base85 overflow in hunk starting at byte %d'
% i)
result = b''.join(out)
if padding:
result = result[:-padding]
return result
def bootstrap(tmpdir=None):
# Import pip so we can use it to install pip and maybe setuptools too
import pip
from pip.commands.install import InstallCommand
from pip.req import InstallRequirement
# Wrapper to provide default certificate with the lowest priority
class CertInstallCommand(InstallCommand):
def parse_args(self, args):
# If cert isn't specified in config or environment, we provide our
# own certificate through defaults.
# This allows user to specify custom cert anywhere one likes:
# config, environment variable or argv.
if not self.parser.get_default_values().cert:
self.parser.defaults["cert"] = cert_path # calculated below
return super(CertInstallCommand, self).parse_args(args)
pip.commands_dict["install"] = CertInstallCommand
implicit_pip = True
implicit_setuptools = True
implicit_wheel = True
# Check if the user has requested us not to install setuptools
if "--no-setuptools" in sys.argv or os.environ.get("PIP_NO_SETUPTOOLS"):
args = [x for x in sys.argv[1:] if x != "--no-setuptools"]
implicit_setuptools = False
else:
args = sys.argv[1:]
# Check if the user has requested us not to install wheel
if "--no-wheel" in args or os.environ.get("PIP_NO_WHEEL"):
args = [x for x in args if x != "--no-wheel"]
implicit_wheel = False
# We only want to implicitly install setuptools and wheel if they don't
# already exist on the target platform.
if implicit_setuptools:
try:
import setuptools # noqa
implicit_setuptools = False
except ImportError:
pass
if implicit_wheel:
try:
import wheel # noqa
implicit_wheel = False
except ImportError:
pass
# We want to support people passing things like 'pip<8' to get-pip.py which
# will let them install a specific version. However because of the dreaded
# DoubleRequirement error if any of the args look like they might be a
# specific for one of our packages, then we'll turn off the implicit
# install of them.
for arg in args:
try:
req = InstallRequirement.from_line(arg)
except:
continue
if implicit_pip and req.name == "pip":
implicit_pip = False
elif implicit_setuptools and req.name == "setuptools":
implicit_setuptools = False
elif implicit_wheel and req.name == "wheel":
implicit_wheel = False
# Add any implicit installations to the end of our args
if implicit_pip:
args += ["pip"]
if implicit_setuptools:
args += ["setuptools"]
if implicit_wheel:
args += ["wheel"]
delete_tmpdir = False
try:
# Create a temporary directory to act as a working directory if we were
# not given one.
if tmpdir is None:
tmpdir = tempfile.mkdtemp()
delete_tmpdir = True
# We need to extract the SSL certificates from requests so that they
# can be passed to --cert
cert_path = os.path.join(tmpdir, "cacert.pem")
with open(cert_path, "wb") as cert:
cert.write(pkgutil.get_data("pip._vendor.requests", "cacert.pem"))
# Execute the included pip and use it to install the latest pip and
# setuptools from PyPI
sys.exit(pip.main(["install", "--upgrade"] + args))
finally:
# Remove our temporary directory
if delete_tmpdir and tmpdir:
shutil.rmtree(tmpdir, ignore_errors=True)
def main():
tmpdir = None
try:
# Create a temporary working directory
tmpdir = tempfile.mkdtemp()
# Unpack the zipfile into the temporary directory
pip_zip = os.path.join(tmpdir, "pip.zip")
with open(pip_zip, "wb") as fp:
fp.write(b85decode(DATA.replace(b"\n", b"")))
# Add the zipfile to sys.path so that we can import it
sys.path.insert(0, pip_zip)
# Run the bootstrap
bootstrap(tmpdir=tmpdir)
finally:
# Clean up our temporary working directory
if tmpdir:
shutil.rmtree(tmpdir, ignore_errors=True)
DATA = b"""
P)h>@6aWAK2mly|Wk_|XSV~_F001E<000jF003}la4%n9X>MtBUtcb8d7WDSZ`-yK|J{ED>nxD8%G$E
w;SJgIu%S({0^J&<?b`!VLy#@n<|0cPDLHYs{qOJYNQ#stXW2BYmPFnc-j@&WsGL3fqE+&Xr6|AP<(}
1tW?Pk$wXAk5P1kMHN}i@n?CMH3EL*CoXd9mD=gGvpFRIN(lpFh4sqU_B>P#wbpYJnS!bH_kszWzd@`
H<yy>jp@sr7ZwDlGf&a-YEanKd3T&PZSB8xIW}4#deBf88!4R+dd5;5p$#pOUa^5aT0Nk+BIptF)mwO
xjJjFJ?H+W&B2{TtylTo<j0GFr)fNUDQIC5u!#EK|<945pLJg)3w2@^h6n(oAv4gUo>xXfT^H*v|rV`
Qf>&QoJEizcI-q^mp#kf>pM6NX_ZErFw2hh^uk2n|nM{8?fo*PBWjxO8U$uZxv0l}Dh+oJ9x(83VFs)
2<8d2{Jx`7Y(rd<REF6I*yg7!+JBfyDphOs8@@)t|QEJSw*YDBeZyaWmzU6csB~+(~1M_TE}r6vxIiR
?Ik89#CMk(g<2|&Lyn3;TdKH*QW3HbSiA0U_j??u<5;{jUXYm#(!?cp#E2HYewxW0s;v~`5sv}w`DLA
iCeGrx?
)H
"""
if __name__ == "__main__":
main()
If you're talking about the script from https://bootstrap.pypa.io/get-pip.py it's totally safe*.
As a matter of fact, you could follow the instructions in the comment, decode the encoded text yourself, and see that it's perfectly fine.
If you're extra paranoid, just copy it to an airgapped computer and try executing it.
*assuming that your connection is actually secure and that pypa.io hasn't been compromised. But if that's the case then you probably have larger problems.