I'm using Python 2.7.8 to make an server from where I can download files. Problem is that many files include utf-8 characters such as čćžšđ and others. I tried decoding path but whenever I click on file name with unicode character it returns "error 404: file not found". How do I properly decode paths so it makes possible to download files with utf-8 characters and, if possible, show them as utf-8 character on index of my server. Here's my server's code, which includes what have I tried and full server code:
# -*- coding: utf-8 -*-
__version__ = "0.6"
__all__ = ["SimpleHTTPRequestHandler"]
import os
import posixpath
import BaseHTTPServer
import urllib
import cgi
import shutil
import mimetypes
from StringIO import StringIO
import SocketServer
import time
import sys
import unicodedata
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
server_version = "SimpleHTTP/" + __version__
def do_GET(self):
"""Serve a GET request."""
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def do_HEAD(self):
"""Serve a HEAD request."""
f = self.send_head()
if f:
f.close()
def send_head(self):
path_now = self.translate_path(self.path)
path_change = (os.path.dirname(os.path.abspath(__file__)) + "/files/")
if path_now.startswith("/home/files/"):
pass
else:
os.chdir(path_change)
path = self.translate_path(self.path)
"""Those are few examples of what have I tried:
path = path1.decode('ascii', 'ignore').makePath()
path = unicodedata.normalize('NFKD', path1).encode('ascii','ignore')
path2 = path1.decode("utf-8")
path = path2.encode("utf-8")
path = path2.encode("utf-8")"""
f = None
if os.path.isdir(path):
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
if ctype.startswith('text/'):
mode = 'r'
else:
mode = 'rb'
try:
f = open(path.decode(sys.getfilesystemencoding()), mode) #this doesn't work, nothing changes
size = os.path.getsize(path)
except IOError:
self.send_error(404, "File not found")
return None
self.send_response(200)
self.send_header("Content-type", ctype + "; charset=utf-8") # + " charset=utf-8"
self.send_header("Content-Length", size)
self.end_headers()
return f
def list_directory(self, path):
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(lambda a, b: cmp(a.lower(), b.lower()))
f = StringIO()
f.write("<title>Directory listing for %s</title>\n" % self.path)
f.write("<h2>Directory listing for %s</h2>\n" % self.path)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name = cgi.escape(name)
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "@"
f.write('<li><a href="%s">%s</a>\n' % (linkname, displayname))
f.write("</ul>\n<hr>\n")
f.seek(0)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
return f
def translate_path(self, path):
try:
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
return path
except Exception, e:
self.send_error(403, e)
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
return path.encode("utf-8")
def copyfile(self, source, outputfile):
shutil.copyfileobj(source, outputfile)
def guess_type(self, path):
base, ext = posixpath.splitext(path)
if self.extensions_map.has_key(ext):
return self.extensions_map[ext]
ext = ext.lower()
if self.extensions_map.has_key(ext):
return self.extensions_map[ext]
else:
return self.extensions_map['']
extensions_map = mimetypes.types_map.copy()
extensions_map.update({
'': 'application/octet-stream', # Default
'.py': 'text/plain',
'.c': 'text/plain',
'.h': 'text/plain',
})
class ForkingHTTPServer(SocketServer.ForkingMixIn, BaseHTTPServer.HTTPServer):
def finish_request(self, request, client_address):
request.settimeout(30)
BaseHTTPServer.HTTPServer.finish_request(self, request, client_address)
def test(HandlerClass = SimpleHTTPRequestHandler, ServerClass = BaseHTTPServer.HTTPServer, server_address=("192.168.1.2", 8000)):
try:
print "Server started"
srvr = ForkingHTTPServer(server_address, HandlerClass)
srvr.serve_forever() # serve_forever
except KeyboardInterrupt:
print "Closing sockets..."
time.sleep(2)
print "Server is shutting down in 3"
time.sleep(1)
print "Server is shutting down in 2"
time.sleep(1)
print "Server is shutting down in 1"
time.sleep(1)
srvr.socket.close()
if __name__ == '__main__':
test()
I hope this is all information you need. If you need anything else, just comment and I'll be glad to edit my question ;)
I just saw that I was setting charset to utf-8 inside "send_head" function, but not in "list_directory" function. Also, I edited
"; charset=utf-8"
to
'; charset="utf-8"'
and it works like a charm now.
It looks like that I was encoding/decoding righ thing all the time but wasn't setting my headers right.