I am trying to download files with the chrome.downloads.download(...)
. The filename is given externally, so I don't know the characters inside. If it contains invalid characters, the function download
will throw an error Error: Invalid filename
.
Is there a regex in JavaScript that replaces all and only the invalid starting/middle/ending Unicode characters with _
in the filename?
Or is there a documentation listing the rules for a filename in Chrome?
Is there a way to make Chrome replace invalid characters in my filename, instead of throwing an error?
Chrome disallows more characters than common filesystems (e.g. NTFS), and I am not sure the exact definition of "invalid character" by Chrome. My current regex attempt is
var regex = /^\.|\.$|[\x00-\x1f\\\/:*?"<>|\r\n\u200D]/g;
filename.replaceAll(regex, '_');
But it only covers a few of the invalid Unicode characters.
I avoid using the <a>
method to download (i.e. create <a>
with href
and download
attributes, then click on it), because I would like to create subdirectries in the downloads folder.
If the filename contains some invalid Unicode characters, or matches a reserved keyword in NTFS, then chrome.downloads.download
will throw Error: Invalid filename
.
At any position (start/middle/end), the following Unicode characters are invalid:
\p{Cc}
(\u{0}
is allowed at the middle):
?
"
*
<
>
|
~
(NTFS reserved characters)\
and /
(NTFS & Chrome treat them as path separators instead of a character in filename, so Invalid filename error will NOT occur)\p{Cf}
\p{Cn}
Zero-width joiner \u{200D}
is commonly used to composite emojis and form a new emoji. However, this character is invalid as well, as its category is "Format characters".
The regex is /[:?"*<>|~/\\\u{1}-\u{1f}\u{7f}\u{80}-\u{9f}\p{Cf}\p{Cn}]/gu
At the start/end of filename, the following Unicode characters are invalid:
\u{0}
\p{Zl}
\p{Zp}
\p{Zs}
.
(rule by NTFS)The regex is /^[.\u{0}\p{Zl}\p{Zp}\p{Zs}]|[.\u{0}\p{Zl}\p{Zp}\p{Zs}]$/gu
Reserved keywords in NTFS: Filenames of CON
, PRN
, AUX
, NUL
, COM1
, COM2
, COM3
, COM4
, COM5
, COM6
, COM7
, COM8
, COM9
, LPT1
, LPT2
, LPT3
, LPT4
, LPT5
, LPT6
, LPT7
, LPT8
, LPT9
(case-insensitive), with or without file extension, are all invalid.
The regex is /^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(?=\.|$)/gui
Other categories of Unicode characters, including private-use \p{Co}
and surrogate pairs \p{Cs}
, are allowed at any position.
Final regex (for JavaScript):
var pattern = /[:?"*<>|~/\\\u{1}-\u{1f}\u{7f}\u{80}-\u{9f}\p{Cf}\p{Cn}]|^[.\u{0}\p{Zl}\p{Zp}\p{Zs}]|[.\u{0}\p{Zl}\p{Zp}\p{Zs}]$|^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(?=\.|$)/gui;
filename.replaceAll(pattern, '_');
Note: These characters/filenames are also invalid in NTFS, but sometimes NTFS just deletes the character instead of displaying an error.
I wrote a script to try each Unicode character, and ran it on Google Chrome 126.0.6478.116 on Windows 11 23H2 22631.3737.
What the script does:
serviceWorker.postMessage('')
in SW console to start/stop the for loopchrome.downloads.download
, pass the Unicode character as filename and the URL createdError: Invalid filename
. The character is invalid if error caught, otherwise valid.chrome.storage.local
The invalid Unicode characters at the start/end are found. Within the set, some characters are invalid at the middle of a filename as well. So we run the script again, replacing filename: char
with
filename: `0${char}0`
background.js:
var creating;
async function setup_offscreen_document() {
const offscreenUrl = chrome.runtime.getURL("offscreen.html");
const existingContexts = await chrome.runtime.getContexts({
contextTypes: ['OFFSCREEN_DOCUMENT'],
documentUrls: [offscreenUrl]
});
if (existingContexts.length > 0) {
return;
}
if (creating) {
await creating;
} else {
creating = chrome.offscreen.createDocument({
url: "offscreen.html",
reasons: ['BLOBS'],
justification: 'Create object URLs',
});
await creating;
creating = null;
}
}
var stop = true;
var url;
async function test(code) {
var char = String.fromCodePoint(code);
try {
chrome.downloads.cancel(await chrome.downloads.download({ url: url, filename: char }));
chrome.storage.local.set({ [code]: { char: char, invalid: false } });
} catch (error) {
if (error.message == "Invalid filename")
chrome.storage.local.set({ [code]: { char: char, invalid: true } });
else
throw error;
}
}
async function loop() {
await setup_offscreen_document();
url = await chrome.runtime.sendMessage(undefined, {});
var start = (await chrome.storage.local.get('last'))['last'];
if (start == undefined)
start = 0;
for (var i = start + 1; i <= 0x10ffff; i++) {
await test(i);
await chrome.storage.local.set({ last: i });
if ((i - start) % 10000 == 0)
chrome.browsingData.removeDownloads({});
if (stop)
break;
}
}
function start_loop() {
stop = false;
loop();
}
function stop_loop() {
stop = true;
}
self.addEventListener('message', function (e) {
if (stop)
start_loop();
else
stop_loop();
});
manifest.json:
{
"manifest_version": 3,
"name": "Unicode Test",
"version": "1.0",
"description": "Test.",
"background": {
"service_worker": "background.js",
"type": "module"
},
"permissions": [
"downloads",
"storage",
"unlimitedStorage",
"offscreen",
"browsingData"
]
}