Andy Polyakov | 4e155ec | 2016-07-16 23:21:39 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. |
| 3 | * |
| 4 | * Licensed under the OpenSSL license (the "License"). You may not use |
| 5 | * this file except in compliance with the License. You can obtain a copy |
| 6 | * in the file LICENSE in the source distribution or at |
| 7 | * https://www.openssl.org/source/license.html |
| 8 | */ |
| 9 | |
| 10 | #include <windows.h> |
| 11 | #include <stdlib.h> |
| 12 | #include <string.h> |
| 13 | #include <malloc.h> |
| 14 | |
| 15 | #if defined(CP_UTF8) |
| 16 | |
| 17 | static UINT saved_cp; |
| 18 | static int newargc; |
| 19 | static char **newargv; |
| 20 | |
| 21 | static void cleanup(void) |
| 22 | { |
| 23 | int i; |
| 24 | |
| 25 | SetConsoleOutputCP(saved_cp); |
| 26 | |
| 27 | for (i = 0; i < newargc; i++) |
| 28 | free(newargv[i]); |
| 29 | |
| 30 | free(newargv); |
| 31 | } |
| 32 | |
| 33 | /* |
| 34 | * Incrementally [re]allocate newargv and keep it NULL-terminated. |
| 35 | */ |
| 36 | static int validate_argv(int argc) |
| 37 | { |
| 38 | static int size = 0; |
| 39 | |
| 40 | if (argc >= size) { |
| 41 | char **ptr; |
| 42 | |
| 43 | while (argc >= size) |
| 44 | size += 64; |
| 45 | |
| 46 | ptr = realloc(newargv, size * sizeof(newargv[0])); |
| 47 | if (ptr == NULL) |
| 48 | return 0; |
| 49 | |
| 50 | (newargv = ptr)[argc] = NULL; |
| 51 | } else { |
| 52 | newargv[argc] = NULL; |
| 53 | } |
| 54 | |
| 55 | return 1; |
| 56 | } |
| 57 | |
| 58 | static int process_glob(WCHAR *wstr, int wlen) |
| 59 | { |
| 60 | int i, slash, udlen; |
| 61 | WCHAR saved_char; |
| 62 | WIN32_FIND_DATAW data; |
| 63 | HANDLE h; |
| 64 | |
| 65 | /* |
| 66 | * Note that we support wildcard characters only in filename part |
| 67 | * of the path, and not in directories. Windows users are used to |
| 68 | * this, that's why recursive glob processing is not implemented. |
| 69 | */ |
| 70 | /* |
| 71 | * Start by looking for last slash or backslash, ... |
| 72 | */ |
| 73 | for (slash = 0, i = 0; i < wlen; i++) |
| 74 | if (wstr[i] == L'/' || wstr[i] == L'\\') |
| 75 | slash = i + 1; |
| 76 | /* |
| 77 | * ... then look for asterisk or question mark in the file name. |
| 78 | */ |
| 79 | for (i = slash; i < wlen; i++) |
| 80 | if (wstr[i] == L'*' || wstr[i] == L'?') |
| 81 | break; |
| 82 | |
| 83 | if (i == wlen) |
| 84 | return 0; /* definitely not a glob */ |
| 85 | |
| 86 | saved_char = wstr[wlen]; |
| 87 | wstr[wlen] = L'\0'; |
| 88 | h = FindFirstFileW(wstr, &data); |
| 89 | wstr[wlen] = saved_char; |
| 90 | if (h == INVALID_HANDLE_VALUE) |
| 91 | return 0; /* not a valid glob, just pass... */ |
| 92 | |
| 93 | if (slash) |
| 94 | udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash, |
| 95 | NULL, 0, NULL, NULL); |
| 96 | else |
| 97 | udlen = 0; |
| 98 | |
| 99 | do { |
| 100 | int uflen; |
| 101 | char *arg; |
| 102 | |
| 103 | /* |
| 104 | * skip over . and .. |
| 105 | */ |
| 106 | if (data.cFileName[0] == L'.') { |
| 107 | if ((data.cFileName[1] == L'\0') || |
| 108 | (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0')) |
| 109 | continue; |
| 110 | } |
| 111 | |
| 112 | if (!validate_argv(newargc + 1)) |
| 113 | break; |
| 114 | |
| 115 | /* |
| 116 | * -1 below means "scan for trailing '\0' *and* count it", |
| 117 | * so that |uflen| covers even trailing '\0'. |
| 118 | */ |
| 119 | uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, |
| 120 | NULL, 0, NULL, NULL); |
| 121 | |
| 122 | arg = malloc(udlen + uflen); |
| 123 | if (arg == NULL) |
| 124 | break; |
| 125 | |
| 126 | if (udlen) |
| 127 | WideCharToMultiByte(CP_UTF8, 0, wstr, slash, |
| 128 | arg, udlen, NULL, NULL); |
| 129 | |
| 130 | WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, |
| 131 | arg + udlen, uflen, NULL, NULL); |
| 132 | |
| 133 | newargv[newargc++] = arg; |
| 134 | } while (FindNextFileW(h, &data)); |
| 135 | |
| 136 | CloseHandle(h); |
| 137 | |
| 138 | return 1; |
| 139 | } |
| 140 | |
| 141 | void win32_utf8argv(int *argc, char **argv[]) |
| 142 | { |
| 143 | const WCHAR *wcmdline; |
| 144 | WCHAR *warg, *wend, *p; |
| 145 | int wlen, ulen, valid = 1; |
| 146 | char *arg; |
| 147 | |
Andy Polyakov | fb5d9f1 | 2016-08-25 08:06:26 +0200 | [diff] [blame] | 148 | if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0) |
| 149 | return; |
| 150 | |
Andy Polyakov | 4e155ec | 2016-07-16 23:21:39 +0200 | [diff] [blame] | 151 | newargc = 0; |
| 152 | newargv = NULL; |
| 153 | if (!validate_argv(newargc)) |
| 154 | return; |
| 155 | |
| 156 | wcmdline = GetCommandLineW(); |
| 157 | if (wcmdline == NULL) return; |
| 158 | |
| 159 | /* |
| 160 | * make a copy of the command line, since we might have to modify it... |
| 161 | */ |
| 162 | wlen = wcslen(wcmdline); |
| 163 | p = _alloca((wlen + 1) * sizeof(WCHAR)); |
| 164 | wcscpy(p, wcmdline); |
| 165 | |
| 166 | while (*p != L'\0') { |
| 167 | int in_quote = 0; |
| 168 | |
| 169 | if (*p == L' ' || *p == L'\t') { |
| 170 | p++; /* skip over white spaces */ |
| 171 | continue; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | * Note: because we may need to fiddle with the number of backslashes, |
| 176 | * the argument string is copied into itself. This is safe because |
| 177 | * the number of characters will never expand. |
| 178 | */ |
| 179 | warg = wend = p; |
| 180 | while (*p != L'\0' |
| 181 | && (in_quote || (*p != L' ' && *p != L'\t'))) { |
| 182 | switch (*p) { |
| 183 | case L'\\': |
| 184 | /* |
| 185 | * Microsoft documentation on how backslashes are treated |
| 186 | * is: |
| 187 | * |
| 188 | * + Backslashes are interpreted literally, unless they |
| 189 | * immediately precede a double quotation mark. |
| 190 | * + If an even number of backslashes is followed by a double |
| 191 | * quotation mark, one backslash is placed in the argv array |
| 192 | * for every pair of backslashes, and the double quotation |
| 193 | * mark is interpreted as a string delimiter. |
| 194 | * + If an odd number of backslashes is followed by a double |
| 195 | * quotation mark, one backslash is placed in the argv array |
| 196 | * for every pair of backslashes, and the double quotation |
| 197 | * mark is "escaped" by the remaining backslash, causing a |
| 198 | * literal double quotation mark (") to be placed in argv. |
| 199 | * |
| 200 | * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx |
| 201 | * |
| 202 | * Though referred page doesn't mention it, multiple qouble |
| 203 | * quotes are also special. Pair of double quotes in quoted |
| 204 | * string is counted as single double quote. |
| 205 | */ |
| 206 | { |
| 207 | const WCHAR *q = p; |
| 208 | int i; |
| 209 | |
| 210 | while (*p == L'\\') |
| 211 | p++; |
| 212 | |
| 213 | if (*p == L'"') { |
| 214 | int i; |
| 215 | |
| 216 | for (i = (p - q) / 2; i > 0; i--) |
| 217 | *wend++ = L'\\'; |
| 218 | |
| 219 | /* |
| 220 | * if odd amount of backslashes before the quote, |
| 221 | * said quote is part of the argument, not a delimiter |
| 222 | */ |
| 223 | if ((p - q) % 2 == 1) |
| 224 | *wend++ = *p++; |
| 225 | } else { |
| 226 | for (i = p - q; i > 0; i--) |
| 227 | *wend++ = L'\\'; |
| 228 | } |
| 229 | } |
| 230 | break; |
| 231 | case L'"': |
| 232 | /* |
| 233 | * Without the preceding backslash (or when preceded with an |
| 234 | * even number of backslashes), the double quote is a simple |
| 235 | * string delimiter and just slightly change the parsing state |
| 236 | */ |
| 237 | if (in_quote && p[1] == L'"') |
| 238 | *wend++ = *p++; |
| 239 | else |
| 240 | in_quote = !in_quote; |
| 241 | p++; |
| 242 | break; |
| 243 | default: |
| 244 | /* |
| 245 | * Any other non-delimiter character is just taken verbatim |
| 246 | */ |
| 247 | *wend++ = *p++; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | wlen = wend - warg; |
| 252 | |
| 253 | if (wlen == 0 || !process_glob(warg, wlen)) { |
| 254 | if (!validate_argv(newargc + 1)) { |
| 255 | valid = 0; |
| 256 | break; |
| 257 | } |
| 258 | |
| 259 | ulen = 0; |
| 260 | if (wlen > 0) { |
| 261 | ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen, |
| 262 | NULL, 0, NULL, NULL); |
| 263 | if (ulen <= 0) |
| 264 | continue; |
| 265 | } |
| 266 | |
| 267 | arg = malloc(ulen + 1); |
| 268 | if (arg == NULL) { |
| 269 | valid = 0; |
| 270 | break; |
| 271 | } |
| 272 | |
| 273 | if (wlen > 0) |
| 274 | WideCharToMultiByte(CP_UTF8, 0, warg, wlen, |
| 275 | arg, ulen, NULL, NULL); |
| 276 | arg[ulen] = '\0'; |
| 277 | |
| 278 | newargv[newargc++] = arg; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | if (valid) { |
| 283 | saved_cp = GetConsoleOutputCP(); |
| 284 | SetConsoleOutputCP(CP_UTF8); |
| 285 | |
| 286 | *argc = newargc; |
| 287 | *argv = newargv; |
| 288 | |
| 289 | atexit(cleanup); |
| 290 | } else if (newargv != NULL) { |
| 291 | int i; |
| 292 | |
| 293 | for (i = 0; i < newargc; i++) |
| 294 | free(newargv[i]); |
| 295 | |
| 296 | free(newargv); |
| 297 | |
| 298 | newargc = 0; |
| 299 | newargv = NULL; |
| 300 | } |
| 301 | |
| 302 | return; |
| 303 | } |
| 304 | #else |
FdaSilvaYY | 10acff6 | 2016-10-19 00:01:42 +0200 | [diff] [blame] | 305 | void win32_utf8argv(int *argc, char **argv[]) |
Andy Polyakov | 4e155ec | 2016-07-16 23:21:39 +0200 | [diff] [blame] | 306 | { return; } |
| 307 | #endif |