Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 : Copyright (C) Andrew Bartlett 2011
7 : Copyright (C) Jeremy Allison 1992-2007
8 : Copyright (C) Martin Pool 2003
9 : Copyright (C) James Peach 2006
10 :
11 : This program is free software; you can redistribute it and/or modify
12 : it under the terms of the GNU General Public License as published by
13 : the Free Software Foundation; either version 3 of the License, or
14 : (at your option) any later version.
15 :
16 : This program is distributed in the hope that it will be useful,
17 : but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : GNU General Public License for more details.
20 :
21 : You should have received a copy of the GNU General Public License
22 : along with this program. If not, see <http://www.gnu.org/licenses/>.
23 : */
24 :
25 : #include "replace.h"
26 : #include "system/locale.h"
27 : #include "charset.h"
28 : #include "lib/util/fault.h"
29 :
30 : #ifdef strcasecmp
31 : #undef strcasecmp
32 : #endif
33 : #ifdef strncasecmp
34 : #undef strncasecmp
35 : #endif
36 :
37 :
38 : /**
39 : Case insensitive string compararison, handle specified for testing
40 : **/
41 370816012 : _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
42 : const char *s1, const char *s2)
43 : {
44 370816012 : codepoint_t c1=0, c2=0;
45 370816012 : codepoint_t u1=0, u2=0;
46 370816012 : codepoint_t l1=0, l2=0;
47 : size_t size1, size2;
48 :
49 : /* handle null ptr comparisons to simplify the use in qsort */
50 370816012 : if (s1 == s2) return 0;
51 370788439 : if (s1 == NULL) return -1;
52 370788437 : if (s2 == NULL) return 1;
53 :
54 1686422209 : while (*s1 && *s2) {
55 1332576598 : c1 = next_codepoint_handle(iconv_handle, s1, &size1);
56 1332576598 : c2 = next_codepoint_handle(iconv_handle, s2, &size2);
57 :
58 1335941653 : if (c1 == INVALID_CODEPOINT ||
59 3365055 : c2 == INVALID_CODEPOINT) {
60 9 : return strcasecmp(s1, s2);
61 : }
62 :
63 1332576589 : s1 += size1;
64 1332576589 : s2 += size2;
65 :
66 1332576589 : if (c1 == c2) {
67 974487766 : continue;
68 : }
69 :
70 358088823 : u1 = toupper_m(c1);
71 358088823 : u2 = toupper_m(c2);
72 358088823 : if (u1 == u2) {
73 829109 : continue;
74 : }
75 :
76 357259714 : l1 = tolower_m(c1);
77 357259714 : l2 = tolower_m(c2);
78 357259714 : if (l1 == l2) {
79 0 : continue;
80 : }
81 :
82 357259714 : return l1 - l2;
83 : }
84 :
85 13528712 : return *s1 - *s2;
86 : }
87 :
88 : /**
89 : Case insensitive string compararison
90 : **/
91 370815994 : _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
92 : {
93 370815994 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
94 370815994 : return strcasecmp_m_handle(iconv_handle, s1, s2);
95 : }
96 :
97 : /**
98 : Case insensitive string compararison, length limited, handle specified for testing
99 : **/
100 4313331 : _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
101 : const char *s1, const char *s2, size_t n)
102 : {
103 4313331 : codepoint_t c1=0, c2=0;
104 4313331 : codepoint_t u1=0, u2=0;
105 4313331 : codepoint_t l1=0, l2=0;
106 : size_t size1, size2;
107 :
108 : /* handle null ptr comparisons to simplify the use in qsort */
109 4313331 : if (s1 == s2) return 0;
110 4313199 : if (s1 == NULL) return -1;
111 4313198 : if (s2 == NULL) return 1;
112 :
113 19378146 : while (*s1 && *s2 && n) {
114 14660804 : n--;
115 :
116 14660804 : c1 = next_codepoint_handle(iconv_handle, s1, &size1);
117 14660804 : c2 = next_codepoint_handle(iconv_handle, s2, &size2);
118 :
119 14695944 : if (c1 == INVALID_CODEPOINT ||
120 35140 : c2 == INVALID_CODEPOINT) {
121 : /*
122 : * n was specified in characters,
123 : * now we must convert it to bytes.
124 : * As bytes are the smallest
125 : * character unit, the following
126 : * increment and strncasecmp is always
127 : * safe.
128 : *
129 : * The source string was already known
130 : * to be n characters long, so we are
131 : * guaranteed to be able to look at the
132 : * (n remaining + size1) bytes from the
133 : * s1 position).
134 : */
135 1 : n += size1;
136 1 : return strncasecmp(s1, s2, n);
137 : }
138 :
139 14660803 : s1 += size1;
140 14660803 : s2 += size2;
141 :
142 14660803 : if (c1 == c2) {
143 11171229 : continue;
144 : }
145 :
146 3489574 : u1 = toupper_m(c1);
147 3489574 : u2 = toupper_m(c2);
148 3489574 : if (u1 == u2) {
149 23614 : continue;
150 : }
151 :
152 3465960 : l1 = tolower_m(c1);
153 3465960 : l2 = tolower_m(c2);
154 3465960 : if (l1 == l2) {
155 0 : continue;
156 : }
157 :
158 3465960 : return l1 - l2;
159 : }
160 :
161 847236 : if (n == 0) {
162 839109 : return 0;
163 : }
164 :
165 4612 : return *s1 - *s2;
166 : }
167 :
168 : /**
169 : Case insensitive string compararison, length limited
170 : **/
171 4313319 : _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
172 : {
173 4313319 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
174 4313319 : return strncasecmp_m_handle(iconv_handle, s1, s2, n);
175 : }
176 :
177 : /**
178 : * Compare 2 strings.
179 : *
180 : * @note The comparison is case-insensitive.
181 : **/
182 73809 : _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
183 : {
184 73809 : return strcasecmp_m(s1,s2) == 0;
185 : }
186 :
187 : /**
188 : Compare 2 strings (case sensitive).
189 : **/
190 12905851 : _PUBLIC_ bool strcsequal(const char *s1,const char *s2)
191 : {
192 12905851 : if (s1 == s2)
193 48 : return true;
194 12905793 : if (!s1 || !s2)
195 0 : return false;
196 :
197 12905791 : return strcmp(s1,s2) == 0;
198 : }
199 :
200 : /**
201 : * Calculate the number of units (8 or 16-bit, depending on the
202 : * destination charset), that would be needed to convert the input
203 : * string which is expected to be in in src_charset encoding to the
204 : * destination charset (which should be a unicode charset).
205 : */
206 28770897 : _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
207 : const char *s, charset_t src_charset, charset_t dst_charset)
208 : {
209 28770897 : size_t count = 0;
210 :
211 : #ifdef DEVELOPER
212 28770897 : switch (dst_charset) {
213 0 : case CH_DOS:
214 : case CH_UNIX:
215 0 : smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
216 28088180 : default:
217 28088180 : break;
218 : }
219 :
220 28770897 : switch (src_charset) {
221 0 : case CH_UTF16LE:
222 : case CH_UTF16BE:
223 0 : smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)");
224 28088180 : default:
225 28088180 : break;
226 : }
227 : #endif
228 28770897 : if (!s) {
229 69279 : return 0;
230 : }
231 :
232 946949442 : while (*s && !(((uint8_t)*s) & 0x80)) {
233 893444516 : s++;
234 893444516 : count++;
235 : }
236 :
237 28697850 : if (!*s) {
238 28016158 : return count;
239 : }
240 :
241 306093 : while (*s) {
242 : size_t c_size;
243 301329 : codepoint_t c = next_codepoint_handle_ext(ic, s, strnlen(s, 5),
244 : src_charset, &c_size);
245 301329 : s += c_size;
246 :
247 301329 : switch (dst_charset) {
248 293262 : case CH_UTF16LE:
249 : case CH_UTF16BE:
250 : case CH_UTF16MUNGED:
251 293262 : if (c < 0x10000) {
252 : /* Unicode char fits into 16 bits. */
253 293259 : count += 1;
254 : } else {
255 : /* Double-width unicode char - 32 bits. */
256 3 : count += 2;
257 : }
258 290971 : break;
259 8067 : case CH_UTF8:
260 : /*
261 : * this only checks ranges, and does not
262 : * check for invalid codepoints
263 : */
264 8067 : if (c < 0x80) {
265 6152 : count += 1;
266 1915 : } else if (c < 0x800) {
267 871 : count += 2;
268 1044 : } else if (c < 0x10000) {
269 1044 : count += 3;
270 : } else {
271 0 : count += 4;
272 : }
273 6822 : break;
274 0 : default:
275 : /*
276 : * non-unicode encoding:
277 : * assume that each codepoint fits into
278 : * one unit in the destination encoding.
279 : */
280 0 : count += 1;
281 : }
282 : }
283 :
284 2743 : return count;
285 : }
286 :
287 : /**
288 : * Calculate the number of units (8 or 16-bit, depending on the
289 : * destination charset), that would be needed to convert the input
290 : * string which is expected to be in in src_charset encoding to the
291 : * destination charset (which should be a unicode charset).
292 : */
293 28770885 : _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
294 : {
295 28770885 : struct smb_iconv_handle *ic = get_iconv_handle();
296 28770885 : return strlen_m_ext_handle(ic, s, src_charset, dst_charset);
297 : }
298 :
299 16211225 : _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
300 : const charset_t dst_charset)
301 : {
302 16211225 : if (!s) {
303 219118 : return 0;
304 : }
305 15990136 : return strlen_m_ext(s, src_charset, dst_charset) + 1;
306 : }
307 :
308 855742 : _PUBLIC_ size_t strlen_m_ext_term_null(const char *s,
309 : const charset_t src_charset,
310 : const charset_t dst_charset)
311 : {
312 : size_t len;
313 855742 : if (!s) {
314 1272 : return 0;
315 : }
316 854469 : len = strlen_m_ext(s, src_charset, dst_charset);
317 854469 : if (len == 0) {
318 602642 : return 0;
319 : }
320 :
321 251526 : return len+1;
322 : }
323 :
324 : /**
325 : * Calculate the number of 16-bit units that would be needed to convert
326 : * the input string which is expected to be in CH_UNIX encoding to UTF16.
327 : *
328 : * This will be the same as the number of bytes in a string for single
329 : * byte strings, but will be different for multibyte.
330 : */
331 11926274 : _PUBLIC_ size_t strlen_m(const char *s)
332 : {
333 11926274 : return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
334 : }
335 :
336 : /**
337 : Work out the number of multibyte chars in a string, including the NULL
338 : terminator.
339 : **/
340 2102729 : _PUBLIC_ size_t strlen_m_term(const char *s)
341 : {
342 2102729 : return strlen_m_ext_term(s, CH_UNIX, CH_UTF16LE);
343 : }
344 :
345 : /*
346 : * Weird helper routine for the winreg pipe: If nothing is around, return 0,
347 : * if a string is there, include the terminator.
348 : */
349 :
350 855742 : _PUBLIC_ size_t strlen_m_term_null(const char *s)
351 : {
352 855742 : return strlen_m_ext_term_null(s, CH_UNIX, CH_UTF16LE);
353 : }
354 :
355 : /**
356 : Strchr and strrchr_m are a bit complex on general multi-byte strings.
357 : **/
358 194900098 : _PUBLIC_ char *strchr_m(const char *src, char c)
359 : {
360 : const char *s;
361 194900098 : struct smb_iconv_handle *ic = get_iconv_handle();
362 194900098 : if (src == NULL) {
363 0 : return NULL;
364 : }
365 : /* characters below 0x3F are guaranteed to not appear in
366 : non-initial position in multi-byte charsets */
367 194900098 : if ((c & 0xC0) == 0) {
368 57935719 : return strchr(src, c);
369 : }
370 :
371 : /* this is quite a common operation, so we want it to be
372 : fast. We optimise for the ascii case, knowing that all our
373 : supported multi-byte character sets are ascii-compatible
374 : (ie. they match for the first 128 chars) */
375 :
376 948894057 : for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
377 812944683 : if (*s == c)
378 74609 : return discard_const_p(char, s);
379 : }
380 :
381 136887793 : if (!*s)
382 135951351 : return NULL;
383 :
384 : #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
385 : /* With compose characters we must restart from the beginning. JRA. */
386 : s = src;
387 : #endif
388 :
389 4 : while (*s) {
390 : size_t size;
391 3 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
392 3 : if (c2 == c) {
393 0 : return discard_const_p(char, s);
394 : }
395 3 : s += size;
396 : }
397 :
398 0 : return NULL;
399 : }
400 :
401 : /**
402 : * Multibyte-character version of strrchr
403 : */
404 5905733 : _PUBLIC_ char *strrchr_m(const char *s, char c)
405 : {
406 : struct smb_iconv_handle *ic;
407 5905733 : char *ret = NULL;
408 :
409 5905733 : if (s == NULL) {
410 0 : return NULL;
411 : }
412 :
413 : /* characters below 0x3F are guaranteed to not appear in
414 : non-initial position in multi-byte charsets */
415 5905733 : if ((c & 0xC0) == 0) {
416 5885759 : return strrchr(s, c);
417 : }
418 :
419 : /* this is quite a common operation, so we want it to be
420 : fast. We optimise for the ascii case, knowing that all our
421 : supported multi-byte character sets are ascii-compatible
422 : (ie. they match for the first 128 chars). Also, in Samba
423 : we only search for ascii characters in 'c' and that
424 : in all mb character sets with a compound character
425 : containing c, if 'c' is not a match at position
426 : p, then p[-1] > 0x7f. JRA. */
427 :
428 : {
429 19974 : size_t len = strlen(s);
430 19974 : const char *cp = s;
431 19974 : bool got_mb = false;
432 :
433 19974 : if (len == 0)
434 86 : return NULL;
435 19888 : cp += (len - 1);
436 : do {
437 147157 : if (c == *cp) {
438 : /* Could be a match. Part of a multibyte ? */
439 32627 : if ((cp > s) &&
440 15755 : (((unsigned char)cp[-1]) & 0x80)) {
441 : /* Yep - go slow :-( */
442 0 : got_mb = true;
443 0 : break;
444 : }
445 : /* No - we have a match ! */
446 17150 : return discard_const_p(char , cp);
447 : }
448 129763 : } while (cp-- != s);
449 2434 : if (!got_mb)
450 2434 : return NULL;
451 : }
452 :
453 0 : ic = get_iconv_handle();
454 :
455 0 : while (*s) {
456 : size_t size;
457 0 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
458 0 : if (c2 == c) {
459 0 : ret = discard_const_p(char, s);
460 : }
461 0 : s += size;
462 : }
463 :
464 0 : return ret;
465 : }
466 :
467 : /**
468 : return True if any (multi-byte) character is lower case
469 : */
470 35 : _PUBLIC_ bool strhaslower_handle(struct smb_iconv_handle *ic,
471 : const char *string)
472 : {
473 998 : while (*string) {
474 : size_t c_size;
475 : codepoint_t s;
476 : codepoint_t t;
477 :
478 950 : s = next_codepoint_handle(ic, string, &c_size);
479 950 : string += c_size;
480 :
481 950 : t = toupper_m(s);
482 :
483 950 : if (s != t) {
484 22 : return true; /* that means it has lower case chars */
485 : }
486 : }
487 :
488 0 : return false;
489 : }
490 :
491 17 : _PUBLIC_ bool strhaslower(const char *string)
492 : {
493 17 : struct smb_iconv_handle *ic = get_iconv_handle();
494 17 : return strhaslower_handle(ic, string);
495 : }
496 :
497 : /**
498 : return True if any (multi-byte) character is upper case
499 : */
500 35 : _PUBLIC_ bool strhasupper_handle(struct smb_iconv_handle *ic,
501 : const char *string)
502 : {
503 989 : while (*string) {
504 : size_t c_size;
505 : codepoint_t s;
506 : codepoint_t t;
507 :
508 941 : s = next_codepoint_handle(ic, string, &c_size);
509 941 : string += c_size;
510 :
511 941 : t = tolower_m(s);
512 :
513 941 : if (s != t) {
514 22 : return true; /* that means it has upper case chars */
515 : }
516 : }
517 :
518 0 : return false;
519 : }
520 :
521 17 : _PUBLIC_ bool strhasupper(const char *string)
522 : {
523 17 : struct smb_iconv_handle *ic = get_iconv_handle();
524 17 : return strhasupper_handle(ic, string);
525 : }
526 :
527 : /***********************************************************************
528 : strstr_m - We convert via ucs2 for now.
529 : ***********************************************************************/
530 :
531 1369503 : char *strstr_m(const char *src, const char *findstr)
532 : {
533 1369503 : TALLOC_CTX *mem_ctx = NULL;
534 : smb_ucs2_t *p;
535 : smb_ucs2_t *src_w, *find_w;
536 : const char *s;
537 : char *s2;
538 1369503 : char *retp = NULL;
539 1369503 : size_t converted_size, findstr_len = 0;
540 :
541 : /* for correctness */
542 1369503 : if (!findstr[0]) {
543 0 : return discard_const_p(char, src);
544 : }
545 :
546 : /* Samba does single character findstr calls a *lot*. */
547 1369501 : if (findstr[1] == '\0')
548 91073 : return strchr_m(src, *findstr);
549 :
550 : /* We optimise for the ascii case, knowing that all our
551 : supported multi-byte character sets are ascii-compatible
552 : (ie. they match for the first 128 chars) */
553 :
554 34105809 : for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
555 33224757 : if (*s == *findstr) {
556 1872210 : if (!findstr_len)
557 767928 : findstr_len = strlen(findstr);
558 :
559 1872210 : if (strncmp(s, findstr, findstr_len) == 0) {
560 387929 : return discard_const_p(char, s);
561 : }
562 : }
563 : }
564 :
565 887390 : if (!*s)
566 884161 : return NULL;
567 :
568 : #if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
569 : /* 'make check' fails unless we do this */
570 :
571 : /* With compose characters we must restart from the beginning. JRA. */
572 9 : s = src;
573 : #endif
574 :
575 : /*
576 : * Use get_iconv_handle() just as a non-NULL talloc ctx. In
577 : * case we leak memory, this should then be more obvious in
578 : * the talloc report.
579 : */
580 9 : mem_ctx = talloc_new(get_iconv_handle());
581 9 : if (mem_ctx == NULL) {
582 0 : return NULL;
583 : }
584 :
585 9 : if (!push_ucs2_talloc(mem_ctx, &src_w, src, &converted_size)) {
586 0 : goto done;
587 : }
588 :
589 9 : if (!push_ucs2_talloc(mem_ctx, &find_w, findstr, &converted_size)) {
590 0 : goto done;
591 : }
592 :
593 6 : p = strstr_w(src_w, find_w);
594 :
595 6 : if (!p) {
596 0 : goto done;
597 : }
598 :
599 3 : *p = 0;
600 3 : if (!pull_ucs2_talloc(mem_ctx, &s2, src_w, &converted_size)) {
601 0 : goto done;
602 : }
603 3 : retp = discard_const_p(char, (s+strlen(s2)));
604 15 : done:
605 9 : TALLOC_FREE(mem_ctx);
606 9 : return retp;
607 : }
|