str.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <assert.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include "faux/ctype.h"
  14. #include "faux/str.h"
  15. /* TODO: Are that vars really needed? */
  16. //const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  17. //const char *lub_string_esc_regex = "^$.*+[](){}";
  18. //const char *lub_string_esc_quoted = "\\\"";
  19. /** @brief Free the memory allocated for the string.
  20. *
  21. * Safely free the memory allocated for the string. You can use NULL
  22. * pointer with this function. POSIX's free() checks for the NULL pointer
  23. * but not all systems do so.
  24. *
  25. * @param [in] str String to free
  26. */
  27. void faux_str_free(char *str)
  28. {
  29. faux_free(str);
  30. }
  31. /** @brief Duplicates the string.
  32. *
  33. * Duplicates the string. Same as standard strdup() function. Allocates
  34. * memory with malloc(). Checks for NULL pointer.
  35. *
  36. * @warning Resulting string must be freed by faux_str_free().
  37. *
  38. * @param [in] str String to duplicate.
  39. * @return Pointer to allocated string or NULL.
  40. */
  41. char *faux_str_dup(const char *str)
  42. {
  43. if (!str)
  44. return NULL;
  45. return strdup(str);
  46. }
  47. /** @brief Duplicates the first n bytes of the string.
  48. *
  49. * Duplicates at most n bytes of the string. Allocates
  50. * memory with malloc(). Checks for NULL pointer. Function will allocate
  51. * n + 1 bytes to store string and terminating null byte.
  52. *
  53. * @warning Resulting string must be freed by faux_str_free().
  54. *
  55. * @param [in] str String to duplicate.
  56. * @param [in] n Number of bytes to copy.
  57. * @return Pointer to allocated string or NULL.
  58. */
  59. char *faux_str_dupn(const char *str, size_t n)
  60. {
  61. char *res = NULL;
  62. size_t len = 0;
  63. if (!str)
  64. return NULL;
  65. len = strlen(str);
  66. len = (len < n) ? len : n;
  67. res = faux_zmalloc(len + 1);
  68. if (!res)
  69. return NULL;
  70. strncpy(res, str, len);
  71. res[len] = '\0';
  72. return res;
  73. }
  74. /** @brief Generates lowercase copy of input string.
  75. *
  76. * Allocates the copy of input string and convert that copy to lowercase.
  77. *
  78. * @warning Resulting string must be freed by faux_str_free().
  79. *
  80. * @param [in] str String to convert.
  81. * @return Pointer to lowercase string copy or NULL.
  82. */
  83. char *faux_str_tolower(const char *str)
  84. {
  85. char *res = faux_str_dup(str);
  86. char *p = res;
  87. if (!res)
  88. return NULL;
  89. while (*p) {
  90. *p = faux_ctype_tolower(*p);
  91. p++;
  92. }
  93. return res;
  94. }
  95. /** @brief Generates uppercase copy of input string.
  96. *
  97. * Allocates the copy of input string and convert that copy to uppercase.
  98. *
  99. * @warning Resulting string must be freed by faux_str_free().
  100. *
  101. * @param [in] str String to convert.
  102. * @return Pointer to lowercase string copy or NULL.
  103. */
  104. char *faux_str_toupper(const char *str)
  105. {
  106. char *res = faux_str_dup(str);
  107. char *p = res;
  108. if (!res)
  109. return NULL;
  110. while (*p) {
  111. *p = faux_ctype_toupper(*p);
  112. p++;
  113. }
  114. return res;
  115. }
  116. /** @brief Add n bytes of text to existent string.
  117. *
  118. * Concatenate two strings. Add n bytes of second string to the end of the
  119. * first one. The first argument is address of string pointer. The pointer
  120. * can be changed due to realloc() features. The first pointer can be NULL.
  121. * In this case the memory will be malloc()-ed and stored to the first pointer.
  122. *
  123. * @param [in,out] str Address of first string pointer.
  124. * @param [in] text Text to add to the first string.
  125. * @param [in] n Number of bytes to add.
  126. * @return Pointer to resulting string or NULL.
  127. */
  128. char *faux_str_catn(char **str, const char *text, size_t n)
  129. {
  130. size_t str_len = 0;
  131. size_t text_len = 0;
  132. char *res = NULL;
  133. char *p = NULL;
  134. if (!text)
  135. return *str;
  136. str_len = (*str) ? strlen(*str) : 0;
  137. text_len = strlen(text);
  138. text_len = (text_len < n) ? text_len : n;
  139. res = realloc(*str, str_len + text_len + 1);
  140. if (!res)
  141. return NULL;
  142. p = res + str_len;
  143. strncpy(p, text, text_len);
  144. p[text_len] = '\0';
  145. *str = res;
  146. return res;
  147. }
  148. /** @brief Add some text to existent string.
  149. *
  150. * Concatenate two strings. Add second string to the end of the first one.
  151. * The first argument is address of string pointer. The pointer can be
  152. * changed due to realloc() features. The first pointer can be NULL. In this
  153. * case the memory will be malloc()-ed and stored to the first pointer.
  154. *
  155. * @param [in,out] str Address of first string pointer.
  156. * @param [in] text Text to add to the first string.
  157. * @return Pointer to resulting string or NULL.
  158. */
  159. char *faux_str_cat(char **str, const char *text)
  160. {
  161. size_t len = 0;
  162. if (!text)
  163. return *str;
  164. len = strlen(text);
  165. return faux_str_catn(str, text, len);
  166. }
  167. /** @brief Add multiply text strings to existent string.
  168. *
  169. * Concatenate multiply strings. Add next string to the end of the previous one.
  170. * The first argument is address of string pointer. The pointer can be
  171. * changed due to realloc() features. The first pointer can be NULL. In this
  172. * case the memory will be malloc()-ed and stored to the first pointer.
  173. * The last argument must be 'NULL'. It marks the last argument within
  174. * variable arguments list.
  175. *
  176. * @warning If last argument is not 'NULL' then behaviour is undefined.
  177. *
  178. * @param [in,out] str Address of first string pointer.
  179. * @param [in] text Text to add to the first string.
  180. * @return Pointer to resulting string or NULL.
  181. */
  182. char *faux_str_vcat(char **str, ...)
  183. {
  184. va_list ap;
  185. const char *arg = NULL;
  186. char *retval = *str;
  187. va_start(ap, str);
  188. while ((arg = va_arg(ap, const char *))) {
  189. retval = faux_str_cat(str, arg);
  190. }
  191. va_end(ap);
  192. return retval;
  193. }
  194. /** @brief Allocates memory and sprintf() to it.
  195. *
  196. * Function tries to find out necessary amount of memory for specified format
  197. * string and arguments. Format is same as for sprintf() function. Then
  198. * function allocates memory for resulting string and sprintf() to it. So
  199. * user doesn't need to allocate buffer himself. Function returns allocated
  200. * string that need to be freed by faux_str_free() function later.
  201. *
  202. * @warning The returned pointer must be free by faux_str_free().
  203. *
  204. * @param [in] fmt Format string like the sprintf()'s fmt.
  205. * @param [in] arg Number of arguments.
  206. * @return Allocated resulting string or NULL on error.
  207. */
  208. char *faux_str_sprintf(const char *fmt, ...)
  209. {
  210. int size = 1;
  211. char calc_buf[1] = "";
  212. char *line = NULL;
  213. va_list ap;
  214. // Calculate buffer size
  215. va_start(ap, fmt);
  216. size = vsnprintf(calc_buf, size, fmt, ap);
  217. va_end(ap);
  218. // The snprintf() prior to 2.0.6 glibc version returns -1 if string
  219. // was truncated. The later glibc returns required buffer size.
  220. // The calc_buf can be NULL and size can be 0 for recent glibc but
  221. // probably some exotic implementations can break on it. So use
  222. // minimal buffer with length = 1.
  223. if (size < 0)
  224. return NULL;
  225. size++; // Additional byte for '\0'
  226. line = faux_zmalloc(size);
  227. if (!line) // Memory problems
  228. return NULL;
  229. // Format real string
  230. va_start(ap, fmt);
  231. size = vsnprintf(line, size, fmt, ap);
  232. va_end(ap);
  233. if (size < 0) { // Some problems
  234. faux_str_free(line);
  235. return NULL;
  236. }
  237. return line;
  238. }
  239. /** @brief Service function to compare to chars in right way.
  240. *
  241. * The problem is char type can be signed or unsigned on different
  242. * platforms. So stright comparision can return different results.
  243. *
  244. * @param [in] char1 First char
  245. * @param [in] char2 Second char
  246. * @return
  247. * < 0 if char1 < char2
  248. * = 0 if char1 = char2
  249. * > 0 if char1 > char2
  250. */
  251. static int faux_str_cmp_chars(char char1, char char2)
  252. {
  253. unsigned char ch1 = (unsigned char)char1;
  254. unsigned char ch2 = (unsigned char)char2;
  255. return (int)ch1 - (int)ch2;
  256. }
  257. /** @brief Compare n first characters of two strings ignoring case.
  258. *
  259. * The difference beetween this function an standard strncasecmp() is
  260. * faux function uses faux ctype functions. It can be important for
  261. * portability.
  262. *
  263. * @param [in] str1 First string to compare.
  264. * @param [in] str2 Second string to compare.
  265. * @param [in] n Number of characters to compare.
  266. * @return < 0, 0, > 0, see the strcasecmp().
  267. */
  268. int faux_str_casecmpn(const char *str1, const char *str2, size_t n)
  269. {
  270. const char *p1 = str1;
  271. const char *p2 = str2;
  272. size_t num = n;
  273. while (*p1 != '\0' && *p2 != '\0' && num != 0) {
  274. int res = faux_str_cmp_chars(
  275. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  276. if (res != 0)
  277. return res;
  278. p1++;
  279. p2++;
  280. num--;
  281. }
  282. if (0 == n) // It means n first characters are equal.
  283. return 0;
  284. return faux_str_cmp_chars(
  285. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  286. }
  287. /** @brief Compare two strings ignoring case.
  288. *
  289. * The difference beetween this function an standard strcasecmp() is
  290. * faux function uses faux ctype functions. It can be important for
  291. * portability.
  292. *
  293. * @param [in] str1 First string to compare.
  294. * @param [in] str2 Second string to compare.
  295. * @return < 0, 0, > 0, see the strcasecmp().
  296. */
  297. int faux_str_casecmp(const char *str1, const char *str2)
  298. {
  299. const char *p1 = str1;
  300. const char *p2 = str2;
  301. while (*p1 != '\0' && *p2 != '\0') {
  302. int res = faux_str_cmp_chars(
  303. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  304. if (res != 0)
  305. return res;
  306. p1++;
  307. p2++;
  308. }
  309. return faux_str_cmp_chars(
  310. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  311. }
  312. /** @brief Finds the first occurrence of the substring in the string
  313. *
  314. * Function is a faux version of strcasestr() function.
  315. *
  316. * @param [in] haystack String to find substring in it.
  317. * @param [in] needle Substring to find.
  318. * @return
  319. * Pointer to first occurence of substring in the string.
  320. * NULL on error
  321. */
  322. char *faux_str_casestr(const char *haystack, const char *needle)
  323. {
  324. const char *ptr = haystack;
  325. size_t ptr_len = 0;
  326. size_t needle_len = 0;
  327. assert(haystack);
  328. assert(needle);
  329. if (!haystack || !needle)
  330. return NULL;
  331. ptr_len = strlen(haystack);
  332. needle_len = strlen(needle);
  333. while ((*ptr != '\0') && (ptr_len >= needle_len)) {
  334. int res = faux_str_casecmpn(ptr, needle, needle_len);
  335. if (0 == res)
  336. return (char *)ptr;
  337. ptr++;
  338. ptr_len--;
  339. }
  340. return NULL; // Not found
  341. }
  342. /** Prepare string for embedding to C-code (make escaping).
  343. *
  344. * @warning The returned pointer must be freed by faux_str_free().
  345. * @param [in] src String for escaping.
  346. * @return Escaped string or NULL on error.
  347. */
  348. char *faux_str_c_esc(const char *src)
  349. {
  350. const char *src_ptr = src;
  351. char *dst = NULL;
  352. char *dst_ptr = NULL;
  353. char *escaped = NULL;
  354. size_t src_len = 0;
  355. size_t dst_len = 0;
  356. assert(src);
  357. if (!src)
  358. return NULL;
  359. src_len = strlen(src);
  360. // Calculate max destination string size.
  361. // The worst case is when each src character will be replaced by
  362. // something like '\xff'. So it's 4 dst chars for 1 src one.
  363. dst_len = (src_len * 4) + 1; // one byte for '\0'
  364. dst = faux_zmalloc(dst_len);
  365. assert(dst);
  366. if (!dst)
  367. return NULL;
  368. dst_ptr = dst;
  369. while (*src_ptr != '\0') {
  370. char *esc = NULL; // escaped replacement
  371. char buf[5]; // longest 'char' (4 bytes) + '\0'
  372. size_t len = 0;
  373. switch (*src_ptr) {
  374. case '\n':
  375. esc = "\\n";
  376. break;
  377. case '\"':
  378. esc = "\\\"";
  379. break;
  380. case '\\':
  381. esc = "\\\\";
  382. break;
  383. case '\'':
  384. esc = "\\\'";
  385. break;
  386. case '\r':
  387. esc = "\\r";
  388. break;
  389. case '\t':
  390. esc = "\\t";
  391. break;
  392. default:
  393. // Check is the symbol control character. Control
  394. // characters has codes from 0x00 to 0x1f.
  395. if (((unsigned char)*src_ptr & 0xe0) == 0) { // control
  396. snprintf(buf, sizeof(buf), "\\x%02x",
  397. (unsigned char)*src_ptr);
  398. buf[4] = '\0'; // for safety
  399. } else {
  400. buf[0] = *src_ptr; // Common character
  401. buf[1] = '\0';
  402. }
  403. esc = buf;
  404. break;
  405. }
  406. len = strlen(esc);
  407. memcpy(dst_ptr, esc, len); // zmalloc() nullify the rest
  408. dst_ptr += len;
  409. src_ptr++;
  410. }
  411. escaped = faux_str_dup(dst); // Free some memory
  412. faux_str_free(dst); // 'dst' size >= 'escaped' size
  413. return escaped;
  414. }
  415. #define BYTE_CONV_LEN 4 // Length of one byte converted to string
  416. /** Prepare binary block for embedding to C-code.
  417. *
  418. * @warning The returned pointer must be freed by faux_str_free().
  419. * @param [in] src Binary block for conversion.
  420. * @return C-string or NULL on error.
  421. */
  422. char *faux_str_c_bin(const char *src, size_t n)
  423. {
  424. const char *src_ptr = src;
  425. char *dst = NULL;
  426. char *dst_ptr = NULL;
  427. size_t dst_len = 0;
  428. assert(src);
  429. if (!src)
  430. return NULL;
  431. // Calculate destination string size.
  432. // Each src character will be replaced by
  433. // something like '\xff'. So it's 4 dst chars for 1 src char.
  434. dst_len = (n * BYTE_CONV_LEN) + 1; // one byte for '\0'
  435. dst = faux_zmalloc(dst_len);
  436. assert(dst);
  437. if (!dst)
  438. return NULL;
  439. dst_ptr = dst;
  440. while (src_ptr < (src + n)) {
  441. char buf[BYTE_CONV_LEN + 1]; // longest 'char' (4 bytes) + '\0'
  442. snprintf(buf, sizeof(buf), "\\x%02x", (unsigned char)*src_ptr);
  443. memcpy(dst_ptr, buf, BYTE_CONV_LEN); // zmalloc() nullify the rest
  444. dst_ptr += BYTE_CONV_LEN;
  445. src_ptr++;
  446. }
  447. return dst;
  448. }
  449. /** @brief Search the n-th chars of string for one of the specified chars.
  450. *
  451. * The function search for any of specified characters within string.
  452. * The search is limited to first n characters of the string. If
  453. * terminating '\0' is before n-th character then search will stop on
  454. * it. Can be used with raw memory block.
  455. *
  456. * @param [in] str String (or memory block) to search in.
  457. * @param [in] chars_to_string Chars enumeration to search for.
  458. * @param [in] n Maximum number of bytes to search within.
  459. * @return Pointer to the first occurence of one of specified chars.
  460. * NULL on error.
  461. */
  462. char *faux_str_charsn(const char *str, const char *chars_to_search, size_t n)
  463. {
  464. const char *current_char = str;
  465. size_t len = n;
  466. assert(str);
  467. assert(chars_to_search);
  468. if (!str || !chars_to_search)
  469. return NULL;
  470. while ((*current_char != '\0') && (len > 0)) {
  471. if (strchr(chars_to_search, *current_char))
  472. return (char *)current_char;
  473. current_char++;
  474. len--;
  475. }
  476. return NULL;
  477. }
  478. /** @brief Search string for one of the specified chars.
  479. *
  480. * The function search for any of specified characters within string.
  481. *
  482. * @param [in] str String to search in.
  483. * @param [in] chars_to_string Chars enumeration to search for.
  484. * @return Pointer to the first occurence of one of specified chars.
  485. * NULL on error.
  486. */
  487. char *faux_str_chars(const char *str, const char *chars_to_search)
  488. {
  489. assert(str);
  490. if (!str)
  491. return NULL;
  492. return faux_str_charsn(str, chars_to_search, strlen(str));
  493. }
  494. /* TODO: If it nedeed?
  495. const char *faux_str_nextword(const char *string,
  496. size_t *len, size_t *offset, size_t *quoted)
  497. {
  498. const char *word;
  499. *quoted = 0;
  500. // Find the start of a word (not including an opening quote)
  501. while (*string && isspace(*string)) {
  502. string++;
  503. (*offset)++;
  504. }
  505. // Is this the start of a quoted string ?
  506. if (*string == '"') {
  507. *quoted = 1;
  508. string++;
  509. }
  510. word = string;
  511. *len = 0;
  512. // Find the end of the word
  513. while (*string) {
  514. if (*string == '\\') {
  515. string++;
  516. (*len)++;
  517. if (*string) {
  518. (*len)++;
  519. string++;
  520. }
  521. continue;
  522. }
  523. // End of word
  524. if (!*quoted && isspace(*string))
  525. break;
  526. if (*string == '"') {
  527. // End of a quoted string
  528. *quoted = 2;
  529. break;
  530. }
  531. (*len)++;
  532. string++;
  533. }
  534. return word;
  535. }
  536. */
  537. // TODO: Is it needed?
  538. /*
  539. char *lub_string_ndecode(const char *string, unsigned int len)
  540. {
  541. const char *s = string;
  542. char *res, *p;
  543. int esc = 0;
  544. if (!string)
  545. return NULL;
  546. p = res = faux_zmalloc(len + 1);
  547. while (*s && (s < (string +len))) {
  548. if (!esc) {
  549. if ('\\' == *s)
  550. esc = 1;
  551. else
  552. *p = *s;
  553. } else {
  554. // switch (*s) {
  555. // case 'r':
  556. // case 'n':
  557. // *p = '\n';
  558. // break;
  559. // case 't':
  560. // *p = '\t';
  561. // break;
  562. // default:
  563. // *p = *s;
  564. // break;
  565. // }
  566. // *p = *s;
  567. esc = 0;
  568. }
  569. if (!esc)
  570. p++;
  571. s++;
  572. }
  573. *p = '\0';
  574. return res;
  575. }
  576. */
  577. // TODO: Is it needed?
  578. /*
  579. inline char *lub_string_decode(const char *string)
  580. {
  581. return lub_string_ndecode(string, strlen(string));
  582. }
  583. */
  584. // TODO: Is it needed?
  585. /*----------------------------------------------------------- */
  586. /*
  587. * This needs to escape any dangerous characters within the command line
  588. * to prevent gaining access to the underlying system shell.
  589. */
  590. /*
  591. char *lub_string_encode(const char *string, const char *escape_chars)
  592. {
  593. char *result = NULL;
  594. const char *p;
  595. if (!escape_chars)
  596. return lub_string_dup(string);
  597. if (string && !(*string)) // Empty string
  598. return lub_string_dup(string);
  599. for (p = string; p && *p; p++) {
  600. // find any special characters and prefix them with '\'
  601. size_t len = strcspn(p, escape_chars);
  602. lub_string_catn(&result, p, len);
  603. p += len;
  604. if (*p) {
  605. lub_string_catn(&result, "\\", 1);
  606. lub_string_catn(&result, p, 1);
  607. } else {
  608. break;
  609. }
  610. }
  611. return result;
  612. }
  613. */
  614. // TODO: Is it needed?
  615. /*--------------------------------------------------------- */
  616. /*
  617. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  618. bool_t utf8)
  619. {
  620. unsigned int cnt = 0;
  621. if (!str1 || !str2)
  622. return cnt;
  623. while (*str1 && *str2) {
  624. if (*str1 != *str2)
  625. break;
  626. cnt++;
  627. str1++;
  628. str2++;
  629. }
  630. if (!utf8)
  631. return cnt;
  632. // UTF8 features
  633. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  634. cnt--;
  635. return cnt;
  636. }
  637. */
  638. // TODO: Is it needed?
  639. /*--------------------------------------------------------- */
  640. /*
  641. const char *lub_string_suffix(const char *string)
  642. {
  643. const char *p1, *p2;
  644. p1 = p2 = string;
  645. while (*p1) {
  646. if (faux_ctype_isspace(*p1)) {
  647. p2 = p1;
  648. p2++;
  649. }
  650. p1++;
  651. }
  652. return p2;
  653. }
  654. */
  655. // TODO: Is it needed?
  656. /*--------------------------------------------------------- */
  657. /*
  658. unsigned int lub_string_wordcount(const char *line)
  659. {
  660. const char *word;
  661. unsigned int result = 0;
  662. size_t len = 0, offset = 0;
  663. size_t quoted;
  664. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  665. *word || quoted;
  666. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  667. // account for the terminating quotation mark
  668. len += quoted ? quoted - 1 : 0;
  669. result++;
  670. }
  671. return result;
  672. }
  673. */