gwenhywfar 5.14.1
text.c
Go to the documentation of this file.
1/***************************************************************************
2 begin : Sat Jun 28 2003
3 copyright : (C) 2019 by Martin Preuss
4 email : martin@libchipcard.de
5
6 ***************************************************************************
7 * *
8 * This library is free software; you can redistribute it and/or *
9 * modify it under the terms of the GNU Lesser General Public *
10 * License as published by the Free Software Foundation; either *
11 * version 2.1 of the License, or (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16 * Lesser General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU Lesser General Public *
19 * License along with this library; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21 * MA 02111-1307 USA *
22 * *
23 ***************************************************************************/
24
25#ifdef HAVE_CONFIG_H
26# include <config.h>
27#endif
28
29#define DISABLE_DEBUGLOG
30
31
32#include "text.h"
33#include <stdlib.h>
34#include <assert.h>
35#include <string.h>
36#include <errno.h>
37#include <ctype.h>
38#ifdef HAVE_LOCALE_H
39# include <locale.h>
40#endif
41
42#ifndef ICONV_CONST
43# define ICONV_CONST
44#endif
45
46#ifdef HAVE_ICONV_H
47# include <iconv.h>
48#endif
49
50
51#include <gwenhywfar/gwenhywfarapi.h>
52#include <gwenhywfar/debug.h>
53#include <gwenhywfar/stringlist.h>
54
55
56
57typedef struct {
59 const char *replace;
61
63 {'&', "&amp;"},
64 {'<', "&lt;"},
65 {'>', "&gt;"},
66 {'\'', "&apos;"},
67 {'\"', "&quot;"},
68 {0, 0}
69};
70
71
72
73
74/* ------------------------------------------------------------------------------------------------
75 * forward declarations
76 * ------------------------------------------------------------------------------------------------
77 */
78
79static int _splitVariableNameInNameAndIndex(const char *s, char **pVariableName, int *pMaxLen);
80static int _cmpSegment(const char *w, unsigned int *wpos,
81 const char *p, unsigned int *ppos,
82 int sensecase,
83 unsigned int *matches);
84static int _findSegment(const char *w, unsigned int *wpos,
85 const char *p, unsigned int *ppos,
86 int sensecase,
87 unsigned int *matches);
88static double _checkSimilarity(const char *s1, const char *s2, int ign);
89
90
91
92/* ------------------------------------------------------------------------------------------------
93 * implementations
94 * ------------------------------------------------------------------------------------------------
95 */
96
97
98
99
100char *GWEN_Text_GetWord(const char *src,
101 const char *delims,
102 char *buffer,
103 unsigned int maxsize,
104 uint32_t flags,
105 const char **next)
106{
107 unsigned int size;
108 int lastWasBlank;
109 int lastBlankPos;
110 int insideQuotes;
111 int lastWasEscape;
112
113 assert(maxsize);
114
115 /* skip leading blanks, if wanted */
117 while (*src && (unsigned char)(*src)<33)
118 src++;
119 }
120
121 /* get word */
122 size=0;
123 lastWasBlank=0;
124 lastBlankPos=-1;
125 lastWasEscape=0;
126 insideQuotes=0;
127
128 if (*src=='"') {
129 insideQuotes=1;
130 if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
131 src++;
132 }
133
134 while (*src && size<(maxsize-1)) {
135 if (lastWasEscape) {
136 buffer[size]=*src;
137 size++;
138 lastWasEscape=0;
139 lastWasBlank=0;
140 lastBlankPos=-1;
141 }
142 else {
143 if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
144 lastWasEscape=1;
145 lastWasBlank=0;
146 lastBlankPos=-1;
147 }
148 else {
149 if (!insideQuotes && strchr(delims, *src)!=0)
150 break;
151 if (*src=='"') {
152 if (insideQuotes) {
153 insideQuotes=0;
154 src++;
155 break;
156 }
157 else {
159 "Found a closing \" without an opening one "
160 "(consider using a backslash to escape)");
161 return 0;
162 }
163 }
164
165
166 if (insideQuotes ||
167 !lastWasBlank ||
168 (lastWasBlank &&
170 /* only copy if last char was NOT blank or
171 * last was blank but the caller does not want to have multiple
172 * blanks removed */
173 buffer[size]=*src;
174 size++;
175 }
176 /* remember next loop whether this char was a blank */
177 if (isspace((int)((unsigned char)*src)) && !lastWasEscape) {
178 lastWasBlank=1;
179 lastBlankPos=size;
180 }
181 else {
182 lastWasBlank=0;
183 lastBlankPos=-1;
184 }
185 } /* if this is not a backslash */
186 } /* !lastWasEscape */
187 /* advance source pointer */
188 src++;
189 } /* while */
190
191 /* add trailing null to correctly terminate the buffer */
192 buffer[size]=0;
193
194 if (insideQuotes) {
195 DBG_DEBUG(GWEN_LOGDOMAIN, "Missing \" after word");
196 return 0;
197 }
198 /* check whether the source string was correctly terminated */
199 if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
200 if (*src) {
201 if (strchr(delims, *src)==0) {
202 DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
203 return 0;
204 }
205 }
206 else {
207 if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
208 DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
209 return 0;
210 }
211 }
212 }
213
214 /* remove trailing blanks, if wanted */
216 if (lastBlankPos!=-1)
217 buffer[lastBlankPos]=0;
218 }
219
220 *next=src;
221 return buffer;
222}
223
224
225
226int GWEN_Text_GetWordToBuffer(const char *src,
227 const char *delims,
228 GWEN_BUFFER *buf,
229 uint32_t flags,
230 const char **next)
231{
232 const char *savedSrc=src;
233 int lastWasBlank;
234 int lastBlankPos;
235 int insideQuotes;
236 int lastWasEscape;
237
238 /* skip leading blanks, if wanted */
240 while (*src && (unsigned char)(*src)<33) {
241 if (strchr(delims, *src)) {
242 *next=src;
243 return 0;
244 }
245 src++;
246 }
247 }
248
249 /* get word */
250 lastWasBlank=0;
251 lastBlankPos=-1;
252 lastWasEscape=0;
253 insideQuotes=0;
254
255 if (*src=='"') {
256 insideQuotes=1;
257 if (flags & GWEN_TEXT_FLAGS_DEL_QUOTES)
258 src++;
259 }
260
261 while (*src) {
262 if (lastWasEscape) {
263 GWEN_Buffer_AppendByte(buf, *src);
264 lastWasEscape=0;
265 lastWasBlank=0;
266 lastBlankPos=-1;
267 }
268 else {
269 if (*src=='\\' && (flags & GWEN_TEXT_FLAGS_CHECK_BACKSLASH)) {
270 lastWasEscape=1;
271 lastWasBlank=0;
272 lastBlankPos=-1;
273 }
274 else {
275 if (!insideQuotes && strchr(delims, *src)!=0)
276 break;
277 if (*src=='"') {
278 if (insideQuotes) {
279 insideQuotes=0;
280 src++;
281 break;
282 }
283 else {
285 "Found a closing \" without an opening one "
286 "(consider using a backslash to escape)");
287 return -1;
288 }
289 }
290
291
292 if (insideQuotes ||
293 !lastWasBlank ||
294 (lastWasBlank &&
296 /* only copy if last char was NOT blank or
297 * last was blank but the caller does not want to have multiple
298 * blanks removed */
299 GWEN_Buffer_AppendByte(buf, *src);
300 }
301 /* remember next loop whether this char was a blank */
302
303 if (!lastWasEscape && *((unsigned char *)src)<33) {
304 lastWasBlank=1;
305 lastBlankPos=GWEN_Buffer_GetPos(buf);
306 }
307 else {
308 lastWasBlank=0;
309 lastBlankPos=-1;
310 }
311 } /* if this is not a backslash */
312 } /* !lastWasEscape */
313 /* advance source pointer */
314 src++;
315 } /* while */
316
317 if (insideQuotes) {
318 DBG_ERROR(GWEN_LOGDOMAIN, "Missing \" after word (at %d: [%s])", (int)(src-savedSrc), savedSrc);
319 return -1;
320 }
321 /* check whether the source string was correctly terminated */
322 if (flags & GWEN_TEXT_FLAGS_NEED_DELIMITER) {
323 if (*src) {
324 if (strchr(delims, *src)==0) {
325 DBG_ERROR(GWEN_LOGDOMAIN, "No delimiter found within specified length");
326 return -1;
327 }
328 }
329 else {
330 if (!(flags & GWEN_TEXT_FLAGS_NULL_IS_DELIMITER)) {
331 DBG_ERROR(GWEN_LOGDOMAIN, "String ends without delimiter");
332 return -1;
333 }
334 }
335 }
336
337 /* remove trailing blanks, if wanted */
339 if (lastBlankPos!=-1)
340 GWEN_Buffer_Crop(buf, 0, lastBlankPos);
341 }
342
343 *next=src;
344 return 0;
345}
346
347
348
349char *GWEN_Text_Escape(const char *src,
350 char *buffer,
351 unsigned int maxsize)
352{
353 unsigned int size;
354
355 size=0;
356 while (*src) {
357 unsigned char x;
358
359 x=(unsigned char)*src;
360 if (!(
361 (x>='A' && x<='Z') ||
362 (x>='a' && x<='z') ||
363 (x>='0' && x<='9'))) {
364 unsigned char c;
365
366 if ((maxsize-1)<size+3) {
367 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
368 return 0;
369 }
370 buffer[size++]='%';
371 c=(((unsigned char)(*src))>>4)&0xf;
372 if (c>9)
373 c+=7;
374 c+='0';
375 buffer[size++]=c;
376 c=((unsigned char)(*src))&0xf;
377 if (c>9)
378 c+=7;
379 c+='0';
380 buffer[size++]=c;
381 }
382 else {
383 if (size<(maxsize-1))
384 buffer[size++]=*src;
385 else {
386 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
387 return 0;
388 }
389 }
390
391 src++;
392 } /* while */
393
394 buffer[size]=0;
395 return buffer;
396}
397
398
399
400char *GWEN_Text_EscapeTolerant(const char *src,
401 char *buffer,
402 unsigned int maxsize)
403{
404 unsigned int size;
405
406 size=0;
407 while (*src) {
408 unsigned char x;
409
410 x=(unsigned char)*src;
411 if (!(
412 (x>='A' && x<='Z') ||
413 (x>='a' && x<='z') ||
414 (x>='0' && x<='9') ||
415 x==' ' ||
416 x=='.' ||
417 x==',' ||
418 x=='.' ||
419 x=='*' ||
420 x=='?'
421 )) {
422 unsigned char c;
423
424 if ((maxsize-1)<size+3) {
425 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
426 return 0;
427 }
428 buffer[size++]='%';
429 c=(((unsigned char)(*src))>>4)&0xf;
430 if (c>9)
431 c+=7;
432 c+='0';
433 buffer[size++]=c;
434 c=((unsigned char)(*src))&0xf;
435 if (c>9)
436 c+=7;
437 c+='0';
438 buffer[size++]=c;
439 }
440 else {
441 if (size<(maxsize-1))
442 buffer[size++]=*src;
443 else {
444 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
445 return 0;
446 }
447 }
448
449 src++;
450 } /* while */
451
452 buffer[size]=0;
453 return buffer;
454}
455
456
457
458char *GWEN_Text_UnescapeN(const char *src,
459 unsigned int srclen,
460 char *buffer,
461 unsigned int maxsize)
462{
463 unsigned int size;
464
465 size=0;
466
467 while (*src && srclen>0) {
468 unsigned char x;
469
470 x=(unsigned char)*src;
471 if (
472 (x>='A' && x<='Z') ||
473 (x>='a' && x<='z') ||
474 (x>='0' && x<='9')) {
475 if (size<(maxsize-1))
476 buffer[size++]=*src;
477 else {
478 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
479 return 0;
480 }
481 }
482 else {
483 if (*src=='%') {
484 unsigned char d1, d2;
485 unsigned char c;
486
487 if (srclen<3) {
488 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
489 return 0;
490 }
491 /* skip '%' */
492 src++;
493 if (!(*src) || !isxdigit((int)*src)) {
494 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
495 return 0;
496 }
497 /* read first digit */
498 d1=(unsigned char)(toupper(*src));
499
500 /* get second digit */
501 src++;
502 if (!(*src) || !isxdigit((int)*src)) {
503 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
504 return 0;
505 }
506 d2=(unsigned char)(toupper(*src));
507 /* compute character */
508 d1-='0';
509 if (d1>9)
510 d1-=7;
511 c=(d1<<4)&0xf0;
512 d2-='0';
513 if (d2>9)
514 d2-=7;
515 c+=(d2&0xf);
516 /* store character */
517 if (size<(maxsize-1))
518 buffer[size++]=(char)c;
519 else {
520 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
521 return 0;
522 }
523 srclen-=2;
524 }
525 else {
526 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
527 "characters in escaped string (\"%s\")",
528 src);
529 return 0;
530 }
531 }
532 srclen--;
533 src++;
534 } /* while */
535
536 buffer[size]=0;
537 return buffer;
538}
539
540
541
542char *GWEN_Text_Unescape(const char *src,
543 char *buffer,
544 unsigned int maxsize)
545{
546 unsigned int srclen;
547
548 srclen=strlen(src);
549 return GWEN_Text_UnescapeN(src, srclen, buffer, maxsize);
550}
551
552
553
554char *GWEN_Text_UnescapeTolerantN(const char *src,
555 unsigned int srclen,
556 char *buffer,
557 unsigned int maxsize)
558{
559 unsigned int size;
560
561 size=0;
562
563 while (*src && srclen>0) {
564 unsigned char x;
565
566 x=(unsigned char)*src;
567 if (
568 (x>='A' && x<='Z') ||
569 (x>='a' && x<='z') ||
570 (x>='0' && x<='9') ||
571 x==' ' ||
572 x=='.' ||
573 x==',' ||
574 x=='.' ||
575 x=='*' ||
576 x=='?'
577 ) {
578 if (size<(maxsize-1))
579 buffer[size++]=*src;
580 else {
581 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
582 return 0;
583 }
584 }
585 else {
586 if (*src=='%') {
587 unsigned char d1, d2;
588 unsigned char c;
589
590 if (srclen<3) {
591 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (EOLN met)");
592 return 0;
593 }
594 /* skip '%' */
595 src++;
596 if (!(*src) || !isxdigit((int)*src)) {
597 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
598 return 0;
599 }
600 /* read first digit */
601 d1=(unsigned char)(toupper(*src));
602
603 /* get second digit */
604 src++;
605 if (!(*src) || !isxdigit((int)*src)) {
606 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
607 return 0;
608 }
609 d2=(unsigned char)(toupper(*src));
610 /* compute character */
611 d1-='0';
612 if (d1>9)
613 d1-=7;
614 c=(d1<<4)&0xf0;
615 d2-='0';
616 if (d2>9)
617 d2-=7;
618 c+=(d2&0xf);
619 /* store character */
620 if (size<(maxsize-1))
621 buffer[size++]=(char)c;
622 else {
623 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
624 return 0;
625 }
626 srclen-=2;
627 }
628 else {
629 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
630 "characters in escaped string (\"%s\")",
631 src);
632 return 0;
633 }
634 }
635 srclen--;
636 src++;
637 } /* while */
638
639 buffer[size]=0;
640 return buffer;
641}
642
643
644
645char *GWEN_Text_UnescapeTolerant(const char *src,
646 char *buffer,
647 unsigned int maxsize)
648{
649 unsigned int srclen;
650
651 srclen=strlen(src);
652 return GWEN_Text_UnescapeTolerantN(src, srclen, buffer, maxsize);
653}
654
655
656
657char *GWEN_Text_ToHex(const char *src, unsigned l,
658 char *buffer, unsigned int maxsize)
659{
660 unsigned int pos;
661 unsigned int size;
662
663 if ((l*2)+1 > maxsize) {
664 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
665 return 0;
666 }
667
668 pos=0;
669 size=0;
670 while (pos<l) {
671 unsigned char c;
672
673 c=(((unsigned char)(src[pos]))>>4)&0xf;
674 if (c>9)
675 c+=7;
676 c+='0';
677 buffer[size++]=c;
678 c=((unsigned char)(src[pos]))&0xf;
679 if (c>9)
680 c+=7;
681 c+='0';
682 buffer[size++]=c;
683 pos++;
684 }
685 buffer[size]=0;
686 return buffer;
687}
688
689
690
691char *GWEN_Text_ToHexGrouped(const char *src,
692 unsigned l,
693 char *buffer,
694 unsigned maxsize,
695 unsigned int groupsize,
696 char delimiter,
697 int skipLeadingZeroes)
698{
699 unsigned int pos;
700 unsigned int size;
701 unsigned int j;
702
703 j=0;
704
705 pos=0;
706 size=0;
707 j=0;
708 while (pos<l) {
709 unsigned char c;
710 int skipThis;
711
712 skipThis=0;
713 c=(((unsigned char)(src[pos]))>>4)&0xf;
714 if (skipLeadingZeroes) {
715 if (c==0)
716 skipThis=1;
717 else
718 skipLeadingZeroes=0;
719 }
720 if (c>9)
721 c+=7;
722 c+='0';
723 if (!skipThis) {
724 if (size+1>=maxsize) {
725 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
726 return 0;
727 }
728 buffer[size++]=c;
729 j++;
730 if (j==groupsize) {
731 if (size+1>=maxsize) {
732 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
733 return 0;
734 }
735 buffer[size++]=delimiter;
736 j=0;
737 }
738 }
739
740 skipThis=0;
741 c=((unsigned char)(src[pos]))&0xf;
742 if (skipLeadingZeroes) {
743 if (c==0 && pos+1<l)
744 skipThis=1;
745 else
746 skipLeadingZeroes=0;
747 }
748 if (c>9)
749 c+=7;
750 c+='0';
751 if (size+1>=maxsize) {
752 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
753 return 0;
754 }
755 if (!skipThis) {
756 buffer[size++]=c;
757 j++;
758 if (j==groupsize) {
759 if (pos+1<l) {
760 if (size+1>=maxsize) {
761 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small");
762 return 0;
763 }
764 buffer[size++]=delimiter;
765 }
766 j=0;
767 }
768 }
769 pos++;
770 }
771 buffer[size]=0;
772 return buffer;
773}
774
775
776
777int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
778 GWEN_BUFFER *buf,
779 unsigned int groupsize,
780 char delimiter,
781 int skipLeadingZeroes)
782{
783 unsigned int pos = 0;
784 unsigned int j = 0;
785
786 while (pos<l) {
787 unsigned char c;
788 int skipThis;
789
790 skipThis=0;
791 c=(((unsigned char)(src[pos]))>>4)&0xf;
792 if (skipLeadingZeroes) {
793 if (c==0)
794 skipThis=1;
795 else
796 skipLeadingZeroes=0;
797 }
798 if (c>9)
799 c+=7;
800 c+='0';
801 if (!skipThis) {
802 if (GWEN_Buffer_AppendByte(buf, c)) {
803 DBG_INFO(GWEN_LOGDOMAIN, "here");
804 return -1;
805 }
806 j++;
807 if (groupsize && j==groupsize) {
808 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
809 DBG_INFO(GWEN_LOGDOMAIN, "here");
810 return -1;
811 }
812 j=0;
813 }
814 }
815
816 skipThis=0;
817 c=((unsigned char)(src[pos]))&0xf;
818 if (skipLeadingZeroes) {
819 if (c==0 && pos+1<l)
820 skipThis=1;
821 else
822 skipLeadingZeroes=0;
823 }
824 if (c>9)
825 c+=7;
826 c+='0';
827 if (!skipThis) {
828 if (GWEN_Buffer_AppendByte(buf, c)) {
829 DBG_INFO(GWEN_LOGDOMAIN, "here");
830 return -1;
831 }
832 j++;
833 if (groupsize && j==groupsize) {
834 if (pos+1<l) {
835 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
836 DBG_INFO(GWEN_LOGDOMAIN, "here");
837 return -1;
838 }
839 }
840 j=0;
841 }
842 }
843 pos++;
844 }
845 return 0;
846}
847
848
849
850int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize)
851{
852 unsigned int size = 0;
853
854 while (*src) {
855 unsigned char d1, d2;
856 unsigned char c;
857
858 /* read first digit */
859 if (!isxdigit((int)*src)) {
860 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
861 return -1;
862 }
863 d1=(unsigned char)(toupper(*src));
864
865 /* get second digit */
866 src++;
867 if (!(*src) || !isxdigit((int)*src)) {
868 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
869 return -1;
870 }
871 d2=(unsigned char)(toupper(*src));
872 src++;
873
874 /* compute character */
875 d1-='0';
876 if (d1>9)
877 d1-=7;
878 c=(d1<<4)&0xf0;
879 d2-='0';
880 if (d2>9)
881 d2-=7;
882 c+=(d2&0xf);
883 /* store character */
884 if (size<(maxsize))
885 buffer[size++]=(char)c;
886 else {
887 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (maxsize=%d)", maxsize);
888 return -1;
889 }
890 } /* while */
891
892 return size;
893}
894
895
896
897int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf)
898{
899 while (*src) {
900 unsigned char d1, d2;
901 unsigned char c;
902
903 /* read first digit */
904 if (isspace((int)*src)) {
905 src++;
906 }
907 else {
908 if (!isxdigit((int)*src)) {
909 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in hex string");
910 return -1;
911 }
912 d1=(unsigned char)(toupper(*src));
913
914 /* get second digit */
915 src++;
916 if (!(*src) || !isxdigit((int)*src)) {
917 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete hex byte (only 1 digit)");
918 return -1;
919 }
920 d2=(unsigned char)(toupper(*src));
921 src++;
922
923 /* compute character */
924 d1-='0';
925 if (d1>9)
926 d1-=7;
927 c=(d1<<4)&0xf0;
928 d2-='0';
929 if (d2>9)
930 d2-=7;
931 c+=(d2&0xf);
932 /* store character */
933 GWEN_Buffer_AppendByte(buf, (char)c);
934 }
935 } /* while */
936
937 return 0;
938}
939
940
941
942int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf)
943{
944 unsigned int l;
945 int fakeByte;
946
947 l=strlen(src);
948 fakeByte=(l%2);
949 while (*src) {
950 unsigned char d1, d2;
951 unsigned char c;
952
953 if (fakeByte) {
954 d1=0;
955 fakeByte=0;
956 }
957 else {
958 /* read first digit */
959 if (!isdigit((int)*src)) {
960 DBG_ERROR(GWEN_LOGDOMAIN, "Bad char in bcd string");
961 return -1;
962 }
963 d1=(unsigned char)(*src);
964 src++;
965 }
966 /* get second digit */
967 if (!(*src) || !isxdigit((int)*src)) {
968 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete BCD byte (only 1 digit)");
969 return -1;
970 }
971 d2=(unsigned char)(*src);
972 src++;
973
974 /* compute character */
975 d1-='0';
976 c=(d1<<4)&0xf0;
977 d2-='0';
978 c+=(d2&0xf);
979 /* store character */
980 GWEN_Buffer_AppendByte(buf, (char)c);
981 } /* while */
982
983 return 0;
984}
985
986
987
988int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
989 GWEN_BUFFER *buf,
990 unsigned int groupsize,
991 char delimiter,
992 int skipLeadingZeroes)
993{
994 unsigned int pos;
995 unsigned int j;
996
997 j=0;
998
999 pos=0;
1000 j=0;
1001 while (pos<l) {
1002 unsigned char c;
1003 int skipThis;
1004
1005 skipThis=0;
1006 c=(((unsigned char)(src[pos]))>>4)&0xf;
1007 if (skipLeadingZeroes) {
1008 if (c==0)
1009 skipThis=1;
1010 else
1011 skipLeadingZeroes=0;
1012 }
1013 c+='0';
1014 if (!skipThis) {
1015 if (GWEN_Buffer_AppendByte(buf, c)) {
1016 DBG_INFO(GWEN_LOGDOMAIN, "here");
1017 return -1;
1018 }
1019 j++;
1020 if (groupsize && j==groupsize) {
1021 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
1022 DBG_INFO(GWEN_LOGDOMAIN, "here");
1023 return -1;
1024 }
1025 j=0;
1026 }
1027 }
1028
1029 skipThis=0;
1030 c=((unsigned char)(src[pos]))&0xf;
1031 if (skipLeadingZeroes) {
1032 if (c==0 && pos+1<l)
1033 skipThis=1;
1034 else
1035 skipLeadingZeroes=0;
1036 }
1037 c+='0';
1038 if (!skipThis) {
1039 if (GWEN_Buffer_AppendByte(buf, c)) {
1040 DBG_INFO(GWEN_LOGDOMAIN, "here");
1041 return -1;
1042 }
1043 j++;
1044 if (groupsize && j==groupsize) {
1045 if (pos+1<l) {
1046 if (GWEN_Buffer_AppendByte(buf, delimiter)) {
1047 DBG_INFO(GWEN_LOGDOMAIN, "here");
1048 return -1;
1049 }
1050 }
1051 j=0;
1052 }
1053 }
1054 pos++;
1055 }
1056 return 0;
1057}
1058
1059
1060
1061int GWEN_Text_Compare(const char *s1, const char *s2, int ign)
1062{
1063 if (s1)
1064 if (*s1==0)
1065 s1=0;
1066 if (s2)
1067 if (*s2==0)
1068 s2=0;
1069 if (!s1 && !s2)
1070 return 0;
1071 if (!s1 && s2)
1072 return 1;
1073 if (s1 && !s2)
1074 return -1;
1075 if (ign)
1076 return strcasecmp(s1, s2);
1077 else
1078 return strcmp(s1, s2);
1079}
1080
1081
1082
1083const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle)
1084{
1085 while (*haystack) {
1086 while (*haystack && tolower(*haystack)!=tolower(*needle))
1087 haystack++;
1088 if (*haystack) {
1089 const char *t;
1090 const char *s;
1091
1092 /* now haystack points to an area which begins with *needle */
1093 t=haystack;
1094 s=needle;
1095 t++;
1096 s++;
1097 while (*t && *s && (tolower(*t)==tolower(*s))) {
1098 t++;
1099 s++;
1100 }
1101 if (*s==0)
1102 return haystack;
1103 }
1104 else
1105 /* not found */
1106 break;
1107 /* no match here, advance haystack */
1108 haystack++;
1109 }
1110
1111 /* not found */
1112 return NULL;
1113}
1114
1115
1116
1117
1118int _cmpSegment(const char *w, unsigned int *wpos,
1119 const char *p, unsigned int *ppos,
1120 int sensecase,
1121 unsigned int *matches)
1122{
1123 char a;
1124 char b;
1125 unsigned wlength;
1126 unsigned plength;
1127
1128 unsigned int _wpos = *wpos, _ppos = *ppos, _matches = *matches;
1129
1130 a=0;
1131 b=0;
1132 wlength=strlen(w);
1133 plength=strlen(p);
1134
1135 while (_wpos<wlength && _ppos<plength) {
1136 a=w[_wpos];
1137 b=p[_ppos];
1138 if (b=='*') {
1139 *wpos = _wpos;
1140 *ppos = _ppos;
1141 *matches = _matches;
1142 return 1;
1143 }
1144 if (!sensecase) {
1145 a=toupper(a);
1146 b=toupper(b);
1147 }
1148 /* count matches */
1149 if (a==b)
1150 ++_matches;
1151 if (a!=b && b!='?') {
1152 *wpos = _wpos;
1153 *ppos = _ppos;
1154 *matches = _matches;
1155 return 0;
1156 }
1157 ++_wpos;
1158 ++_ppos;
1159 }
1160 /* both at end, would be ok */
1161 if (_wpos==wlength && _ppos==plength) {
1162 *wpos = _wpos;
1163 *ppos = _ppos;
1164 *matches = _matches;
1165 return 1;
1166 }
1167 /* word ends, pattern doesnt, would be ok if pattern is '*' here */
1168 if (_wpos>=wlength && _ppos<plength)
1169 if (p[_ppos]=='*') {
1170 *wpos = _wpos;
1171 *ppos = _ppos;
1172 *matches = _matches;
1173 return 1;
1174 }
1175 /* otherwise no match ;-/ */
1176 *wpos = _wpos;
1177 *ppos = _ppos;
1178 *matches = _matches;
1179 return 0;
1180}
1181
1182
1183
1184int _findSegment(const char *w, unsigned int *wpos,
1185 const char *p, unsigned int *ppos,
1186 int sensecase,
1187 unsigned int *matches)
1188{
1189 unsigned int lwpos, lppos, lmatches;
1190 unsigned wlength;
1191
1192 wlength=strlen(w);
1193 lwpos=*wpos;
1194 lppos=*ppos;
1195 lmatches=*matches;
1196 while (lwpos<wlength) {
1197 *ppos=lppos;
1198 *wpos=lwpos;
1199 *matches=lmatches;
1200 if (_cmpSegment(w, wpos, p, ppos, sensecase, matches))
1201 return 1;
1202 lwpos++;
1203 }
1204 return 0;
1205}
1206
1207
1208int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase)
1209{
1210 unsigned int ppos;
1211 unsigned int wpos;
1212 unsigned int matches;
1213 unsigned int plength;
1214
1215 ppos=wpos=matches=0;
1216 plength=strlen(p);
1217
1218 /* compare until first occurrence of '*' */
1219 if (!_cmpSegment(w, &wpos, p, &ppos, sensecase, &matches)) {
1220 return -1;
1221 }
1222
1223 while (1) {
1224 /* if pattern ends we have done it */
1225 if (ppos>=plength)
1226 return matches;
1227 /* skip '*' in pattern */
1228 ppos++;
1229 /* if pattern ends behind '*' the word matches */
1230 if (ppos>=plength)
1231 return matches;
1232 /* find next matching segment */
1233 if (!_findSegment(w, &wpos, p, &ppos, sensecase, &matches)) {
1234 return -1;
1235 }
1236 } /* while */
1237 /* I know, we'll never get to here ;-) */
1238 return -1;
1239}
1240
1241
1242
1243int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
1244 int fillchar)
1245{
1246 char lbuffer[128];
1247 unsigned int i;
1248
1249 sprintf(lbuffer, "%d", num);
1250 i=strlen(lbuffer);
1251 if (i>=bufsize) {
1252 DBG_ERROR(GWEN_LOGDOMAIN, "Buffer too small (%d>=%d)", i, bufsize);
1253 return -1;
1254 }
1255 if (fillchar>0) {
1256 /* fill right, but first copy chars */
1257 strcpy(buffer, lbuffer);
1258 while (i<bufsize-1)
1259 buffer[i++]=fillchar;
1260 buffer[i]=0;
1261 return bufsize;
1262 }
1263 else if (fillchar<0) {
1264 int j, k;
1265
1266 fillchar=-fillchar;
1267 j=bufsize-1-i;
1268 for (k=0; k<j; k++)
1269 buffer[k]=fillchar;
1270 buffer[k]=0;
1271 strcat(buffer, lbuffer);
1272 return bufsize;
1273 }
1274 else {
1275 /* dont fill, just copy */
1276 strcpy(buffer, lbuffer);
1277 return i;
1278 }
1279}
1280
1281
1282
1283void GWEN_Text_DumpString(const char *s, unsigned int l,
1284 unsigned int insert)
1285{
1286 unsigned int i;
1287 unsigned int j;
1288 unsigned int pos;
1289 unsigned int k;
1290
1291 pos=0;
1292 for (k=0; k<insert; k++)
1293 fprintf(stderr, " ");
1294 fprintf(stderr, "String size is %d:\n", l);
1295 while (pos<l) {
1296 for (k=0; k<insert; k++)
1297 fprintf(stderr, " ");
1298 fprintf(stderr, "%04x: ", pos);
1299 j=pos+16;
1300 if (j>=l)
1301 j=l;
1302
1303 /* show hex dump */
1304 for (i=pos; i<j; i++) {
1305 fprintf(stderr, "%02x ", (unsigned char)s[i]);
1306 }
1307 if (j-pos<16)
1308 for (i=0; i<16-(j-pos); i++)
1309 fprintf(stderr, " ");
1310 /* show text */
1311 for (i=pos; i<j; i++) {
1312 if (s[i]<32)
1313 fprintf(stderr, ".");
1314 else
1315 fprintf(stderr, "%c", s[i]);
1316 }
1317 fprintf(stderr, "\n");
1318 pos+=16;
1319 }
1320}
1321
1322
1323
1324void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
1325 GWEN_BUFFER *mbuf,
1326 unsigned int insert)
1327{
1328 unsigned int i;
1329 unsigned int j;
1330 unsigned int pos;
1331 unsigned k;
1332 char numbuf[32];
1333
1334 pos=0;
1335 for (k=0; k<insert; k++)
1336 GWEN_Buffer_AppendByte(mbuf, ' ');
1337 GWEN_Buffer_AppendString(mbuf, "String size is ");
1338 snprintf(numbuf, sizeof(numbuf), "%d", l);
1339 GWEN_Buffer_AppendString(mbuf, numbuf);
1340 GWEN_Buffer_AppendByte(mbuf, '\n');
1341 while (pos<l) {
1342 for (k=0; k<insert; k++)
1343 GWEN_Buffer_AppendByte(mbuf, ' ');
1344 snprintf(numbuf, sizeof(numbuf), "%04x: ", pos);
1345 GWEN_Buffer_AppendString(mbuf, numbuf);
1346 j=pos+16;
1347 if (j>=l)
1348 j=l;
1349
1350 /* show hex dump */
1351 for (i=pos; i<j; i++) {
1352 snprintf(numbuf, sizeof(numbuf), "%02x ", (unsigned char)s[i]);
1353 GWEN_Buffer_AppendString(mbuf, numbuf);
1354 }
1355 if (j-pos<16)
1356 for (i=0; i<16-(j-pos); i++)
1357 GWEN_Buffer_AppendString(mbuf, " ");
1358 /* show text */
1359 for (i=pos; i<j; i++) {
1360 if (s[i]<32)
1361 GWEN_Buffer_AppendByte(mbuf, '.');
1362 else
1363 GWEN_Buffer_AppendByte(mbuf, s[i]);
1364 }
1365 GWEN_Buffer_AppendByte(mbuf, '\n');
1366 pos+=16;
1367 }
1368}
1369
1370
1371
1372
1373
1374
1375
1376int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf)
1377{
1378 while (*src) {
1379 unsigned char x;
1380
1381 x=(unsigned char)*src;
1382 if (!(
1383 (x>='A' && x<='Z') ||
1384 (x>='a' && x<='z') ||
1385 (x>='0' && x<='9'))) {
1386 unsigned char c;
1387
1388 GWEN_Buffer_AppendByte(buf, '%');
1389 c=(((unsigned char)(*src))>>4)&0xf;
1390 if (c>9)
1391 c+=7;
1392 c+='0';
1393 GWEN_Buffer_AppendByte(buf, c);
1394 c=((unsigned char)(*src))&0xf;
1395 if (c>9)
1396 c+=7;
1397 c+='0';
1398 GWEN_Buffer_AppendByte(buf, c);
1399 }
1400 else
1401 GWEN_Buffer_AppendByte(buf, *src);
1402
1403 src++;
1404 } /* while */
1405
1406 return 0;
1407}
1408
1409
1410
1411int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf)
1412{
1413 while (*src) {
1414 unsigned char x;
1415
1416 x=(unsigned char)*src;
1417 if (
1418 (x>='A' && x<='Z') ||
1419 (x>='a' && x<='z') ||
1420 (x>='0' && x<='9')) {
1421 GWEN_Buffer_AppendByte(buf, *src);
1422 }
1423 else {
1424 if (*src=='%') {
1425 unsigned char d1, d2;
1426 unsigned char c;
1427
1428 /* skip '%' */
1429 src++;
1430 if (!(*src) || !isxdigit((int)*src)) {
1431 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (no digits)");
1432 return 0;
1433 }
1434 /* read first digit */
1435 d1=(unsigned char)(toupper(*src));
1436
1437 /* get second digit */
1438 src++;
1439 if (!(*src) || !isxdigit((int)*src)) {
1440 DBG_ERROR(GWEN_LOGDOMAIN, "Incomplete escape sequence (only 1 digit)");
1441 return 0;
1442 }
1443 d2=(unsigned char)(toupper(*src));
1444 /* compute character */
1445 d1-='0';
1446 if (d1>9)
1447 d1-=7;
1448 c=(d1<<4)&0xf0;
1449 d2-='0';
1450 if (d2>9)
1451 d2-=7;
1452 c+=(d2&0xf);
1453 /* store character */
1454 GWEN_Buffer_AppendByte(buf, (char)c);
1455 }
1456 else {
1457 DBG_ERROR(GWEN_LOGDOMAIN, "Found non-alphanum "
1458 "characters in escaped string (\"%s\")",
1459 src);
1460 return -1;
1461 }
1462 }
1463 src++;
1464 } /* while */
1465
1466 return 0;
1467}
1468
1469
1470
1472{
1473 while (*src) {
1474 unsigned char x;
1475
1476 x=(unsigned char)*src;
1477 if (!(
1478 (x>='A' && x<='Z') ||
1479 (x>='a' && x<='z') ||
1480 (x>='0' && x<='9') ||
1481 x==' ' ||
1482 x=='.' ||
1483 x==',' ||
1484 x=='.' ||
1485 x=='_' ||
1486 x=='-' ||
1487 x=='*' ||
1488 x=='?'
1489 )) {
1490 unsigned char c;
1491
1492 GWEN_Buffer_AppendByte(buf, '%');
1493 c=(((unsigned char)(*src))>>4)&0xf;
1494 if (c>9)
1495 c+=7;
1496 c+='0';
1497 GWEN_Buffer_AppendByte(buf, c);
1498 c=((unsigned char)(*src))&0xf;
1499 if (c>9)
1500 c+=7;
1501 c+='0';
1502 GWEN_Buffer_AppendByte(buf, c);
1503 }
1504 else
1505 GWEN_Buffer_AppendByte(buf, *src);
1506
1507 src++;
1508 } /* while */
1509
1510 return 0;
1511}
1512
1513
1514
1516{
1517 while (*src) {
1518 //const char *srcBak=src;
1519
1520 int charHandled=0;
1521 if (*src=='%') {
1522 if (strlen(src)>2) {
1523 unsigned char d1, d2;
1524 unsigned char c;
1525
1526 if (isxdigit((int)src[1]) && isxdigit((int)src[2])) {
1527 /* skip '%' */
1528 src++;
1529 /* read first digit */
1530 d1=(unsigned char)(toupper(*src));
1531
1532 /* get second digit */
1533 src++;
1534 d2=(unsigned char)(toupper(*src));
1535 /* compute character */
1536 d1-='0';
1537 if (d1>9)
1538 d1-=7;
1539 c=(d1<<4)&0xf0;
1540 d2-='0';
1541 if (d2>9)
1542 d2-=7;
1543 c+=(d2&0xf);
1544 /* store character */
1545 GWEN_Buffer_AppendByte(buf, (char)c);
1546 charHandled=1;
1547 }
1548 }
1549 }
1550 if (!charHandled)
1551 GWEN_Buffer_AppendByte(buf, *src);
1552 src++;
1553 } /* while */
1554
1555 return 0;
1556}
1557
1558
1559
1561{
1562 while (GWEN_Buffer_GetBytesLeft(src)) {
1563 int z;
1564 unsigned char x;
1565
1566 z=GWEN_Buffer_ReadByte(src);
1567 if (z==-1) {
1568 DBG_INFO(GWEN_LOGDOMAIN, "here");
1569 return -1;
1570 }
1571 x=(unsigned char)z;
1572 if (!(
1573 (x>='A' && x<='Z') ||
1574 (x>='a' && x<='z') ||
1575 (x>='0' && x<='9') ||
1576 x==' ' ||
1577 x=='.' ||
1578 x==',' ||
1579 x=='.' ||
1580 x=='*' ||
1581 x=='?'
1582 )) {
1583 unsigned char c;
1584
1585 GWEN_Buffer_AppendByte(buf, '%');
1586 c=(((unsigned char)x)>>4)&0xf;
1587 if (c>9)
1588 c+=7;
1589 c+='0';
1590 GWEN_Buffer_AppendByte(buf, c);
1591 c=((unsigned char)x)&0xf;
1592 if (c>9)
1593 c+=7;
1594 c+='0';
1595 GWEN_Buffer_AppendByte(buf, c);
1596 }
1597 else
1598 GWEN_Buffer_AppendByte(buf, x);
1599 } /* while */
1600
1601 return 0;
1602}
1603
1604
1605
1606void GWEN_Text_LogString(const char *s, unsigned int l,
1607 const char *logDomain,
1609{
1610 GWEN_BUFFER *mbuf;
1611
1612 mbuf=GWEN_Buffer_new(0, ((l*16)<1024)?1024:l*16, 0, 1);
1613 GWEN_Text_DumpString2Buffer(s, l, mbuf, 0);
1614 GWEN_Logger_Log(logDomain, lv, GWEN_Buffer_GetStart(mbuf));
1615 GWEN_Buffer_free(mbuf);
1616}
1617
1618
1619
1621{
1622 const char *p;
1623 char *dst;
1624 unsigned int size;
1625 unsigned int i;
1626 int lastWasBlank;
1627 char *lastBlankPos;
1628
1629 dst=GWEN_Buffer_GetStart(buf);
1630 p=dst;
1631 size=GWEN_Buffer_GetUsedBytes(buf);
1632 lastWasBlank=0;
1633 lastBlankPos=0;
1634
1635 for (i=0; i<size; i++) {
1636 /* remember next loop whether this char was a blank */
1637 if (isspace((int)*p)) {
1638 if (!lastWasBlank) {
1639 /* store only one blank */
1640 lastWasBlank=1;
1641 lastBlankPos=dst;
1642 *(dst++)=*p;
1643 }
1644 }
1645 else {
1646 lastWasBlank=0;
1647 lastBlankPos=0;
1648 *(dst++)=*p;
1649 }
1650 p++;
1651 }
1652
1653 /* remove trailing blanks */
1654 if (lastBlankPos!=0)
1655 dst=lastBlankPos;
1656
1657 size=dst-GWEN_Buffer_GetStart(buf);
1658 GWEN_Buffer_Crop(buf, 0, size);
1659}
1660
1661
1662
1664{
1665 char numbuf[128];
1666 int rv;
1667#ifdef HAVE_SETLOCALE
1668 const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1669 char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1670 setlocale(LC_NUMERIC, "C");
1671#endif
1672
1673 rv=snprintf(numbuf, sizeof(numbuf), "%f", num);
1674
1675#ifdef HAVE_SETLOCALE
1676 setlocale(LC_NUMERIC, currentLocale);
1677 free(currentLocale);
1678#endif
1679
1680 if (rv<1 || rv>=(int)sizeof(numbuf))
1681 return -1;
1682 GWEN_Buffer_AppendString(buf, numbuf);
1683 return 0;
1684}
1685
1686
1687
1688int GWEN_Text_StringToDouble(const char *s, double *num)
1689{
1690 int rv;
1691#ifdef HAVE_SETLOCALE
1692 const char *orig_locale = setlocale(LC_NUMERIC, NULL);
1693 char *currentLocale = strdup(orig_locale ? orig_locale : "C");
1694 setlocale(LC_NUMERIC, "C");
1695#endif
1696
1697 rv=sscanf(s, "%lf", num);
1698
1699#ifdef HAVE_SETLOCALE
1700 setlocale(LC_NUMERIC, currentLocale);
1701 free(currentLocale);
1702#endif
1703
1704 if (rv!=1)
1705 return -1;
1706 return 0;
1707}
1708
1709
1710
1711double _checkSimilarity(const char *s1, const char *s2, int ign)
1712{
1713 int nboth;
1714 int nmatch;
1715 double pc;
1716
1717 nboth=strlen(s1)+strlen(s2);
1718 nmatch=0;
1719 if (ign) {
1720 while (*s1 && *s2) {
1721 const char *t;
1722 int lmatch;
1723
1724 /* find next equal in s2 */
1725 t=s2;
1726 lmatch=0;
1727 while (*t) {
1728 if (toupper(*s1)==toupper(*t)) {
1729 lmatch=2;
1730 break;
1731 }
1732 if (isalnum((int)*s1) && isalnum((int)*t)) {
1733 lmatch=1;
1734 break;
1735 }
1736 t++;
1737 } /* while */
1738
1739 if (lmatch) {
1740 nmatch+=lmatch;
1741 s2=t+1;
1742 }
1743
1744 s1++;
1745 } /* while */
1746 }
1747 else {
1748 while (*s1 && *s2) {
1749 const char *t;
1750 int lmatch;
1751
1752 /* find next equal in s2 */
1753 t=s2;
1754 lmatch=0;
1755 while (*t) {
1756 if (*s1==*t) {
1757 lmatch=2;
1758 break;
1759 }
1760 if (toupper(*s1)==toupper(*t)) {
1761 lmatch=1;
1762 break;
1763 }
1764 if (isalnum((int)*s1) && isalnum((int)*t)) {
1765 lmatch=1;
1766 break;
1767 }
1768 t++;
1769 } /* while */
1770
1771 if (lmatch) {
1772 nmatch+=lmatch;
1773 s2=t+1;
1774 }
1775
1776 s1++;
1777 } /* while */
1778 }
1779
1780 pc=(nmatch*100)/nboth;
1781 return pc;
1782}
1783
1784
1785
1786double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign)
1787{
1788 double pc1, pc2;
1789
1790 pc1=_checkSimilarity(s1, s2, ign);
1791 pc2=_checkSimilarity(s2, s1, ign);
1792 if (pc2>pc1)
1793 return pc2;
1794 return pc1;
1795}
1796
1797
1798
1799int GWEN_Text_CountUtf8Chars(const char *s, int len)
1800{
1801 int count;
1802 int handled;
1803
1804 if (len==0)
1805 len=strlen(s);
1806 count=0;
1807 handled=0;
1808 while (handled<len) {
1809 unsigned char c;
1810 int i;
1811
1812 c=(unsigned char)*s;
1813 if ((c & 0xfe)==0xfc)
1814 i=5;
1815 else if ((c & 0xfc)==0xf8)
1816 i=4;
1817 else if ((c & 0xf8)==0xf0)
1818 i=3;
1819 else if ((c & 0xf0)==0xe0)
1820 i=2;
1821 else if ((c & 0xe0)==0xc0)
1822 i=1;
1823 else if (c & 0x80) {
1824 DBG_ERROR(GWEN_LOGDOMAIN, "Invalid UTF8 character at pos %d", handled);
1825 return -1;
1826 }
1827 else
1828 i=0;
1829 if (handled+i+1>len) {
1831 "Incomplete UTF8 sequence at pos %d", handled);
1832 return -1;
1833 }
1834 s++;
1835 if (i) {
1836 int j;
1837
1838 for (j=0; j<i; j++) {
1839 if ((((unsigned char)*s) & 0xc0)!=0xc0) {
1841 "Invalid UTF8 sequence at pos %d (rel %d of %d)",
1842 handled, j, i);
1843 }
1844 s++;
1845 }
1846 }
1847 handled+=i+1;
1848 count++;
1849 } /* while */
1850
1851 return count;
1852}
1853
1854
1855
1857{
1858 char *pdst;
1859 uint32_t roomLeft;
1860 uint32_t bytesAdded;
1861
1862#define GWEN_TEXT__APPENDCHAR(chr) \
1863 if (roomLeft<2) { \
1864 if (bytesAdded) { \
1865 GWEN_Buffer_IncrementPos(buf, bytesAdded); \
1866 GWEN_Buffer_AdjustUsedBytes(buf); \
1867 } \
1868 GWEN_Buffer_AllocRoom(buf, 2); \
1869 pdst=GWEN_Buffer_GetPosPointer(buf); \
1870 roomLeft=GWEN_Buffer_GetMaxUnsegmentedWrite(buf); \
1871 bytesAdded=0; \
1872 } \
1873 *(pdst++)=(unsigned char)chr; \
1874 *pdst=0; \
1875 bytesAdded++; \
1876 roomLeft--
1877
1878 pdst=GWEN_Buffer_GetPosPointer(buf);
1880 bytesAdded=0;
1881
1882 while (*src) {
1883 unsigned char x;
1884 int match;
1885
1886 match=0;
1887 x=(unsigned char)*src;
1888 if (x=='&') {
1889 if (src[1]=='#') {
1890 unsigned char num=0;
1891
1892 src++;
1893 src++;
1894 while (*src && isdigit((int)*src)) {
1895 num*=10;
1896 num+=(*src)-'0';
1897 src++;
1898 }
1899 src++;
1901 }
1902 else if (strncmp(src+1, "szlig;", 6)==0) {
1905 src+=7;
1906 match=1;
1907 }
1908 else if (strncmp(src+1, "Auml;", 5)==0) {
1911 src+=6;
1912 match=1;
1913 }
1914 else if (strncmp(src+1, "Ouml;", 5)==0) {
1917 src+=6;
1918 match=1;
1919 }
1920 else if (strncmp(src+1, "Uuml;", 5)==0) {
1923 src+=6;
1924 match=1;
1925 }
1926 else if (strncmp(src+1, "auml;", 5)==0) {
1929 src+=6;
1930 match=1;
1931 }
1932 else if (strncmp(src+1, "ouml;", 5)==0) {
1935 src+=6;
1936 match=1;
1937 }
1938 else if (strncmp(src+1, "uuml;", 5)==0) {
1941 src+=6;
1942 match=1;
1943 }
1944 else {
1945 const GWEN_TEXT_ESCAPE_ENTRY *e;
1947 while (e->replace) {
1948 int l;
1949
1950 l=strlen(e->replace);
1951 if (strncasecmp(src, e->replace, l)==0) {
1953 //GWEN_Buffer_AppendByte(buf, e->character);
1954 src+=l;
1955 match=1;
1956 break;
1957 }
1958 e++;
1959 } /* while */
1960 }
1961 }
1962 if (!match) {
1963 GWEN_TEXT__APPENDCHAR(*(src++));
1964 }
1965 } /* while */
1966
1967 if (bytesAdded) {
1968 GWEN_Buffer_IncrementPos(buf, bytesAdded);
1970 }
1971
1972 return 0;
1973#undef GWEN_TEXT__APPENDCHAR
1974}
1975
1976
1977
1979{
1980 while (*src) {
1981 unsigned char x;
1982 const GWEN_TEXT_ESCAPE_ENTRY *e;
1983 int match;
1984
1985 match=0;
1986 x=(unsigned char)*src;
1988 while (e->replace) {
1989 if (x==e->character) {
1991 match=1;
1992 break;
1993 }
1994 e++;
1995 } /* while */
1996
1997 if (!match) {
1998 if (0 && x>127) { /* disabled */
1999 char numbuf[32];
2000
2001 snprintf(numbuf, sizeof(numbuf), "&#%d;", x);
2002 GWEN_Buffer_AppendString(buf, numbuf);
2003 }
2004 else
2005 GWEN_Buffer_AppendByte(buf, *src);
2006 }
2007 src++;
2008 } /* while */
2009
2010 return 0;
2011}
2012
2013
2014
2015int GWEN_Text_ConvertCharset(const char *fromCharset,
2016 const char *toCharset,
2017 const char *text, int len,
2018 GWEN_BUFFER *tbuf)
2019{
2020 if (len) {
2021 if (fromCharset && *fromCharset && toCharset && *toCharset &&
2022 strcasecmp(fromCharset, toCharset)!=0) {
2023#ifndef HAVE_ICONV
2025 "iconv not available, can not convert from \"%s\" to \"%s\"",
2026 fromCharset, toCharset);
2027#else
2028 iconv_t ic;
2029
2030 ic=iconv_open(toCharset, fromCharset);
2031 if (ic==((iconv_t)-1)) {
2032 DBG_DEBUG(GWEN_LOGDOMAIN, "Charset \"%s\" or \"%s\" not available or equal",
2033 fromCharset, toCharset);
2034 }
2035 else {
2036 char *outbuf;
2037 char *pOutbuf;
2038 /* Some systems have iconv in libc, some have it in libiconv
2039 (OSF/1 and those with the standalone portable GNU libiconv
2040 installed). Check which one is available. The define
2041 ICONV_CONST will be "" or "const" accordingly. */
2042 ICONV_CONST char *pInbuf;
2043 size_t inLeft;
2044 size_t outLeft;
2045 size_t done;
2046 size_t space;
2047
2048 /* convert */
2049 pInbuf=(char *)text;
2050
2051 outLeft=len*2;
2052 space=outLeft;
2053 outbuf=(char *)malloc(outLeft);
2054 assert(outbuf);
2055
2056 inLeft=len;
2057 pInbuf=(char *)text;
2058 pOutbuf=outbuf;
2059 done=iconv(ic, &pInbuf, &inLeft, &pOutbuf, &outLeft);
2060 if (done==(size_t)-1) {
2061 DBG_ERROR(GWEN_LOGDOMAIN, "Error in conversion: %s (%d)",
2062 strerror(errno), errno);
2063 free(outbuf);
2064 iconv_close(ic);
2065 return GWEN_ERROR_GENERIC;
2066 }
2067
2068 GWEN_Buffer_AppendBytes(tbuf, outbuf, space-outLeft);
2069 free(outbuf);
2070 DBG_DEBUG(GWEN_LOGDOMAIN, "Conversion done.");
2071 iconv_close(ic);
2072 return 0;
2073 }
2074#endif
2075 }
2076
2077 GWEN_Buffer_AppendBytes(tbuf, text, len);
2078 }
2079 return 0;
2080}
2081
2082
2083
2084/* On windows use brilliant implementation from comment number 7 on site
2085 * https://stackoverflow.com/questions/46013382/c-strndup-implicit-declaration
2086 * placed into public domain by the author
2087 */
2088char *GWEN_Text_strndup(const char *s, size_t n)
2089{
2090#ifdef OS_WIN32
2091 char *p;
2092
2093 p=memchr(s, '\0', n);
2094 if (p!=NULL)
2095 n=p-s;
2096 p=malloc(n+1);
2097 if (p!=NULL) {
2098 memcpy(p, s, n);
2099 p[n] = '\0';
2100 }
2101 return p;
2102#else
2103 return strndup(s, n);
2104#endif
2105}
2106
2107
2108
2109
2110int GWEN_Text_ReplaceVars(const char *s, GWEN_BUFFER *dbuf, GWEN_TEXT_REPLACE_VARS_CB fn, void *ptr)
2111{
2112 const char *p;
2113
2114 p=s;
2115 while (*p) {
2116 if (*p=='$') {
2117 p++;
2118 if (*p=='$')
2119 GWEN_Buffer_AppendByte(dbuf, '$');
2120 else if (*p=='(') {
2121 const char *pStart;
2122
2123 p++;
2124 pStart=p;
2125 while (*p && *p!=')')
2126 p++;
2127 if (*p!=')') {
2128 DBG_ERROR(GWEN_LOGDOMAIN, "Unterminated variable name in code");
2129 return GWEN_ERROR_BAD_DATA;
2130 }
2131 else {
2132 int len;
2133 char *name=NULL;
2134 char index=0;
2135 char *rawName;
2136 int rv;
2137 int maxLen=-1;
2138 uint32_t posBeforeFn;
2139 uint32_t posAfterFn;
2140 uint32_t charsAdded;
2141
2142 len=p-pStart;
2143 if (len<1) {
2144 DBG_ERROR(GWEN_LOGDOMAIN, "Empty variable name in code");
2145 return GWEN_ERROR_BAD_DATA;
2146 }
2147 rawName=(char *) malloc(len+1);
2148 assert(rawName);
2149 memmove(rawName, pStart, len);
2150 rawName[len]=0;
2151
2152 index=_splitVariableNameInNameAndIndex(rawName, &name, &maxLen);
2153 if (index<0 && index!=GWEN_ERROR_NO_ADDRESS) {
2154 DBG_ERROR(GWEN_LOGDOMAIN, "Invalid variable name \"%s\"", rawName);
2155 free(rawName);
2156 return index;
2157 }
2158 free(rawName);
2159
2160 posBeforeFn=GWEN_Buffer_GetPos(dbuf);
2161 rv=fn(ptr, name, (index==GWEN_ERROR_NO_ADDRESS)?-1:index, maxLen, dbuf);
2162 if (rv<0 && rv!=GWEN_ERROR_NO_DATA) {
2163 DBG_INFO(GWEN_LOGDOMAIN, "here (%d)", rv);
2164 free(name);
2165 return rv;
2166 }
2167 free(name);
2168 posAfterFn=GWEN_Buffer_GetPos(dbuf);
2169 charsAdded=posAfterFn-posBeforeFn;
2170 if (maxLen>0 && (int)charsAdded>maxLen) {
2171 GWEN_Buffer_Crop(dbuf, 0, posBeforeFn+maxLen);
2172 }
2173 }
2174 }
2175 else {
2176 DBG_ERROR(GWEN_LOGDOMAIN, "Bad variable string in code \"%s\" (rest:\"%s\")", s, p);
2177 return GWEN_ERROR_BAD_DATA;
2178 }
2179 p++;
2180 }
2181 else {
2182 GWEN_Buffer_AppendByte(dbuf, *p);
2183 p++;
2184 }
2185 }
2186
2187 return 0;
2188}
2189
2190
2191
2192int _splitVariableNameInNameAndIndex(const char *s, char **pVariableName, int *pMaxLen)
2193{
2194 const char *p;
2195 const char *pStart;
2196 int len;
2197 char *name=NULL;
2198 int index=0;
2199 int maxLen=0;
2200
2201 p=s;
2202 pStart=p;
2203 while (*p && *p!='[' && *p!=':')
2204 p++;
2205
2206 len=p-pStart;
2207 if (len<1) {
2208 DBG_ERROR(GWEN_LOGDOMAIN, "Empty variable name in code");
2209 return GWEN_ERROR_BAD_DATA;
2210 }
2211 name=(char *) malloc(len+1);
2212 assert(name);
2213 memmove(name, pStart, len);
2214 name[len]=0;
2215
2216 if (*p=='[') {
2217 /* we have an index */
2218 p++;
2219 pStart=p;
2220 while (*p && *p!=']' && isdigit(*p)) {
2221 index*=10;
2222 index+=(*p)-'0';
2223 p++;
2224 }
2225 if (*p!=']') {
2226 DBG_ERROR(GWEN_LOGDOMAIN, "Bad index specification in variable name");
2227 free(name);
2228 return GWEN_ERROR_BAD_DATA;
2229 }
2230 if (p==pStart) {
2231 index=GWEN_ERROR_NO_ADDRESS; /* meaning: ALL indices */
2232 }
2233 }
2234
2235 if (*p==':') {
2236
2237 /* we might have a maxlen field */
2238 p++;
2239 pStart=p;
2240 while (*p && isdigit(*p)) {
2241 maxLen*=10;
2242 maxLen+=(*p)-'0';
2243 p++;
2244 }
2245 if (*p) {
2246 DBG_ERROR(GWEN_LOGDOMAIN, "Bad maximum length specification in variable name");
2247 free(name);
2248 return GWEN_ERROR_BAD_DATA;
2249 }
2250 }
2251
2252 *pVariableName=name;
2253 if (maxLen)
2254 *pMaxLen=maxLen;
2255 else
2256 *pMaxLen=-1;
2257 return index;
2258}
2259
2260
2261
#define NULL
Definition binreloc.c:300
GWEN_BUFFER * GWEN_Buffer_new(char *buffer, uint32_t size, uint32_t used, int take)
Definition buffer.c:42
int GWEN_Buffer_IncrementPos(GWEN_BUFFER *bf, uint32_t i)
Definition buffer.c:451
uint32_t GWEN_Buffer_GetMaxUnsegmentedWrite(GWEN_BUFFER *bf)
Definition buffer.c:527
int GWEN_Buffer_AdjustUsedBytes(GWEN_BUFFER *bf)
Definition buffer.c:468
uint32_t GWEN_Buffer_GetBytesLeft(GWEN_BUFFER *bf)
Definition buffer.c:536
char * GWEN_Buffer_GetPosPointer(const GWEN_BUFFER *bf)
Definition buffer.c:548
int GWEN_Buffer_AppendBytes(GWEN_BUFFER *bf, const char *buffer, uint32_t size)
Definition buffer.c:360
uint32_t GWEN_Buffer_GetPos(const GWEN_BUFFER *bf)
Definition buffer.c:253
void GWEN_Buffer_free(GWEN_BUFFER *bf)
Definition buffer.c:89
int GWEN_Buffer_ReadByte(GWEN_BUFFER *bf)
Definition buffer.c:438
int GWEN_Buffer_AppendString(GWEN_BUFFER *bf, const char *buffer)
Definition buffer.c:992
uint32_t GWEN_Buffer_GetUsedBytes(const GWEN_BUFFER *bf)
Definition buffer.c:277
char * GWEN_Buffer_GetStart(const GWEN_BUFFER *bf)
Definition buffer.c:235
int GWEN_Buffer_Crop(GWEN_BUFFER *bf, uint32_t pos, uint32_t l)
Definition buffer.c:950
int GWEN_Buffer_AppendByte(GWEN_BUFFER *bf, char c)
Definition buffer.c:393
#define ICONV_CONST
Definition cgui.c:67
#define DBG_INFO(dbg_logger, format,...)
Definition debug.h:181
#define DBG_ERROR(dbg_logger, format,...)
Definition debug.h:97
#define DBG_DEBUG(dbg_logger, format,...)
Definition debug.h:214
#define GWEN_ERROR_BAD_DATA
Definition error.h:121
#define GWEN_ERROR_NO_ADDRESS
Definition error.h:81
#define GWEN_ERROR_GENERIC
Definition error.h:62
#define GWEN_ERROR_NO_DATA
Definition error.h:94
struct GWEN_BUFFER GWEN_BUFFER
A dynamically resizeable text buffer.
Definition buffer.h:38
void GWEN_Logger_Log(const char *logDomain, GWEN_LOGGER_LEVEL priority, const char *s)
Definition logger.c:401
#define GWEN_LOGDOMAIN
Definition logger.h:32
GWEN_LOGGER_LEVEL
Definition logger.h:61
Definition text.c:57
const char * replace
Definition text.c:59
int character
Definition text.c:58
double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign)
Definition text.c:1786
char * GWEN_Text_strndup(const char *s, size_t n)
Definition text.c:2088
#define GWEN_TEXT__APPENDCHAR(chr)
int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:1856
char * GWEN_Text_Escape(const char *src, char *buffer, unsigned int maxsize)
Definition text.c:349
int GWEN_Text_ToHexBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition text.c:777
int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf)
Definition text.c:1663
int GWEN_Text_ToBcdBuffer(const char *src, unsigned l, GWEN_BUFFER *buf, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition text.c:988
void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf)
Definition text.c:1620
int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:1376
char * GWEN_Text_UnescapeN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition text.c:458
int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition text.c:1471
static double _checkSimilarity(const char *s1, const char *s2, int ign)
Definition text.c:1711
int GWEN_Text_Compare(const char *s1, const char *s2, int ign)
Definition text.c:1061
char * GWEN_Text_UnescapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition text.c:645
int GWEN_Text_StringToDouble(const char *s, double *num)
Definition text.c:1688
static int _cmpSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition text.c:1118
static int _splitVariableNameInNameAndIndex(const char *s, char **pVariableName, int *pMaxLen)
Definition text.c:2192
int GWEN_Text_GetWordToBuffer(const char *src, const char *delims, GWEN_BUFFER *buf, uint32_t flags, const char **next)
Definition text.c:226
const char * GWEN_Text_StrCaseStr(const char *haystack, const char *needle)
Definition text.c:1083
static int _findSegment(const char *w, unsigned int *wpos, const char *p, unsigned int *ppos, int sensecase, unsigned int *matches)
Definition text.c:1184
int GWEN_Text_ReplaceVars(const char *s, GWEN_BUFFER *dbuf, GWEN_TEXT_REPLACE_VARS_CB fn, void *ptr)
Definition text.c:2110
int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:1978
int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf)
Definition text.c:1560
char * GWEN_Text_Unescape(const char *src, char *buffer, unsigned int maxsize)
Definition text.c:542
int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:1411
char * GWEN_Text_EscapeTolerant(const char *src, char *buffer, unsigned int maxsize)
Definition text.c:400
char * GWEN_Text_GetWord(const char *src, const char *delims, char *buffer, unsigned int maxsize, uint32_t flags, const char **next)
Definition text.c:100
int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase)
Definition text.c:1208
int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf)
Definition text.c:1515
char * GWEN_Text_ToHexGrouped(const char *src, unsigned l, char *buffer, unsigned maxsize, unsigned int groupsize, char delimiter, int skipLeadingZeroes)
Definition text.c:691
int GWEN_Text_CountUtf8Chars(const char *s, int len)
Definition text.c:1799
int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:897
char * GWEN_Text_ToHex(const char *src, unsigned l, char *buffer, unsigned int maxsize)
Definition text.c:657
static const GWEN_TEXT_ESCAPE_ENTRY gwen_text__xml_escape_chars[]
Definition text.c:62
int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf)
Definition text.c:942
void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l, GWEN_BUFFER *mbuf, unsigned int insert)
Definition text.c:1324
void GWEN_Text_LogString(const char *s, unsigned int l, const char *logDomain, GWEN_LOGGER_LEVEL lv)
Definition text.c:1606
char * GWEN_Text_UnescapeTolerantN(const char *src, unsigned int srclen, char *buffer, unsigned int maxsize)
Definition text.c:554
int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize)
Definition text.c:850
void GWEN_Text_DumpString(const char *s, unsigned int l, unsigned int insert)
Definition text.c:1283
int GWEN_Text_ConvertCharset(const char *fromCharset, const char *toCharset, const char *text, int len, GWEN_BUFFER *tbuf)
Definition text.c:2015
int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize, int fillchar)
Definition text.c:1243
#define GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS
Definition text.h:44
int GWENHYWFAR_CB(* GWEN_TEXT_REPLACE_VARS_CB)(void *cbPtr, const char *name, int index, int maxLen, GWEN_BUFFER *dstBuf)
Definition text.h:368
#define GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS
Definition text.h:46
#define GWEN_TEXT_FLAGS_DEL_QUOTES
Definition text.h:49
#define GWEN_TEXT_FLAGS_CHECK_BACKSLASH
Definition text.h:50
#define GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS
Definition text.h:45
#define GWEN_TEXT_FLAGS_NULL_IS_DELIMITER
Definition text.h:48
#define GWEN_TEXT_FLAGS_NEED_DELIMITER
Definition text.h:47