bwstring.c revision 298089
138451Smsmith/*- 238451Smsmith * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 338451Smsmith * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 498542Smckusick * All rights reserved. 598542Smckusick * 698542Smckusick * Redistribution and use in source and binary forms, with or without 798542Smckusick * modification, are permitted provided that the following conditions 898542Smckusick * are met: 998542Smckusick * 1. Redistributions of source code must retain the above copyright 1098542Smckusick * notice, this list of conditions and the following disclaimer. 1198542Smckusick * 2. Redistributions in binary form must reproduce the above copyright 1298542Smckusick * notice, this list of conditions and the following disclaimer in the 1398542Smckusick * documentation and/or other materials provided with the distribution. 1438451Smsmith * 1538451Smsmith * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1638451Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1738451Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1838451Smsmith * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1938451Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2038451Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2138451Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2238451Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2338451Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2438451Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2538451Smsmith * SUCH DAMAGE. 2638451Smsmith */ 2738451Smsmith 2838451Smsmith#include <sys/cdefs.h> 2938451Smsmith__FBSDID("$FreeBSD: head/usr.bin/sort/bwstring.c 298089 2016-04-15 22:31:22Z pfg $"); 3038451Smsmith 3138451Smsmith#include <ctype.h> 3238451Smsmith#include <errno.h> 3338451Smsmith#include <err.h> 3438451Smsmith#include <langinfo.h> 3538451Smsmith#include <math.h> 3638451Smsmith#include <stdlib.h> 3738451Smsmith#include <string.h> 3838451Smsmith#include <wchar.h> 3938451Smsmith#include <wctype.h> 4038451Smsmith 4138451Smsmith#include "bwstring.h" 4238451Smsmith#include "sort.h" 4338451Smsmith 4438451Smsmithbool byte_sort; 4538451Smsmith 4638451Smsmithstatic wchar_t **wmonths; 4738451Smsmithstatic unsigned char **cmonths; 4838451Smsmith 4938451Smsmith/* initialise months */ 5038451Smsmith 5138451Smsmithvoid 5238451Smsmithinitialise_months(void) 5338451Smsmith{ 5438451Smsmith const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 5538451Smsmith ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 5638451Smsmith ABMON_11, ABMON_12 }; 5738451Smsmith unsigned char *tmp; 5838451Smsmith size_t len; 5938451Smsmith 6038451Smsmith if (MB_CUR_MAX == 1) { 6138451Smsmith if (cmonths == NULL) { 6238451Smsmith unsigned char *m; 6338451Smsmith 6438451Smsmith cmonths = sort_malloc(sizeof(unsigned char*) * 12); 6538451Smsmith for (int i = 0; i < 12; i++) { 6638451Smsmith cmonths[i] = NULL; 6738451Smsmith tmp = (unsigned char *) nl_langinfo(item[i]); 6838451Smsmith if (debug_sort) 6938451Smsmith printf("month[%d]=%s\n", i, tmp); 7084221Sdillon if (*tmp == '\0') 7184221Sdillon continue; 7284221Sdillon m = sort_strdup(tmp); 7338451Smsmith len = strlen(tmp); 7438451Smsmith for (unsigned int j = 0; j < len; j++) 7538451Smsmith m[j] = toupper(m[j]); 7638451Smsmith cmonths[i] = m; 7738451Smsmith } 7896477Sphk } 7938451Smsmith 8038451Smsmith } else { 8138451Smsmith if (wmonths == NULL) { 8238451Smsmith wchar_t *m; 8338451Smsmith 8438451Smsmith wmonths = sort_malloc(sizeof(wchar_t *) * 12); 8538451Smsmith for (int i = 0; i < 12; i++) { 8639468Smsmith wmonths[i] = NULL; 8787631Sjhb tmp = (unsigned char *) nl_langinfo(item[i]); 8838451Smsmith if (debug_sort) 8938451Smsmith printf("month[%d]=%s\n", i, tmp); 9038451Smsmith if (*tmp == '\0') 9138451Smsmith continue; 9259766Sjlemon len = strlen(tmp); 9338451Smsmith m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); 9438451Smsmith if (mbstowcs(m, (char*)tmp, len) == 9559766Sjlemon ((size_t) - 1)) { 9659766Sjlemon sort_free(m); 9759766Sjlemon continue; 9859766Sjlemon } 9987631Sjhb m[len] = L'\0'; 10059766Sjlemon for (unsigned int j = 0; j < len; j++) 10159766Sjlemon m[j] = towupper(m[j]); 10259766Sjlemon wmonths[i] = m; 10338451Smsmith } 10438451Smsmith } 10538451Smsmith } 10638451Smsmith} 10738451Smsmith 10838451Smsmith/* 10938451Smsmith * Compare two wide-character strings 11038451Smsmith */ 11198542Smckusickstatic int 11298542Smckusickwide_str_coll(const wchar_t *s1, const wchar_t *s2) 11398542Smckusick{ 11498542Smckusick int ret = 0; 11538451Smsmith 11638451Smsmith errno = 0; 11738451Smsmith ret = wcscoll(s1, s2); 11838451Smsmith if (errno == EILSEQ) { 11938451Smsmith errno = 0; 12038451Smsmith ret = wcscmp(s1, s2); 12138451Smsmith if (errno != 0) { 12298542Smckusick for (size_t i = 0; ; ++i) { 12398542Smckusick wchar_t c1 = s1[i]; 12438451Smsmith wchar_t c2 = s2[i]; 12538451Smsmith if (c1 == L'\0') 12638451Smsmith return ((c2 == L'\0') ? 0 : -1); 12798542Smckusick if (c2 == L'\0') 12898542Smckusick return (+1); 12998542Smckusick if (c1 == c2) 13038451Smsmith continue; 13138451Smsmith return ((int)(c1 - c2)); 13298542Smckusick } 13338451Smsmith } 13487631Sjhb } 13538451Smsmith return (ret); 13638451Smsmith} 13738451Smsmith 13838451Smsmith/* counterparts of wcs functions */ 13938451Smsmith 14038451Smsmithvoid 14138451Smsmithbwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 14238451Smsmith{ 14338451Smsmith 14438451Smsmith if (MB_CUR_MAX == 1) 14592913Sobrien fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); 14692913Sobrien else 14738451Smsmith fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); 14838451Smsmith} 14938451Smsmith 15038451Smsmithconst void* bwsrawdata(const struct bwstring *bws) 15139665Smsmith{ 15239665Smsmith 15339665Smsmith return (&(bws->data)); 15438451Smsmith} 15538451Smsmith 15638451Smsmithsize_t bwsrawlen(const struct bwstring *bws) 15739665Smsmith{ 158278602Sian 15938451Smsmith return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); 16038451Smsmith} 16138451Smsmith 16238451Smsmithsize_t 16338451Smsmithbws_memsize(const struct bwstring *bws) 16438451Smsmith{ 16538451Smsmith 16638451Smsmith return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : 16738451Smsmith (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); 16838451Smsmith} 16998542Smckusick 17098542Smckusickvoid 17198542Smckusickbws_setlen(struct bwstring *bws, size_t newlen) 17298542Smckusick{ 17398542Smckusick 17498542Smckusick if (bws && newlen != bws->len && newlen <= bws->len) { 17538451Smsmith bws->len = newlen; 17638451Smsmith if (MB_CUR_MAX == 1) 17738451Smsmith bws->data.cstr[newlen] = '\0'; 17838451Smsmith else 17938451Smsmith bws->data.wstr[newlen] = L'\0'; 18092913Sobrien } 18138451Smsmith} 18238451Smsmith 18338451Smsmith/* 18438451Smsmith * Allocate a new binary string of specified size 18538451Smsmith */ 186134760Siedowsestruct bwstring * 18738451Smsmithbwsalloc(size_t sz) 18839665Smsmith{ 18938451Smsmith struct bwstring *ret; 19038451Smsmith 19138451Smsmith if (MB_CUR_MAX == 1) 19238451Smsmith ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 19338451Smsmith else 19438451Smsmith ret = sort_malloc(sizeof(struct bwstring) + 19538451Smsmith SIZEOF_WCHAR_STRING(sz + 1)); 19638451Smsmith ret->len = sz; 19738451Smsmith 19838451Smsmith if (MB_CUR_MAX == 1) 19998542Smckusick ret->data.cstr[ret->len] = '\0'; 20098542Smckusick else 20138451Smsmith ret->data.wstr[ret->len] = L'\0'; 20292913Sobrien 20392913Sobrien return (ret); 20438451Smsmith} 20538451Smsmith 20698542Smckusick/* 20738451Smsmith * Create a copy of binary string. 20838451Smsmith * New string size equals the length of the old string. 20938451Smsmith */ 21038451Smsmithstruct bwstring * 21138451Smsmithbwsdup(const struct bwstring *s) 21238451Smsmith{ 21338451Smsmith 21438451Smsmith if (s == NULL) 21538451Smsmith return (NULL); 21638451Smsmith else { 21738451Smsmith struct bwstring *ret = bwsalloc(s->len); 21838451Smsmith 21938451Smsmith if (MB_CUR_MAX == 1) 22038451Smsmith memcpy(ret->data.cstr, s->data.cstr, (s->len)); 22138451Smsmith else 22238451Smsmith memcpy(ret->data.wstr, s->data.wstr, 22338451Smsmith SIZEOF_WCHAR_STRING(s->len)); 22438451Smsmith 22538451Smsmith return (ret); 22638451Smsmith } 22738451Smsmith} 22838451Smsmith 22938451Smsmith/* 23038451Smsmith * Create a new binary string from a wide character buffer. 23138451Smsmith */ 23238451Smsmithstruct bwstring * 23338451Smsmithbwssbdup(const wchar_t *str, size_t len) 23498542Smckusick{ 23538451Smsmith 23638451Smsmith if (str == NULL) 23738451Smsmith return ((len == 0) ? bwsalloc(0) : NULL); 23838451Smsmith else { 23938451Smsmith struct bwstring *ret; 24038451Smsmith 24138451Smsmith ret = bwsalloc(len); 24238451Smsmith 24338451Smsmith if (MB_CUR_MAX == 1) 24438451Smsmith for (size_t i = 0; i < len; ++i) 24538451Smsmith ret->data.cstr[i] = (unsigned char) str[i]; 24638451Smsmith else 24738451Smsmith memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); 24838451Smsmith 24938451Smsmith return (ret); 25038451Smsmith } 25138451Smsmith} 25238451Smsmith 25338451Smsmith/* 25438451Smsmith * Create a new binary string from a raw binary buffer. 25538451Smsmith */ 25698542Smckusickstruct bwstring * 25738451Smsmithbwscsbdup(const unsigned char *str, size_t len) 25838451Smsmith{ 25938451Smsmith struct bwstring *ret; 26038451Smsmith 26138451Smsmith ret = bwsalloc(len); 26238451Smsmith 26338451Smsmith if (str) { 26438451Smsmith if (MB_CUR_MAX == 1) 26538451Smsmith memcpy(ret->data.cstr, str, len); 26638451Smsmith else { 26739665Smsmith mbstate_t mbs; 268278602Sian const char *s; 26938451Smsmith size_t charlen, chars, cptr; 27038451Smsmith 27138451Smsmith charlen = chars = 0; 27238451Smsmith cptr = 0; 27338451Smsmith s = (const char *) str; 27438451Smsmith 27538451Smsmith memset(&mbs, 0, sizeof(mbs)); 27638451Smsmith 27738451Smsmith while (cptr < len) { 27838451Smsmith size_t n = MB_CUR_MAX; 27938451Smsmith 28038451Smsmith if (n > len - cptr) 28138451Smsmith n = len - cptr; 28238451Smsmith charlen = mbrlen(s + cptr, n, &mbs); 28338451Smsmith switch (charlen) { 28438451Smsmith case 0: 28538451Smsmith /* FALLTHROUGH */ 28638451Smsmith case (size_t) -1: 28798542Smckusick /* FALLTHROUGH */ 28898542Smckusick case (size_t) -2: 28998542Smckusick ret->data.wstr[chars++] = 29098542Smckusick (unsigned char) s[cptr]; 29138451Smsmith ++cptr; 29238451Smsmith break; 29338451Smsmith default: 29438451Smsmith n = mbrtowc(ret->data.wstr + (chars++), 29538451Smsmith s + cptr, charlen, &mbs); 29638451Smsmith if ((n == (size_t)-1) || (n == (size_t)-2)) 29738451Smsmith /* NOTREACHED */ 29838451Smsmith err(2, "mbrtowc error"); 29987631Sjhb cptr += charlen; 30087631Sjhb } 30187631Sjhb } 30287631Sjhb 30387631Sjhb ret->len = chars; 30487631Sjhb ret->data.wstr[ret->len] = L'\0'; 30587631Sjhb } 30687631Sjhb } 30792913Sobrien return (ret); 30892913Sobrien} 30987631Sjhb 31098542Smckusick/* 31198542Smckusick * De-allocate object memory 31287631Sjhb */ 31387631Sjhbvoid 31487631Sjhbbwsfree(const struct bwstring *s) 31587631Sjhb{ 31687631Sjhb 31787631Sjhb if (s) 31887631Sjhb sort_free(s); 31987631Sjhb} 32098542Smckusick 32187631Sjhb/* 32287631Sjhb * Copy content of src binary string to dst. 32387631Sjhb * If the capacity of the dst string is not sufficient, 32487631Sjhb * then the data is truncated. 32587631Sjhb */ 32687631Sjhbsize_t 32798542Smckusickbwscpy(struct bwstring *dst, const struct bwstring *src) 32898542Smckusick{ 32987631Sjhb size_t nums = src->len; 33087631Sjhb 33187631Sjhb if (nums > dst->len) 33287631Sjhb nums = dst->len; 33387631Sjhb dst->len = nums; 33498542Smckusick 33598542Smckusick if (MB_CUR_MAX == 1) { 33687631Sjhb memcpy(dst->data.cstr, src->data.cstr, nums); 33787631Sjhb dst->data.cstr[dst->len] = '\0'; 33887631Sjhb } else { 33987631Sjhb memcpy(dst->data.wstr, src->data.wstr, 34087631Sjhb SIZEOF_WCHAR_STRING(nums + 1)); 34187631Sjhb dst->data.wstr[dst->len] = L'\0'; 34287631Sjhb } 34387631Sjhb 34487631Sjhb return (nums); 34587631Sjhb} 34687631Sjhb 34787631Sjhb/* 34887631Sjhb * Copy content of src binary string to dst, 349278602Sian * with specified number of symbols to be copied. 35087631Sjhb * If the capacity of the dst string is not sufficient, 35187631Sjhb * then the data is truncated. 35287631Sjhb */ 35387631Sjhbstruct bwstring * 35487631Sjhbbwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 35587631Sjhb{ 35687631Sjhb size_t nums = src->len; 35787631Sjhb 35887631Sjhb if (nums > dst->len) 35987631Sjhb nums = dst->len; 36087631Sjhb if (nums > size) 36187631Sjhb nums = size; 36298542Smckusick dst->len = nums; 36387631Sjhb 36487631Sjhb if (MB_CUR_MAX == 1) { 36587631Sjhb memcpy(dst->data.cstr, src->data.cstr, nums); 36687631Sjhb dst->data.cstr[dst->len] = '\0'; 36787631Sjhb } else { 368278602Sian memcpy(dst->data.wstr, src->data.wstr, 36987631Sjhb SIZEOF_WCHAR_STRING(nums + 1)); 37087631Sjhb dst->data.wstr[dst->len] = L'\0'; 37187631Sjhb } 37287631Sjhb 37387631Sjhb return (dst); 37487631Sjhb} 37587631Sjhb 37638451Smsmith/* 37738451Smsmith * Copy content of src binary string to dst, 37838451Smsmith * with specified number of symbols to be copied. 37938451Smsmith * An offset value can be specified, from the start of src string. 38038451Smsmith * If the capacity of the dst string is not sufficient, 38138451Smsmith * then the data is truncated. 38238451Smsmith */ 38338451Smsmithstruct bwstring * 38438451Smsmithbwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 38592913Sobrien size_t size) 38692913Sobrien{ 38738451Smsmith 38898542Smckusick if (offset >= src->len) { 38998542Smckusick dst->data.wstr[0] = 0; 39038451Smsmith dst->len = 0; 39138451Smsmith } else { 39238451Smsmith size_t nums = src->len - offset; 39338451Smsmith 39438451Smsmith if (nums > dst->len) 39598542Smckusick nums = dst->len; 39638451Smsmith if (nums > size) 39738451Smsmith nums = size; 39887631Sjhb dst->len = nums; 39987631Sjhb if (MB_CUR_MAX == 1) { 40087631Sjhb memcpy(dst->data.cstr, src->data.cstr + offset, 40138451Smsmith (nums)); 40238451Smsmith dst->data.cstr[dst->len] = '\0'; 40338451Smsmith } else { 40438451Smsmith memcpy(dst->data.wstr, src->data.wstr + offset, 40538451Smsmith SIZEOF_WCHAR_STRING(nums)); 40638451Smsmith dst->data.wstr[dst->len] = L'\0'; 40738451Smsmith } 40838451Smsmith } 409278602Sian return (dst); 41038451Smsmith} 41138451Smsmith 41238451Smsmith/* 41338451Smsmith * Write binary string to the file. 41438451Smsmith * The output is ended either with '\n' (nl == true) 41538451Smsmith * or '\0' (nl == false). 41638451Smsmith */ 41738451Smsmithsize_t 41838451Smsmithbwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 41938451Smsmith{ 42038451Smsmith 42138451Smsmith if (MB_CUR_MAX == 1) { 42238451Smsmith size_t len = bws->len; 42338451Smsmith 42438451Smsmith if (!zero_ended) { 42538451Smsmith bws->data.cstr[len] = '\n'; 42638451Smsmith 42738451Smsmith if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 42838451Smsmith err(2, NULL); 42938451Smsmith 43038451Smsmith bws->data.cstr[len] = '\0'; 43198542Smckusick } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 43298542Smckusick err(2, NULL); 43338451Smsmith 43438451Smsmith return (len + 1); 43538451Smsmith 43638451Smsmith } else { 43738451Smsmith wchar_t eols; 43838451Smsmith size_t printed = 0; 43938451Smsmith 44038451Smsmith eols = zero_ended ? btowc('\0') : btowc('\n'); 44138451Smsmith 44238451Smsmith while (printed < BWSLEN(bws)) { 44338451Smsmith const wchar_t *s = bws->data.wstr + printed; 44438451Smsmith 44538451Smsmith if (*s == L'\0') { 44638451Smsmith int nums; 44792913Sobrien 44892913Sobrien nums = fwprintf(f, L"%lc", *s); 44938451Smsmith 45038451Smsmith if (nums != 1) 45138451Smsmith err(2, NULL); 45238451Smsmith ++printed; 45338451Smsmith } else { 45438451Smsmith int nums; 45538451Smsmith 45638451Smsmith nums = fwprintf(f, L"%ls", s); 45738451Smsmith 45898542Smckusick if (nums < 1) 45938451Smsmith err(2, NULL); 46038451Smsmith printed += nums; 46138451Smsmith } 46238451Smsmith } 46338451Smsmith fwprintf(f, L"%lc", eols); 46438451Smsmith return (printed + 1); 46538451Smsmith } 46638451Smsmith} 46738451Smsmith 46838451Smsmith/* 46938451Smsmith * Allocate and read a binary string from file. 47038451Smsmith * The strings are nl-ended or zero-ended, depending on the sort setting. 47138451Smsmith */ 47238451Smsmithstruct bwstring * 47338451Smsmithbwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 47438451Smsmith{ 47538451Smsmith wint_t eols; 47638451Smsmith 47738451Smsmith eols = zero_ended ? btowc('\0') : btowc('\n'); 47838451Smsmith 47938451Smsmith if (!zero_ended && (MB_CUR_MAX > 1)) { 48038451Smsmith wchar_t *ret; 48138451Smsmith 48238451Smsmith ret = fgetwln(f, len); 48338451Smsmith 48438451Smsmith if (ret == NULL) { 48538451Smsmith if (!feof(f)) 48638451Smsmith err(2, NULL); 48738451Smsmith return (NULL); 48898542Smckusick } 48998542Smckusick if (*len > 0) { 49038451Smsmith if (ret[*len - 1] == (wchar_t)eols) 49138451Smsmith --(*len); 49238451Smsmith } 49338451Smsmith return (bwssbdup(ret, *len)); 49439468Smsmith 49539468Smsmith } else if (!zero_ended && (MB_CUR_MAX == 1)) { 49638451Smsmith char *ret; 49738451Smsmith 49892913Sobrien ret = fgetln(f, len); 49992913Sobrien 50038451Smsmith if (ret == NULL) { 50138451Smsmith if (!feof(f)) 50238451Smsmith err(2, NULL); 50398542Smckusick return (NULL); 50438451Smsmith } 50538451Smsmith if (*len > 0) { 50638451Smsmith if (ret[*len - 1] == '\n') 50738451Smsmith --(*len); 50839468Smsmith } 50938451Smsmith return (bwscsbdup((unsigned char*)ret, *len)); 51038451Smsmith 51138451Smsmith } else { 51238451Smsmith *len = 0; 51338451Smsmith 51438451Smsmith if (feof(f)) 51538451Smsmith return (NULL); 51698542Smckusick 51738451Smsmith if (2 >= rb->fgetwln_z_buffer_size) { 518278602Sian rb->fgetwln_z_buffer_size += 256; 51998542Smckusick rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 52098542Smckusick sizeof(wchar_t) * rb->fgetwln_z_buffer_size); 52198542Smckusick } 52298542Smckusick rb->fgetwln_z_buffer[*len] = 0; 52398542Smckusick 52498542Smckusick if (MB_CUR_MAX == 1) 52598542Smckusick while (!feof(f)) { 52698542Smckusick int c; 52798542Smckusick 52898542Smckusick c = fgetc(f); 52998542Smckusick 530107555Sjake if (c == EOF) { 53198542Smckusick if (*len == 0) 53298542Smckusick return (NULL); 53398542Smckusick goto line_read_done; 53498542Smckusick } 53598542Smckusick if (c == eols) 53698542Smckusick goto line_read_done; 53738451Smsmith 53838451Smsmith if (*len + 1 >= rb->fgetwln_z_buffer_size) { 53938451Smsmith rb->fgetwln_z_buffer_size += 256; 54038451Smsmith rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 54138451Smsmith SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 54238451Smsmith } 54338451Smsmith 54498542Smckusick rb->fgetwln_z_buffer[*len] = c; 54592913Sobrien rb->fgetwln_z_buffer[++(*len)] = 0; 54638451Smsmith } 54738451Smsmith else 54838451Smsmith while (!feof(f)) { 54938451Smsmith wint_t c = 0; 55038451Smsmith 55138451Smsmith c = fgetwc(f); 55238451Smsmith 55338451Smsmith if (c == WEOF) { 55438451Smsmith if (*len == 0) 55538451Smsmith return (NULL); 55638451Smsmith goto line_read_done; 55738451Smsmith } 55839468Smsmith if (c == eols) 55939468Smsmith goto line_read_done; 56039468Smsmith 56139468Smsmith if (*len + 1 >= rb->fgetwln_z_buffer_size) { 56239468Smsmith rb->fgetwln_z_buffer_size += 256; 56338451Smsmith rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, 56438451Smsmith SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); 56538451Smsmith } 56638451Smsmith 56738451Smsmith rb->fgetwln_z_buffer[*len] = c; 56838451Smsmith rb->fgetwln_z_buffer[++(*len)] = 0; 56938451Smsmith } 57038451Smsmith 57138451Smsmithline_read_done: 57238451Smsmith /* we do not count the last 0 */ 57338451Smsmith return (bwssbdup(rb->fgetwln_z_buffer, *len)); 57438451Smsmith } 57538451Smsmith} 57698542Smckusick 57738451Smsmithint 57838451Smsmithbwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 57938451Smsmith size_t offset, size_t len) 58038451Smsmith{ 58138451Smsmith size_t cmp_len, len1, len2; 58238451Smsmith int res = 0; 58338451Smsmith 58438451Smsmith cmp_len = 0; 58592913Sobrien len1 = bws1->len; 58638451Smsmith len2 = bws2->len; 58738451Smsmith 58838451Smsmith if (len1 <= offset) { 58938451Smsmith return ((len2 <= offset) ? 0 : -1); 59038451Smsmith } else { 59138451Smsmith if (len2 <= offset) 59238451Smsmith return (+1); 59338451Smsmith else { 59438451Smsmith len1 -= offset; 59538451Smsmith len2 -= offset; 59638451Smsmith 59738451Smsmith cmp_len = len1; 59838451Smsmith 59938451Smsmith if (len2 < cmp_len) 60038451Smsmith cmp_len = len2; 60138451Smsmith 60238451Smsmith if (len < cmp_len) 60338451Smsmith cmp_len = len; 60438451Smsmith 60538451Smsmith if (MB_CUR_MAX == 1) { 60638451Smsmith const unsigned char *s1, *s2; 60738451Smsmith 60838451Smsmith s1 = bws1->data.cstr + offset; 60938451Smsmith s2 = bws2->data.cstr + offset; 61038451Smsmith 61138451Smsmith res = memcmp(s1, s2, cmp_len); 61238451Smsmith 61338451Smsmith } else { 61438451Smsmith const wchar_t *s1, *s2; 61538451Smsmith 61638451Smsmith s1 = bws1->data.wstr + offset; 61738451Smsmith s2 = bws2->data.wstr + offset; 61898542Smckusick 61998542Smckusick res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 62038451Smsmith } 62138451Smsmith } 62238451Smsmith } 62338451Smsmith 62438451Smsmith if (res == 0) { 62538451Smsmith if (len1 < cmp_len && len1 < len2) 62638451Smsmith res = -1; 62738451Smsmith else if (len2 < cmp_len && len2 < len1) 62838451Smsmith res = +1; 62938451Smsmith } 63038451Smsmith 63138451Smsmith return (res); 63238451Smsmith} 63398542Smckusick 63498542Smckusickint 63598542Smckusickbwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 63698542Smckusick{ 63798542Smckusick size_t len1, len2, cmp_len; 63838451Smsmith int res; 63938451Smsmith 64038451Smsmith len1 = bws1->len; 64138451Smsmith len2 = bws2->len; 64238451Smsmith 64398542Smckusick len1 -= offset; 64492913Sobrien len2 -= offset; 64538451Smsmith 64638451Smsmith cmp_len = len1; 64739665Smsmith 64898542Smckusick if (len2 < cmp_len) 64938451Smsmith cmp_len = len2; 65038451Smsmith 65138451Smsmith res = bwsncmp(bws1, bws2, offset, cmp_len); 652278602Sian 65338451Smsmith if (res == 0) { 65438451Smsmith if( len1 < len2) 65538451Smsmith res = -1; 65638451Smsmith else if (len2 < len1) 65738451Smsmith res = +1; 65838451Smsmith } 65938451Smsmith 66038451Smsmith return (res); 66138451Smsmith} 66238451Smsmith 66338451Smsmithint 66438451Smsmithbws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 66538451Smsmith{ 66638451Smsmith wchar_t c1, c2; 66738451Smsmith size_t i = 0; 66838451Smsmith 66938451Smsmith for (i = 0; i < len; ++i) { 67038451Smsmith c1 = bws_get_iter_value(iter1); 67138451Smsmith c2 = bws_get_iter_value(iter2); 67238451Smsmith if (c1 != c2) 67338451Smsmith return (c1 - c2); 67438451Smsmith iter1 = bws_iterator_inc(iter1, 1); 67538451Smsmith iter2 = bws_iterator_inc(iter2, 1); 67638451Smsmith } 67738451Smsmith 67838451Smsmith return (0); 67938451Smsmith} 68038451Smsmith 681134760Siedowseint 68238451Smsmithbwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 68338451Smsmith{ 68439665Smsmith size_t len1, len2; 68539468Smsmith 68639468Smsmith len1 = bws1->len; 68738451Smsmith len2 = bws2->len; 68838451Smsmith 68939665Smsmith if (len1 <= offset) 69038451Smsmith return ((len2 <= offset) ? 0 : -1); 69138451Smsmith else { 69238451Smsmith if (len2 <= offset) 69338451Smsmith return (+1); 69438451Smsmith else { 69538451Smsmith len1 -= offset; 69638451Smsmith len2 -= offset; 69738451Smsmith 69838451Smsmith if (MB_CUR_MAX == 1) { 69938451Smsmith const unsigned char *s1, *s2; 70092913Sobrien 70138451Smsmith s1 = bws1->data.cstr + offset; 70238451Smsmith s2 = bws2->data.cstr + offset; 70338451Smsmith 70438451Smsmith if (byte_sort) { 70538451Smsmith int res = 0; 70638451Smsmith 70738451Smsmith if (len1 > len2) { 70838451Smsmith res = memcmp(s1, s2, len2); 70939665Smsmith if (!res) 71038451Smsmith res = +1; 71138451Smsmith } else if (len1 < len2) { 71239665Smsmith res = memcmp(s1, s2, len1); 71339665Smsmith if (!res) 71438451Smsmith res = -1; 71538451Smsmith } else 71638451Smsmith res = memcmp(s1, s2, len1); 71738451Smsmith 71838451Smsmith return (res); 71938451Smsmith 72038451Smsmith } else { 72138451Smsmith int res = 0; 72238451Smsmith size_t i, maxlen; 72338451Smsmith 72438451Smsmith i = 0; 72538451Smsmith maxlen = len1; 72638451Smsmith 72738451Smsmith if (maxlen > len2) 72838451Smsmith maxlen = len2; 72992913Sobrien 73092913Sobrien while (i < maxlen) { 73138451Smsmith /* goto next non-zero part: */ 73238451Smsmith while ((i < maxlen) && 73338451Smsmith !s1[i] && !s2[i]) 73492913Sobrien ++i; 73538451Smsmith 73638451Smsmith if (i >= maxlen) 73798542Smckusick break; 73838451Smsmith 73938451Smsmith if (s1[i] == 0) { 74038451Smsmith if (s2[i] == 0) 74138451Smsmith /* NOTREACHED */ 74238451Smsmith err(2, "bwscoll error 01"); 74338451Smsmith else 74438451Smsmith return (-1); 74538451Smsmith } else if (s2[i] == 0) 74638451Smsmith return (+1); 74738451Smsmith 74838451Smsmith res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); 74938451Smsmith if (res) 75038451Smsmith return (res); 75138451Smsmith 75238451Smsmith while ((i < maxlen) && 75338451Smsmith s1[i] && s2[i]) 75438451Smsmith ++i; 75538451Smsmith 75638451Smsmith if (i >= maxlen) 75738451Smsmith break; 75838451Smsmith 75987631Sjhb if (s1[i] == 0) { 76087631Sjhb if (s2[i] == 0) { 76187631Sjhb ++i; 76287631Sjhb continue; 76387631Sjhb } else 76487631Sjhb return (-1); 76587631Sjhb } else if (s2[i] == 0) 76687631Sjhb return (+1); 76787631Sjhb else 76887631Sjhb /* NOTREACHED */ 76987631Sjhb err(2, "bwscoll error 02"); 77087631Sjhb } 77192913Sobrien 77287631Sjhb if (len1 < len2) 77387631Sjhb return (-1); 77492913Sobrien else if (len1 > len2) 77587631Sjhb return (+1); 77687631Sjhb 77787631Sjhb return (0); 77898542Smckusick } 77987631Sjhb } else { 78087631Sjhb const wchar_t *s1, *s2; 78187631Sjhb size_t i, maxlen; 78287631Sjhb int res = 0; 78387631Sjhb 78487631Sjhb s1 = bws1->data.wstr + offset; 78587631Sjhb s2 = bws2->data.wstr + offset; 78687631Sjhb 78787631Sjhb i = 0; 78887631Sjhb maxlen = len1; 78987631Sjhb 79087631Sjhb if (maxlen > len2) 79187631Sjhb maxlen = len2; 79287631Sjhb 79387631Sjhb while (i < maxlen) { 79487631Sjhb 79587631Sjhb /* goto next non-zero part: */ 79638451Smsmith while ((i < maxlen) && 79738451Smsmith !s1[i] && !s2[i]) 79838451Smsmith ++i; 79938451Smsmith 80038451Smsmith if (i >= maxlen) 80138451Smsmith break; 80292913Sobrien 80338451Smsmith if (s1[i] == 0) { 80438451Smsmith if (s2[i] == 0) 80538451Smsmith /* NOTREACHED */ 80638451Smsmith err(2, "bwscoll error 1"); 80738451Smsmith else 80838451Smsmith return (-1); 80938451Smsmith } else if (s2[i] == 0) 81038451Smsmith return (+1); 81138451Smsmith 81298542Smckusick res = wide_str_coll(s1 + i, s2 + i); 81338451Smsmith if (res) 81438451Smsmith return (res); 815124811Sjhb 81638451Smsmith while ((i < maxlen) && s1[i] && s2[i]) 81738451Smsmith ++i; 81838451Smsmith 81938451Smsmith if (i >= maxlen) 82038451Smsmith break; 82138451Smsmith 82238451Smsmith if (s1[i] == 0) { 82338451Smsmith if (s2[i] == 0) { 82438451Smsmith ++i; 82538451Smsmith continue; 82692913Sobrien } else 82738451Smsmith return (-1); 82838451Smsmith } else if (s2[i] == 0) 82998542Smckusick return (+1); 83098542Smckusick else 83198542Smckusick /* NOTREACHED */ 83298542Smckusick err(2, "bwscoll error 2"); 83338451Smsmith } 83438451Smsmith 83538451Smsmith if (len1 < len2) 83659766Sjlemon return (-1); 83759766Sjlemon else if (len1 > len2) 83859766Sjlemon return (+1); 83959766Sjlemon 84059766Sjlemon return (0); 84159766Sjlemon } 84259766Sjlemon } 84359766Sjlemon } 84459766Sjlemon} 84559766Sjlemon 84659766Sjlemon/* 84759766Sjlemon * Correction of the system API 84859766Sjlemon */ 84998542Smckusickdouble 85059766Sjlemonbwstod(struct bwstring *s0, bool *empty) 85159766Sjlemon{ 85259766Sjlemon double ret = 0; 85359766Sjlemon 85459766Sjlemon if (MB_CUR_MAX == 1) { 85559766Sjlemon unsigned char *end, *s; 85659766Sjlemon char *ep; 85759766Sjlemon 85859766Sjlemon s = s0->data.cstr; 85959766Sjlemon end = s + s0->len; 86059766Sjlemon ep = NULL; 86159766Sjlemon 862 while (isblank(*s) && s < end) 863 ++s; 864 865 if (!isprint(*s)) { 866 *empty = true; 867 return (0); 868 } 869 870 ret = strtod((char*)s, &ep); 871 if ((unsigned char*) ep == s) { 872 *empty = true; 873 return (0); 874 } 875 } else { 876 wchar_t *end, *ep, *s; 877 878 s = s0->data.wstr; 879 end = s + s0->len; 880 ep = NULL; 881 882 while (iswblank(*s) && s < end) 883 ++s; 884 885 if (!iswprint(*s)) { 886 *empty = true; 887 return (0); 888 } 889 890 ret = wcstod(s, &ep); 891 if (ep == s) { 892 *empty = true; 893 return (0); 894 } 895 } 896 897 *empty = false; 898 return (ret); 899} 900 901/* 902 * A helper function for monthcoll. If a line matches 903 * a month name, it returns (number of the month - 1), 904 * while if there is no match, it just return -1. 905 */ 906 907int 908bws_month_score(const struct bwstring *s0) 909{ 910 911 if (MB_CUR_MAX == 1) { 912 const unsigned char *end, *s; 913 size_t len; 914 915 s = s0->data.cstr; 916 end = s + s0->len; 917 918 while (isblank(*s) && s < end) 919 ++s; 920 921 len = strlen((const char*)s); 922 923 for (int i = 11; i >= 0; --i) { 924 if (cmonths[i] && 925 (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) 926 return (i); 927 } 928 929 } else { 930 const wchar_t *end, *s; 931 size_t len; 932 933 s = s0->data.wstr; 934 end = s + s0->len; 935 936 while (iswblank(*s) && s < end) 937 ++s; 938 939 len = wcslen(s); 940 941 for (int i = 11; i >= 0; --i) { 942 if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 943 return (i); 944 } 945 } 946 947 return (-1); 948} 949 950/* 951 * Rips out leading blanks (-b). 952 */ 953struct bwstring * 954ignore_leading_blanks(struct bwstring *str) 955{ 956 957 if (MB_CUR_MAX == 1) { 958 unsigned char *dst, *end, *src; 959 960 src = str->data.cstr; 961 dst = src; 962 end = src + str->len; 963 964 while (src < end && isblank(*src)) 965 ++src; 966 967 if (src != dst) { 968 size_t newlen; 969 970 newlen = BWSLEN(str) - (src - dst); 971 972 while (src < end) { 973 *dst = *src; 974 ++dst; 975 ++src; 976 } 977 bws_setlen(str, newlen); 978 } 979 } else { 980 wchar_t *dst, *end, *src; 981 982 src = str->data.wstr; 983 dst = src; 984 end = src + str->len; 985 986 while (src < end && iswblank(*src)) 987 ++src; 988 989 if (src != dst) { 990 991 size_t newlen = BWSLEN(str) - (src - dst); 992 993 while (src < end) { 994 *dst = *src; 995 ++dst; 996 ++src; 997 } 998 bws_setlen(str, newlen); 999 1000 } 1001 } 1002 return (str); 1003} 1004 1005/* 1006 * Rips out nonprinting characters (-i). 1007 */ 1008struct bwstring * 1009ignore_nonprinting(struct bwstring *str) 1010{ 1011 size_t newlen = str->len; 1012 1013 if (MB_CUR_MAX == 1) { 1014 unsigned char *dst, *end, *src; 1015 unsigned char c; 1016 1017 src = str->data.cstr; 1018 dst = src; 1019 end = src + str->len; 1020 1021 while (src < end) { 1022 c = *src; 1023 if (isprint(c)) { 1024 *dst = c; 1025 ++dst; 1026 ++src; 1027 } else { 1028 ++src; 1029 --newlen; 1030 } 1031 } 1032 } else { 1033 wchar_t *dst, *end, *src; 1034 wchar_t c; 1035 1036 src = str->data.wstr; 1037 dst = src; 1038 end = src + str->len; 1039 1040 while (src < end) { 1041 c = *src; 1042 if (iswprint(c)) { 1043 *dst = c; 1044 ++dst; 1045 ++src; 1046 } else { 1047 ++src; 1048 --newlen; 1049 } 1050 } 1051 } 1052 bws_setlen(str, newlen); 1053 1054 return (str); 1055} 1056 1057/* 1058 * Rips out any characters that are not alphanumeric characters 1059 * nor blanks (-d). 1060 */ 1061struct bwstring * 1062dictionary_order(struct bwstring *str) 1063{ 1064 size_t newlen = str->len; 1065 1066 if (MB_CUR_MAX == 1) { 1067 unsigned char *dst, *end, *src; 1068 unsigned char c; 1069 1070 src = str->data.cstr; 1071 dst = src; 1072 end = src + str->len; 1073 1074 while (src < end) { 1075 c = *src; 1076 if (isalnum(c) || isblank(c)) { 1077 *dst = c; 1078 ++dst; 1079 ++src; 1080 } else { 1081 ++src; 1082 --newlen; 1083 } 1084 } 1085 } else { 1086 wchar_t *dst, *end, *src; 1087 wchar_t c; 1088 1089 src = str->data.wstr; 1090 dst = src; 1091 end = src + str->len; 1092 1093 while (src < end) { 1094 c = *src; 1095 if (iswalnum(c) || iswblank(c)) { 1096 *dst = c; 1097 ++dst; 1098 ++src; 1099 } else { 1100 ++src; 1101 --newlen; 1102 } 1103 } 1104 } 1105 bws_setlen(str, newlen); 1106 1107 return (str); 1108} 1109 1110/* 1111 * Converts string to lower case(-f). 1112 */ 1113struct bwstring * 1114ignore_case(struct bwstring *str) 1115{ 1116 1117 if (MB_CUR_MAX == 1) { 1118 unsigned char *end, *s; 1119 1120 s = str->data.cstr; 1121 end = s + str->len; 1122 1123 while (s < end) { 1124 *s = toupper(*s); 1125 ++s; 1126 } 1127 } else { 1128 wchar_t *end, *s; 1129 1130 s = str->data.wstr; 1131 end = s + str->len; 1132 1133 while (s < end) { 1134 *s = towupper(*s); 1135 ++s; 1136 } 1137 } 1138 return (str); 1139} 1140 1141void 1142bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 1143{ 1144 1145 if (MB_CUR_MAX == 1) 1146 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); 1147 else 1148 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); 1149} 1150