I found myself reimplementing this functionality in a number of projects over the course of the past ten years, and I finally bit the bullet. Any objections when this is committed? Or perhaps better suggestions as to the naming and/or implementation?
commit dda53cbadcf125f207878e8a5ac04ac936b21782 Author: Stephen R. van den Berg srb@cuci.nl Date: Fri Jan 1 23:12:18 2010 +0100
New member String.strim_whites which sanitises a string by stripping leading and trailing whitespace, in addition to reducing all strains of spaces and tabs to a single space.
diff --git a/lib/modules/String.pmod/module.pmod b/lib/modules/String.pmod/module.pmod index 2696d93..e70c848 100644 --- a/lib/modules/String.pmod/module.pmod +++ b/lib/modules/String.pmod/module.pmod @@ -6,6 +6,7 @@ constant Buffer = __builtin.Buffer; constant count=__builtin.string_count; constant width=__builtin.string_width; constant trim_whites = __builtin.string_trim_whites; +constant strim_whites = __builtin.string_strim_whites; constant trim_all_whites = __builtin.string_trim_all_whites; constant Iterator = __builtin.string_iterator; constant SplitIterator = __builtin.string_split_iterator; diff --git a/src/builtin.cmod b/src/builtin.cmod index 4085f4e..1dda4dd 100644 --- a/src/builtin.cmod +++ b/src/builtin.cmod @@ -750,6 +750,55 @@ PIKEFUN string string_trim_whites (string s) RETURN string_slice (s, start, end + 1 - start); }
+/*! @decl string strim_whites (string s) + *! @belongs String + *! + *! Trim leading and trailing spaces and tabs from the string @[s]. + *! Also reduces strains of multiple spaces and tabs to a single space. + */ +PMOD_EXPORT +PIKEFUN string string_strim_whites (string s) + errname String.strim_whites; + optflags OPT_TRY_OPTIMIZE; +{ size_t len = s->len; + void *src = s->str; + unsigned shift = s->size_shift; + unsigned foundspace = 0; + struct string_builder sb; + init_string_builder_alloc (&sb, len, shift); + sb.known_shift = shift; + + switch (shift) { +#define DO_IT(TYPE) \ + { TYPE *start = src, *end = start+len, *dst = (void*)sb.s->str; \ + for (; start < end; start++) { \ + int chr = *start; \ + if (chr != ' ' && chr != '\t') break; \ + } \ + for (; start < end; start++) { \ + int chr = *start; \ + if (chr == ' ' || chr == '\t') \ + if (foundspace) \ + continue; \ + else \ + chr=' ',foundspace=1; \ + else \ + foundspace=0; \ + *dst++ = chr; \ + } \ + len = dst - (TYPE*)sb.s->str; \ + } + case 0: DO_IT (p_wchar0); break; + case 1: DO_IT (p_wchar1); break; + case 2: DO_IT (p_wchar2); break; +#undef DO_IT + } + if (foundspace) + len--; + sb.s->len = len; + RETURN finish_string_builder (&sb); +} + /*! @decl string trim_all_whites (string s) *! @belongs String *! diff --git a/src/builtin_functions.h b/src/builtin_functions.h index 9da458a..1183d21 100644 --- a/src/builtin_functions.h +++ b/src/builtin_functions.h @@ -181,6 +181,7 @@ PMOD_EXPORT void f_ctime(INT32 args); PMOD_EXPORT void f_mkmapping(INT32 args); PMOD_EXPORT void f_string_count(INT32 args); PMOD_EXPORT void f_string_trim_whites(INT32 args); +PMOD_EXPORT void f_string_strim_whites(INT32 args); PMOD_EXPORT void f_string_trim_all_whites(INT32 args); PMOD_EXPORT void f_program_implements(INT32 args); PMOD_EXPORT void f_program_inherits(INT32 args);