I found myself reimplementing this functionality in a number of projects
over the course of the past ten years, and I finally bit the bullet.
Any objections when this is committed?
Or perhaps better suggestions as to the naming and/or implementation?
commit dda53cbadcf125f207878e8a5ac04ac936b21782
Author: Stephen R. van den Berg <srb(a)cuci.nl>
Date: Fri Jan 1 23:12:18 2010 +0100
New member String.strim_whites
which sanitises a string by stripping leading and trailing whitespace,
in addition to reducing all strains of spaces and tabs to a single space.
diff --git a/lib/modules/String.pmod/module.pmod b/lib/modules/String.pmod/module.pmod
index 2696d93..e70c848 100644
--- a/lib/modules/String.pmod/module.pmod
+++ b/lib/modules/String.pmod/module.pmod
@@ -6,6 +6,7 @@ constant Buffer = __builtin.Buffer;
constant count=__builtin.string_count;
constant width=__builtin.string_width;
constant trim_whites = __builtin.string_trim_whites;
+constant strim_whites = __builtin.string_strim_whites;
constant trim_all_whites = __builtin.string_trim_all_whites;
constant Iterator = __builtin.string_iterator;
constant SplitIterator = __builtin.string_split_iterator;
diff --git a/src/builtin.cmod b/src/builtin.cmod
index 4085f4e..1dda4dd 100644
--- a/src/builtin.cmod
+++ b/src/builtin.cmod
@@ -750,6 +750,55 @@ PIKEFUN string string_trim_whites (string s)
RETURN string_slice (s, start, end + 1 - start);
}
+/*! @decl string strim_whites (string s)
+ *! @belongs String
+ *!
+ *! Trim leading and trailing spaces and tabs from the string @[s].
+ *! Also reduces strains of multiple spaces and tabs to a single space.
+ */
+PMOD_EXPORT
+PIKEFUN string string_strim_whites (string s)
+ errname String.strim_whites;
+ optflags OPT_TRY_OPTIMIZE;
+{ size_t len = s->len;
+ void *src = s->str;
+ unsigned shift = s->size_shift;
+ unsigned foundspace = 0;
+ struct string_builder sb;
+ init_string_builder_alloc (&sb, len, shift);
+ sb.known_shift = shift;
+
+ switch (shift) {
+#define DO_IT(TYPE) \
+ { TYPE *start = src, *end = start+len, *dst = (void*)sb.s->str; \
+ for (; start < end; start++) { \
+ int chr = *start; \
+ if (chr != ' ' && chr != '\t') break; \
+ } \
+ for (; start < end; start++) { \
+ int chr = *start; \
+ if (chr == ' ' || chr == '\t') \
+ if (foundspace) \
+ continue; \
+ else \
+ chr=' ',foundspace=1; \
+ else \
+ foundspace=0; \
+ *dst++ = chr; \
+ } \
+ len = dst - (TYPE*)sb.s->str; \
+ }
+ case 0: DO_IT (p_wchar0); break;
+ case 1: DO_IT (p_wchar1); break;
+ case 2: DO_IT (p_wchar2); break;
+#undef DO_IT
+ }
+ if (foundspace)
+ len--;
+ sb.s->len = len;
+ RETURN finish_string_builder (&sb);
+}
+
/*! @decl string trim_all_whites (string s)
*! @belongs String
*!
diff --git a/src/builtin_functions.h b/src/builtin_functions.h
index 9da458a..1183d21 100644
--- a/src/builtin_functions.h
+++ b/src/builtin_functions.h
@@ -181,6 +181,7 @@ PMOD_EXPORT void f_ctime(INT32 args);
PMOD_EXPORT void f_mkmapping(INT32 args);
PMOD_EXPORT void f_string_count(INT32 args);
PMOD_EXPORT void f_string_trim_whites(INT32 args);
+PMOD_EXPORT void f_string_strim_whites(INT32 args);
PMOD_EXPORT void f_string_trim_all_whites(INT32 args);
PMOD_EXPORT void f_program_implements(INT32 args);
PMOD_EXPORT void f_program_inherits(INT32 args);
--
Sincerely,
Stephen R. van den Berg.