[Patch v9 05/17] util/string-util: add a new string tokenized function

david at tethera.net david at tethera.net
Sun Dec 23 17:39:31 PST 2012


From: David Bremner <bremner at debian.org>

This initial target use is in quoting queries for Xapian. We want to
split into tokens, but preserve the delimiters between the tokens
verbatim.
---
 util/string-util.c |   12 ++++++++++++
 util/string-util.h |   19 +++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/util/string-util.c b/util/string-util.c
index b9039f4..1586483 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -34,6 +34,18 @@ strtok_len (char *s, const char *delim, size_t *len)
     return *len ? s : NULL;
 }
 
+char *
+strtok_len2 (char *s, const char *delim, size_t *len, size_t *delim_len)
+{
+    /* length of token */
+    *len = strcspn (s, delim);
+
+    /* length of following delimiter */
+    *delim_len = strspn (s + *len, delim);
+
+    return *len || *delim_len ? s : NULL;
+}
+
 
 int
 double_quote_str (void *ctx, const char *str,
diff --git a/util/string-util.h b/util/string-util.h
index 4fc7942..12398a5 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,6 +19,25 @@
 
 char *strtok_len (char *s, const char *delim, size_t *len);
 
+/* Like strtok_len, but return length of delimiters as well.  Return
+ * value is indicated by pointer and length, not null terminator.
+ * Does _not_ skip initial delimiters.
+ *
+ * Usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " :.,";
+ * size_t tok_len = 0;
+ * size_t delim_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len + delim_len, delim,
+ *			     &tok_len, &delim_len)) != NULL) {
+ *     // do stuff with token and following delimiters.
+ * }
+ */
+
+char *strtok_len2 (char *s, const char *delim, size_t *len, size_t *delim_len);
+
 /* Copy str to dest, surrounding with double quotes.
  * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
  *
-- 
1.7.10.4



More information about the notmuch mailing list