[PATCH v3 01/10] hex-escape: (en|de)code strings to/from restricted character set
David Bremner
david at tethera.net
Sat Jan 14 17:40:15 PST 2012
From: David Bremner <bremner at debian.org>
The character set is chosen to be suitable for pathnames, and the same
as that used by contrib/nmbug
---
util/Makefile.local | 2 +-
util/hex-escape.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++
util/hex-escape.h | 32 +++++++++++
3 files changed, 189 insertions(+), 1 deletions(-)
create mode 100644 util/hex-escape.c
create mode 100644 util/hex-escape.h
diff --git a/util/Makefile.local b/util/Makefile.local
index 26e4c3f..2e63932 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,7 +3,7 @@
dir := util
extra_cflags += -I$(srcdir)/$(dir)
-libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c
+libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
libutil_modules := $(libutil_c_srcs:.c=.o)
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 0000000..6c1260b
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,156 @@
+/* hex-escape.c - Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david at tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const size_t default_buf_size = 1024;
+
+static const char *output_charset =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,";
+
+static const int escape_char = '%';
+
+static int
+is_output (char c)
+{
+ return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+ if (*out_size < needed) {
+
+ if (*out == NULL)
+ *out = talloc_size (ctx, needed);
+ else
+ *out = talloc_realloc (ctx, *out, char, needed);
+
+ if (*out == NULL)
+ return 0;
+
+ *out_size = needed;
+ }
+ return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+ const unsigned char *p;
+ char *q;
+
+ size_t escape_count = 0;
+ size_t len = 0;
+ size_t needed;
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = (unsigned char *) in; *p; p++) {
+ escape_count += (!is_output (*p));
+ len++;
+ }
+
+ needed = len + escape_count * 2 + 1;
+
+ if (*out == NULL)
+ *out_size = 0;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ q = *out;
+ p = (unsigned char *) in;
+
+ while (*p) {
+ if (is_output (*p)) {
+ *q++ = *p++;
+ } else {
+ sprintf (q, "%%%02x", *p++);
+ q += 3;
+ }
+ }
+
+ *q = '\0';
+ return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+{
+
+ char buf[3];
+
+ const char *p;
+ unsigned char *q;
+
+ size_t escape_count = 0;
+ size_t needed = 0;
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ size_t len = strlen (in);
+
+ for (p = in; *p; p++)
+ escape_count += (*p == escape_char);
+
+ needed = len - escape_count * 2 + 1;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ p = in;
+ q = (unsigned char *) *out;
+ buf[2] = 0;
+
+ while (*p) {
+
+ if (*p == escape_char) {
+
+ char *endp;
+
+ if (len < 3)
+ return HEX_SYNTAX_ERROR;
+
+ buf[0] = p[1];
+ buf[1] = p[2];
+
+ *q = strtol (buf, &endp, 16);
+
+ if (endp != buf + 2)
+ return HEX_SYNTAX_ERROR;
+
+ len -= 3;
+ p += 3;
+ q++;
+ } else {
+ *q++ = *p++;
+ }
+ }
+
+ *q = '\0';
+
+ return HEX_SUCCESS;
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 0000000..e409626
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,32 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+typedef enum hex_status {
+ HEX_SUCCESS = 0,
+ HEX_SYNTAX_ERROR,
+ HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API is modelled on that for getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size. This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+#endif
--
1.7.7.3
More information about the notmuch
mailing list