[PATCH 2/7] lib: add a date/time parser module
Jani Nikula
jani at nikula.org
Fri Aug 3 14:51:47 PDT 2012
Build a date/time parser as part of the notmuch lib, to be used for
adding date range query support later on.
Signed-off-by: Jani Nikula <jani at nikula.org>
---
lib/Makefile.local | 1 +
lib/parse-time-string.c | 1384 +++++++++++++++++++++++++++++++++++++++++++++++
lib/parse-time-string.h | 95 ++++
3 files changed, 1480 insertions(+)
create mode 100644 lib/parse-time-string.c
create mode 100644 lib/parse-time-string.h
diff --git a/lib/Makefile.local b/lib/Makefile.local
index 8a9aa28..e29c3a2 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -53,6 +53,7 @@ libnotmuch_c_srcs = \
$(dir)/libsha1.c \
$(dir)/message-file.c \
$(dir)/messages.c \
+ $(dir)/parse-time-string.c \
$(dir)/sha1.c \
$(dir)/tags.c
diff --git a/lib/parse-time-string.c b/lib/parse-time-string.c
new file mode 100644
index 0000000..7c50f3e
--- /dev/null
+++ b/lib/parse-time-string.c
@@ -0,0 +1,1384 @@
+/*
+ * parse time string - user friendly date and time parser
+ * Copyright © 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Jani Nikula <jani at nikula.org>
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "parse-time-string.h"
+
+#define unused(x) x __attribute__ ((unused))
+
+/* REVISIT: Redefine these to add i18n support. The keyword table uses
+ * N_() to mark strings to be translated; they are accessed
+ * dynamically using _(). */
+#define _(s) (s) /* i18n: define as gettext (s) */
+#define N_(s) (s) /* i18n: define as gettext_noop (s) */
+
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+
+/* field indices in struct state tm, and set fields */
+enum field {
+ /* keep SEC...YEAR in this order */
+ TM_ABS_SEC, /* seconds */
+ TM_ABS_MIN, /* minutes */
+ TM_ABS_HOUR, /* hours */
+ TM_ABS_MDAY, /* day of the month */
+ TM_ABS_MON, /* month */
+ TM_ABS_YEAR, /* year */
+
+ TM_ABS_WDAY, /* day of the week. special: may be relative */
+ TM_ABS_ISDST, /* daylight saving time */
+
+ TM_AMPM, /* am vs. pm */
+ TM_TZ, /* timezone in minutes */
+
+ /* keep SEC...YEAR in this order */
+ TM_REL_SEC, /* seconds relative to now */
+ TM_REL_MIN, /* minutes ... */
+ TM_REL_HOUR, /* hours ... */
+ TM_REL_DAY, /* days ... */
+ TM_REL_MON, /* months ... */
+ TM_REL_YEAR, /* years ... */
+ TM_REL_WEEK, /* weeks ... */
+
+ TM_NONE, /* not a field */
+
+ TM_SIZE = TM_NONE,
+};
+
+enum field_set {
+ FIELD_UNSET,
+ FIELD_SET,
+ FIELD_NOW,
+};
+
+static enum field
+next_field (enum field field)
+{
+ /* note: depends on the enum ordering */
+ return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
+}
+
+static enum field
+abs_to_rel_field (enum field field)
+{
+ assert (field <= TM_ABS_YEAR);
+
+ /* note: depends on the enum ordering */
+ return field + (TM_REL_SEC - TM_ABS_SEC);
+}
+
+/* get zero value for field */
+static int
+field_zero (enum field field)
+{
+ if (field == TM_ABS_MDAY || field == TM_ABS_MON)
+ return 1;
+ else if (field == TM_ABS_YEAR)
+ return 1970;
+ else
+ return 0;
+}
+
+struct state {
+ int tm[TM_SIZE]; /* parsed date and time */
+ enum field_set set[TM_SIZE]; /* set status of tm */
+
+ enum field last_field;
+ char delim;
+
+ int postponed_length; /* number of digits in postponed value */
+ int postponed_value;
+ char postponed_delim;
+};
+
+/*
+ * Helpers for postponed numbers.
+ *
+ * postponed_length is the number of digits in postponed value. 0
+ * means there is no postponed number. -1 means there is a postponed
+ * number, but it comes from a keyword, and it doesn't have digits.
+ */
+static int
+get_postponed_length (struct state *state)
+{
+ return state->postponed_length;
+}
+
+static bool
+get_postponed_number (struct state *state, int *v, int *n, char *d)
+{
+ if (!state->postponed_length)
+ return false;
+
+ if (n)
+ *n = state->postponed_length;
+
+ if (v)
+ *v = state->postponed_value;
+
+ if (d)
+ *d = state->postponed_delim;
+
+ state->postponed_length = 0;
+ state->postponed_value = 0;
+ state->postponed_delim = 0;
+
+ return true;
+}
+
+/* parse postponed number if one exists */
+static int parse_postponed_number (struct state *state, int v, int n, char d);
+static int
+handle_postponed_number (struct state *state)
+{
+ int v = state->postponed_value;
+ int n = state->postponed_length;
+ char d = state->postponed_delim;
+
+ if (!n)
+ return 0;
+
+ state->postponed_value = 0;
+ state->postponed_length = 0;
+ state->postponed_delim = 0;
+
+ return parse_postponed_number (state, v, n, d);
+}
+
+/*
+ * set new postponed number to be handled later. if one exists
+ * already, handle it first. n may be -1 to indicate a keyword that
+ * has no number length.
+ */
+static int
+set_postponed_number (struct state *state, int v, int n)
+{
+ int r;
+ char d = state->delim;
+
+ /* parse previous postponed number, if any */
+ r = handle_postponed_number (state);
+ if (r)
+ return r;
+
+ state->postponed_length = n;
+ state->postponed_value = v;
+ state->postponed_delim = d;
+
+ return 0;
+}
+
+static void
+set_delim (struct state *state, char delim)
+{
+ state->delim = delim;
+}
+
+static void
+unset_delim (struct state *state)
+{
+ state->delim = 0;
+}
+
+/*
+ * Field set/get/mod helpers.
+ */
+
+/* returns unset for non-tracked fields */
+static bool
+is_field_set (struct state *state, enum field field)
+{
+ assert (field < ARRAY_SIZE (state->tm));
+
+ return field < ARRAY_SIZE (state->set) &&
+ state->set[field] != FIELD_UNSET;
+}
+
+static void
+unset_field (struct state *state, enum field field)
+{
+ assert (field < ARRAY_SIZE (state->tm));
+
+ state->set[field] = FIELD_UNSET;
+ state->tm[field] = 0;
+}
+
+/* Set field to value. */
+static int
+set_field (struct state *state, enum field field, int value)
+{
+ int r;
+
+ assert (field < ARRAY_SIZE (state->tm));
+
+ /* some fields can only be set once */
+ if (field < ARRAY_SIZE (state->set) && state->set[field] != FIELD_UNSET)
+ return -PARSE_TIME_ERR_ALREADYSET;
+
+ state->set[field] = FIELD_SET;
+
+ /*
+ * REVISIT: There could be a "next_field" that would be set from
+ * "field" for the duration of the handle_postponed_number() call,
+ * so it has more information to work with.
+ */
+
+ /* parse postponed number, if any */
+ r = handle_postponed_number (state);
+ if (r)
+ return r;
+
+ unset_delim (state);
+
+ state->tm[field] = value;
+ state->last_field = field;
+
+ return 0;
+}
+
+/*
+ * Mark n fields in fields to be set to current date/time in the
+ * specified time zone, or local timezone if not specified. The fields
+ * will be initialized after parsing is complete and timezone is
+ * known.
+ */
+static int
+set_fields_to_now (struct state *state, enum field *fields, size_t n)
+{
+ size_t i;
+ int r;
+
+ for (i = 0; i < n; i++) {
+ r = set_field (state, fields[i], 0);
+ if (r)
+ return r;
+ state->set[fields[i]] = FIELD_NOW;
+ }
+
+ return 0;
+}
+
+/* Modify field by adding value to it. To be used on relative fields. */
+static int
+mod_field (struct state *state, enum field field, int value)
+{
+ int r;
+
+ assert (field < ARRAY_SIZE (state->tm)); /* assert relative??? */
+
+ if (field < ARRAY_SIZE (state->set))
+ state->set[field] = FIELD_SET;
+
+ /* parse postponed number, if any */
+ r = handle_postponed_number (state);
+ if (r)
+ return r;
+
+ unset_delim (state);
+
+ state->tm[field] += value;
+ state->last_field = field;
+
+ return 0;
+}
+
+/*
+ * Get field value. Make sure the field is set before query. It's most
+ * likely an error to call this while parsing (for example fields set
+ * as FIELD_NOW will only be set to some value after parsing).
+ */
+static int
+get_field (struct state *state, enum field field)
+{
+ assert (field < ARRAY_SIZE (state->tm));
+
+ return state->tm[field];
+}
+
+/*
+ * Validity checkers.
+ */
+static bool is_valid_12hour (int h)
+{
+ return h >= 0 && h <= 12;
+}
+
+static bool is_valid_time (int h, int m, int s)
+{
+ /* allow 24:00:00 to denote end of day */
+ if (h == 24 && m == 0 && s == 0)
+ return true;
+
+ return h >= 0 && h <= 23 && m >= 0 && m <= 59 && s >= 0 && s <= 59;
+}
+
+static bool is_valid_mday (int mday)
+{
+ return mday >= 1 && mday <= 31;
+}
+
+static bool is_valid_mon (int mon)
+{
+ return mon >= 1 && mon <= 12;
+}
+
+static bool is_valid_year (int year)
+{
+ return year >= 1970;
+}
+
+static bool is_valid_date (int year, int mon, int mday)
+{
+ return is_valid_year (year) && is_valid_mon (mon) && is_valid_mday (mday);
+}
+
+/* Unset indicator for time and date set helpers. */
+#define UNSET -1
+
+/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
+static int
+set_abs_time (struct state *state, int hour, int min, int sec)
+{
+ int r;
+
+ if (hour != UNSET) {
+ if ((r = set_field (state, TM_ABS_HOUR, hour)))
+ return r;
+ }
+
+ if (min != UNSET) {
+ if ((r = set_field (state, TM_ABS_MIN, min)))
+ return r;
+ }
+
+ if (sec != UNSET) {
+ if ((r = set_field (state, TM_ABS_SEC, sec)))
+ return r;
+ }
+
+ return 0;
+}
+
+/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
+static int
+set_abs_date (struct state *state, int year, int mon, int mday)
+{
+ int r;
+
+ if (year != UNSET) {
+ if ((r = set_field (state, TM_ABS_YEAR, year)))
+ return r;
+ }
+
+ if (mon != UNSET) {
+ if ((r = set_field (state, TM_ABS_MON, mon)))
+ return r;
+ }
+
+ if (mday != UNSET) {
+ if ((r = set_field (state, TM_ABS_MDAY, mday)))
+ return r;
+ }
+
+ return 0;
+}
+
+/*
+ * Keyword parsing and handling.
+ */
+struct keyword;
+typedef int (*setter_t)(struct state *state, struct keyword *kw);
+
+struct keyword {
+ const char *name; /* keyword */
+ enum field field; /* field to set, or FIELD_NONE if N/A */
+ int value; /* value to set, or 0 if N/A */
+ setter_t set; /* function to use for setting, if non-NULL */
+};
+
+/*
+ * Setter callback functions for keywords.
+ */
+static int
+kw_set_default (struct state *state, struct keyword *kw)
+{
+ return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_rel (struct state *state, struct keyword *kw)
+{
+ int multiplier = 1;
+
+ /* get a previously set multiplier, if any */
+ get_postponed_number (state, &multiplier, NULL, NULL);
+
+ /* accumulate relative field values */
+ return mod_field (state, kw->field, multiplier * kw->value);
+}
+
+static int
+kw_set_number (struct state *state, struct keyword *kw)
+{
+ /* -1 = no length, from keyword */
+ return set_postponed_number (state, kw->value, -1);
+}
+
+static int
+kw_set_month (struct state *state, struct keyword *kw)
+{
+ int n = get_postponed_length (state);
+
+ /* consume postponed number if it could be mday */
+ if (n == 1 || n == 2) {
+ int r, v;
+
+ get_postponed_number (state, &v, NULL, NULL);
+
+ if (!is_valid_mday (v))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ r = set_field (state, TM_ABS_MDAY, v);
+ if (r)
+ return r;
+ }
+
+ return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_ampm (struct state *state, struct keyword *kw)
+{
+ int n = get_postponed_length (state);
+
+ /* consume postponed number if it could be hour */
+ if (n == 1 || n == 2) {
+ int r, v;
+
+ get_postponed_number (state, &v, NULL, NULL);
+
+ if (!is_valid_12hour (v))
+ return -PARSE_TIME_ERR_INVALIDTIME;
+
+ r = set_abs_time (state, v, 0, 0);
+ if (r)
+ return r;
+ }
+
+ return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_timeofday (struct state *state, struct keyword *kw)
+{
+ return set_abs_time (state, kw->value, 0, 0);
+}
+
+static int
+kw_set_today (struct state *state, unused (struct keyword *kw))
+{
+ enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
+
+ return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
+}
+
+static int
+kw_set_now (struct state *state, unused (struct keyword *kw))
+{
+ enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
+
+ return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
+}
+
+static int
+kw_set_ordinal (struct state *state, struct keyword *kw)
+{
+ int n, v;
+
+ /* require a postponed number */
+ if (!get_postponed_number (state, &v, &n, NULL))
+ return -PARSE_TIME_ERR_DATEFORMAT;
+
+ /* ordinals are mday */
+ if (n != 1 && n != 2)
+ return -PARSE_TIME_ERR_DATEFORMAT;
+
+ /* be strict about st, nd, rd, and lax about th */
+ if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
+ return -PARSE_TIME_ERR_INVALIDDATE;
+ else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
+ return -PARSE_TIME_ERR_INVALIDDATE;
+ else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
+ return -PARSE_TIME_ERR_INVALIDDATE;
+ else if (strcasecmp (kw->name, "th") == 0 && !is_valid_mday (v))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ return set_field (state, TM_ABS_MDAY, v);
+}
+
+/*
+ * Accepted keywords.
+ *
+ * A keyword may optionally contain a '|' to indicate the minimum
+ * match length. Without one, full match is required. It's advisable
+ * to keep the minimum match parts unique across all keywords.
+ *
+ * If keyword begins with upper case letter, then the matching will be
+ * case sensitive. Otherwise the matching is case insensitive.
+ *
+ * If setter is NULL, set_default will be used.
+ *
+ * Note: Order matters. Matching is greedy, longest match is used, but
+ * of equal length matches the first one is used.
+ */
+static struct keyword keywords[] = {
+ /* weekdays */
+ { N_("sun|day"), TM_ABS_WDAY, 0, NULL },
+ { N_("mon|day"), TM_ABS_WDAY, 1, NULL },
+ { N_("tue|sday"), TM_ABS_WDAY, 2, NULL },
+ { N_("wed|nesday"), TM_ABS_WDAY, 3, NULL },
+ { N_("thu|rsday"), TM_ABS_WDAY, 4, NULL },
+ { N_("fri|day"), TM_ABS_WDAY, 5, NULL },
+ { N_("sat|urday"), TM_ABS_WDAY, 6, NULL },
+
+ /* months */
+ { N_("jan|uary"), TM_ABS_MON, 1, kw_set_month },
+ { N_("feb|ruary"), TM_ABS_MON, 2, kw_set_month },
+ { N_("mar|ch"), TM_ABS_MON, 3, kw_set_month },
+ { N_("apr|il"), TM_ABS_MON, 4, kw_set_month },
+ { N_("may"), TM_ABS_MON, 5, kw_set_month },
+ { N_("jun|e"), TM_ABS_MON, 6, kw_set_month },
+ { N_("jul|y"), TM_ABS_MON, 7, kw_set_month },
+ { N_("aug|ust"), TM_ABS_MON, 8, kw_set_month },
+ { N_("sep|tember"), TM_ABS_MON, 9, kw_set_month },
+ { N_("oct|ober"), TM_ABS_MON, 10, kw_set_month },
+ { N_("nov|ember"), TM_ABS_MON, 11, kw_set_month },
+ { N_("dec|ember"), TM_ABS_MON, 12, kw_set_month },
+
+ /* durations */
+ { N_("y|ears"), TM_REL_YEAR, 1, kw_set_rel },
+ { N_("w|eeks"), TM_REL_WEEK, 1, kw_set_rel },
+ { N_("d|ays"), TM_REL_DAY, 1, kw_set_rel },
+ { N_("h|ours"), TM_REL_HOUR, 1, kw_set_rel },
+ { N_("hr|s"), TM_REL_HOUR, 1, kw_set_rel },
+ { N_("m|inutes"), TM_REL_MIN, 1, kw_set_rel },
+ /* M=months, m=minutes */
+ { N_("M"), TM_REL_MON, 1, kw_set_rel },
+ { N_("mins"), TM_REL_MIN, 1, kw_set_rel },
+ { N_("mo|nths"), TM_REL_MON, 1, kw_set_rel },
+ { N_("s|econds"), TM_REL_SEC, 1, kw_set_rel },
+ { N_("secs"), TM_REL_SEC, 1, kw_set_rel },
+
+ /* numbers */
+ { N_("one"), TM_NONE, 1, kw_set_number },
+ { N_("two"), TM_NONE, 2, kw_set_number },
+ { N_("three"), TM_NONE, 3, kw_set_number },
+ { N_("four"), TM_NONE, 4, kw_set_number },
+ { N_("five"), TM_NONE, 5, kw_set_number },
+ { N_("six"), TM_NONE, 6, kw_set_number },
+ { N_("seven"), TM_NONE, 7, kw_set_number },
+ { N_("eight"), TM_NONE, 8, kw_set_number },
+ { N_("nine"), TM_NONE, 9, kw_set_number },
+ { N_("ten"), TM_NONE, 10, kw_set_number },
+ { N_("dozen"), TM_NONE, 12, kw_set_number },
+ { N_("hundred"), TM_NONE, 100, kw_set_number },
+
+ /* special number forms */
+ { N_("this"), TM_NONE, 0, kw_set_number },
+ { N_("last"), TM_NONE, 1, kw_set_number },
+
+ /* specials */
+ { N_("yesterday"), TM_REL_DAY, 1, kw_set_rel },
+ { N_("today"), TM_NONE, 0, kw_set_today },
+ { N_("now"), TM_NONE, 0, kw_set_now },
+ { N_("noon"), TM_NONE, 12, kw_set_timeofday },
+ { N_("midnight"), TM_NONE, 0, kw_set_timeofday },
+ { N_("am"), TM_AMPM, 0, kw_set_ampm },
+ { N_("a.m."), TM_AMPM, 0, kw_set_ampm },
+ { N_("pm"), TM_AMPM, 1, kw_set_ampm },
+ { N_("p.m."), TM_AMPM, 1, kw_set_ampm },
+ { N_("st"), TM_NONE, 0, kw_set_ordinal },
+ { N_("nd"), TM_NONE, 0, kw_set_ordinal },
+ { N_("rd"), TM_NONE, 0, kw_set_ordinal },
+ { N_("th"), TM_NONE, 0, kw_set_ordinal },
+
+ /* timezone codes: offset in minutes. FIXME: add more codes. */
+ { N_("pst"), TM_TZ, -8*60, NULL },
+ { N_("mst"), TM_TZ, -7*60, NULL },
+ { N_("cst"), TM_TZ, -6*60, NULL },
+ { N_("est"), TM_TZ, -5*60, NULL },
+ { N_("ast"), TM_TZ, -4*60, NULL },
+ { N_("nst"), TM_TZ, -(3*60+30), NULL },
+
+ { N_("gmt"), TM_TZ, 0, NULL },
+ { N_("utc"), TM_TZ, 0, NULL },
+
+ { N_("wet"), TM_TZ, 0, NULL },
+ { N_("cet"), TM_TZ, 1*60, NULL },
+ { N_("eet"), TM_TZ, 2*60, NULL },
+ { N_("fet"), TM_TZ, 3*60, NULL },
+
+ { N_("wat"), TM_TZ, 1*60, NULL },
+ { N_("cat"), TM_TZ, 2*60, NULL },
+ { N_("eat"), TM_TZ, 3*60, NULL },
+};
+
+/*
+ * Compare strings s and keyword. Return number of matching chars on
+ * match, 0 for no match. Match must be at least n chars (n == 0 all
+ * of keyword), otherwise it's not a match. Use match_case for case
+ * sensitive matching.
+ */
+static size_t
+stringcmp (const char *s, const char *keyword, size_t n, bool match_case)
+{
+ size_t i;
+
+ for (i = 0; *s && *keyword; i++, s++, keyword++) {
+ if (match_case) {
+ if (*s != *keyword)
+ break;
+ } else {
+ if (tolower ((unsigned char) *s) !=
+ tolower ((unsigned char) *keyword))
+ break;
+ }
+ }
+
+ if (n)
+ return i < n ? 0 : i;
+ else
+ return *keyword ? 0 : i;
+}
+
+/*
+ * Parse a keyword. Return < 0 on error, number of parsed chars on
+ * success.
+ */
+static ssize_t
+parse_keyword (struct state *state, const char *s)
+{
+ unsigned int i;
+ size_t n, max_n = 0;
+ struct keyword *kw = NULL;
+ int r;
+
+ /* Match longest keyword */
+ for (i = 0; i < ARRAY_SIZE (keywords); i++) {
+ /* Match case if keyword begins with upper case letter. */
+ bool mcase = isupper ((unsigned char) keywords[i].name[0]);
+ size_t minlen = 0;
+ char keyword[128];
+ char *p;
+
+ strncpy (keyword, _(keywords[i].name), sizeof (keyword));
+
+ /* Truncate too long keywords. REVISIT: Make this dynamic? */
+ keyword[sizeof (keyword) - 1] = '\0';
+
+ /* Minimum match length. */
+ p = strchr (keyword, '|');
+ if (p) {
+ minlen = p - keyword;
+ memmove (p, p + 1, strlen (p + 1) + 1);
+ }
+
+ n = stringcmp (s, keyword, minlen, mcase);
+ if (n > max_n || (n == max_n && mcase)) {
+ max_n = n;
+ kw = &keywords[i];
+ }
+ }
+
+ if (!kw)
+ return -PARSE_TIME_ERR_KEYWORD;
+
+ if (kw->set)
+ r = kw->set (state, kw);
+ else
+ r = kw_set_default (state, kw);
+
+ if (r < 0)
+ return r;
+
+ return max_n;
+}
+
+/*
+ * Non-keyword parsers and their helpers.
+ */
+
+static int
+set_user_tz (struct state *state, char sign, int hour, int min)
+{
+ int tz = hour * 60 + min;
+
+ assert (sign == '+' || sign == '-');
+
+ if (hour < 0 || hour > 14 || min < 0 || min > 59 || min % 15)
+ return -PARSE_TIME_ERR_INVALIDTIME;
+
+ if (sign == '-')
+ tz = -tz;
+
+ return set_field (state, TM_TZ, tz);
+}
+
+/*
+ * Independent parsing of a postponed number when it wasn't consumed
+ * during parsing of the following token.
+ *
+ * This should be able to trust that last_field and next_field are
+ * right.
+ */
+static int
+parse_postponed_number (struct state *state, int v, int n, char d)
+{
+ /*
+ * alright, these are really lone, won't affect parsing of
+ * following items... it's not a multiplier, those have been eaten
+ * away.
+ *
+ * also note numbers eaten away by parse_single_number.
+ */
+
+ assert (n < 8);
+
+ if (n == 1 || n == 2) {
+ if (state->last_field == TM_ABS_MON) {
+ /* D[D] */
+ if (!is_valid_mday (v))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ return set_field (state, TM_ABS_MDAY, v);
+ } else if (n == 2) {
+ /* REVISIT: only allow if last field is hour, min, or sec? */
+ if (d == '+' || d == '-') {
+ /* +/-HH */
+ return set_user_tz (state, d, v, 0);
+ }
+ }
+ } else if (n == 4) {
+ /* Notable exception: Value affects parsing. */
+ if (!is_valid_year (v)) {
+ if (d == '+' || d == '-') {
+ /* +/-HHMM */
+ return set_user_tz (state, d, v / 100, v % 100);
+ }
+ } else {
+ /* YYYY */
+ return set_field (state, TM_ABS_YEAR, v);
+ }
+ } else if (n == 6) {
+ /* HHMMSS */
+ int hour = v / 10000;
+ int min = (v / 100) % 100;
+ int sec = v % 100;
+
+ if (!is_valid_time (hour, min, sec))
+ return -PARSE_TIME_ERR_INVALIDTIME;
+
+ return set_abs_time (state, hour, min, sec);
+ }
+
+ /* else n is one of {-1, 3, 5, 7 } */
+
+ return -PARSE_TIME_ERR_FORMAT;
+}
+
+/* Parse a single number. Typically postpone parsing until later. */
+static int
+parse_single_number (struct state *state, unsigned long v,
+ unsigned long n)
+{
+ assert (n);
+
+ /* parse things that can be parsed immediately */
+ if (n == 8) {
+ /* YYYYMMDD */
+ int year = v / 10000;
+ int mon = (v / 100) % 100;
+ int mday = v % 100;
+
+ if (!is_valid_date (year, mon, mday))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ return set_abs_date (state, year, mon, mday);
+ } else if (n > 8) {
+ /* FIXME: seconds since epoch */
+ return -PARSE_TIME_ERR_FORMAT;
+ }
+
+ if (v > INT_MAX)
+ return -PARSE_TIME_ERR_FORMAT;
+
+ return set_postponed_number (state, v, n);
+}
+
+static bool
+is_time_sep (char c)
+{
+ return c == ':';
+}
+
+static bool
+is_date_sep (char c)
+{
+ return c == '/' || c == '-' || c == '.';
+}
+
+static bool
+is_sep (char c)
+{
+ return is_time_sep (c) || is_date_sep (c);
+}
+
+/* two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep unset */
+static int
+expand_year (unsigned long year, size_t n)
+{
+ if (n == 2) {
+ return (year < 70 ? 2000 : 1900) + year;
+ } else if (n == 4) {
+ return year;
+ } else {
+ return UNSET;
+ }
+}
+
+static int
+parse_date (struct state *state, char sep,
+ unsigned long v1, unsigned long v2, unsigned long v3,
+ size_t n1, size_t n2, size_t n3)
+{
+ int year = UNSET, mon = UNSET, mday = UNSET;
+
+ assert (is_date_sep (sep));
+
+ switch (sep) {
+ case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
+ if (n1 != 1 && n1 != 2)
+ return -PARSE_TIME_ERR_DATEFORMAT;
+
+ if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
+ /* M[M]/D[D][/YY[YY]] */
+ year = expand_year (v3, n3);
+ mon = v1;
+ mday = v2;
+ } else if (n2 == 4 && n3 == 0) {
+ /* M[M]/YYYY */
+ year = v2;
+ mon = v1;
+ } else {
+ return -PARSE_TIME_ERR_DATEFORMAT;
+ }
+ break;
+
+ case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
+ if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
+ /* YYYY-MM[-DD] */
+ year = v1;
+ mon = v2;
+ if (n3)
+ mday = v3;
+ } else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
+ /* DD-MM[-YY[YY]] */
+ year = expand_year (v3, n3);
+ mon = v2;
+ mday = v1;
+ } else if (n1 == 2 && n2 == 4 && n3 == 0) {
+ /* MM-YYYY */
+ year = v2;
+ mon = v1;
+ } else {
+ return -PARSE_TIME_ERR_DATEFORMAT;
+ }
+ break;
+
+ case '.': /* Date: D[D].M[M][.[YY[YY]]] */
+ if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
+ (n3 != 0 && n3 != 2 && n3 != 4))
+ return -PARSE_TIME_ERR_DATEFORMAT;
+
+ year = expand_year (v3, n3);
+ mon = v2;
+ mday = v1;
+ break;
+ }
+
+ if (year != UNSET && !is_valid_year (year))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ if (mon != UNSET && !is_valid_mon (mon))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ if (mday != UNSET && !is_valid_mday (mday))
+ return -PARSE_TIME_ERR_INVALIDDATE;
+
+ return set_abs_date (state, year, mon, mday);
+}
+
+static int
+parse_time (struct state *state, char sep,
+ unsigned long v1, unsigned long v2, unsigned long v3,
+ size_t n1, size_t n2, size_t n3)
+{
+ assert (is_time_sep (sep));
+
+ if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
+ return -PARSE_TIME_ERR_TIMEFORMAT;
+
+ /*
+ * REVISIT: this means it's required to set time *before* being
+ * able to set timezone
+ */
+ if (is_field_set (state, TM_ABS_HOUR) &&
+ is_field_set (state, TM_ABS_MIN) &&
+ n1 == 2 && n2 == 2 && n3 == 0 &&
+ (state->delim == '+' || state->delim == '-')) {
+ return set_user_tz (state, state->delim, v1, v2);
+ }
+
+ if (!is_valid_time (v1, v2, v3))
+ return -PARSE_TIME_ERR_INVALIDTIME;
+
+ return set_abs_time (state, v1, v2, n3 ? v3 : 0);
+}
+
+/* strtoul helper that assigns length */
+static unsigned long
+strtoul_len (const char *s, const char **endp, size_t *len)
+{
+ unsigned long val = strtoul (s, (char **) endp, 10);
+
+ *len = *endp - s;
+ return val;
+}
+
+/*
+ * Parse a (group of) number(s). Return < 0 on error, number of parsed
+ * chars on success.
+ */
+static ssize_t
+parse_number (struct state *state, const char *s)
+{
+ int r;
+ unsigned long v1, v2, v3 = 0;
+ size_t n1, n2, n3 = 0;
+ const char *p = s;
+ char sep;
+
+ v1 = strtoul_len (p, &p, &n1);
+
+ if (is_sep (*p) && isdigit ((unsigned char) *(p + 1))) {
+ sep = *p;
+ v2 = strtoul_len (p + 1, &p, &n2);
+ } else {
+ /* a single number */
+ r = parse_single_number (state, v1, n1);
+ if (r)
+ return r;
+
+ return p - s;
+ }
+
+ /* a group of two or three numbers? */
+ if (*p == sep && isdigit ((unsigned char) *(p + 1)))
+ v3 = strtoul_len (p + 1, &p, &n3);
+
+ if (is_time_sep (sep))
+ r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
+ else
+ r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
+
+ if (r)
+ return r;
+
+ return p - s;
+}
+
+/*
+ * Parse delimiter(s). Return < 0 on error, number of parsed chars on
+ * success.
+ */
+static ssize_t
+parse_delim (struct state *state, const char *s)
+{
+ const char *p = s;
+
+ /*
+ * REVISIT: any actions depending on the first delim after last
+ * field? what could it be?
+ */
+
+ /*
+ * skip non-alpha and non-digit, and store the last for further
+ * processing
+ */
+ while (*p && !isalnum ((unsigned char) *p)) {
+ set_delim (state, *p);
+ p++;
+ }
+
+ return p - s;
+}
+
+/*
+ * Parse a date/time string. Return < 0 on error, number of parsed
+ * chars on success.
+ */
+static ssize_t
+parse_input (struct state *state, const char *s)
+{
+ const char *p = s;
+ ssize_t n;
+ int r;
+
+ while (*p) {
+ if (isalpha ((unsigned char) *p)) {
+ n = parse_keyword (state, p);
+ } else if (isdigit ((unsigned char) *p)) {
+ n = parse_number (state, p);
+ } else {
+ n = parse_delim (state, p);
+ }
+
+ if (n <= 0) {
+ if (n == 0)
+ n = -PARSE_TIME_ERR;
+
+ return n;
+ }
+
+ p += n;
+ }
+
+ /* parse postponed number, if any */
+ r = handle_postponed_number (state);
+ if (r < 0)
+ return r;
+
+ return p - s;
+}
+
+/*
+ * Processing the parsed input.
+ */
+
+/*
+ * Initialize reference time to tm. Use time zone in state if
+ * specified, otherwise local time. Use now for reference time if
+ * non-NULL, otherwise current time.
+ */
+static int
+initialize_now (struct state *state, struct tm *tm, const time_t *now)
+{
+ time_t t;
+
+ if (now) {
+ t = *now;
+ } else {
+ if (time (&t) == (time_t) -1)
+ return -PARSE_TIME_ERR_LIB;
+ }
+
+ if (is_field_set (state, TM_TZ)) {
+ /* some other time zone */
+
+ /* adjust now according to the TZ */
+ t += get_field (state, TM_TZ) * 60;
+
+ /* it's not gm, but this doesn't mess with the tz */
+ if (gmtime_r (&t, tm) == NULL)
+ return -PARSE_TIME_ERR_LIB;
+ } else {
+ /* local time */
+ if (localtime_r (&t, tm) == NULL)
+ return -PARSE_TIME_ERR_LIB;
+ }
+
+ return 0;
+}
+
+/*
+ * Normalize tm according to mktime(3). Both mktime(3) and
+ * localtime_r(3) use local time, but they cancel each other out here,
+ * making this function agnostic to time zone.
+ */
+static int
+normalize_tm (struct tm *tm)
+{
+ time_t t = mktime (tm);
+
+ if (t == (time_t) -1)
+ return -PARSE_TIME_ERR_LIB;
+
+ if (!localtime_r (&t, tm))
+ return -PARSE_TIME_ERR_LIB;
+
+ return 0;
+}
+
+/* Get field out of a struct tm. */
+static int
+tm_get_field (const struct tm *tm, enum field field)
+{
+ switch (field) {
+ case TM_ABS_SEC: return tm->tm_sec;
+ case TM_ABS_MIN: return tm->tm_min;
+ case TM_ABS_HOUR: return tm->tm_hour;
+ case TM_ABS_MDAY: return tm->tm_mday;
+ case TM_ABS_MON: return tm->tm_mon + 1; /* 0- to 1-based */
+ case TM_ABS_YEAR: return 1900 + tm->tm_year;
+ case TM_ABS_WDAY: return tm->tm_wday;
+ case TM_ABS_ISDST: return tm->tm_isdst;
+ default:
+ assert (false);
+ break;
+ }
+
+ return 0;
+}
+
+/* Modify hour according to am/pm setting. */
+static int
+fixup_ampm (struct state *state)
+{
+ int hour, hdiff = 0;
+
+ if (!is_field_set (state, TM_AMPM))
+ return 0;
+
+ if (!is_field_set (state, TM_ABS_HOUR))
+ return -PARSE_TIME_ERR_TIMEFORMAT;
+
+ hour = get_field (state, TM_ABS_HOUR);
+ if (!is_valid_12hour (hour))
+ return -PARSE_TIME_ERR_INVALIDTIME;
+
+ if (get_field (state, TM_AMPM)) {
+ /* 12pm is noon */
+ if (hour != 12)
+ hdiff = 12;
+ } else {
+ /* 12am is midnight, beginning of day */
+ if (hour == 12)
+ hdiff = -12;
+ }
+
+ mod_field (state, TM_REL_HOUR, -hdiff);
+
+ return 0;
+}
+
+/* Combine absolute and relative fields, and round. */
+static int
+create_output (struct state *state, time_t *t_out, const time_t *tnow,
+ int round)
+{
+ struct tm tm = { .tm_isdst = -1 };
+ struct tm now;
+ time_t t;
+ enum field f;
+ int r;
+ int week_round = PARSE_TIME_NO_ROUND;
+
+ r = initialize_now (state, &now, tnow);
+ if (r)
+ return r;
+
+ /* initialize uninitialized fields to now */
+ for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
+ if (state->set[f] == FIELD_NOW) {
+ state->tm[f] = tm_get_field (&now, f);
+ state->set[f] = FIELD_SET;
+ }
+ }
+
+ /*
+ * If MON is set but YEAR is not, refer to past month.
+ *
+ * REVISIT: Why are month/week special in this regard? What about
+ * mday, or time. Should refer to past.
+ */
+ if (is_field_set (state, TM_ABS_MON) &&
+ !is_field_set (state, TM_ABS_YEAR)) {
+ if (get_field (state, TM_ABS_MON) >= tm_get_field (&now, TM_ABS_MON))
+ mod_field (state, TM_REL_YEAR, 1);
+ }
+
+ /*
+ * If WDAY is set but MDAY is not, we consider WDAY relative
+ *
+ * REVISIT: This fails on stuff like "two months ago monday"
+ * because two months ago wasn't the same day as today. Postpone
+ * until we know date?
+ */
+ if (is_field_set (state, TM_ABS_WDAY) &&
+ !is_field_set (state, TM_ABS_MDAY)) {
+ int wday = get_field (state, TM_ABS_WDAY);
+ int today = tm_get_field (&now, TM_ABS_WDAY);
+ int rel_days;
+
+ if (today > wday)
+ rel_days = today - wday;
+ else
+ rel_days = today + 7 - wday;
+
+ /* this also prevents special week rounding from happening */
+ mod_field (state, TM_REL_DAY, rel_days);
+
+ unset_field (state, TM_ABS_WDAY);
+ }
+
+ r = fixup_ampm (state);
+ if (r)
+ return r;
+
+ /*
+ * Iterate fields from most accurate to least accurate, and set
+ * unset fields according to requested rounding.
+ */
+ for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
+ if (round != PARSE_TIME_NO_ROUND) {
+ enum field r = abs_to_rel_field (f);
+
+ if (is_field_set (state, f) || is_field_set (state, r)) {
+ if (round >= PARSE_TIME_ROUND_UP)
+ mod_field (state, r, -1);
+ round = PARSE_TIME_NO_ROUND; /* no more rounding */
+ } else {
+ if (f == TM_ABS_MDAY &&
+ is_field_set (state, TM_REL_WEEK)) {
+ /* week is most accurate */
+ week_round = round;
+ round = PARSE_TIME_NO_ROUND;
+ } else {
+ set_field (state, f, field_zero (f));
+ }
+ }
+ }
+
+ if (!is_field_set (state, f))
+ set_field (state, f, tm_get_field (&now, f));
+ }
+
+ /* special case: rounding with week accuracy */
+ if (week_round != PARSE_TIME_NO_ROUND) {
+ /* temporarily set more accurate fields to now */
+ set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
+ set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
+ set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
+ set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
+ }
+
+ /*
+ * set all fields. they may contain out of range values before
+ * normalization by mktime(3).
+ */
+ tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
+ tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
+ tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
+ tm.tm_mday = get_field (state, TM_ABS_MDAY) -
+ get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
+ tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
+ tm.tm_mon--; /* 1- to 0-based */
+ tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
+
+ /*
+ * It's always normal time.
+ *
+ * REVISIT: This is probably not a solution that universally
+ * works. Just make sure DST is not taken into account. We don't
+ * want rounding to be affected by DST.
+ */
+ tm.tm_isdst = -1;
+
+ /* special case: rounding with week accuracy */
+ if (week_round != PARSE_TIME_NO_ROUND) {
+ /* normalize to get proper tm.wday */
+ r = normalize_tm (&tm);
+ if (r < 0)
+ return r;
+
+ /* set more accurate fields back to zero */
+ tm.tm_sec = 0;
+ tm.tm_min = 0;
+ tm.tm_hour = 0;
+ tm.tm_isdst = -1;
+
+ /* monday is the true 1st day of week, but this is easier */
+ if (week_round <= PARSE_TIME_ROUND_DOWN)
+ tm.tm_mday -= tm.tm_wday;
+ else
+ tm.tm_mday += 7 - tm.tm_wday;
+ }
+
+ if (is_field_set (state, TM_TZ)) {
+ /* tm is in specified TZ, convert to UTC for timegm(3) */
+ tm.tm_min -= get_field (state, TM_TZ);
+ t = timegm (&tm);
+ } else {
+ /* tm is in local time */
+ t = mktime (&tm);
+ }
+
+ if (t == (time_t) -1)
+ return -PARSE_TIME_ERR_LIB;
+
+ *t_out = t;
+
+ return 0;
+}
+
+/* internally, all errors are < 0. parse_time_string() returns errors > 0. */
+#define EXTERNAL_ERR(r) (-r)
+
+int
+parse_time_string (const char *s, time_t *t, const time_t *now, int round)
+{
+ struct state state = { .last_field = TM_NONE };
+ int r;
+
+ if (!s || !t)
+ return EXTERNAL_ERR (-PARSE_TIME_ERR);
+
+ r = parse_input (&state, s);
+ if (r < 0)
+ return EXTERNAL_ERR (r);
+
+ r = create_output (&state, t, now, round);
+ if (r < 0)
+ return EXTERNAL_ERR (r);
+
+ return 0;
+}
diff --git a/lib/parse-time-string.h b/lib/parse-time-string.h
new file mode 100644
index 0000000..50b7c6f
--- /dev/null
+++ b/lib/parse-time-string.h
@@ -0,0 +1,95 @@
+/*
+ * parse time string - user friendly date and time parser
+ * Copyright © 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Jani Nikula <jani at nikula.org>
+ */
+
+#ifndef PARSE_TIME_STRING_H
+#define PARSE_TIME_STRING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <time.h>
+
+/* return values for parse_time_string() */
+enum {
+ PARSE_TIME_OK = 0,
+ PARSE_TIME_ERR, /* unspecified error */
+ PARSE_TIME_ERR_LIB, /* library call failed */
+ PARSE_TIME_ERR_ALREADYSET, /* attempt to set unit twice */
+ PARSE_TIME_ERR_FORMAT, /* generic date/time format error */
+ PARSE_TIME_ERR_DATEFORMAT, /* date format error */
+ PARSE_TIME_ERR_TIMEFORMAT, /* time format error */
+ PARSE_TIME_ERR_INVALIDDATE, /* date value error */
+ PARSE_TIME_ERR_INVALIDTIME, /* time value error */
+ PARSE_TIME_ERR_KEYWORD, /* unknown keyword */
+};
+
+/* round values for parse_time_string() */
+enum {
+ PARSE_TIME_ROUND_DOWN = -1,
+ PARSE_TIME_NO_ROUND = 0,
+ PARSE_TIME_ROUND_UP = 1,
+};
+
+/**
+ * parse_time_string() - user friendly date and time parser
+ * @s: string to parse
+ * @t: pointer to time_t to store parsed time in
+ * @now: pointer to time_t containing reference date/time, or NULL
+ * @round: PARSE_TIME_NO_ROUND, PARSE_TIME_ROUND_DOWN, or
+ * PARSE_TIME_ROUND_UP
+ *
+ * Parse a date/time string 's' and store the parsed date/time result
+ * in 't'.
+ *
+ * A reference date/time is used for determining the "date/time units"
+ * (roughly equivalent to struct tm members) not specified by 's'. If
+ * 'now' is non-NULL, it must contain a pointer to a time_t to be used
+ * as reference date/time. Otherwise, the current time is used.
+ *
+ * If 's' does not specify a full date/time, the 'round' parameter
+ * specifies if and how the result should be rounded as follows:
+ *
+ * PARSE_TIME_NO_ROUND: All date/time units that are not specified
+ * by 's' are set to the corresponding unit derived from the
+ * reference date/time.
+ *
+ * PARSE_TIME_ROUND_DOWN: All date/time units that are more accurate
+ * than the most accurate unit specified by 's' are set to the
+ * smallest valid value for that unit. Rest of the unspecified units
+ * are set as in PARSE_TIME_NO_ROUND.
+ *
+ * PARSE_TIME_ROUND_UP: All date/time units that are more accurate
+ * than the most accurate unit specified by 's' are set to the
+ * smallest valid value for that unit. The most accurate unit
+ * specified by 's' is incremented by one (and this is rolled over
+ * to the less accurate units as necessary). Rest of the unspecified
+ * units are set as in PARSE_TIME_NO_ROUND.
+ *
+ * Return 0 (PARSE_TIME_OK) for succesfully parsed date/time, or one
+ * of PARSE_TIME_ERR_* on error. 't' is not modified on error.
+ */
+int parse_time_string (const char *s, time_t *t, const time_t *now, int round);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PARSE_TIME_STRING_H */
--
1.7.9.5
More information about the notmuch
mailing list