[RFC PATCH 1/2] lib: add date/time parser
Mark Walters
markwalters1009 at gmail.com
Sun Feb 26 00:45:22 PST 2012
Hi I have not read all of this carefully but it looks very nice to
me. It is pleasantly nice to read.
I have not looked through the create output function yet but have looked
at most of the rest.
My only concern (as mentioned on irc) is the question of
internationalisation. I think most of the this can be done by allowing
other keyword tables and that seems quite clean. Ideally I think the
user would set which to localisation use in the config file and then the
cli would pass that to the lib parser.
I think it would be a shame to hold up this very useful functionality
just because of these internationalisation concerns.
The code is fairly large but it is easy to read and I would imagine
(excepting the internationalisation question) almost maintenance free.
On the actual code I have a small number of comments/queries below.
Best wishes
Mark
On Mon, 20 Feb 2012 00:55:51 +0200, Jani Nikula <jani at nikula.org> wrote:
> Signed-off-by: Jani Nikula <jani at nikula.org>
> ---
> lib/Makefile.local | 1 +
> lib/parse-time-string.c | 1304 +++++++++++++++++++++++++++++++++++++++++++++++
> lib/parse-time-string.h | 95 ++++
> 3 files changed, 1400 insertions(+), 0 deletions(-)
> create mode 100644 lib/parse-time-string.c
> create mode 100644 lib/parse-time-string.h
>
> diff --git a/lib/Makefile.local b/lib/Makefile.local
> index 54c4dea..803a284 100644
> --- a/lib/Makefile.local
> +++ b/lib/Makefile.local
> @@ -53,6 +53,7 @@ libnotmuch_c_srcs = \
> $(dir)/libsha1.c \
> $(dir)/message-file.c \
> $(dir)/messages.c \
> + $(dir)/parse-time-string.c \
> $(dir)/sha1.c \
> $(dir)/tags.c
>
> diff --git a/lib/parse-time-string.c b/lib/parse-time-string.c
> new file mode 100644
> index 0000000..59713dc
> --- /dev/null
> +++ b/lib/parse-time-string.c
> @@ -0,0 +1,1304 @@
> +/*
> + * parse time string - user friendly date and time parser
> + * Copyright © 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Jani Nikula <jani at nikula.org>
> + */
> +
> +#ifndef PARSE_TIME_DEBUG
> +#define NDEBUG /* for assert() */
> +#endif
> +
> +#include <assert.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <stdio.h>
> +#include <stdarg.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <strings.h>
> +#include <time.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +
> +#include "parse-time-string.h"
> +
> +#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
> +
> +/* field indices in struct state tm, and set fields */
> +enum field {
> + /* keep SEC...YEAR in this order */
> + TM_ABS_SEC, /* seconds */
> + TM_ABS_MIN, /* minutes */
> + TM_ABS_HOUR, /* hours */
> + TM_ABS_MDAY, /* day of the month */
> + TM_ABS_MON, /* month */
> + TM_ABS_YEAR, /* year */
> +
> + TM_ABS_WDAY, /* day of the week. special: may be relative */
> + TM_ABS_ISDST, /* daylight saving time */
> +
> + TM_AMPM, /* am vs. pm */
> + TM_TZ, /* timezone in minutes */
> +
> + /* keep SEC...YEAR in this order */
> + TM_REL_SEC, /* seconds relative to now */
> + TM_REL_MIN, /* minutes ... */
> + TM_REL_HOUR, /* hours ... */
> + TM_REL_DAY, /* days ... */
> + TM_REL_MON, /* months ... */
> + TM_REL_YEAR, /* years ... */
> + TM_REL_WEEK, /* weeks ... */
> +
> + TM_NONE, /* not a field */
> +
> + TM_SIZE = TM_NONE,
> +};
> +
> +enum field_set {
> + FIELD_UNSET,
> + FIELD_SET,
> + FIELD_NOW,
> +};
> +
> +static enum field
> +next_field (enum field field)
> +{
> + /* note: depends on the enum ordering */
> + return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
> +}
> +
> +static enum field
> +abs_to_rel_field (enum field field)
> +{
> + assert (field <= TM_ABS_YEAR);
> +
> + /* note: depends on the enum ordering */
> + return field + (TM_REL_SEC - TM_ABS_SEC);
> +}
> +
> +/* get zero value for field */
> +static int
> +field_zero (enum field field)
> +{
> + if (field == TM_ABS_MDAY || field == TM_ABS_MON)
> + return 1;
> + else if (field == TM_ABS_YEAR)
> + return 1970;
> + else
> + return 0;
> +}
> +
> +struct state {
> + int tm[TM_SIZE]; /* parsed date and time */
> + enum field_set set[TM_SIZE]; /* set status of tm */
> +
> + enum field last_field;
> + char delim;
> +
> + int postponed_length; /* number of digits in postponed value */
> + int postponed_value;
> +};
Personally I would prefer this above the function definitions (but
obviously that is up to you).
> +
> +/*
> + * Helpers for postponed numbers.
> + *
> + * postponed_length is the number of digits in postponed value. 0
> + * means there is no postponed number. -1 means there is a postponed
> + * number, but it comes from a keyword, and it doesn't have digits.
> + */
> +static int
> +get_postponed_length (struct state *state)
> +{
> + return state->postponed_length;
> +}
> +
> +static bool
> +get_postponed_number (struct state *state, int *v, int *n)
> +{
> + if (!state->postponed_length)
> + return false;
> +
> + if (n)
> + *n = state->postponed_length;
> +
> + if (v)
> + *v = state->postponed_value;
> +
> + state->postponed_length = 0;
> + state->postponed_value = 0;
> +
> + return true;
> +}
> +
> +/* parse postponed number if one exists */
> +static int parse_postponed_number (struct state *state, int v, int n);
> +static int
> +handle_postponed_number (struct state *state)
> +{
> + int v = state->postponed_value;
> + int n = state->postponed_length;
> +
> + if (!n)
> + return 0;
> +
> + state->postponed_value = 0;
> + state->postponed_length = 0;
> +
> + return parse_postponed_number (state, v, n);
> +}
> +
> +/*
> + * set new postponed number to be handled later. if one exists
> + * already, handle it first. n may be -1 to indicate a keyword that
> + * has no number length.
> + */
> +static int
> +set_postponed_number (struct state *state, int v, int n)
> +{
> + int r;
> +
> + /* parse previous postponed number, if any */
> + r = handle_postponed_number (state);
> + if (r)
> + return r;
> +
> + state->postponed_length = n;
> + state->postponed_value = v;
> +
> + return 0;
> +}
> +
> +static void
> +set_delim (struct state *state, char delim)
> +{
> + state->delim = delim;
> +}
> +
> +static void
> +unset_delim (struct state *state)
> +{
> + state->delim = 0;
> +}
> +
> +/*
> + * Field set/get/mod helpers.
> + */
> +
> +/* returns unset for non-tracked fields */
> +static bool
> +is_field_set (struct state *state, enum field field)
> +{
> + assert (field < ARRAY_SIZE (state->tm));
> +
> + return field < ARRAY_SIZE (state->set) &&
> + state->set[field] != FIELD_UNSET;
> +}
> +
> +static void
> +unset_field (struct state *state, enum field field)
> +{
> + assert (field < ARRAY_SIZE (state->tm));
> +
> + state->set[field] = FIELD_UNSET;
> + state->tm[field] = 0;
> +}
> +
> +/* Set field to value. */
> +static int
> +set_field (struct state *state, enum field field, int value)
> +{
> + int r;
> +
> + assert (field < ARRAY_SIZE (state->tm));
> +
> + /* some fields can only be set once */
> + if (field < ARRAY_SIZE (state->set) && state->set[field] != FIELD_UNSET)
> + return -PARSE_TIME_ERR_ALREADYSET;
> +
> + state->set[field] = FIELD_SET;
> +
> + /*
> + * REVISIT: There could be a "next_field" that would be set from
> + * "field" for the duration of the handle_postponed_number() call,
> + * so it has more information to work with.
> + */
> +
> + /* parse postponed number, if any */
> + r = handle_postponed_number (state);
> + if (r)
> + return r;
> +
> + unset_delim (state);
> +
> + state->tm[field] = value;
> + state->last_field = field;
> +
> + return 0;
> +}
> +
> +/*
> + * Mark n fields in fields to be set to current date/time in the
> + * specified time zone, or local timezone if not specified. The fields
> + * will be initialized after parsing is complete and timezone is
> + * known.
> + */
> +static int
> +set_fields_to_now (struct state *state, enum field *fields, size_t n)
> +{
> + size_t i;
> + int r;
> +
> + for (i = 0; i < n; i++) {
> + r = set_field (state, fields[i], 0);
> + if (r)
> + return r;
> + state->set[fields[i]] = FIELD_NOW;
> + }
> +
> + return 0;
> +}
> +
> +/* Modify field by adding value to it. To be used on relative fields. */
> +static int
> +mod_field (struct state *state, enum field field, int value)
> +{
> + int r;
> +
> + assert (field < ARRAY_SIZE (state->tm)); /* assert relative??? */
> +
> + if (field < ARRAY_SIZE (state->set))
> + state->set[field] = FIELD_SET;
> +
> + /* parse postponed number, if any */
> + r = handle_postponed_number (state);
> + if (r)
> + return r;
> +
> + unset_delim (state);
> +
> + state->tm[field] += value;
> + state->last_field = field;
> +
> + return 0;
> +}
> +
> +/*
> + * Get field value. Make sure the field is set before query. It's most
> + * likely an error to call this while parsing (for example fields set
> + * as FIELD_NOW will only be set to some value after parsing).
> + */
> +static int
> +get_field (struct state *state, enum field field)
> +{
> + assert (field < ARRAY_SIZE (state->tm));
> +
> + return state->tm[field];
> +}
> +
> +/* Unset indicator for time and date set helpers. */
> +#define UNSET -1
> +
> +/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
> +static int
> +set_abs_time (struct state *state, int hour, int min, int sec)
> +{
> + int r;
> +
> + if (hour != UNSET) {
> + if ((r = set_field (state, TM_ABS_HOUR, hour)))
> + return r;
> + }
> +
> + if (min != UNSET) {
> + if ((r = set_field (state, TM_ABS_MIN, min)))
> + return r;
> + }
> +
> + if (sec != UNSET) {
> + if ((r = set_field (state, TM_ABS_SEC, sec)))
> + return r;
> + }
> +
> + return 0;
> +}
> +
> +/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
> +static int
> +set_abs_date (struct state *state, int year, int mon, int mday)
> +{
> + int r;
> +
> + if (year != UNSET) {
> + if ((r = set_field (state, TM_ABS_YEAR, year)))
> + return r;
> + }
> +
> + if (mon != UNSET) {
> + if ((r = set_field (state, TM_ABS_MON, mon)))
> + return r;
> + }
> +
> + if (mday != UNSET) {
> + if ((r = set_field (state, TM_ABS_MDAY, mday)))
> + return r;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Keyword parsing and handling.
> + */
> +struct keyword;
> +typedef int (*setter_t)(struct state *state, struct keyword *kw);
> +
> +struct keyword {
> + const char *name; /* keyword */
> + size_t minlen; /* min length to match, 0 = must match all */
> + enum field field; /* field to set, or FIELD_NONE if N/A */
> + int value; /* value to set, or 0 if N/A */
> + setter_t set; /* function to use for setting, if non-NULL */
> +};
> +
> +/*
> + * Setter callback functions for keywords.
> + */
> +static int
> +kw_set_default (struct state *state, struct keyword *kw)
> +{
> + return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_rel (struct state *state, struct keyword *kw)
> +{
> + int multiplier = 1;
> +
> + /* get a previously set multiplier, if any */
> + get_postponed_number (state, &multiplier, NULL);
> +
> + /* accumulate relative field values */
> + return mod_field (state, kw->field, multiplier * kw->value);
> +}
> +
> +static int
> +kw_set_number (struct state *state, struct keyword *kw)
> +{
> + /* -1 = no length, from keyword */
> + return set_postponed_number (state, kw->value, -1);
> +}
> +
> +static int
> +kw_set_month (struct state *state, struct keyword *kw)
> +{
> + int n = get_postponed_length (state);
> +
> + /* consume postponed number if it could be mday */
> + if (n == 1 || n == 2) {
> + int r, v;
> +
> + get_postponed_number (state, &v, NULL);
> +
> + if (v < 1 || v > 31)
> + return -PARSE_TIME_ERR_INVALIDDATE;
> +
> + r = set_field (state, TM_ABS_MDAY, v);
> + if (r)
> + return r;
> + }
> +
> + return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_ampm (struct state *state, struct keyword *kw)
> +{
> + int n = get_postponed_length (state);
> +
> + /* consume postponed number if it could be hour */
> + if (n == 1 || n == 2) {
> + int r, v;
> +
> + get_postponed_number (state, &v, NULL);
> +
> + if (v < 1 || v > 12)
> + return -PARSE_TIME_ERR_INVALIDTIME;
> +
> + r = set_abs_time (state, v, 0, 0);
> + if (r)
> + return r;
> + }
> +
> + return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_timeofday (struct state *state, struct keyword *kw)
> +{
> + return set_abs_time (state, kw->value, 0, 0);
> +}
> +
> +static int
> +kw_set_today (struct state *state, struct keyword *kw)
> +{
> + enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
> +
> + return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> +}
> +
> +static int
> +kw_set_now (struct state *state, struct keyword *kw)
> +{
> + enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
> +
> + return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> +}
> +
> +static int
> +kw_set_ordinal (struct state *state, struct keyword *kw)
> +{
> + int n, v;
> +
> + /* require a postponed number */
> + if (!get_postponed_number (state, &v, &n))
> + return -PARSE_TIME_ERR_DATEFORMAT;
> +
> + /* ordinals are mday */
> + if (n != 1 && n != 2)
> + return -PARSE_TIME_ERR_DATEFORMAT;
> +
> + /* be strict about st, nd, rd, and lax about th */
> + if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
> + return -PARSE_TIME_ERR_INVALIDDATE;
> + else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
> + return -PARSE_TIME_ERR_INVALIDDATE;
> + else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
> + return -PARSE_TIME_ERR_INVALIDDATE;
> + else if (strcasecmp (kw->name, "th") == 0 && (v < 1 || v > 31))
> + return -PARSE_TIME_ERR_INVALIDDATE;
> +
> + return set_field (state, TM_ABS_MDAY, v);
> +}
> +
> +/*
> + * Accepted keywords.
> + *
> + * If keyword begins with upper case letter, then the matching will be
> + * case sensitive. Otherwise the matching is case insensitive.
> + *
> + * If setter is NULL, set_default will be used.
> + *
> + * Note: Order matters. Matching is greedy, longest match is used, but
> + * of equal length matches the first one is used.
> + */
> +static struct keyword keywords[] = {
> + /* weekdays */
> + { "sunday", 3, TM_ABS_WDAY, 0, NULL },
> + { "monday", 3, TM_ABS_WDAY, 1, NULL },
> + { "tuesday", 3, TM_ABS_WDAY, 2, NULL },
> + { "wednesday", 3, TM_ABS_WDAY, 3, NULL },
> + { "thursday", 3, TM_ABS_WDAY, 4, NULL },
> + { "friday", 3, TM_ABS_WDAY, 5, NULL },
> + { "saturday", 3, TM_ABS_WDAY, 6, NULL },
> +
> + /* months */
> + { "january", 3, TM_ABS_MON, 1, kw_set_month },
> + { "february", 3, TM_ABS_MON, 2, kw_set_month },
> + { "march", 3, TM_ABS_MON, 3, kw_set_month },
> + { "april", 3, TM_ABS_MON, 4, kw_set_month },
> + { "may", 3, TM_ABS_MON, 5, kw_set_month },
> + { "june", 3, TM_ABS_MON, 6, kw_set_month },
> + { "july", 3, TM_ABS_MON, 7, kw_set_month },
> + { "august", 3, TM_ABS_MON, 8, kw_set_month },
> + { "september", 3, TM_ABS_MON, 9, kw_set_month },
> + { "october", 3, TM_ABS_MON, 10, kw_set_month },
> + { "november", 3, TM_ABS_MON, 11, kw_set_month },
> + { "december", 3, TM_ABS_MON, 12, kw_set_month },
> +
> + /* durations */
> + { "years", 1, TM_REL_YEAR, 1, kw_set_rel },
> + { "weeks", 1, TM_REL_WEEK, 1, kw_set_rel },
> + { "days", 1, TM_REL_DAY, 1, kw_set_rel },
> + { "hours", 1, TM_REL_HOUR, 1, kw_set_rel },
> + { "hrs", 1, TM_REL_HOUR, 1, kw_set_rel },
> + /* M=months, m=minutes. single M must precede minutes in the list. */
> + { "M", 1, TM_REL_MON, 1, kw_set_rel },
> + { "minutes", 1, TM_REL_MIN, 1, kw_set_rel },
> + { "mins", 1, TM_REL_MIN, 1, kw_set_rel },
> + { "months", 1, TM_REL_MON, 1, kw_set_rel },
> + { "seconds", 1, TM_REL_SEC, 1, kw_set_rel },
> + { "secs", 1, TM_REL_SEC, 1, kw_set_rel },
> +
> + /* numbers */
> + { "one", 0, TM_NONE, 1, kw_set_number },
> + { "two", 0, TM_NONE, 2, kw_set_number },
> + { "three", 0, TM_NONE, 3, kw_set_number },
> + { "four", 0, TM_NONE, 4, kw_set_number },
> + { "five", 0, TM_NONE, 5, kw_set_number },
> + { "six", 0, TM_NONE, 6, kw_set_number },
> + { "seven", 0, TM_NONE, 7, kw_set_number },
> + { "eight", 0, TM_NONE, 8, kw_set_number },
> + { "nine", 0, TM_NONE, 9, kw_set_number },
> + { "ten", 0, TM_NONE, 10, kw_set_number },
> + { "dozen", 0, TM_NONE, 12, kw_set_number },
> + { "hundred", 0, TM_NONE, 100, kw_set_number },
> +
> + /* special number forms */
> + { "this", 0, TM_NONE, 0, kw_set_number },
> + { "last", 0, TM_NONE, 1, kw_set_number },
> +
> + /* specials */
> + { "yesterday", 0, TM_REL_DAY, 1, kw_set_rel },
> + { "today", 0, TM_NONE, 0, kw_set_today },
> + { "now", 0, TM_NONE, 0, kw_set_now },
> + { "noon", 0, TM_NONE, 12, kw_set_timeofday },
> + { "midnight", 0, TM_NONE, 0, kw_set_timeofday },
> + { "am", 0, TM_AMPM, 0, kw_set_ampm },
> + { "a.m.", 0, TM_AMPM, 0, kw_set_ampm },
> + { "pm", 0, TM_AMPM, 1, kw_set_ampm },
> + { "p.m.", 0, TM_AMPM, 1, kw_set_ampm },
> + { "st", 0, TM_NONE, 0, kw_set_ordinal },
> + { "nd", 0, TM_NONE, 0, kw_set_ordinal },
> + { "rd", 0, TM_NONE, 0, kw_set_ordinal },
> + { "th", 0, TM_NONE, 0, kw_set_ordinal },
> +
> + /* timezone codes: offset in minutes. FIXME: add more codes. */
> + { "pst", 0, TM_TZ, -8*60, NULL },
> + { "mst", 0, TM_TZ, -7*60, NULL },
> + { "cst", 0, TM_TZ, -6*60, NULL },
> + { "est", 0, TM_TZ, -5*60, NULL },
> + { "ast", 0, TM_TZ, -4*60, NULL },
> + { "nst", 0, TM_TZ, -(3*60+30), NULL },
> +
> + { "gmt", 0, TM_TZ, 0, NULL },
> + { "utc", 0, TM_TZ, 0, NULL },
> +
> + { "wet", 0, TM_TZ, 0, NULL },
> + { "cet", 0, TM_TZ, 1*60, NULL },
> + { "eet", 0, TM_TZ, 2*60, NULL },
> + { "fet", 0, TM_TZ, 3*60, NULL },
> +
> + { "wat", 0, TM_TZ, 1*60, NULL },
> + { "cat", 0, TM_TZ, 2*60, NULL },
> + { "eat", 0, TM_TZ, 3*60, NULL },
> +};
> +
> +/*
> + * Compare strings s and keyword. Return number of matching chars on
> + * match, 0 for no match. Match must be at least n chars (n == 0 all
> + * of keyword), otherwise it's not a match. Use match_case for case
> + * sensitive matching.
> + */
> +static size_t
> +stringcmp (const char *s, const char *keyword, size_t n, bool match_case)
> +{
> + size_t i;
> +
> + for (i = 0; *s && *keyword; i++, s++, keyword++) {
> + if (match_case) {
> + if (*s != *keyword)
> + break;
> + } else {
> + if (tolower ((unsigned char) *s) !=
> + tolower ((unsigned char) *keyword))
> + break;
> + }
> + }
> +
> + if (n)
> + return i < n ? 0 : i;
> + else
> + return *keyword ? 0 : i;
> +}
> +
> +/*
> + * Parse a keyword. Return < 0 on error, number of parsed chars on
> + * success.
> + */
> +static ssize_t
> +parse_keyword (struct state *state, const char *s)
> +{
> + unsigned int i;
> + size_t n, max_n = 0;
> + struct keyword *kw = NULL;
> + int r;
> +
> + /* Match longest keyword */
> + for (i = 0; i < ARRAY_SIZE (keywords); i++) {
> + /* Match case if keyword begins with upper case letter. */
> + bool mcase = isupper ((unsigned char) keywords[i].name[0]);
> +
> + n = stringcmp (s, keywords[i].name, keywords[i].minlen, mcase);
> + if (n > max_n) {
> + max_n = n;
> + kw = &keywords[i];
> + }
> + }
> +
> + if (!kw)
> + return -PARSE_TIME_ERR_KEYWORD;
> +
> + if (kw->set)
> + r = kw->set (state, kw);
> + else
> + r = kw_set_default (state, kw);
> +
> + return r < 0 ? r : max_n;
> +}
> +
> +/*
> + * Non-keyword parsers and their helpers.
> + */
> +
> +static int
> +set_user_tz (struct state *state, char sign, int hour, int min)
> +{
> + int tz = hour * 60 + min;
> +
> + assert (sign == '+' || sign == '-');
> +
> + if (hour < 0 || hour > 14 || min < 0 || min > 60 || min % 15)
> + return -PARSE_TIME_ERR_INVALIDTIME;
> +
> + if (sign == '-')
> + tz = -tz;
> +
> + return set_field (state, TM_TZ, tz);
> +}
> +
> +/*
> + * Independent parsing of a postponed number when it wasn't consumed
> + * during parsing of the following token.
> + *
> + * This should be able to trust that last_field and next_field are
> + * right.
> + */
> +static int
> +parse_postponed_number (struct state *state, int v, int n)
> +{
> + /*
> + * alright, these are really lone, won't affect parsing of
> + * following items... it's not a multiplier, those have been eaten
> + * away.
> + *
> + * also note numbers eaten away by parse_single_number.
> + */
> +
> + assert (n < 8);
> +
> + switch (n) {
> + case 1:
> + case 2:
> + /* hour or mday or year */
> + if (state->last_field == TM_ABS_MON && /* FIXME: written mon! */
> + !is_field_set (state, TM_ABS_MDAY)) {
> + return set_field (state, TM_ABS_MDAY, v);
> + }
> + break;
> + case 4:
> + /* YYYY or +/-HHMM for TZ or HHMM or DDMM */
> + /* FIXME: state->delim is no longer right for this function!
> + * why not, it could be! */
> + if (!is_field_set (state, TM_ABS_YEAR)) {
> + /* FIXME: check year? */
> + return set_field (state, TM_ABS_YEAR, v);
> + }
> + break;
> + case 6:
> + /* FIXME: HHMMSS or DDMMYY */
> + break;
> + case -1:
> + /* REVISIT */
> + break;
> + case 3:
> + case 5:
> + case 7:
> + default:
> + break;
> + }
> +
> + return -PARSE_TIME_ERR_FORMAT;
> +}
> +
> +/* Parse a single number. Typically postpone parsing until later. */
> +static int
> +parse_single_number (struct state *state, unsigned long v,
> + unsigned long n)
> +{
> + assert (n);
> +
> + /* parse things that can be parsed immediately */
> + if (n == 8) {
> + /* YYYYMMDD */
> + int year = v / 10000;
> + int mon = (v / 100) % 100;
> + int mday = v % 100;
> +
> + if (year < 1970 || mon < 1 || mon > 12 || mday < 1 || mday > 31)
> + return -PARSE_TIME_ERR_INVALIDDATE;
I think dates are checked for validity in more than one place. It might
be worth pulling that out into a function. In particular, someone might
want to check mday depending on month at some point.
> +
> + return set_abs_date (state, year, mon, mday);
> + } else if (n > 8) {
> + /* FIXME: seconds since epoch */
> + return -PARSE_TIME_ERR_FORMAT;
> + }
This is probably an important FIXME for notmuch for backward compatibility.
> +
> + if (v > INT_MAX)
> + return -PARSE_TIME_ERR_FORMAT;
> +
> + return set_postponed_number (state, v, n);
> +}
> +
> +static bool
> +is_time_sep (char c)
> +{
> + return c == ':';
> +}
> +
> +static bool
> +is_date_sep (char c)
> +{
> + return c == '/' || c == '-' || c == '.';
> +}
> +
> +static bool
> +is_sep (char c)
> +{
> + return is_time_sep (c) || is_date_sep (c);
> +}
> +
> +/* two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep unset */
> +static int
> +expand_year (unsigned long year, size_t n)
> +{
> + if (n == 2) {
> + return (year < 70 ? 2000 : 1900) + year;
> + } else if (n == 4) {
> + return year;
> + } else {
> + return UNSET;
> + }
> +}
> +
> +static int
> +parse_date (struct state *state, char sep,
> + unsigned long v1, unsigned long v2, unsigned long v3,
> + size_t n1, size_t n2, size_t n3)
> +{
> + int year = UNSET, mon = UNSET, mday = UNSET;
> +
> + assert (is_date_sep (sep));
> +
> + switch (sep) {
> + case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
> + if (n1 != 1 && n1 != 2)
> + return -PARSE_TIME_ERR_DATEFORMAT;
> +
> + if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
> + /* M[M]/D[D][/YY[YY]] */
> + year = expand_year (v3, n3);
> + mon = v1;
> + mday = v2;
> + } else if (n2 == 4 && n3 == 0) {
> + /* M[M]/YYYY */
> + year = v2;
> + mon = v1;
> + } else {
> + return -PARSE_TIME_ERR_DATEFORMAT;
> + }
> + break;
> +
> + case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
> + if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
> + /* YYYY-MM[-DD] */
> + year = v1;
> + mon = v2;
> + if (n3)
> + mday = v3;
> + } else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
> + /* DD-MM[-YY[YY]] */
> + year = expand_year (v3, n3);
> + mon = v2;
> + mday = v1;
> + } else if (n1 == 2 && n2 == 4 && n3 == 0) {
> + /* MM-YYYY */
> + year = v2;
> + mon = v1;
> + } else {
> + return -PARSE_TIME_ERR_DATEFORMAT;
> + }
> + break;
> +
> + case '.': /* Date: D[D].M[M][.[YY[YY]]] */
> + if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
> + (n3 != 0 && n3 != 2 && n3 != 4))
> + return -PARSE_TIME_ERR_DATEFORMAT;
> +
> + year = expand_year (v3, n3);
> + mon = v2;
> + mday = v1;
> + break;
> + }
> +
> + if (year != UNSET && year < 1970)
> + return -PARSE_TIME_ERR_INVALIDDATE;
> +
> + if (mon != UNSET && (mon < 1 || mon > 12))
> + return -PARSE_TIME_ERR_INVALIDDATE;
> +
> + if (mday != UNSET && (mday < 1 || mday > 31))
> + return -PARSE_TIME_ERR_INVALIDDATE;
> +
> + return set_abs_date (state, year, mon, mday);
> +}
> +
> +static int
> +parse_time (struct state *state, char sep,
> + unsigned long v1, unsigned long v2, unsigned long v3,
> + size_t n1, size_t n2, size_t n3)
> +{
> + assert (is_time_sep (sep));
> +
> + if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
> + return -PARSE_TIME_ERR_TIMEFORMAT;
> +
> + /*
> + * REVISIT: this means it's required to set time *before* being
> + * able to set timezone
> + */
> + if (is_field_set (state, TM_ABS_HOUR) &&
> + is_field_set (state, TM_ABS_MIN) &&
> + n1 == 2 && n2 == 2 && n3 == 0 &&
> + (state->delim == '+' || state->delim == '-')) {
> + return set_user_tz (state, state->delim, v1, v2);
> + }
> +
> + if (v1 > 24 || v2 > 60 || v3 > 60)
> + return -PARSE_TIME_ERR_INVALIDTIME;
Are the > rather than >= deliberate here (i.e. do you mean to allow 60
for minutes or seconds)?
> +
> + return set_abs_time (state, v1, v2, n3 ? v3 : 0);
> +}
> +
> +/* strtoul helper that assigns length */
> +static unsigned long
> +strtoul_len (const char *s, const char **endp, size_t *len)
> +{
> + unsigned long val = strtoul (s, (char **) endp, 10);
> +
> + *len = *endp - s;
> + return val;
> +}
> +
> +/*
> + * Parse a (group of) number(s). Return < 0 on error, number of parsed
> + * chars on success.
> + */
> +static ssize_t
> +parse_number (struct state *state, const char *s)
> +{
> + int r;
> + unsigned long v1, v2, v3 = 0;
> + size_t n1, n2, n3 = 0;
> + const char *p = s;
> + char sep;
> +
> + v1 = strtoul_len (p, &p, &n1);
> +
> + if (is_sep (*p) && isdigit ((unsigned char) *(p + 1))) {
> + sep = *p;
> + v2 = strtoul_len (p + 1, &p, &n2);
> + } else {
> + /* a single number */
> + r = parse_single_number (state, v1, n1);
> + if (r)
> + return r;
> +
> + return p - s;
> + }
> +
> + /* a group of two or three numbers? */
> + if (*p == sep && isdigit ((unsigned char) *(p + 1)))
> + v3 = strtoul_len (p + 1, &p, &n3);
> +
> + if (is_time_sep (sep))
> + r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
> + else
> + r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
> +
> + if (r)
> + return r;
> +
> + return p - s;
> +}
> +
> +/*
> + * Parse delimiter(s). Return < 0 on error, number of parsed chars on
> + * success.
> + */
> +static ssize_t
> +parse_delim (struct state *state, const char *s)
> +{
> + const char *p = s;
> +
> + /*
> + * REVISIT: any actions depending on the first delim after last
> + * field? what could it be?
> + */
> +
> + /*
> + * skip non-alpha and non-digit, and store the last for further
> + * processing
> + */
> + while (*p && !isalnum ((unsigned char) *p)) {
> + set_delim (state, *p);
> + p++;
> + }
> +
> + return p - s;
> +}
> +
> +/*
> + * Parse a date/time string. Return < 0 on error, number of parsed
> + * chars on success.
> + */
> +static ssize_t
> +parse_input (struct state *state, const char *s)
> +{
> + const char *p = s;
> + ssize_t n;
> + int r;
> +
> + while (*p) {
> + if (isalpha ((unsigned char) *p)) {
> + n = parse_keyword (state, p);
> + } else if (isdigit ((unsigned char) *p)) {
> + n = parse_number (state, p);
> + } else {
> + n = parse_delim (state, p);
> + }
> +
> + if (n <= 0) {
> + if (n == 0)
> + n = -PARSE_TIME_ERR;
> +
> + return n; /* FIXME */
> + }
> +
> + p += n;
> + }
> +
> + /* parse postponed number, if any */
> + r = handle_postponed_number (state);
> + if (r < 0)
> + return r;
> +
> + return p - s;
> +}
> +
> +/*
> + * Processing the parsed input.
> + */
> +
> +/*
> + * Initialize reference time to tm. Use time zone in state if
> + * specified, otherwise local time. Use now for reference time if
> + * non-NULL, otherwise current time.
> + */
> +static int
> +initialize_now (struct state *state, struct tm *tm, const time_t *now)
> +{
> + time_t t;
> +
> + if (now) {
> + t = *now;
> + } else {
> + if (time (&t) == (time_t) -1)
> + return -PARSE_TIME_ERR_LIB;
> + }
> +
> + if (is_field_set (state, TM_TZ)) {
> + /* some other time zone */
> +
> + /* adjust now according to the TZ */
> + t += get_field (state, TM_TZ) * 60;
> +
> + /* it's not gm, but this doesn't mess with the tz */
> + if (gmtime_r (&t, tm) == NULL)
> + return -PARSE_TIME_ERR_LIB;
> + } else {
> + /* local time */
> + if (localtime_r (&t, tm) == NULL)
> + return -PARSE_TIME_ERR_LIB;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Normalize tm according to mktime(3). Both mktime(3) and
> + * localtime_r(3) use local time, but they cancel each other out here,
> + * making this function agnostic to time zone.
> + */
> +static int
> +normalize_tm (struct tm *tm)
> +{
> + time_t t = mktime (tm);
> +
> + if (t == (time_t) -1)
> + return -PARSE_TIME_ERR_LIB;
> +
> + if (!localtime_r (&t, tm))
> + return -PARSE_TIME_ERR_LIB;
> +
> + return 0;
> +}
> +
> +/* Get field out of a struct tm. */
> +static int
> +tm_get_field (const struct tm *tm, enum field field)
> +{
> + switch (field) {
> + case TM_ABS_SEC: return tm->tm_sec;
> + case TM_ABS_MIN: return tm->tm_min;
> + case TM_ABS_HOUR: return tm->tm_hour;
> + case TM_ABS_MDAY: return tm->tm_mday;
> + case TM_ABS_MON: return tm->tm_mon + 1; /* 0- to 1-based */
> + case TM_ABS_YEAR: return 1900 + tm->tm_year;
> + case TM_ABS_WDAY: return tm->tm_wday;
> + case TM_ABS_ISDST: return tm->tm_isdst;
> + default:
> + assert (false);
> + break;
> + }
> +
> + return 0;
> +}
> +
> +/* Modify hour according to am/pm setting. */
> +static int
> +fixup_ampm (struct state *state)
> +{
> + int hour, hdiff = 0;
> +
> + if (!is_field_set (state, TM_AMPM))
> + return 0;
> +
> + if (!is_field_set (state, TM_ABS_HOUR))
> + return -PARSE_TIME_ERR_TIMEFORMAT;
> +
> + hour = get_field (state, TM_ABS_HOUR);
> + if (hour < 1 || hour > 12)
> + return -PARSE_TIME_ERR_INVALIDTIME;
> +
> + if (get_field (state, TM_AMPM)) {
> + /* 12pm is noon */
> + if (hour != 12)
> + hdiff = 12;
> + } else {
> + /* 12am is midnight, beginning of day */
> + if (hour == 12)
> + hdiff = -12;
> + }
> +
> + mod_field (state, TM_REL_HOUR, -hdiff);
> +
> + return 0;
> +}
> +
> +/* Combine absolute and relative fields, and round. */
> +static int
> +create_output (struct state *state, time_t *t_out, const time_t *tnow,
> + int round)
> +{
> + struct tm tm = { 0 };
> + struct tm now;
> + enum field f;
> + int r;
> + int week_round = PARSE_TIME_NO_ROUND;
> +
> + r = initialize_now (state, &now, tnow);
> + if (r)
> + return r;
> +
> + /* initialize uninitialized fields to now */
> + for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> + if (state->set[f] == FIELD_NOW) {
> + state->tm[f] = tm_get_field (&now, f);
> + state->set[f] = FIELD_SET;
> + }
> + }
> +
> + /*
> + * If MON is set but YEAR is not, refer to past month.
> + *
> + * REVISIT: Why are month/week special in this regard? What about
> + * mday, or time. Should refer to past.
> + */
> + if (is_field_set (state, TM_ABS_MON) &&
> + !is_field_set (state, TM_ABS_YEAR)) {
> + if (get_field (state, TM_ABS_MON) >= tm_get_field (&now, TM_ABS_MON))
> + mod_field (state, TM_REL_YEAR, 1);
> + }
> +
> + /*
> + * If WDAY is set but MDAY is not, we consider WDAY relative
> + *
> + * REVISIT: This fails on stuff like "two months ago monday"
> + * because two months ago wasn't the same day as today. Postpone
> + * until we know date?
> + */
> + if (is_field_set (state, TM_ABS_WDAY) &&
> + !is_field_set (state, TM_ABS_MDAY)) {
> + int wday = get_field (state, TM_ABS_WDAY);
> + int today = tm_get_field (&now, TM_ABS_WDAY);
> + int rel_days;
> +
> + if (today > wday)
> + rel_days = today - wday;
> + else
> + rel_days = today + 7 - wday;
> +
> + /* this also prevents special week rounding from happening */
> + mod_field (state, TM_REL_DAY, rel_days);
> +
> + unset_field (state, TM_ABS_WDAY);
> + }
> +
> + r = fixup_ampm (state);
> + if (r)
> + return r;
> +
> + /*
> + * Iterate fields from least accurate to most accurate, and set
> + * unset fields according to requested rounding.
> + */
> + for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> + if (round != PARSE_TIME_NO_ROUND) {
> + enum field r = abs_to_rel_field (f);
The comment and the code seem to disagree on the ordering.
> +
> + if (is_field_set (state, f) || is_field_set (state, r)) {
> + if (round >= PARSE_TIME_ROUND_UP)
> + mod_field (state, r, -1);
> + round = PARSE_TIME_NO_ROUND; /* no more rounding */
> + } else {
> + if (f == TM_ABS_MDAY &&
> + is_field_set (state, TM_REL_WEEK)) {
> + /* week is most accurate */
> + week_round = round;
> + round = PARSE_TIME_NO_ROUND;
> + } else {
> + set_field (state, f, field_zero (f));
> + }
> + }
> + }
> +
> + if (!is_field_set (state, f))
> + set_field (state, f, tm_get_field (&now, f));
> + }
> +
> + /* special case: rounding with week accuracy */
> + if (week_round != PARSE_TIME_NO_ROUND) {
> + /* temporarily set more accurate fields to now */
> + set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
> + set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
> + set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
> + set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
> + }
> +
> + /*
> + * set all fields. they may contain out of range values before
> + * normalization by mktime(3).
> + */
> + tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
> + tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
> + tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
> + tm.tm_mday = get_field (state, TM_ABS_MDAY) -
> + get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
> + tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
> + tm.tm_mon--; /* 1- to 0-based */
> + tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
> +
> + /*
> + * It's always normal time.
> + *
> + * REVISIT: This is probably not a solution that universally
> + * works. Just make sure DST is not taken into account. We don't
> + * want rounding to be affected by DST.
> + */
> + tm.tm_isdst = -1;
> +
> + /* special case: rounding with week accuracy */
> + if (week_round != PARSE_TIME_NO_ROUND) {
> + /* normalize to get proper tm.wday */
> + r = normalize_tm (&tm);
> + if (r < 0)
> + return r;
> +
> + /* set more accurate fields back to zero */
> + tm.tm_sec = 0;
> + tm.tm_min = 0;
> + tm.tm_hour = 0;
> + tm.tm_isdst = -1;
> +
> + /* monday is the true 1st day of week, but this is easier */
> + if (week_round <= PARSE_TIME_ROUND_DOWN)
> + tm.tm_mday -= tm.tm_wday;
> + else
> + tm.tm_mday += 7 - tm.tm_wday;
> + }
> +
> + /* if TZ specified, convert from TZ to local time for mktime(3) */
> + if (is_field_set (state, TM_TZ)) {
> + time_t t = mktime (&tm);
> +
> + /* from specified TZ to UTC */
> + tm.tm_min -= get_field (state, TM_TZ);
> +
> + /* from UTC to local TZ (yes, it's hacky - FIXME) */
> + tm.tm_sec += difftime (mktime (localtime (&t)), mktime (gmtime (&t)));
> + }
> +
> + /* FIXME: check return value, don't set if fail */
> + *t_out = mktime (&tm);
> +
> + return 0;
> +}
> +
> +/* internally, all errors are < 0. parse_time_string() returns errors > 0. */
> +#define EXTERNAL_ERR(r) (-r)
> +
> +int
> +parse_time_string (const char *s, time_t *t, const time_t *now, int round)
> +{
> + struct state state = { { 0 } };
> + int r;
> +
> + if (!s || !t)
> + return EXTERNAL_ERR (-PARSE_TIME_ERR);
> +
> + r = parse_input (&state, s);
> + if (r < 0)
> + return EXTERNAL_ERR (r);
> +
> + r = create_output (&state, t, now, round);
> + if (r < 0)
> + return EXTERNAL_ERR (r);
> +
> + return 0;
> +}
> diff --git a/lib/parse-time-string.h b/lib/parse-time-string.h
> new file mode 100644
> index 0000000..50b7c6f
> --- /dev/null
> +++ b/lib/parse-time-string.h
> @@ -0,0 +1,95 @@
> +/*
> + * parse time string - user friendly date and time parser
> + * Copyright © 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Jani Nikula <jani at nikula.org>
> + */
> +
> +#ifndef PARSE_TIME_STRING_H
> +#define PARSE_TIME_STRING_H
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <time.h>
> +
> +/* return values for parse_time_string() */
> +enum {
> + PARSE_TIME_OK = 0,
> + PARSE_TIME_ERR, /* unspecified error */
> + PARSE_TIME_ERR_LIB, /* library call failed */
> + PARSE_TIME_ERR_ALREADYSET, /* attempt to set unit twice */
> + PARSE_TIME_ERR_FORMAT, /* generic date/time format error */
> + PARSE_TIME_ERR_DATEFORMAT, /* date format error */
> + PARSE_TIME_ERR_TIMEFORMAT, /* time format error */
> + PARSE_TIME_ERR_INVALIDDATE, /* date value error */
> + PARSE_TIME_ERR_INVALIDTIME, /* time value error */
> + PARSE_TIME_ERR_KEYWORD, /* unknown keyword */
> +};
> +
> +/* round values for parse_time_string() */
> +enum {
> + PARSE_TIME_ROUND_DOWN = -1,
> + PARSE_TIME_NO_ROUND = 0,
> + PARSE_TIME_ROUND_UP = 1,
> +};
> +
> +/**
> + * parse_time_string() - user friendly date and time parser
> + * @s: string to parse
> + * @t: pointer to time_t to store parsed time in
> + * @now: pointer to time_t containing reference date/time, or NULL
> + * @round: PARSE_TIME_NO_ROUND, PARSE_TIME_ROUND_DOWN, or
> + * PARSE_TIME_ROUND_UP
> + *
> + * Parse a date/time string 's' and store the parsed date/time result
> + * in 't'.
> + *
> + * A reference date/time is used for determining the "date/time units"
> + * (roughly equivalent to struct tm members) not specified by 's'. If
> + * 'now' is non-NULL, it must contain a pointer to a time_t to be used
> + * as reference date/time. Otherwise, the current time is used.
> + *
> + * If 's' does not specify a full date/time, the 'round' parameter
> + * specifies if and how the result should be rounded as follows:
> + *
> + * PARSE_TIME_NO_ROUND: All date/time units that are not specified
> + * by 's' are set to the corresponding unit derived from the
> + * reference date/time.
> + *
> + * PARSE_TIME_ROUND_DOWN: All date/time units that are more accurate
> + * than the most accurate unit specified by 's' are set to the
> + * smallest valid value for that unit. Rest of the unspecified units
> + * are set as in PARSE_TIME_NO_ROUND.
> + *
> + * PARSE_TIME_ROUND_UP: All date/time units that are more accurate
> + * than the most accurate unit specified by 's' are set to the
> + * smallest valid value for that unit. The most accurate unit
> + * specified by 's' is incremented by one (and this is rolled over
> + * to the less accurate units as necessary). Rest of the unspecified
> + * units are set as in PARSE_TIME_NO_ROUND.
> + *
> + * Return 0 (PARSE_TIME_OK) for succesfully parsed date/time, or one
> + * of PARSE_TIME_ERR_* on error. 't' is not modified on error.
> + */
> +int parse_time_string (const char *s, time_t *t, const time_t *now, int round);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* PARSE_TIME_STRING_H */
> --
> 1.7.5.4
>
> _______________________________________________
> notmuch mailing list
> notmuch at notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch
More information about the notmuch
mailing list