[notmuch] [PATCH] Reindex larger files that duplicate ids we have
James Westby
jw+debian at jameswestby.net
Fri Dec 18 17:29:09 PST 2009
When we see a message where we already have the file
id stored, check if the size is larger. If it is then
re-index and set the file size and name to be the
new message.
---
Here's the (quite simple) patch to implement indexing the
largest copy of each mail that we have.
Does the re-indexing replace the old terms? In the case
where you had a collision with different text this could
make a search return mails that don't contain that text.
I don't think it's a big issue though, even if that is the
case.
Thanks,
James
lib/database.cc | 4 +++-
lib/index.cc | 27 +++++++++++++++++++++++++++
lib/message.cc | 31 ++++++++++++++++++++++++++-----
lib/notmuch-private.h | 13 +++++++++++++
lib/notmuch.h | 5 +++--
5 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/lib/database.cc b/lib/database.cc
index d834d94..64f29b9 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1000,7 +1000,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
if (ret)
goto DONE;
} else {
- ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+ ret = _notmuch_message_possibly_reindex (message, filename, size);
+ if (!ret)
+ ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
goto DONE;
}
diff --git a/lib/index.cc b/lib/index.cc
index 125fa6c..14c3268 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -312,3 +312,30 @@ _notmuch_message_index_file (notmuch_message_t *message,
return ret;
}
+
+notmuch_status_t
+_notmuch_message_possibly_reindex (notmuch_message_t *message,
+ const char *filename,
+ const off_t size)
+{
+ off_t realsize = size;
+ off_t stored_size;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+
+ ret = _notmuch_message_size_on_disk (message, filename, &realsize);
+ if (ret)
+ goto DONE;
+ stored_size = _notmuch_message_get_filesize (message);
+ if (realsize > stored_size) {
+ ret = _notmuch_message_index_file (message, filename);
+ if (ret)
+ goto DONE;
+ ret = _notmuch_message_set_filesize (message, filename, realsize);
+ _notmuch_message_set_filename (message, filename);
+ _notmuch_message_sync (message);
+ }
+
+ DONE:
+ return ret;
+
+}
diff --git a/lib/message.cc b/lib/message.cc
index 2bfc5ed..cc32741 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -427,23 +427,38 @@ _notmuch_message_set_filename (notmuch_message_t *message,
}
notmuch_status_t
-_notmuch_message_set_filesize (notmuch_message_t *message,
+_notmuch_message_size_on_disk (notmuch_message_t *message,
const char *filename,
- const off_t size)
+ off_t *size)
{
struct stat st;
- off_t realsize = size;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
- if (realsize < 0) {
+ if (*size < 0) {
if (stat (filename, &st)) {
ret = NOTMUCH_STATUS_FILE_ERROR;
goto DONE;
} else {
- realsize = st.st_size;
+ *size = st.st_size;
}
}
+ DONE:
+ return ret;
+}
+
+notmuch_status_t
+_notmuch_message_set_filesize (notmuch_message_t *message,
+ const char *filename,
+ const off_t size)
+{
+ off_t realsize = size;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+
+ ret = _notmuch_message_size_on_disk (message, filename, &realsize);
+ if (ret)
+ goto DONE;
+
message->doc.add_value (NOTMUCH_VALUE_FILESIZE,
Xapian::sortable_serialise (realsize));
@@ -451,6 +466,12 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
return ret;
}
+off_t
+_notmuch_message_get_filesize (notmuch_message_t *message)
+{
+ return Xapian::sortable_unserialise (message->doc.get_value (NOTMUCH_VALUE_FILESIZE));
+}
+
const char *
notmuch_message_get_filename (notmuch_message_t *message)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 1ba3055..cf65fd9 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -199,6 +199,14 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
const char *filename,
const off_t size);
+off_t
+_notmuch_message_get_filesize (notmuch_message_t *message);
+
+notmuch_status_t
+_notmuch_message_size_on_disk (notmuch_message_t *message,
+ const char *filename,
+ off_t *size);
+
void
_notmuch_message_ensure_thread_id (notmuch_message_t *message);
@@ -218,6 +226,11 @@ notmuch_status_t
_notmuch_message_index_file (notmuch_message_t *message,
const char *filename);
+notmuch_status_t
+_notmuch_message_possibly_reindex (notmuch_message_t *message,
+ const char *filename,
+ const off_t size);
+
/* message-file.c */
/* XXX: I haven't decided yet whether these will actually get exported
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 5d0d224..892e420 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -256,8 +256,9 @@ notmuch_database_get_timestamp (notmuch_database_t *database,
* NOTMUCH_STATUS_SUCCESS: Message successfully added to database.
*
* NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message
- * ID as another message already in the database. Nothing added
- * to the database.
+ * ID as another message already in the database. This may have
+ * caused some further indexing to be done, but it is not an entirely
+ * new message.
*
* NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the
* file, (such as permission denied, or file not found,
--
1.6.3.3
More information about the notmuch
mailing list