[PATCH] contrib/nmbug: new script for sharing tags with a given prefix.

David Bremner david at tethera.net
Sun Nov 6 16:59:46 PST 2011


From: David Bremner <bremner at debian.org>

The main idea is consider the notmuch database as analogous to the
work-tree. A bare git repo is maintained in the users home directory,
with a tree of the form tags/$message-id/$tag

Like notmuch and git, we have a set of subcommnds, mainly modelled on
git.

The most important commands are

    commit	xapian -> git
    checkout	git -> xapian
    merge	fetched git + git -> xapian
    status	find differences between xapian, git, and remote git.

There are also some convenience wrappers around git commands.

In order to encode tags (viewed as octet sequences) into filenames,
we whitelist a smallish set of characters and %hex escape anything outside.

The prefix is omitted in git, which lets one save and restore to
different prefixes (although this is only lightly tested).
---

Many things have changed, time for a repost. It no long needs the
"restore --match" patches. 

 contrib/nmbug |  565 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 565 insertions(+), 0 deletions(-)
 create mode 100755 contrib/nmbug

diff --git a/contrib/nmbug b/contrib/nmbug
new file mode 100755
index 0000000..2128b95
--- /dev/null
+++ b/contrib/nmbug
@@ -0,0 +1,565 @@
+#!/usr/bin/env perl
+# Copyright (c) 2011 David Bremner
+# License: same as notmuch
+
+use strict;
+use File::Path qw(remove_tree make_path);
+use File::Temp qw(tempdir tempfile);
+use File::Basename;
+use Pod::Usage;
+
+no encoding;
+
+my $NMBGIT = $ENV{NMBGIT} || $ENV{HOME}."/.nmbug";
+
+$NMBGIT .= '/.git' if (-d $NMBGIT.'/.git');
+
+my $TAGPREFIX = $ENV{NMBPREFIX} || "notmuch::";
+
+# magic hashes for git
+my $EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391';
+my $EMPTYTREE = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
+
+# for encoding
+
+my $ESCAPE_CHAR='%';
+my $NO_ESCAPE= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,";
+my $MUST_ENCODE=qr{[^\Q$NO_ESCAPE\E]};
+my $ESCAPED_RX=qr{$ESCAPE_CHAR[A-Fa-f0-9]{2}};
+
+my %command=(
+	     archive	=> \&do_archive,
+	     checkout	=>\&do_checkout,
+	     commit	=> \&do_commit,
+	     fetch	=> \&do_fetch,
+	     help	=> \&do_help,
+	     log	=> \&do_log,
+	     merge	=> \&do_merge,
+	     pull	=> \&do_pull,
+	     push	=> \&do_push,
+	     status	=> \&do_status,
+	     );
+
+my $subcommand=shift;
+
+if (!exists $command{$subcommand}){
+  usage();
+}
+
+&{$command{$subcommand}}(@ARGV);
+
+sub get_tags {
+  my $prefix=shift;
+  my @tags;
+  open my $fh, 'notmuch search --output=tags "*"|' or die "error dumping tags";
+  while (<$fh>) {
+    chomp();
+    push @tags, $_ if (m/^$prefix/);
+  }
+  return @tags;
+}
+
+sub do_archive {
+  system ('git', "--git-dir=$NMBGIT", 'archive', 'HEAD');
+}
+
+sub is_committed(){
+  my $status = compute_status();
+  return scalar (@{$status->{added}} ) + scalar (@{$status->{deleted}} )==0
+}
+
+sub do_commit {
+  my @args=@_;
+
+  if ( is_committed() ){
+    print "Nothing to commit\n";
+    return;
+  }
+
+  my $index=index_tags();
+
+  my $tree= git ('write-tree', { GIT_INDEX_FILE=>$index })
+    or die "no output from write-tree";
+
+  my $parent = git ( 'rev-parse', 'HEAD'  )
+    or die "no output from rev-parse";
+
+  my $commit = git ('commit-tree', $tree, '-p', $parent, {},  @args);
+
+  git ('update-ref', 'HEAD', $commit);
+
+  unlink $index || die "unlink: $!";
+
+}
+
+sub do_fetch {
+  my $remote = shift || "origin";
+
+  git ('fetch', $remote);
+}
+
+sub git {
+
+  return subcommand('git', at _);
+
+}
+
+sub subcommand {
+  die 'command and subcommand needed' unless (scalar(@_) >= 2);
+
+  return run (@_);
+}
+
+# arguments are any number of strings for ARGV, an optional hash ref
+# for settings for the environment, followed by any number of strings as 
+# as lines for stdin.
+
+sub run {
+
+  my $command=shift 
+    or die "command not specified";
+
+  my @args;
+
+
+  while ( scalar(@_) && !ref($_[0]) ) {
+    push @args, shift;
+  };
+
+  my %SETENV={};
+  if(ref($_[0]) eq 'HASH'){
+    my $ref=shift;
+    %SETENV=%{$ref};
+  }
+
+  $SETENV{GIT_DIR} ||= $NMBGIT;
+
+  my @input=@_;
+  my @output;
+
+  my $pid = open my $child, '-|';
+  if ($pid){
+    while(<$child>){
+      chomp();
+      push @output,$_;
+    }
+    close ($child);
+  } else {
+
+    # setup child environment
+    while (my ($key,$val) = each %SETENV) {
+      $ENV{$key}=$val;
+    }
+
+    read_from(@input);
+    exec($command, at args) || die "exec $command @args: $!";
+  }
+
+  if (wantarray()) {
+    return @output;
+  }
+  elsif (defined wantarray()) {
+    return join("\n", at output);
+  }
+  else {
+    return;
+  }
+}
+
+sub read_from {
+    close STDIN;
+    if (!scalar(@_)){
+      open STDIN, '<', '/dev/null' || die "reopening stdin: $!";
+    } else {
+      my ($fh,$tempfile) = tempfile();
+      foreach my $line (@_){
+	print $fh $line;
+      }
+      close $fh || die "closing";
+
+      open STDIN, '<', $tempfile or
+	die "reopening stdin: $!";
+    }
+}
+
+sub notmuch {
+  my @args=@_;
+  system ('notmuch', @args) == 0 or die  "notmuch @args failed: $?"
+}
+
+
+sub index_tags {
+
+  my $index=$NMBGIT."/nmbug.index";
+
+  my $query = join " ", map ("tag:$_", get_tags($TAGPREFIX));
+  open my $fh, "notmuch dump -- $query|" or die "notmuch dump: $!";
+
+  git ('read-tree', $EMPTYTREE);
+  open my $git, "|GIT_DIR=$NMBGIT GIT_INDEX_FILE=$index git update-index --index-info" or die "git update-index";
+
+  while (<$fh>) {
+    m/ ( [^ ]* ) \s+ \( ([^\)]* ) \) /x || die "syntax error in dump";
+    my ($id,$rest) = ($1,$2);
+    index_tags_for_msg ($git,$id, split(" ", $rest));
+  }
+
+  close $git;
+  return $index;
+}
+
+sub index_tags_for_msg {
+  my $fh=shift;
+  my $msgid = shift;
+  my @tags=@_;
+
+  foreach my $tag (@tags){
+    # insist prefix is there, but remove it before writing
+    next unless ($tag =~ s/^$TAGPREFIX//);
+    my $tagpath = 'tags/' . encode_for_fs($msgid)  . '/' . encode_for_fs ($tag);
+    print $fh "100644 blob $EMPTYBLOB\t$tagpath\n";
+  }
+}
+
+sub do_checkout {
+  do_sync (action => 'checkout');
+}
+
+sub do_sync {
+
+  my %args=@_;
+
+  my $status=compute_status();
+  my ($A_action, $D_action);
+
+  if ($args{action} eq 'checkout') {
+    $A_action = '-';
+    $D_action = '+';
+  } else {
+    $A_action = '+';
+    $D_action = '-';
+  }
+
+  foreach my $pair (@{$status->{added}}){
+
+    notmuch ('tag', $A_action.$TAGPREFIX.$pair->{tag},
+	     'id:'.$pair->{id});
+  }
+
+  foreach my $pair (@{$status->{deleted}}){
+    notmuch ('tag', $D_action.$TAGPREFIX.$pair->{tag},
+	     'id:'.$pair->{id});
+  }
+
+}
+
+sub insist_committed {
+
+  if ( !is_committed () ){
+    print "Uncommitted changes to $TAGPREFIX* tags in notmuch
+
+For a summary of changes, run 'nmbug status'
+To save your changes,     run 'nmbug commit' before merging/pull
+To discard your changes,  run 'nmbug checkout'
+";
+    exit(1);
+  }
+
+}
+
+sub do_pull {
+  my $remote = shift || "origin";
+
+  git ( 'fetch', $remote);
+
+  do_merge();
+}
+
+sub do_merge {
+  insist_committed();
+
+  my $tempwork= tempdir ("/tmp/nmbug-merge.XXXXXX", CLEANUP=>1);
+
+  git ( 'checkout', '-f', 'HEAD', { GIT_WORK_TREE=> $tempwork });
+
+  git ( 'merge', 'FETCH_HEAD', { GIT_WORK_TREE=> $tempwork });
+
+  do_checkout();
+}
+
+sub do_log {
+  # we don't want output trapping here, because we want the pager.
+  system ( 'git', "--git-dir=$NMBGIT", 'log', '--name-status', @_);
+}
+
+sub do_push {
+  my $remote = shift || "origin";
+
+  git ('push', $remote);
+}
+
+sub do_status {
+  my $status = compute_status();
+  
+  foreach my $pair (@{$status->{added}}){
+    printf "A\t%s\t%s\n",$pair->{id}, $pair->{tag};
+  }
+
+  foreach my $pair (@{$status->{deleted}}){
+    printf "D\t%s\t%s\n",$pair->{id}, $pair->{tag};
+  }
+
+  foreach my $id (@{$status->{missing}}){
+    print "U\t$id\n",
+  }
+
+  if (is_unmerged ()) {
+    foreach my $pair (diff_refs('A')){
+      printf "a\t%s\t%s\n",$pair->{id}, $pair->{tag};
+    }
+
+    foreach my $pair (diff_refs('D')){
+      printf "d\t%s\t%s\n",$pair->{id}, $pair->{tag};
+    }
+  }
+
+}
+
+sub is_unmerged {
+  my $fetch_head = git ('rev-parse', 'FETCH_HEAD');
+  my $base = git ( 'merge-base', 'HEAD', 'FETCH_HEAD');
+
+  return ($base ne $fetch_head);
+
+}
+sub compute_status {
+  my %args=@_;
+
+  my @added;
+  my @deleted;
+  my @missing;
+
+  my $index=index_tags();
+
+  my @maybe_deleted = diff_index($index,'D');
+
+  foreach my $pair (@maybe_deleted){
+
+    my $id = $pair->{id};
+
+    open my $fh, "notmuch search --output=files id:$id |"
+      or die "searching for $id";
+    if (!<$fh>) {
+      push @missing, $id;
+    } else {
+      push @deleted, $pair;
+    }
+  }
+
+
+  @added = diff_index ($index, 'A');
+
+  unlink $index || die "unlink $index: $!";
+
+  return { added => [@added], deleted => [@deleted], missing=> [@missing] };
+}
+
+sub diff_index {
+  my $index=shift;
+  my $filter=shift;
+
+  my @lines=git( qw/diff-index --cached/,
+		 "--diff-filter=$filter", qw/--name-only HEAD/,
+		 {GIT_INDEX_FILE=>$index} );
+
+  return unpack_diff_lines(@lines);
+}
+
+sub diff_refs {
+  my $filter=shift;
+  my $ref1 = shift || 'HEAD';
+  my $ref2 = shift || 'FETCH_HEAD';
+
+  my @lines=git( 'diff', "--diff-filter=$filter", '--name-only',
+		 $ref1, $ref2);
+
+  return unpack_diff_lines(@lines);
+}
+
+
+sub unpack_diff_lines {
+  my @found;
+
+  foreach (@_){
+    chomp();
+    my ($id,$tag) = m at tags/ ([^/]+) / ([^/]+) @x;
+
+    $id = decode_from_fs($id);
+    $tag = decode_from_fs($tag);
+
+    push @found, { id => $id, tag => $tag };
+  }
+
+  return @found;
+}
+
+sub encode_for_fs{
+  my $str=shift;
+
+  $str=~ s/($MUST_ENCODE)/"$ESCAPE_CHAR".sprintf("%02x",ord($1))/ge;
+  return $str;
+}
+
+sub decode_from_fs{
+  my $str=shift;
+
+  $str=~ s/$ESCAPED_RX/ hex($1)/eg;
+
+  return $str;
+
+}
+
+
+sub usage {
+  pod2usage();
+  exit(1);
+}
+
+sub do_help {
+  pod2usage( -verbose=>2 );
+  exit(0);
+}
+
+__END__
+
+=head1 NAME
+
+nmbug - manage notmuch tags about notmuch
+
+=head1 SYNOPSIS
+
+nmbug subcommand [options]
+
+B<nmbug help> for more help
+
+=head1 OPTIONS
+
+=head2 Most common commands
+
+=over 8
+
+=item B<commit> [message]
+
+Commit appropriately prefixed tags from the notmuch database to
+git. Any extra arguments are used (one per line) as a commit message.
+
+=item  B<push> [remote]
+
+push local nmbug git state to remote repo
+
+=item  B<pull> [remote]
+
+pull (merge) remote repo changes to notmuch. B<pull> is equivalent to
+B<fetch> followed by B<merge>.
+
+=back
+
+=head2 Other Useful Commands
+
+=over 8
+
+=item B<checkout>
+
+Update the notmuch database from git. This is mainly useful to discard
+your changes in notmuch relative to git.
+
+=item B<fetch> [remote]
+
+Fetch changes from the remote repo (see merge to bring those changes
+into notmuch). 
+
+=item B<help> [subcommand]
+
+print help [for subcommand]
+
+=item B<log> [parameters]
+
+A simple wrapper for git log. After running C<nmbug fetch>, you can
+inspect the changes with C<nmbug log HEAD..FETCH_HEAD>
+
+=item B<merge> 
+
+Merge changes from FETCH_HEAD into HEAD, and load the result into
+notmuch.
+
+=item  B<status>
+
+Show pending updates in notmuch or git repo. See below for more
+information about the output format.
+
+=back
+
+=head2 Less common commands
+
+=over 8
+
+=item B<archive>
+
+Dump a tar archive (using git archive) of the current nmbug tag set.
+
+=back
+
+
+=head1 STATUS FORMAT
+
+B<nmbug status> prints lines of the form
+
+   c Message-Id tag
+
+where c is
+
+=over 8
+
+=item B<A>
+
+Tag is present in notmuch database, but not committed to nmbug
+(equivalently, tag has been deleted in nmbug repo, e.g. by a pull, but
+not restored to notmuch database).
+
+=item B<a>
+
+Tag is fetched, but not merged into notmuch.
+
+=item B<D>
+
+Tag is present in nmbug repo, but not restored to notmuch database
+(equivalently, tag has been deleted in notmuch)
+
+=item B<d>
+
+Tag deletion is fetched, but not merged into notmuch.
+
+=item B<U>
+
+Message is unknown (missing from local notmuch database)
+
+=back
+
+=head1 DUMP FORMAT
+
+Each tag $tag for message with Message-Id $id is written to
+an empty file
+
+	tags/encode($id)/encode($tag)
+
+The encoding preserves alphanumerics, and the characters "+-_@=.:,"
+(not the quotes).  All other octets are replaced with '%' followed by
+a two digit hex number.
+
+=head1 ENVIRONMENT
+
+B<NMBGIT> specifies the location of the git repository used by nmbug. 
+If not specified $HOME/.nmbug is used.
+
+B<NMBPREFIX> specifies the prefix in the notmuch database for tags of
+interest to nmbug. If not specified 'notmuch::' is used.
-- 
1.7.6.3



More information about the notmuch mailing list