[PATCH 7/8] emacs: Implement an incremental JSON parser

Austin Clements amdragon at MIT.EDU
Thu Jul 5 11:36:19 PDT 2012


Quoth Mark Walters on Jul 05 at  9:30 am:
> On Tue, 03 Jul 2012, Austin Clements <amdragon at MIT.EDU> wrote:
> > This parser is designed to read streaming JSON whose structure is
> > known to the caller.  Like a typical JSON parsing interface, it
> > provides a function to read a complete JSON value from the input.
> > However, it extends this with an additional function that
> > requires the next value in the input to be a compound value and
> > descends into it, allowing its elements to be read one at a time
> > or further descended into.  Both functions can return 'retry to
> > indicate that not enough input is available.
> >
> > The parser supports efficient partial parsing, so there's no need to
> > frame the input for correctness or performance.
> >
> > Currently only descending into JSON lists is supported because that's
> > all we need, but support for descending into JSON objects can be added
> > in the future.
> > ---
> >  emacs/notmuch-lib.el |  183 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 183 insertions(+)
> >
> > diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
> > index c829df3..f7cda33 100644
> > --- a/emacs/notmuch-lib.el
> > +++ b/emacs/notmuch-lib.el
> > @@ -23,6 +23,7 @@
> >  
> >  (require 'mm-view)
> >  (require 'mm-decode)
> > +(require 'json)
> >  (eval-when-compile (require 'cl))
> >  
> >  (defvar notmuch-command "notmuch"
> > @@ -296,6 +297,188 @@ was called."
> >  (defvar notmuch-show-process-crypto nil)
> >  (make-variable-buffer-local 'notmuch-show-process-crypto)
> >  
> > +;; Incremental JSON parsing
> > +
> > +(defun notmuch-json-create-parser (buffer)
> > +  "Return a streaming JSON parser that consumes input from BUFFER.
> > +
> > +This parser is designed to read streaming JSON whose structure is
> > +known to the caller.  Like a typical JSON parsing interface, it
> > +provides a function to read a complete JSON value from the input.
> > +However, it extends this with an additional function that
> > +requires the next value in the input to be a compound value and
> > +descends into it, allowing its elements to be read one at a time
> > +or further descended into.  Both functions can return 'retry to
> > +indicate that not enough input is available.
> > +
> > +The parser always consumes input from BUFFER's point.  Hence, the
> > +caller is allowed to delete and data before point and may
> > +resynchronize after an error by moving point."
> > +
> > +  (list buffer
> > +	;; Terminator stack: a stack of characters that indicate the
> > +	;; end of the compound values enclosing point
> > +	'()
> > +	;; Next: One of
> > +	;; * 'expect-value if the next token must be a value, but a
> > +	;;   value has not yet been reached
> > +	;; * 'value if point is at the beginning of a value
> > +	;; * 'expect-comma if the next token must be a comma
> > +	'expect-value
> > +	;; Allow terminator: non-nil if the next token may be a
> > +	;; terminator
> > +	nil
> > +	;; Partial parse position: If state is 'value, a marker for
> > +	;; the position of the partial parser or nil if no partial
> > +	;; parsing has happened yet
> > +	nil
> > +	;; Partial parse state: If state is 'value, the current
> > +	;; `parse-partial-sexp' state
> > +	nil))
> > +
> > +(defmacro notmuch-json-buffer (jp) `(first ,jp))
> > +(defmacro notmuch-json-term-stack (jp) `(second ,jp))
> > +(defmacro notmuch-json-next (jp) `(third ,jp))
> > +(defmacro notmuch-json-allow-term (jp) `(fourth ,jp))
> > +(defmacro notmuch-json-partial-pos (jp) `(fifth ,jp))
> > +(defmacro notmuch-json-partial-state (jp) `(sixth ,jp))
> > +
> > +(defvar notmuch-json-syntax-table
> > +  (let ((table (make-syntax-table)))
> > +    ;; The standard syntax table is what we need except that "." needs
> > +    ;; to have word syntax instead of punctuation syntax.
> > +    (modify-syntax-entry ?. "w" table)
> > +    table)
> > +  "Syntax table used for incremental JSON parsing.")
> > +
> > +(defun notmuch-json-scan-to-value (jp)
> > +  ;; Helper function that consumes separators, terminators, and
> > +  ;; whitespace from point.  Returns nil if it successfully reached
> > +  ;; the beginning of a value, 'end if it consumed a terminator, or
> > +  ;; 'retry if not enough input was available to reach a value.  Upon
> > +  ;; nil return, (notmuch-json-next jp) is always 'value.
> > +
> > +  (if (eq (notmuch-json-next jp) 'value)
> > +      ;; We're already at a value
> > +      nil
> > +    ;; Drive the state toward 'expect-value
> > +    (skip-chars-forward " \t\r\n")
> > +    (or (when (eobp) 'retry)
> > +	;; Test for the terminator for the current compound
> > +	(when (and (notmuch-json-allow-term jp)
> > +		   (eq (char-after) (car (notmuch-json-term-stack jp))))
> > +	  ;; Consume it and expect a comma or terminator next
> > +	  (forward-char)
> > +	  (setf (notmuch-json-term-stack jp) (cdr (notmuch-json-term-stack jp))
> > +		(notmuch-json-next jp) 'expect-comma
> > +		(notmuch-json-allow-term jp) t)
> > +	  'end)
> > +	;; Test for a separator
> > +	(when (eq (notmuch-json-next jp) 'expect-comma)
> > +	  (when (/= (char-after) ?,)
> > +	    (signal 'json-readtable-error (list "expected ','")))
> > +	  ;; Consume it, switch to 'expect-value, and disallow a
> > +	  ;; terminator
> > +	  (forward-char)
> > +	  (skip-chars-forward " \t\r\n")
> > +	  (setf (notmuch-json-next jp) 'expect-value
> > +		(notmuch-json-allow-term jp) nil)
> > +	  ;; We moved point, so test for eobp again and fall through
> > +	  ;; to the next test if there's more input
> > +	  (when (eobp) 'retry))
> > +	;; Next must be 'expect-value and we know this isn't
> > +	;; whitespace, EOB, or a terminator, so point must be on a
> > +	;; value
> > +	(progn
> > +	  (assert (eq (notmuch-json-next jp) 'expect-value))
> > +	  (setf (notmuch-json-next jp) 'value)
> > +	  nil))))
> > +
> > +(defun notmuch-json-begin-compound (jp)
> > +  "Parse the beginning of a compound value and traverse inside it.
> > +
> > +Returns 'retry if there is insufficient input to parse the
> > +beginning of the compound.  If this is able to parse the
> > +beginning of a compound, it returns t and later calls to
> > +`notmuch-json-read' will return the compound's elements.
> > +
> > +Entering JSON objects is current unimplemented."
> > +
> > +  (with-current-buffer (notmuch-json-buffer jp)
> > +    ;; Disallow terminators
> > +    (setf (notmuch-json-allow-term jp) nil)
> > +    (or (notmuch-json-scan-to-value jp)
> > +	(if (/= (char-after) ?\[)
> > +	    (signal 'json-readtable-error (list "expected '['"))
> > +	  (forward-char)
> > +	  (push ?\] (notmuch-json-term-stack jp))
> > +	  ;; Expect a value or terminator next
> > +	  (setf (notmuch-json-next jp) 'expect-value
> > +		(notmuch-json-allow-term jp) t)
> > +	  t))))
> > +
> > +(defun notmuch-json-read (jp)
> > +  "Parse the value at point in JP's buffer.
> > +
> > +Returns 'retry if there is insufficient input to parse a complete
> > +JSON value.  If the parser is currently inside a compound value
> > +and the next token ends the list or object, returns 'end.
> > +Otherwise, returns the value."
> 
> 
> This looks excellent! My only comment is that I think it would be
> helpful it the above comment and the one for notmuch-json-begin-compound
> said what happens to point when they get called.

Good idea.

> Also, I think in practice a lot of the parsing is done by json.el (that
> you use this to enter one level of the hierarchy and then use this to
> ensure that json.el only gets complete objects to parse) so I assume
> this means that your much faster replacement for json.el could be
> slotted in? It might be worth mentioning that in the commit message.

Yes, definitely.  My hope is that we'll skip over the optimized JSON
parser and move straight to S-expressions, which perform ~3X better
than my optimized json.el (and ~8X better than Emacs' json.el).

> Best wishes
> 
> Mark
> 
> > +
> > +  (with-current-buffer (notmuch-json-buffer jp)
> > +    (or
> > +     ;; Get to a value state
> > +     (notmuch-json-scan-to-value jp)
> > +
> > +     ;; Can we parse a complete value?
> > +     (let ((complete
> > +	    (if (looking-at "[-+0-9tfn]")
> > +		;; This is a number or a keyword, so the partial
> > +		;; parser isn't going to help us because a truncated
> > +		;; number or keyword looks like a complete symbol to
> > +		;; it.  Look for something that clearly ends it.
> > +		(save-excursion
> > +		  (skip-chars-forward "^]},: \t\r\n")
> > +		  (not (eobp)))
> > +
> > +	      ;; We're looking at a string, object, or array, which we
> > +	      ;; can partial parse.  If we just reached the value, set
> > +	      ;; up the partial parser.
> > +	      (when (null (notmuch-json-partial-state jp))
> > +		(setf (notmuch-json-partial-pos jp) (point-marker)))
> > +
> > +	      ;; Extend the partial parse until we either reach EOB or
> > +	      ;; get the whole value
> > +	      (save-excursion
> > +		(let ((pstate
> > +		       (with-syntax-table notmuch-json-syntax-table
> > +			 (parse-partial-sexp
> > +			  (notmuch-json-partial-pos jp) (point-max) 0 nil
> > +			  (notmuch-json-partial-state jp)))))
> > +		  ;; A complete value is available if we've reached
> > +		  ;; depth 0 or less and encountered a complete
> > +		  ;; subexpression.
> > +		  (if (and (<= (first pstate) 0) (third pstate))
> > +		      t
> > +		    ;; Not complete.  Update the partial parser state
> > +		    (setf (notmuch-json-partial-pos jp) (point-marker)
> > +			  (notmuch-json-partial-state jp) pstate)
> > +		    nil))))))
> > +
> > +       (if (not complete)
> > +	   'retry
> > +	 ;; We have a value.  Reset the partial parse state and expect
> > +	 ;; a comma or terminator after the value.
> > +	 (setf (notmuch-json-next jp) 'expect-comma
> > +	       (notmuch-json-allow-term jp) t
> > +	       (notmuch-json-partial-pos jp) nil
> > +	       (notmuch-json-partial-state jp) nil)
> > +	 ;; Parse the value
> > +	 (let* ((json-object-type 'plist)
> > +		(json-array-type 'list)
> > +		(json-false nil))
> > +	   (json-read)))))))
> > +
> >  (provide 'notmuch-lib)
> >  
> >  ;; Local Variables:


More information about the notmuch mailing list