#! /usr/bin/env python
# File: bibgrammar.py
"""
:mod:`bibstuff.bibgrammar`: BibTeX Parser
--------------------------------------------
Provides an EBNF description of the bibtex bibliography format. The grammar
draws largely from the grammar description in Nelson Beebe's `Lex/Yacc parser <http://www.math.utah.edu/~beebe/>`_
and also from Greg Ward's `btOOL <http://www.gerg.ca/software/btOOL/>`_ documentation.
:copyright: Dylan Schwilk and Alan G. Isaac, see AUTHORS
:license: MIT (see LICENSE)
"""
__docformat__ = "restructuredtext en"
__needs__ = '2.4'
__version__ = "1.7"
__author__ = ["Dylan W. Schwilk", "Alan G Isaac"]
################### IMPORTS ##################################################
#import from standard library
# (some if run as main; see below)
#import dependencies
from simpleparse.parser import Parser
from simpleparse.common import numbers, strings, chartypes
#local imports
################################################################################
# EBNF description of a bibtex file
# 2008-06-27: There may be a bug in simpleparse that sometimes causes certain entries to
# not be recognized. The problem, however, can disapear if the order of entries
# in a bibfile is changed! I do not believe it is a problem with the grammar
# but is a bug in simpleparse itself.
#modification 2009-01-01
# change `key` to `citekey`
# add `alpha_name`
# change `macro` def (use case insenstive string)
# change `macro_contents` def (field instead of fields)
# change `fields` def (since comma is allowed after last field)
#modification 2009-02-11
# change braces_string and esp. quotes_string def bec old def *very* slow
# also, gives better match to format described at
# http://artis.imag.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html
dec = r"""
bibfile := entry_or_junk+
>entry_or_junk< := (tb, object) / (tb, junk)
>object< := entry / macro / preamble / comment_entry
entry := '@', entry_type, tb, ( '{' , tb, contents, tb, '}' ) / ( '(' , tb, contents, tb, ')' )
macro := c'@string', tb, ( '{' , tb, macro_contents, tb, '}' ) / ( '(' , tb, macro_contents, tb, ')' )
preamble := '@', entry_type, tb, ( '{' , tb, preamble_contents, tb, '}' ) / ( '(' , tb, preamble_contents, tb, ')' )
comment_entry := '@', entry_type, tb, string
>contents< := citekey , tb, ',' , tb, fields
>macro_contents< := field
>preamble_contents< := value
entry_type := alpha_name
citekey := number / name
fields := (field_comma / field)+
>field_comma< := field , tb, ',', tb
field := name, tb, '=' , tb, value
value := simple_value / (simple_value, (tb,'#', tb, simple_value)+)
>simple_value< := string / number / name
alpha_name := [a-zA-Z]+
name := []-[a-z_A-Z!$&+./:;<>?^`|'] , []-[a-z_A-Z0-9!$&+./:;<>?^`|']*
number := [0-9]+ / ([[0-9]+, tb, [-]+, tb, [0-9]+)
string := ('\"' , quotes_string?, '\"') / ('{' , braces_string?, '}')
<braces_string> := (-[{}@]+ / string)+
<quotes_string> := (-[\"{}]+ / ('{', braces_string,'}'))+
<junk> := -[ \t\r\n]+
<tb> := (comment / ws)*
<ws> := [ \t\n\r]
<comment> := '%' , -[\n]*, '\n'
"""
## instantiate SimpleParse parsers
parser = Parser(dec, 'bibfile')
entry_parser = Parser(dec, 'entry')
## offer a default parse function
[docs]def Parse(src, processor=None) :
'''Parse the bibtex string *src*, process with *processor*.'''
return parser.parse(src, processor=processor)
## self-test
if __name__ =="__main__":
import sys, pprint
if len(sys.argv) > 1 :
src = open(sys.argv[1]).read()
taglist = Parse(src)
pprint.pprint(taglist)