#! /usr/bin/env python
#File: bibname.py
"""
:mod:`bibstuff.bibname`: Name Parser and Formatter
--------------------------------------------------
Parses bibtex-formatted author/editor raw names and provides
formatting functions (e.g., via bibstyles/shared.NamesFormatter).
:copyright: 2009-2014 Dylan Schwilk and Alan G Isaac, see AUTHORS
:license: MIT (see LICENSE)
:note: Major change as of 2008-07-02. Now the ebnf grammar and processor
handles parsing of a list of names (a bibtex names field such as editor
or author) and parses the single author name into its fvlj parts. This
eliminates the need for the original hand-coded parse_raw_names_parts
function. Moved to using names_dicts rather than names_parts. The
grammar handles latex accents and ligatures as well as braces strings so
that a name such as {Barnes and Noble, Inc} is parsed as a single name
and not split on the " and ".
:todo: The dispatch processor does not currently strip the leading and trailing
braces from latex/bibtex strings. Not hard to add (see bibfile.py). This
should be done eventually.
:todo: The grammar does not support quoted strings, only braces strings. Could
be added fairly simply
"""
__docformat__ = "restructuredtext en"
__authors__ = ["Dylan W. Schwilk", "Alan G. Isaac"]
__version__ = '2.0'
__needs__ = '2.4'
################ IMPORTS #############################
# import from standard library
import logging
logging.basicConfig(format='\n%(levelname)s:\n%(message)s\n')
bibname_logger = logging.getLogger('bibstuff_logger')
# import dependencies
import simpleparse
from simpleparse.dispatchprocessor import dispatch
#from string import maketrans
# BibStuff imports
from . import bibstyles, bibfile, bibgrammar
######################################################
################## Global Variables ##################
# constant needed for populating dicts in names_dicts with empty lists for
# missing parts
nameparts = ("first","last","von","jr")
# The EBNF description of a bibtex name field (such as a list of author names).
ebnf_bibname = r"""
namelist := sp*, name, (_and_, name)*
<_and_> := sp+, "and", sp+
name := vlf / fvl / fl / vljf / fvlj / l
>l< := last
>vlf< := (von, sp+)*, last, (sp+, last)*, comma, (sp*, first)+
>fl< := first, sp+, (first, sp+, ?(capitalized/capstring))*, last
>fvl< := (first, sp+)+, (von, sp+)+, last, (sp+, last)*
>fvlj< := fvl, comma, jr
>vljf< := (von, sp+)*, last, (sp+, last)*, comma, jr, comma, first, (sp+ , first)*
von := lowercase / lowerstring
first := capitalized / capstring
last := capitalized / capstring
jr := "jr" / "Jr" / "JR" / "Junior" / "junior" /
"Sr" / "sr" / "II" / "III" / "IV" / "2nd" / "3rd" / "4th"
<comma> := sp*, ',', sp*
<capitalized> := capital , anyc*
<lowercase> := ?lowerc, -"and ", anyc* # Mustn't grab the delimiter _and_ for a part
<ltx_accent> := '\\`' / "\\'" / '\\^' / '\\"' / '\\H' / '\\~' / '\\c' / '\\=' / '\\b' / '\\.' /
'\\d' / '\\u' / '\\v' / '\\t'
<ltx_ij_accent> := '\\^{\\i}' / '\\"{\\i}' / '\\^{\\j}' / '\\"{\\j}'
<ltx_ligature_uc> := '\\AE' / '\\OE' / '\\AA' / '\\O'
<ltx_ligature_lc> := '\\ae' / '\\oe' / '\\aa' / '\\o' / '\\ss'
<capital> := ('{',capital,'}') / [A-Z] /
(ltx_accent, [A-Z]) / (ltx_accent, '{' , [A-Z] , '}') /
ltx_ligature_uc
<lowerc> := ('{',lowerc,'}') / [a-z] / (ltx_accent, [a-z]) /
(ltx_accent, '{' , [a-z] , '}') /
ltx_ij_accent / ltx_ligature_lc
<anyc> := [~'-] / capital / lowerc
<string> := '{' , braces_string?, '}'
<capstring> := '{' , cap_braces_string?, '}'
<lowerstring> := '{' , lower_braces_string?, '}'
<cap_braces_string> := ( (capital, -[{}]*) / capstring)+
<lower_braces_string> := ( (capital, -[{}]*) / lowerstring)+
<braces_string> := (-[{}]+ / string)+
<sp> := [ \t\n\r.]
"""
bibnamelist_parser = simpleparse.parser.Parser(ebnf_bibname, 'namelist')
######################################################
# ----------- Public Classes and Functions -----------------#
# ----------------------------------------------------------
# BibName
# -------
# Parser processor for bibtex names
# ----------------------------------------------------------
[docs]class BibName( simpleparse.dispatchprocessor.DispatchProcessor ):
"""Processes a bibtex names entry (author, editor, etc) and
stores the resulting raw_names_parts.
:note: a BibName object should be bibstyle independent.
"""
def __init__(self, raw_names=None, from_field=None) : #:note: 2006-07-25 add initialization based on raw name
"""initialize a BibName instance
:Parameters:
`raw_names` : str
the raw names (e.g., unparsed author field of a BibEntry instance)
`from_field` : str
the entry field for the raw name
:note: 2006-08-02 add `from_field` argument (set by `BibEntry.make_names`)
"""
self.from_field = from_field
self.raw_names = raw_names
self.names_dicts = []
#populate self.names_dicts from raw_names
if raw_names:
self.parse_raw_names(raw_names)
############### PRODUCTION FUNCTIONS #######################
# Handle each name by adding new dict to list "names_dicts", then
# handle each name part by adding to last dict in names_dict list.
[docs] def name(self, (tag,start,stop,subtags), buffer):
"""Prduction function to process a single name in a nameslist"""
self.names_dicts.append({}) # add new dict to list
for part in subtags:
dispatch(self, part, buffer)
# Create empty lists for missing parts
for p in nameparts:
if not self.names_dicts[-1].has_key(p):
self.names_dicts[-1][p] = []
[docs] def last(self, (tag,start,stop,subtags), buffer ):
"""Processes last name part in a single name of a bibtex names field"""
if self.names_dicts[-1].has_key("last"):
self.names_dicts[-1]["last"].append(buffer[start:stop])
else:
self.names_dicts[-1]["last"] = [buffer[start:stop],]
[docs] def first(self, (tag,start,stop,subtags), buffer ):
"""Processes first name part in a single name of a bibtex names field"""
if self.names_dicts[-1].has_key("first"):
self.names_dicts[-1]["first"].append(buffer[start:stop])
else:
self.names_dicts[-1]["first"] = [buffer[start:stop],]
[docs] def von(self, (tag,start,stop,subtags), buffer ):
"""Processes von name part in a single name of a bibtex names field"""
if self.names_dicts[-1].has_key("von"):
self.names_dicts[-1]["von"].append(buffer[start:stop])
else:
self.names_dicts[-1]["von"] = [buffer[start:stop],]
[docs] def jr(self, (tag,start,stop,subtags), buffer ):
"""Processes jr name part in a single name of a bibtex names field"""
# Just on jr part so simple add list with one item
self.names_dicts[-1]["jr"] = [ buffer[start:stop],]
############## HELPER FUNCTIONS ######################
[docs] def parse_raw_names(self, raw_name):
"""This function can be used to populate an empty BibName
instance or replace all the name values currently contained in
an instance. It parses the names field with the bibname grammar"""
self.names_dicts = [] # Replace extant list of names
bibnamelist_parser.parse(raw_name, processor = self)
[docs] def get_names_dicts(self): #:note: renamed
"""
Return a list of name dicts,
one dict per name,
having the fields: first , von, last, jr
"""
return self.names_dicts
#ai: method to get last names, which is needed by bibstyle.py and by
#some style sortkeys
[docs] def get_last_names(self):
"""Return list of strings, where each string is a last name.
:TODO: graceful handling of missing names parts
"""
result = list(' '.join(name_dict['last']) for name_dict in self.names_dicts)
#bibname_logger.debug("BibName.get_last_names result: "+str(result))
return result
[docs]def getNames(src) :
"""Returns list of name dicts. Each dict has keys "first", "last",
"von", "jr". `src` is a string is in bibtex name format.
"""
try :
p = BibName(src) #:note: 2006-07-25 allow initialization w src
return p.get_names_dicts() #:note: 2006-07-25 renamed
except :
bibname_logger.error('Error in name %s' % src)
raise
# command-line version
## TODO: move this to script
if __name__ =="__main__":
import sys
from optparse import OptionParser
from bibstyles.default import DEFAULT_CITATION_TEMPLATE
defaultformat = DEFAULT_CITATION_TEMPLATE['name_first']
usage = "usage: %prog [options] filenames"
parser = OptionParser(usage=usage, version ="%prog " + __version__)
parser.add_option("-t", "--template", action="store", type="string", \
dest="template", default = defaultformat, help="Name format template")
parser.add_option("-i", "--initials", action="store_true", dest="initials", \
default = True, help="Initialize first names")
parser.add_option("-I", "--no-initials", action="store_false", dest="initials", \
default = True, help="do not initialize first names")
parser.add_option("-l", "--last-names", action="store_true", dest="last_names", \
default = False, help="Print last names only.")
parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
help="Print INFO messages to stdout, default=%default")
# get options
(options, args) = parser.parse_args()
if options.verbose:
bibname_logger.setLevel(logging.INFO)
if options.last_names:
options.template = 'l'
if options.initials :
initials = 'f' # only first names. Does any style ever use initials for anything else?
else :
initials = ''
if len(args) == 0 :
src = sys.stdin.read()
else :
flist = list()
for fname in args:
try:
flist.append(open(fname,'r'))
except IOError :
bibname_logger.warn('Error in filelist: %s.'%fname)
src = '\n'.join(f.read() for f in flist)
map(lambda f: f.close(), flist)
if not src:
bibname_logger.error("No bibtex source database found")
sys.exit(1)
else:
bfile = bibfile.BibFile()
bibgrammar.Parse(src, bfile)
names_formatter = bibstyles.shared.NamesFormatter(template_list=[options.template]*2,initials=initials)
for entry in bfile.entries:
print entry.format_names(names_formatter)