Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_split_fullname():
"""Test author fullname splitting."""
author1 = 'Doe, John Magic'
author2 = 'Doe Boe, John Magic'
author3 = 'Doe Boe John Magic'
author4 = 'John Magic Doe'
author5 = 'John Magic Doe Boe'
author6 = 'John Magic, Doe Boe'
author7 = ''
assert split_fullname(author1) == ('Doe', 'John Magic')
assert split_fullname(author2) == ('Doe Boe', 'John Magic')
assert split_fullname(author3, switch_name_order=True) == ('Doe', 'Boe John Magic')
assert split_fullname(author4) == ('Doe', 'John Magic')
assert split_fullname(author5) == ('Boe', 'John Magic Doe')
assert split_fullname(author6, switch_name_order=True) == ('Doe Boe', 'John Magic')
assert split_fullname(author7) == ('', '')
def test_split_fullname():
"""Test author fullname splitting."""
author1 = 'Doe, John Magic'
author2 = 'Doe Boe, John Magic'
author3 = 'Doe Boe John Magic'
author4 = 'John Magic Doe'
author5 = 'John Magic Doe Boe'
author6 = 'John Magic, Doe Boe'
author7 = ''
assert split_fullname(author1) == ('Doe', 'John Magic')
assert split_fullname(author2) == ('Doe Boe', 'John Magic')
assert split_fullname(author3, switch_name_order=True) == ('Doe', 'Boe John Magic')
assert split_fullname(author4) == ('Doe', 'John Magic')
assert split_fullname(author5) == ('Boe', 'John Magic Doe')
assert split_fullname(author6, switch_name_order=True) == ('Doe Boe', 'John Magic')
assert split_fullname(author7) == ('', '')
def test_split_fullname():
"""Test author fullname splitting."""
author1 = 'Doe, John Magic'
author2 = 'Doe Boe, John Magic'
author3 = 'Doe Boe John Magic'
author4 = 'John Magic Doe'
author5 = 'John Magic Doe Boe'
author6 = 'John Magic, Doe Boe'
author7 = ''
assert split_fullname(author1) == ('Doe', 'John Magic')
assert split_fullname(author2) == ('Doe Boe', 'John Magic')
assert split_fullname(author3, switch_name_order=True) == ('Doe', 'Boe John Magic')
assert split_fullname(author4) == ('Doe', 'John Magic')
assert split_fullname(author5) == ('Boe', 'John Magic Doe')
assert split_fullname(author6, switch_name_order=True) == ('Doe Boe', 'John Magic')
assert split_fullname(author7) == ('', '')
def test_split_fullname():
"""Test author fullname splitting."""
author1 = 'Doe, John Magic'
author2 = 'Doe Boe, John Magic'
author3 = 'Doe Boe John Magic'
author4 = 'John Magic Doe'
author5 = 'John Magic Doe Boe'
author6 = 'John Magic, Doe Boe'
author7 = ''
assert split_fullname(author1) == ('Doe', 'John Magic')
assert split_fullname(author2) == ('Doe Boe', 'John Magic')
assert split_fullname(author3, switch_name_order=True) == ('Doe', 'Boe John Magic')
assert split_fullname(author4) == ('Doe', 'John Magic')
assert split_fullname(author5) == ('Boe', 'John Magic Doe')
assert split_fullname(author6, switch_name_order=True) == ('Doe Boe', 'John Magic')
assert split_fullname(author7) == ('', '')
"""Return authors dictionary """
authors_raw = node.xpath(
"//td[contains(text(), 'dc.contributor.author')]/following-sibling::td[1]/text()").extract()
affiliation = node.xpath(
"//td[contains(text(), 'dc.contributor.department')]/following-sibling::td[1]/text()").extract_first()
authors = []
strip_years_pattern = re.compile(r"(.*)\,\s\d{4}.?")
full_given_names_pattern = re.compile(r".?\((.*)\).?")
for author in authors_raw:
try:
# Might contain birthdate
author = strip_years_pattern.search(author).group(1)
except AttributeError:
pass
surname, given_names = split_fullname(author)
try:
# Might contain full given_names in parentheses
given_names = full_given_names_pattern.search(given_names).group(1)
except AttributeError:
pass
authdict = {
'surname': surname,
'given_names': given_names,
}
if affiliation:
authdict["affiliations"] = [{"value": affiliation}]
authors.append(authdict)
return authors
def parse_authors(value):
"""Add missing information for an author.
``full_name`` combination value and ``surname`` + ``given_names`` values.
Delete spaces from initials.
"""
if "raw_name" in value and "surname" not in value:
value['surname'], value['given_names'] = split_fullname(
value['raw_name']
)
if 'given_names' in value and value['given_names']:
value['given_names'] = collapse_initials(value['given_names'])
value['full_name'] = u'{0}, {1}'.format(
value['surname'],
value['given_names']
)
else:
value['full_name'] = value['surname']
return value
"""Parses the line where there are data about the author(s)
Note that author surnames and given names are not comma separated, so
`split_fullname` might get a wrong surname.
"""
authors_raw = node.xpath(
"//div[@id='content']/p[@class='author']/text()").extract()
affiliation = node.xpath(
"//h2[contains(text(), 'School')]/following-sibling::p/strong/text()"
).extract_first()
if not affiliation:
affiliation = ''
authors = []
for author in authors_raw:
surname, given_names = split_fullname(author)
authdict = {
'surname': surname,
'given_names': given_names,
}
if affiliation:
authdict["affiliations"] = [{"value": affiliation}]
authors.append(authdict)
return authors
some_affiliation_contains_collaboration = True
else:
affiliations_without_collaborations.append(aff)
for aff in affiliations_with_collaborations:
coll, author_name = coll_cleanforthe(aff)
if coll and coll not in collaborations:
collaborations.append(coll)
# Check if name is a collaboration, else append to authors
collaboration_in_name = ' for the ' in name_string.lower() or any(
phrase in name_string.lower() for phrase in collab_phrases
)
if collaboration_in_name:
coll, author_name = coll_cleanforthe(name_string)
if author_name:
surname, given_names = split_fullname(author_name)
authors.append({
'full_name': surname + ', ' + given_names,
'surname': surname,
'given_names': given_names,
'affiliations': [],
})
if coll and coll not in collaborations:
collaborations.append(coll)
elif name_string.strip() == ':':
# DANGERZONE : this might not be correct - add a warning for the cataloger
warning_tags.append(' %s %s ' % (next_forenames, next_keyname))
if not some_affiliation_contains_collaboration:
# everything up to now seems to be collaboration info
for author_info in authors:
name_string = " %s %s " % \
(author_info['given_names'], author_info['surname'])