Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
identifiers – A dictionary of other identifiers, most commonly {‘isbn’:‘1234...’}
timeout – Timeout in seconds, no network request should hang for longer than timeout.
Returns:
None if no errors occurred, otherwise a unicode representation of the error suitable for showing to the user
'''
self.log = Log(self.name, log)
found = []
xml = None
detail_ident = None
#test previous found first
ident = identifiers.get(self.name, None)
XPath = partial(etree.XPath, namespaces=self.NAMESPACES)
detail_test = XPath('//x:div[@id="detail"]')
entry = XPath('//x:tr[@class="suda" or @class="licha"]')
query = self.create_query(title=title, authors=authors,
identifiers=identifiers)
if not query:
self.log('Insufficient metadata to construct query')
return
br = self.browser
try:
self.log('download book page search %s'%query)
raw = br.open(query, timeout=timeout).read().strip()
try:
parser = etree.XMLParser(recover=True)
clean = clean_ascii_chars(raw)
if check_hyphs:
if not _ufound and _alltext.find(u'\u00AD') != -1:
print(_file_dec + ': U+00AD hyphenate marks found.')
_ufound = True
if not _unbfound and _alltext.find(u'\u00A0') != -1:
print(_file_dec + ': U+00A0 non-breaking space found.')
_unbfound = True
_links = etree.XPath('//xhtml:link', namespaces=XHTMLNS)(_xhtmlsoup)
for _link in _links:
if not _linkfound and (_link.get('type') is None):
_linkfound = True
print(_file_dec + ': At least one xhtml file has link tag '
'without type attribute defined')
#Check dtb:uid - should be identical go dc:identifier
ncxfile = etree.XPath('//opf:item[@media-type="application/x-dtbncx+xml"]',
namespaces=OPFNS)(opftree)[0].get('href')
ncxtree = etree.fromstring(_epubfile.read(_folder + ncxfile))
uniqid = etree.XPath('//opf:package',
namespaces=OPFNS)(opftree)[0].get('unique-identifier')
if uniqid is not None:
try:
dc_identifier = etree.XPath('//dc:identifier[@id="' + uniqid +
'"]/text()',
namespaces=DCNS)(opftree)[0]
except:
dc_identifier = ''
print(_file_dec + ': dc:identifier with unique-id not found')
else:
print(_file_dec + ': no unique-identifier found')
try:
metadtd = etree.XPath('//ncx:meta[@name="dtb:uid"]',
def __init__(self, content):
self.tree = content
self.xpath_collection = {
'id': etree.XPath('id'),
'school-name': etree.XPath('school-name'),
'field-of-study': etree.XPath('field-of-study'),
'start-date': etree.XPath('start-date/year'),
'end-date': etree.XPath('end-date/year'),
'degree': etree.XPath('degree'),
'activities': etree.XPath('activities')
}
self.results = self.__build_data(self.tree)
def XPath(expr):
return etree.XPath(expr, namespaces={'h':XHTML_NS})
def __init__(self, ident, result_queue, browser, log, relevance, plugin, timeout=20):
Thread.__init__(self)
self.daemon = True
self.ident, self.result_queue = ident, result_queue
self.browser = browser.clone_browser()
self.relevance = relevance
self.plugin, self.timeout = plugin, timeout
self.cover_url = self.isbn = None
self.XPath = partial(etree.XPath, namespaces=plugin.NAMESPACES)
self.number = int(ident)
self.log = Log("worker %i"%self.number, log)
def check_xpath(self, s, lineno):
try:
lxml.etree.XPath(s)
except lxml.etree.XPathSyntaxError as exc:
raise Error(self.file, lineno, exc)
if self.warnings:
if not s.lstrip('(').startswith('.') and len(self.element_context) >= 2:
if self.element_context[-1] == 'ItemElement' and self.element_context[-2] in ('TableElement', 'ListElement'):
print('%s:%s: probable missing "." at start of XPath' % (self.file, lineno))
def fix_styles(source_file):
try:
links = etree.XPath(
'//xhtml:link',
namespaces=XHTMLNS
)(source_file)
except:
print('No links found...')
for link in links:
if link.get('type') is None:
link.set('type', 'text/css')
return source_file
# set dc:language to my language
for lang in soup.xpath("//dc:language", namespaces=DCNS):
if lang.text != _my_language:
lang.text = _my_language
# add missing dc:language
if len(soup.xpath("//dc:language", namespaces=DCNS)) == 0:
for metadata in soup.xpath("//opf:metadata", namespaces=OPFNS):
newlang = etree.Element(
'{http://purl.org/dc/elements/1.1/}language'
)
newlang.text = _my_language
metadata.insert(0, newlang)
# add missing meta cover and cover reference guide element
metacovers = etree.XPath('//opf:meta[@name="cover"]',
namespaces=OPFNS)(soup)
refcovers = etree.XPath('//opf:reference[@type="cover"]',
namespaces=OPFNS)(soup)
if len(metacovers) == 1 and len(refcovers) == 0:
# set missing cover reference guide element
itemcovers = etree.XPath(
'//opf:item[@id="' + metacovers[0].get('content') + '"]',
namespaces=OPFNS
)(soup)
if verbose:
print('Defining cover guide element...')
itemcoverhref = os.path.basename(itemcovers[0].get('href'))
soup = set_cover_guide_ref(
xhtml_files, itemcoverhref, xhtml_file_paths, soup
)
namespaces=OPFNS)(opftree)[0].get('href')
ncxtree = etree.fromstring(_epubfile.read(_folder + ncxfile))
uniqid = etree.XPath('//opf:package',
namespaces=OPFNS)(opftree)[0].get('unique-identifier')
if uniqid is not None:
try:
dc_identifier = etree.XPath('//dc:identifier[@id="' + uniqid +
'"]/text()',
namespaces=DCNS)(opftree)[0]
except:
dc_identifier = ''
print(_file_dec + ': dc:identifier with unique-id not found')
else:
print(_file_dec + ': no unique-identifier found')
try:
metadtd = etree.XPath('//ncx:meta[@name="dtb:uid"]',
namespaces=NCXNS)(ncxtree)[0]
if metadtd.get('content') != dc_identifier:
print(_file_dec + ': dtd:uid and dc:identifier mismatched')
except IndexError:
print(_file_dec + ': dtd:uid not properly defined')
for meta in opftree.xpath("//opf:meta[starts-with(@name, 'calibre')]",
namespaces=OPFNS):
print(_file_dec + ': calibre staff found')
break
for dcid in opftree.xpath(
"//dc:identifier[@opf:scheme='calibre']",
namespaces={'dc': 'http://purl.org/dc/elements/1.1/',
'opf': 'http://www.idpf.org/2007/opf'}
):
print(_file_dec + ': calibre staff found')
def __init__(self, ident, result_queue, browser, log, relevance, plugin, xml, timeout=20):
Thread.__init__(self)
self.daemon = True
self.ident, self.result_queue = ident, result_queue
self.browser = browser.clone_browser()
self.relevance = relevance
self.plugin, self.timeout = plugin, timeout
self.cover_url = self.isbn = None
self.XPath = partial(etree.XPath, namespaces=plugin.NAMESPACES)
self.xml = xml
self.log = Log("worker %s"%ident, log)