Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_fail_parse(feed):
with assert_raises(podcastparser.FeedParseError):
podcastparser.parse('file://example.com/feed.xml', StringIO(feed))def test_fail_parse(feed):
with assert_raises(podcastparser.FeedParseError):
podcastparser.parse('file://example.com/feed.xml', StringIO(feed))'rss/channel/item/pubDate': EpisodeAttr('published', parse_pubdate),
'rss/channel/item/atom:link': AtomLink(),
'rss/channel/item/media:content': Enclosure('fileSize'),
'rss/channel/item/enclosure': Enclosure('length'),
'rss/channel/item/psc:chapters': PodloveChapters(),
'rss/channel/item/psc:chapters/psc:chapter': PodloveChapter(),
# Basic support for Atom feeds
'atom:feed': PodcastItem(),
'atom:feed/atom:title': PodcastAttr('title', squash_whitespace),
'atom:feed/atom:subtitle': PodcastAttr('description', squash_whitespace),
'atom:feed/atom:icon': PodcastAttrRelativeLink('cover_url'),
'atom:feed/atom:link': PodcastAtomLink(),
'atom:feed/atom:entry': EpisodeItem(),
'atom:feed/atom:entry/atom:id': EpisodeAttr('guid'),
'atom:feed/atom:entry/atom:title': EpisodeAttr('title', squash_whitespace),
'atom:feed/atom:entry/atom:link': AtomLink(),
'atom:feed/atom:entry/atom:content': AtomContent(),
'atom:feed/atom:entry/content:encoded': EpisodeAttr('description_html'),
'atom:feed/atom:entry/atom:published': EpisodeAttr('published', parse_pubdate),
'atom:feed/atom:entry/atom:updated': EpisodeAttr('published', parse_pubdate, overwrite=False),
'atom:feed/atom:entry/media:group/media:description': EpisodeAttr('description', squash_whitespace),
'atom:feed/atom:entry/psc:chapters': PodloveChapters(),
'atom:feed/atom:entry/psc:chapters/psc:chapter': PodloveChapter(),
}
# Derive valid root elements from the supported MAPPINGs
VALID_ROOTS = set(path.split('/')[0] for path in MAPPING.keys())
class FeedParseError(sax.SAXParseException, ValueError):'rss/channel/item/psc:chapters/psc:chapter': PodloveChapter(),
# Basic support for Atom feeds
'atom:feed': PodcastItem(),
'atom:feed/atom:title': PodcastAttr('title', squash_whitespace),
'atom:feed/atom:subtitle': PodcastAttr('description', squash_whitespace),
'atom:feed/atom:icon': PodcastAttrRelativeLink('cover_url'),
'atom:feed/atom:link': PodcastAtomLink(),
'atom:feed/atom:entry': EpisodeItem(),
'atom:feed/atom:entry/atom:id': EpisodeAttr('guid'),
'atom:feed/atom:entry/atom:title': EpisodeAttr('title', squash_whitespace),
'atom:feed/atom:entry/atom:link': AtomLink(),
'atom:feed/atom:entry/atom:content': AtomContent(),
'atom:feed/atom:entry/content:encoded': EpisodeAttr('description_html'),
'atom:feed/atom:entry/atom:published': EpisodeAttr('published', parse_pubdate),
'atom:feed/atom:entry/atom:updated': EpisodeAttr('published', parse_pubdate, overwrite=False),
'atom:feed/atom:entry/media:group/media:description': EpisodeAttr('description', squash_whitespace),
'atom:feed/atom:entry/psc:chapters': PodloveChapters(),
'atom:feed/atom:entry/psc:chapters/psc:chapter': PodloveChapter(),
}
# Derive valid root elements from the supported MAPPINGs
VALID_ROOTS = set(path.split('/')[0] for path in MAPPING.keys())
class FeedParseError(sax.SAXParseException, ValueError):
"""
Exception raised when asked to parse an invalid feed
This exception allows users of this library to catch exceptions
without having to import the XML parsing library themselves.
"""'rss/channel/item/itunes:subtitle': EpisodeAttr('subtitle', squash_whitespace),
'rss/channel/item/content:encoded': EpisodeAttr('description_html'),
'rss/channel/item/itunes:duration': EpisodeAttr('total_time', parse_time),
'rss/channel/item/pubDate': EpisodeAttr('published', parse_pubdate),
'rss/channel/item/atom:link': AtomLink(),
'rss/channel/item/media:content': Enclosure('fileSize'),
'rss/channel/item/enclosure': Enclosure('length'),
'rss/channel/item/psc:chapters': PodloveChapters(),
'rss/channel/item/psc:chapters/psc:chapter': PodloveChapter(),
# Basic support for Atom feeds
'atom:feed': PodcastItem(),
'atom:feed/atom:title': PodcastAttr('title', squash_whitespace),
'atom:feed/atom:subtitle': PodcastAttr('description', squash_whitespace),
'atom:feed/atom:icon': PodcastAttrRelativeLink('cover_url'),
'atom:feed/atom:link': PodcastAtomLink(),
'atom:feed/atom:entry': EpisodeItem(),
'atom:feed/atom:entry/atom:id': EpisodeAttr('guid'),
'atom:feed/atom:entry/atom:title': EpisodeAttr('title', squash_whitespace),
'atom:feed/atom:entry/atom:link': AtomLink(),
'atom:feed/atom:entry/atom:content': AtomContent(),
'atom:feed/atom:entry/content:encoded': EpisodeAttr('description_html'),
'atom:feed/atom:entry/atom:published': EpisodeAttr('published', parse_pubdate),
'atom:feed/atom:entry/atom:updated': EpisodeAttr('published', parse_pubdate, overwrite=False),
'atom:feed/atom:entry/media:group/media:description': EpisodeAttr('description', squash_whitespace),
'atom:feed/atom:entry/psc:chapters': PodloveChapters(),
'atom:feed/atom:entry/psc:chapters/psc:chapter': PodloveChapter(),
}
# Derive valid root elements from the supported MAPPINGs
VALID_ROOTS = set(path.split('/')[0] for path in MAPPING.keys())# Basic support for Atom feeds
'atom:feed': PodcastItem(),
'atom:feed/atom:title': PodcastAttr('title', squash_whitespace),
'atom:feed/atom:subtitle': PodcastAttr('description', squash_whitespace),
'atom:feed/atom:icon': PodcastAttrRelativeLink('cover_url'),
'atom:feed/atom:link': PodcastAtomLink(),
'atom:feed/atom:entry': EpisodeItem(),
'atom:feed/atom:entry/atom:id': EpisodeAttr('guid'),
'atom:feed/atom:entry/atom:title': EpisodeAttr('title', squash_whitespace),
'atom:feed/atom:entry/atom:link': AtomLink(),
'atom:feed/atom:entry/atom:content': AtomContent(),
'atom:feed/atom:entry/content:encoded': EpisodeAttr('description_html'),
'atom:feed/atom:entry/atom:published': EpisodeAttr('published', parse_pubdate),
'atom:feed/atom:entry/atom:updated': EpisodeAttr('published', parse_pubdate, overwrite=False),
'atom:feed/atom:entry/media:group/media:description': EpisodeAttr('description', squash_whitespace),
'atom:feed/atom:entry/psc:chapters': PodloveChapters(),
'atom:feed/atom:entry/psc:chapters/psc:chapter': PodloveChapter(),
}
# Derive valid root elements from the supported MAPPINGs
VALID_ROOTS = set(path.split('/')[0] for path in MAPPING.keys())
class FeedParseError(sax.SAXParseException, ValueError):
"""
Exception raised when asked to parse an invalid feed
This exception allows users of this library to catch exceptions
without having to import the XML parsing library themselves.
"""
passtry:
self._parse_feed(ad._resolved_url, None, None, False)
return Result(NEW_LOCATION, ad._resolved_url)
except Exception as e:
logger.warn('Feed autodiscovery failed', exc_info=True)
# Second, try to resolve the URL
url = self._resolve_url(url)
if url:
return Result(NEW_LOCATION, url)
# Reset the stream so podcastparser can give it a go
data.seek(0)
try:
feed = podcastparser.parse(url, data)
feed['url'] = url
except ValueError as e:
raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e))
if is_local:
feed['headers'] = {}
return Result(UPDATED_FEED, feed)
else:
feed['headers'] = stream.headers
return self._check_statuscode(stream, feed)page = 2
remaining_episodes = max_episodes - len(self.parsed['episodes'])
while ('paged_feed_next' in self.parsed and
page < self.PAGED_FEED_MAX_PAGES and
remaining_episodes > 0):
# Take the next page from the paged feed
url = self.parsed['paged_feed_next']
del self.parsed['paged_feed_next']
if not url:
break
try:
logger.debug('Downloading page %d from %s', page, url)
stream = util.urlopen(url)
parsed = podcastparser.parse(url, stream, remaining_episodes)
added_episodes = len(parsed['episodes'])
remaining_episodes -= added_episodes
logger.debug('Page %d contains %d additional episodes', page,
added_episodes)
self.parsed['episodes'].extend(parsed['episodes'])
# Next iteration if we still have a next page
if 'paged_feed_next' in parsed:
self.parsed['paged_feed_next'] = parsed['paged_feed_next']
except Exception as e:
logger.warn('Error while fetching feed page %d from %s: %s', page, url, e)
# Give up, don't try to download additional pages here
break
page += 1self.overwrite = overwrite
def start(self, handler, attrs):
pass
def end(self, handler, text):
pass
class RSS(Target):
def start(self, handler, attrs):
if 'xml:base' in attrs.keys():
handler.set_base(attrs.get('xml:base'))
class PodcastItem(Target):
def end(self, handler, text):
by_published = lambda entry: entry.get('published')
handler.data['episodes'].sort(key=by_published, reverse=True)
if handler.max_episodes:
episodes = handler.data['episodes'][:handler.max_episodes]
handler.data['episodes'] = episodes
class PodcastAttr(Target):
WANT_TEXT = True
def end(self, handler, text):
handler.set_podcast_attr(self.key, self.filter_func(text))
class PodcastAttrRelativeLink(PodcastAttr):