How to use the mwparserfromhell.parse function in mwparserfromhell

To help you get started, we’ve selected a few mwparserfromhell examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github earwig / mwparserfromhell / tests / test_wikicode.py View on Github external
def test_get_sections(self):
        """test Wikicode.get_sections()"""
        page1 = parse("")
        page2 = parse("==Heading==")
        page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n")

        p4_lead = "This is a lead.\n"
        p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n"
        p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n•Some content.\n\n"
        p4_IB = "=== Section I.B ===\n" + p4_IB1
        p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB
        p4_II = "== Section II ==\nSection II body.\n\n"
        p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n"
        p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!"
        p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1
        p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2
        p4_III = "== Section III ==\n" + p4_IIIA
        page4 = parse(p4_lead + p4_I + p4_II + p4_III)
github earwig / mwparserfromhell / tests / test_wikicode.py View on Github external
func(code2.get(1), "c", recursive=False)
        func("{{a}}", "d", recursive=False)
        func(code2.get(-1), "e", recursive=True)
        func("{{b}}", "f", recursive=True)
        self.assertEqual(expected[1], code2)

        code3 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
        func = partial(meth, code3)
        obj = code3.get(0).params[0].value.get(0)
        self.assertRaises(ValueError, func, obj, "x", recursive=False)
        func(obj, "x", recursive=True)
        self.assertRaises(ValueError, func, "{{f}}", "y", recursive=False)
        func("{{f}}", "y", recursive=True)
        self.assertEqual(expected[2], code3)

        code4 = parse("{{a}}{{b}}{{c}}{{d}}{{e}}{{f}}{{g}}{{h}}{{i}}{{j}}")
        func = partial(meth, code4)
        fake = parse("{{b}}{{c}}")
        self.assertRaises(ValueError, func, fake, "q", recursive=False)
        self.assertRaises(ValueError, func, fake, "q", recursive=True)
        func("{{b}}{{c}}", "w", recursive=False)
        func("{{d}}{{e}}", "x", recursive=True)
        func(Wikicode(code4.nodes[-2:]), "y", recursive=False)
        func(Wikicode(code4.nodes[-2:]), "z", recursive=True)
        self.assertEqual(expected[3], code4)
        self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=False)
        self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=True)

        code5 = parse("{{a|{{b}}{{c}}|{{f|{{g}}={{h}}{{i}}}}}}")
        func = partial(meth, code5)
        self.assertRaises(ValueError, func, "{{b}}{{c}}", "x", recursive=False)
        func("{{b}}{{c}}", "x", recursive=True)
github osrsbox / osrsbox-db / builders / monsters / drop_tables.py View on Github external
def commonseed(wikitext: str) -> Dict:
    """Set common seed drop tables items.

    Item drops are hard coded.
    Drop rates sourced from:
    https://osrs.wiki/w/Drop_table#Common_seed_drop_table

    :param wikitext: The monsters wikitext as a string.
    :return: Dictionary of items on the drop table.
    """
    drop_table_template = None
    wikicode = mwparserfromhell.parse(wikitext)
    templates = wikicode.filter_templates()
    for template in templates:
        if "manyseeddroptable2" in template.lower():
            drop_table_template = template

    drop_table_template = drop_table_template.replace("{", "")
    drop_table_template = drop_table_template.replace("}", "")

    try:
        base_rarity = float(Fraction(drop_table_template.split("|")[1]))
    except ValueError:
        print("NO BASE RARITY FOR: drop_tables.commonseed")
        exit(1)

    # Populate drop table items
    items = {
github theopolisme / theobot / tafi_archiver_and_mover.py View on Github external
global unsuccessful_page_new
unsuccessful_page_new = unsuccessful_page.edit()
unsuccessful_page_new += "\n== Archived " + now.strftime('%B') + " " + timey.ordinal(now.day) + " =="

holding_page = site.Pages["Wikipedia:Today's articles for improvement/Holding area"]
global holding_new
holding_new = holding_page.edit()

nominations_page = site.Pages["Wikipedia:Today's articles for improvement/Nominated articles"]
nominations_page_text = nominations_page.edit()

global nominations_page_new
nominations_page_new = nominations_page.edit()

sections = mwparserfromhell.parse(nominations_page_text).get_sections(levels=[2], include_headings=True)
del sections[0] # removes lede section, which we don't want

for section in sections:
	process_section(unicode(section))

# Two counters used in edit summaries
count_toholding = 0
count_archive = 0

move_to_holding()
move_to_archive()

# This is used to remove extras spaces in the noms page. Warning: HACKY.
# !Todo just fix the original regex above. 
rm_spaces = re.compile(r"""\n\n*{{TAFI""", flags=re.DOTALL | re.UNICODE | re.M)
re.sub(rm_spaces, """\n\n{{TAFI""", nominations_page_new)
github theopolisme / theobot / reggaeton_tagger.py View on Github external
def editor(text):
	"""This function does the bulk of the
	work. Requires one parameter, text.
	"""
	
	code = mwparserfromhell.parse(text)
	
	# This is used to check if we need to add the template.	
	has_been_tagged = False
	
	for template in code.filter_templates():
		if template.name in ('WikiProject Latin music', 'Latin music'):
			has_been_tagged = True
		if template.name in ('WikiProject Latin music', 'Latin music') and not template.has_param("reggaeton"):
			template.add("reggaeton", "yes")
			print "Reggaeton value added."
		if template.name in lists.bannershell_redirects:
			x = template.get(1).value
			for template in x.filter_templates():
				if template.name in ('WikiProject Latin music', 'Latin music'):
					has_been_tagged = True
				if template.name in ('WikiProject Latin music', 'Latin music') and not template.has_param("reggaeton"):
github osrsbox / osrsbox-db / extraction_tools_wiki / extract_all_monsters_page_wikitext.py View on Github external
def extract_wikitext(page_name):
    url = "https://oldschool.runescape.wiki/api.php?action=parse&prop=wikitext&format=json&page=" + page_name
    result = requests.get(url, headers=custom_agent)
    data = result.json()

    try:
        # Extract the actual content
        input = data["parse"]["wikitext"]["*"].encode("utf-8")
    except KeyError:
        # Or return if cannot extract wikitext from page
        return

    # Parse actual wikitext content using mwparserfromhell
    wikitext = mwparserfromhell.parse(input)

    # Convert to JSON
    monster_dict = dict()
    monster_dict[page_name] = str(wikitext)

    out_fi_name = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64))

    out_fi = os.path.join("extract_all_monsters_page_wikitext", out_fi_name + ".json")
    with open(out_fi, "w") as f:
        json.dump(monster_dict, f)
github ricordisamoa / wiki / wikidata / wd_import.py View on Github external
def from_page(page,import_data=True,remove=False):
	pywikibot.output(u'parsing {page}'.format(page=page))
	item=pywikibot.ItemPage.fromPage(page)
	if not item.exists():
		pywikibot.output(u'\03{{lightyellow}}item not found for {page}\03{{default}}'.format(page=page))
		return False
	text=page.get(force=True)
	code=mwparserfromhell.parse(text)
	imported=[]
	for template in code.ifilter_templates():
		tname=template.name.strip()
		for harv in harvesting:
			if tname==harv['name'] or tname in harv['name'] or tname.lower()==harv['name'] or tname.lower() in harv['name']:
				if 'sites' in harv and (not page.site.dbName() in harv['sites']):
					pywikibot.output(u'\03{lightyellow}%s template was found but skipped because site is not whitelisted\03{default}'%template.name)
					continue
				for param in harv['params']:
					for pname in ([param['name']] if isinstance(param['name'],basestring) else param['name']):
						if template.has_param(pname):
							rawvalue=value=unicode(template.get(pname).value)
							pywikibot.output(u'\03{lightgreen}%s parameter found in %s: %s\03{default}'%(pname,tname,value))
							if 'filter' in param:
								for func in (param['filter'] if isinstance(param['filter'],list) else [param['filter']]):
									value=func(value)
github osrsbox / osrsbox-db / extraction_tools_wiki / extract_all_items_page_wikitext.py View on Github external
norm_page_name = page_name.replace("&", "%26")
    norm_page_name = norm_page_name.replace("+", "%2B")
    # Example: http://oldschoolrunescape.wikia.com/api.php?action=parse&prop=wikitext&format=json&page=3rd_age_pickaxe
    url = "https://oldschool.runescape.wiki/api.php?action=parse&prop=wikitext&format=json&page=" + norm_page_name
    result = requests.get(url, headers=custom_agent)
    data = result.json()

    try:
        # Extract the actual content
        input = data["parse"]["wikitext"]["*"].encode("utf-8")
    except KeyError:
        # Or return if cannot extract wikitext from page
        return

    # Parse actual wikitext content using mwparserfromhell
    wikitext = mwparserfromhell.parse(input)

    # Convert to JSON
    item_dict = dict()
    item_dict[page_name] = str(wikitext)

    out_fi_name = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(64))

    out_fi = os.path.join("extract_all_items_page_wikitext", out_fi_name + ".json")
    with open(out_fi, "w") as f:
        json.dump(item_dict, f)
github lahwaacz / wiki-scripts / ws / parser_helpers / template_expansion.py View on Github external
# unhandled modifier - restore the name
                    modifier = ""
                    name = original_name
            else:
                modifier = ""

            # handle magic words
            if MagicWords.is_magic_word(name):
                if substitute_magic_words is True:
                    # MW incompatibility: in some cases, MediaWiki tries to transclude a template
                    # if the parser function failed (e.g. "{{ns:Foo}}" -> "{{Template:Ns:Foo}}")
                    mw = MagicWords(title)
                    replacement = mw.get_replacement(template)
                    if replacement is not None:
                        # expand the replacement to handle nested magic words in parser functions like {{#if:}})
                        replacement = mwparserfromhell.parse(replacement)
                        expand(title, replacement, content_getter_func, visited_templates)
#                        wikicode.replace(template, replacement)
                        parent.replace(template, replacement, recursive=False)
            else:
                try:
                    target_title = get_target_title(title, name)
                except TitleError:
                    logger.error("Invalid transclusion on page [[{}]]: {}".format(title, template))
                    continue

                try:
                    content = content_getter_func(target_title)
                except ValueError:
                    if not modifier:
                        # If the target page does not exist, MediaWiki just skips the expansion,
                        # but it renders a wikilink to the non-existing page.
github marcinwrochna / abbrevIso / wikiBot / abbrevIsoBot.py View on Github external
def fillAbbreviation(pageText: str, whichInfobox: int, abbrev: str) -> str:
    """Return pageText with changed abbreviation in specified infobox."""
    p = mwparserfromhell.parse(pageText)
    i = 0
    for t in p.filter_templates():
        if t.name.matches('infobox journal') or \
           t.name.matches('Infobox Journal'):
            if i == whichInfobox:
                if t.has_param('title') and t.get('title')[0] == ' ':
                    abbrev = ' ' + abbrev
                t.add('abbreviation', abbrev, preserve_spacing=True)
            i = i + 1
    return str(p)