From 0b70e4b47604d7a6ee1f851c31bb5ce2f6410044 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Thu, 2 May 2013 05:34:06 -0400 Subject: [PATCH] fix parsing of wiktionary's html They changed their code a bit so the [edit] links appear at the end of the headlines instead of at the start of them. --- main.coffee | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/main.coffee b/main.coffee index 4aaf2d5..0467055 100644 --- a/main.coffee +++ b/main.coffee @@ -484,6 +484,8 @@ extract_wiktionary_definiton = (html) -> valid_parts = ["Abbreviation", "Adjective", "Adverb", "Article", "Cardinal number", "Conjunction", "Determiner", "Interjection", "Noun", "Numeral", "Particle", "Preposition", "Pronoun", "Verb"] + edit_link_regex = new RegExp(' ?\\[edit\\] ?') + elements.each (i, el) -> #which tag: el.tagName if el.tagName is 'H2' @@ -492,12 +494,12 @@ extract_wiktionary_definiton = (html) -> if found return false # break part = false # mark us not being in a definition section unless the next section finds a part of speach header - language = $(el).text().substr 7 + language = $(el).text().replace(edit_link_regex, '') if language and el.tagName is 'H3' or el.tagName is 'H4' # eg yak def uses one for english and one for dutch part = false - text = $(el).text() + text = $(el).text().replace(edit_link_regex, '') for p in valid_parts - if text is "[edit] #{p}" + if text is "#{p}" part = p.toLowerCase() # FIXME break if part and el.tagName is 'OL' @@ -537,6 +539,8 @@ look_up_definition = (word) -> tdl = extract_wiktionary_definiton data.parse.text['*'] if tdl show_definition word, tdl[0], tdl[1], tdl[2] + else + $definition_body.html "Oops, could't find a definition for \"#{word}\"." else $definition_body.html "Sorry, couldn't find a definition for \"#{word}\"." }) -- 1.7.10.4