/* search-dictionary.js Code to implement functionality for getting content from wiktionary. External Objects: DictionaryQuery - Namespace for handing dictionary results. DictionaryQuery.handle(query) - Test if the query is of the form "definition of ". If so attempt to find a definition on wiktionary and present it. Semi-external objects: DictionaryQueryTest - test object. Implementation notes i. Using Ajax to communicate with wiktionary. ii. At the moment uses a trick where a serverside component fetch resources from another page to allow cross-site scripting. This componenet needs fastcgi installed. (But is ludicrously simple). This is a good stop gap approach until cross site scripting is supported. TO MAKE THIS WORK ONE YOUR COMPUTER YOU WILL NEED TO CHANGE THE URL OF THIS COMPONENT TO THAT OF YOUR MACHINE. Bugs and caveats: a.Defaults to english at the moment - (but could be extended to other languages quickly) b.Load times can be slightly slow. c. Only returns the first result if several etymologies exist. d. The output produced can be slightly ugly at the moment. (It is stripped wiki). This can't be avoided without sending data to a mediawiki server to be parsed - which is bad for bandwidth, or asking the mediawiki server to parse only a particular part of a section of wiki page - which can't be done at the moment. This would be the ideal solution. e. Presentation is slight ugly */ function _convert_wiki_to_text(text) { text = text.replace(/\[\[[^\]]*\|([^\]]*)\]\]/, "$1"); text = text.replace("{{", "(", "g"); text = text.replace("}}", ")", "g"); text = text.replace("'''", "", "g"); text = text.replace("''", "", "g"); text = text.replace("[[", "", "g"); text = text.replace("]]", "", "g"); return text; } function get_dictionary_container() { return $("dictionary-container"); } function get_dictionary_element() { return $("dictionary-results-container"); } var DefinitionsObject = Class.create({ initialize: function(text) { var defs_quote_pair = this._parse_next_raw_list(text); var defs = defs_quote_pair[0]; var quotes = defs_quote_pair[1]; for (var i=0; i < defs.length; i++) { defs[i] = _convert_wiki_to_text(defs[i]); } this._defs = defs; for (i=0; i < quotes.length; i++) { for (j=0; j < quotes[i].length; j++) { quotes[i][j] = _convert_wiki_to_text(quotes[i][j]); } } this._quotes = quotes; }, get_definitions: function() { return this._defs.clone(); }, get_quotes: function() { return deep_clone_array(this._quotes); }, get_length: function() { return this._defs.length; }, build_list: function() { var defs = this._defs; var quotes = this._quotes; var list = document.createElement("ol"); for(var i = 0; i < defs.length; i++) { var item = document.createElement("li"); item.innerHTML = defs[i]; list.appendChild(item); if (quotes[i].length > 0) { var quotes_list = document.createElement("ul"); for(var j=0; j < quotes[i].length; j++) { var quote_item = document.createElement("li"); quote_item.innerHTML = quotes[i][j]; quotes_list.appendChild(quote_item); } item.appendChild(quotes_list); } } return list; }, _parse_next_raw_list: function(raw_wiki_text) { var start_line; start_line = raw_wiki_text.indexOf("#"); if (start_line == -1) { return []; } var quotes = []; var defs = []; var line; var finished = false; var count = 0; var mode; function eat_quote(line) { if (line.substr(0,1) == ":") { line = line.substring(1).strip(); quotes[count].push(line); } else { mode = read_definition; count++; mode(line); } } function read_definition() { defs.push(line); quotes.push([]); mode = eat_quote; } mode = read_definition; while (!finished) { var end_line = raw_wiki_text.indexOf("\n", start_line); if (end_line == -1) { finished = true; line = raw_wiki_text.substring(start_line); } else { line = raw_wiki_text.substring(start_line, end_line); } if (line.indexOf("#") !== 0) { break; } line = line.substring(1).strip(); mode(line); start_line = end_line + 1; } return [defs, quotes]; } }); var DictionaryQuery = { localizing_server: "http://192.168.2.40/search/getpage.fcgi?url=#{url}", section_query_template: "http://en.wiktionary.org/w/index.php?action=raw&title=#{title}§ion=#{section_number}&templates=expand", sections_query_template : "http://en.wiktionary.org/w/api.php?action=parse&text={{:#{title}}}__TOC__&prop=sections&format=json", page_query_template: "http://en.wiktionary.org/w/api.php?action=query&titles=#{title}&prop=info&format=json", regular_expressions: [/^\s*definition\s+of\s+(.*)/, /^\s*define\s+(.*)/, /s*meaning\s+of\s+(.*)/, /s*what\s+does\s+(.*)\s+mean\??/, /s*dictionary\s+definition of (.*)/, /^\s*find\s+(.*)\s+in\s+a\s+dictionary\s*/, /^\s*find\s+(.*)\s+in\s+the\s+dictionary\s*/, /^\s*find\s+(.*)\s+in\s+dictionary\s*/], handle: function(query_string) { var match = ParseUtils.regexps_find_match(this.regular_expressions, query_string); var definition_found = false; get_dictionary_container().style.display = "none"; get_dictionary_element().innerHTML = ""; function get_noun_definition() { DictionaryQuery._get_noun_definition(expr, "english", {onSuccess: append_noun_definition_success, onFailure: append_noun_definition_failure}); } function get_verb_definition() { DictionaryQuery._get_verb_definition(expr, "english", {onSuccess: append_verb_definition_success, onFailure: append_verb_definition_failure}); } function get_adjective_definition() { DictionaryQuery._get_adjective_definition(expr, "english", {onSuccess: append_adjective_success, onFailure: append_adjective_failure}); } function append_adjective_success(def_object) { definition_found = true; make_title(expr + "(adjective)"); append_definition(def_object); display_result(); } function append_adjective_failure() { display_result(); } function make_title(title) { var item = document.createElement("h4"); item.appendChild(document.createTextNode(title)); get_dictionary_element().appendChild(item); } function append_noun_definition_success(def_object) { definition_found = true; make_title(expr + "(noun)") append_definition(def_object); get_verb_definition(); } function append_noun_definition_failure(def_object) { get_verb_definition(); } function append_verb_definition_success(def_object) { definition_found = true; make_title(expr + "(verb)") append_definition(def_object); get_adjective_definition(); } function append_verb_definition_failure(def_object) { get_adjective_definition(); } function append_definition(def_object) { var defs = def_object.get_definitions(); var quotes = def_object.get_quotes(); var list = def_object.build_list(); get_dictionary_element().appendChild(list); } function display_result() { if (definition_found) { get_dictionary_container().style.display = "block"; } } if (match) { var expr = match[1]; get_noun_definition(); } get_dictionary_container().style.display = "none"; get_dictionary_element().innerHTML = ""; return false; }, _make_query_url_local: function(url) { return this.localizing_server.interpolate({url:encodeURIComponent(url)}); }, _get_sections_raw_wiki: function(title, section_number, options) { if (!options.onSuccess) { throw "No onSuccess callback specified"; } var failure_callback = options.onFailure? options.onFailure: function () {return;}; function success_callback(transport) { var text = transport.responseText; options.onSuccess(text); } var section_query_url = this.section_query_template.interpolate( {title:title, section_number: section_number}); section_query_url = this._make_query_url_local(section_query_url); return new Ajax.Request(section_query_url, {onSuccess: success_callback, onFailure: failure_callback}); }, _get_sections: function(title, options) { var retrieved_callback; if (options.hasOwnProperty("onSuccess")) { retrieved_callback = options.onSuccess; } var no_page_match_callback, page_failure_callback, page_parse_error_callback, sections_parse_error_callback, transport_error_callback, sections_transport_failure, default_failure_callback; default_failure_callback = options.hasOwnProperty("onFailure")? options.onFailure : default_failure_callback = function () {return;}; page_failure_callback = page_parse_error_callback = sections_parse_error_callback = transport_error_callback = sections_transport_failure = default_failure_callback; no_page_match_callback = options.hasOwnProperty("onNoPageMatch")? options.onNoPageMatch: default_failure_callback; function sections_success_callback(transport) { try { var text = transport.responseText; var data = text.evalJSON(true); var sections = data.parse.sections; var tree = new Tree(); var section; for (var i=0; i < sections.length; i++) { section = sections[i]; tree.append(section.line, i + 1, {level:section.toclevel}); } retrieved_callback(tree); } catch(e) { sections_parse_error_callback(e); } } function page_success_callback(transport) { try { var text = transport.responseText; var data = text.evalJSON(true); if (data.query.pages.hasOwnProperty("-1")) { no_page_match_callback(); return; } var sections_query_url = DictionaryQuery.sections_query_template.interpolate({title: title}); sections_query_url = DictionaryQuery._make_query_url_local(sections_query_url); new Ajax.Request(sections_query_url, {method:'get', onSuccess: sections_success_callback, onFailure: sections_transport_failure}); } catch(e) { page_parse_error_callback(e); } } var page_query_url = this.page_query_template.interpolate({title: title}); page_query_url = this._make_query_url_local(page_query_url); new Ajax.Request(page_query_url, {method:'get', onSuccess: page_success_callback, onFailure: page_failure_callback}); }, _get_noun_definition: function(word, language, options) { return this._get_definition(word, "noun", language, options) }, _get_adjective_definition: function(word, language, options) { return this._get_definition(word, "adjective", language, options) }, _get_verb_definition: function(word, language, options) { return this._get_definition(word, "verb", language, options); }, _get_definition: function(word, word_type, language, options) { language = language.toLowerCase(); if (!options.onSuccess) { throw "No onSuccess callback defined"; } var class_success_callback = options.onSuccess; var default_failure_callback = options.onFailure? options.onFailure: function() {return}; var sections_failure_callback, raw_failure_callback, raw_parse_failure_callback, sections_parse_error_callback, word_not_found_callback; sections_parse_error_callback = sections_failure_callback = raw_failure_callback = raw_parse_callback = default_failure_callback; word_not_found_callback = options.onWordNotFound? options.onWordNotFound: default_failure_callback; function raw_success_callback(text) { try { var defs = new DefinitionsObject(text); class_success_callback(defs); } catch(e) { raw_parse_callback(e); } } function sections_success_callback(tree) { try { var language_tree = tree.find_subtree(language, {case_sensitive: false}); if (language_tree == null) { throw "Failed to find language: " + language; } var word_type_node = language_tree.find_node(word_type, {case_sensitive: false}); if (word_type_node == null) { throw "There is no " + word_type + " definition of " + word + "." } var section_no = word_type_node.get_value(); var raw_section = DictionaryQuery._get_sections_raw_wiki(word, section_no, {onSuccess: raw_success_callback, onFailure:raw_failure_callback}); } catch(e) { sections_parse_error_callback(e); } } this._get_sections(word, {onSuccess: sections_success_callback, onFailure: sections_failure_callback, onNoPageMatch: word_not_found_callback}); } }; var Tree = Class.create({ initialize: function(array) { if(array) { this._data = array; } else { this._data = []; } }, // Efficiency: frequent reuse of appends for inserting data happens to become // hideously inefficient for tree building when one has very deep trees. append : function(item, value, options) { var layers_to_descend = 1; if (options.hasOwnProperty("level")) { layers_to_descend = parseInt(options.level); } var init_level = layers_to_descend; var entry_list = this._data; while(layers_to_descend > 1 && entry_list != null) { entry_list = entry_list[entry_list.length - 1][2]; layers_to_descend -= 1; } if(entry_list == null) { throw "Cannot enter data at this level:" + init_level + ".Tree:" + this.toString(); } entry_list.push([item, value, []]); }, equals: function(tree) { a = this._data; b = tree._data; return this._tree_data_equal(a,b); }, _tree_data_equal: function(a, b) { if (a.length != b.length) { return false; } for (var i=0; i < a.length; i++) { if (a[i][0] != b[i][0]) { return false; } if (a[i][1] != b[i][1]) { return false; } } for (var i=0; i < a.length; i++) { if (!this._tree_data_equal(a[i][2], b[i][2])) { return false; } } return true; }, toString: function() { return "(" + this._tree_data_to_string(this._data) + ")"; }, _tree_data_to_string: function(d) { var out = ""; var item = ""; var rest = ""; var data; for (var i = 0; i < d.length; i++) { data = {key: d[i][0], value: d[i][1], rest: this._tree_data_to_string(d[i][2])}; if (i != 0) { out += " "; } if (data.rest == "") { item = "(#{key}:#{value})".interpolate(data); } else { item = "(#{key}:#{value} #{rest})".interpolate(data); } out += item; } return out; }, find_subtree: function(key, options) { var data = this._tree_data_find_subtree(this._data, key, options); if (data == null) { return null; } var tree = new Tree(data); return tree; }, _tree_data_find_subtree: function(d, key, options) { if (options && options.hasOwnProperty("case_sensitive") && !options.case_sensitive) { var normalizer = function (x) { return x.toLowerCase();}; } else { var normalizer = function (x) { return x}; } // efficiency: performed multiple times - could be done once. for (var i = 0; i < d.length; i++) { var tree_item = d[i][0]; if (normalizer(tree_item) == normalizer(key)) { var data = [[tree_item, d[i][1], deep_clone_array(d[i][2])]] return data; //efficiency: (not necessary to copy if we are only reading) } else { var search = this._tree_data_find_subtree(d[i][2], key, options); if (search != null) { return search; } } } return null; }, find_node: function(key, options) { // efficiency - creating a whole copied subtree for a single node. var tree = this.find_subtree(key, options); if (tree == null) { return null; } var key = tree._data[0][0]; var value = tree._data[0][1]; return new Node(key, value); } }); var Node = Class.create({ initialize: function(key, value) { this._key = key; this._value = value; }, get_value: function() { return this._value; }, toString: function() { return this._key + ":" + this._value; } }); function deep_clone_array(arr) { if (arr instanceof Array) { var new_arr = []; arr.forEach(function (item) { new_arr.push(deep_clone_array(item)); }); return new_arr; } return arr; } var DictionaryQueryTest = { sections_test: function() { var test_tree = new Tree( [ ["English", 1, [ ["Pronunciation", 2, []], ["Etymology", 3, []], ["Noun", 4, [ ["Related terms", 5, []], ["Translations", 6, []] ] ], ["Verb", 7, [ ["Translations", 8, []] ] ] ], ], ["Dutch", 9, [ ["Noun", 10, []] ] ] ] ); function retrieved_callback(actual_tree) { try { if (!actual_tree.equals(test_tree)) { throw "Sections test failed. Returned sections differ from answer. Returned" + " sections:" + actual_tree.toString() + " test answer: " + test_tree.toString(); } } catch(e) { alert(e); } } function shout(e) { alert(e); } DictionaryQuery._get_sections("tree", {onSuccess: retrieved_callback, onFailure: shout }); }, noun_missing_definitions_test: function() { var word = "krilg"; function defined_incorrectly_callback() { throw "Noun_missing_definitions_test failure. word:" + word + " appears defined when it shouldn't."; } function word_not_found_callback() { return; } function failure_callback(e) { alert("noun_missing_definitions_test failed. Failure callback called incorrectly. Error: '" + e + "'"); } DictionaryQuery._get_noun_definition(word, "english", {onSuccess: defined_incorrectly_callback, onWordNotFound: word_not_found_callback, onFailure: failure_callback}); }, definitions_test_general: function(word_type, word, language, definitions, quotes) { function retrieved_callback(returned_def_object) { try { var returned_defs = returned_def_object.get_definitions(); var returned_quotes = returned_def_object.get_quotes(); var test = arrays_differ_point(returned_defs, definitions); if (definitions != null && test !== -1) { throw "Definitions differ from expected. At entry #{entry}. Returned: '#{returned}'. Expected: '#{expected}. Differ at character: #{differ_point}".interpolate({entry: test, returned: returned_defs[test], expected: definitions[test], differ_point: strings_differ_point(returned_defs[test], definitions[test])}) ; } if (quotes != null && array_tree_differ_point(returned_quotes, quotes) !== -1) { throw "Returned quotes differ from expected. Returned:'" + array_to_string(returned_quotes) + "' Expected:'" + array_to_string(quotes) + "'"; } } catch(e) { alert(e); } } function shout(e) { alert(e); } DictionaryQuery["_get_" + word_type+ "_definition"](word, language, {onSuccess: retrieved_callback, onFailure: shout}) }, noun_definitions_test: function() { var word = "tree"; var definitions = [ "step (of a staircase), stair"]; var quotes = [[]]; this.definitions_test_general("noun", word, "dutch", definitions, quotes); }, noun_definitions_test_multiple_etymologies: function() { // only return the first var word = "cat" var definitions = ["(countable) A domesticated species (Felis silvestris) of feline animal commonly kept as a house pet.", "(countable) Any similar animal of the family Felidae, which includes lions, tigers, etc.", "(countable) A catfish.", "(countable) A spiteful or angry woman.", "(nautical, countable) A strong tackle used to hoist an anchor to the cathead of a ship.", "(nautical, countable) Contraction of cat-o'-nine-tails.", "(countable) An enthusiast or player of jazz", "(slang, countable) A person (usually male); dude (in its non-derogatory senses), guy, fella", "(archaic, countable) A sturdy merchant sailing vessel (now only in \"catboat\")", "(archaic) The game of \"trap and ball\" (also called \"cat and dog\")", "(archaic) The trap of the game of \"trap and ball\".", "(countable) A generic term for an earth moving machine, derived from both caterpillar and bobcat."] var quotes = null; this.definitions_test_general("noun", word, "english", definitions, null); }, verb_definitions_test: function() { var definitions = ["To chase down prey and (usually) kill it. When the prey is a fish, it's not called hunting, but fishing.", "To try to find something; search."]; var quotes = [["Her uncle is out deer hunting, now that it is open season."], ["The little girl was hunting for shells on the beach."]]; this.definitions_test_general("verb", "hunt", "english", definitions, quotes); }, definitions_object_test: function() { var text = "#one\n#:1\n#:first\n#two\n#:2\n#three\n#four"; var defs = new DefinitionsObject(text); var definitions = defs.get_definitions(); var quotes = defs.get_quotes(); var expected_definitions = ["one", "two", "three", "four"] var expected_quotes = [["1", "first"], ["2"], [], []]; var differ_point = arrays_differ_point(definitions, expected_definitions); if (differ_point !== -1) { throw "definitions_object_test failed. Definitions returned differ from expected value. Returned:'" + definitions + "'. Expected: '" + expected_definitions + "' Differ at entry:'" + differ_point + "'"; } differ_point = array_tree_differ_point(quotes, expected_quotes); if (differ_point !== -1) { throw "definitions_object_test failed. Quotes returned differ from expected value.. Differ at position: " + differ_point.toString() + "Returned:" + quotes.toString() + ". Expected:" + expected_quotes.toString(); } }, test: function() { this.definitions_object_test(); this.sections_test(); this.noun_definitions_test(); this.verb_definitions_test(); this.noun_missing_definitions_test(); } }; var TreeTest = { test_tree: new Tree( [ ["English", 1, [ ["Pronunciation", 2, []], ["Etymology", 3, []], ["Noun", 4, [ ["Related terms", 5, []], ["Translations", 6, []] ] ], ["Verb", 7, [ ["Translations", 8, []] ] ] ], ], ["Dutch", 9, [ ["Noun", 10, []] ] ] ] ), find_subtree_test: function() { var ret = this.test_tree.find_subtree("Dutch").toString(); var expected = "((Dutch:9 (Noun:10)))"; if (ret != expected) { throw "find_subtree_test failed. Returned:'" + ret + "'. Expected:'" + expected + "'"; } }, find_subtree_test2: function() { var ret = this.test_tree.find_subtree("Dutch").find_subtree("noun", {case_sensitive:false}); var expected = "((Noun:10))"; if (ret != expected) { throw "find_subtree_test2 failed. Returned:'" + ret + "'. Expected:'" + expected + "'"; } }, find_node_test: function() { var ret = this.test_tree.find_subtree("dutch", {case_sensitive:false}).find_node("noun", {case_sensitive:false}); var expected = "Noun:10"; if (ret != expected) { throw "find_node_test failed. Returned:'" + ret + "'. Expected:'" + expected + "'"; } }, toStringTest: function() { var tree = new Tree([ ["one", 1, [ ["two", 2, []], ["three", 3, []]]], ["four", 4, []]]); var test_value = "((one:1 (two:2) (three:3)) (four:4))"; var ret = tree.toString() if (ret != test_value) { throw "Tree toString test failed. Expected:'" + test_value + "'. Returned:'" + ret + "'"; } }, test: function() { this.find_subtree_test(); this.find_subtree_test2(); this.toStringTest(); this.find_node_test(); } }; function arrays_differ_point(a, b) { a = a.clone(); b = b.clone(); var i = 0; while (a.length != 0 && b.length != 0) { if (a.shift() != b.shift()) { return i; } i++; } if (a.length != 0 || b.length != 0) { return i; } return -1; } function arrays_differ_point_test() { var a = [1,2,3,5]; var b = [1,2,4,5]; var test = arrays_differ_point(a,b); if (test !== 2) { throw "arrays_differ_point test failed. Expected: 2. Returned: " + test; } a = [1,2,3,4]; b = [1,2,3,4]; var test = arrays_differ_point(a, b); if (test !== -1) { throw "arrays_differ_point test failed. Expected: 2. Returned: " + test; } } function array_tree_differ_point(a, b) { a = deep_clone_array(a); b = deep_clone_array(b); if (a instanceof Array && b instanceof Array) { if (a.length == 0) { if (b.length == 0) { return -1; } return []; } var temp = array_tree_differ_point(a[0], b[0]); if (temp !== -1) { temp.unshift(0); return temp; } a.shift(); b.shift(); temp = array_tree_differ_point(a, b); if (temp != -1) { temp[0] += 1; return temp; } return -1; } if (! (a instanceof Array) && ! (b instanceof Array) ) { if (a == b) { return -1; } } return []; } function array_tree_differ_point_test() { var a = [1]; var b = [1]; var test = array_tree_differ_point(a, b); if (test != -1) { throw "array_tree_differ_point [1] test failed. Expected: -1. Returned:" + test; } var a = [[1,2], 3, 4]; var b = [[1,2], 3, 4]; var test = array_tree_differ_point(a, b); if (test != -1) { throw "array_tree_differ_point test failed. Expected: -1. Returned:" + test; } a = [[1,[2,3]], 4, 5]; b = [[1,[2,7]], 5, 17]; var test = array_tree_differ_point(a,b); if (arrays_differ_point(test, [0, 1, 1]) != -1) { throw "array_tree_differ_point test failed. Expected: [0,1,1]. Returned:" + test.toString(); } a = [[]]; b = [[]]; var test = array_tree_differ_point(a,b); if (test != -1) { throw "array_tree_differ_point test failed. Expected: -1. Returned:" + test.toString(); } } //arrays_differ_point_test(); //array_tree_differ_point_test(); function array_to_string(arr) { if (! (arr instanceof Array)) { if (arr.hasOwnProperty("toString")) { return arr.toString(); } return "" + arr; } var out = "["; var child_strings = []; for(var i=0; i < arr.length; i++) { child_strings.push(array_to_string(arr[i])); } return "[" + child_strings.join(", ") + "]"; } function array_to_string_test() { var a = [1, [2, 3]]; var test = array_to_string(a); var expected = "[1, [2, 3]]"; if (test != expected ) { throw "array_to_string_test failed. Returned:'" + test + "'. Expected:'" + expected + "'"; } } array_to_string_test(); function strings_differ_point(a, b) { var common_end = Math.min(a.length, b.length); var longest = Math.max(a.length, b.length); for(var i = 0; i < common_end; i++) { if (a[i] != b[i]) { return i; } } if (common_end < longest) { return common_end; } return -1; } function _convert_wiki_to_text_test() { var text = "[[link|cat fish]]"; var ret = _convert_wiki_to_text(text); var expected = "cat fish"; if (ret != expected) { throw "_convert_wiki_to_text_test failed. Returned:'" + ret + "'. Expected:'" + expected + "'"; } } _convert_wiki_to_text_test(); //MISSING: Deal with multiple etymologies. At the moment ignoring all but first. //MISSING: Add logging facilities and get this to work with logging. //MISSING: add tests for garbled definitions //MAYBE: Add a translate to other language widget using wiktionary. //MAYBE: Add an etymology widget. //MISSING JSDoc documentation //MISSING run through JSlint