Class Jmdict::ImportXMLReader
In: lib/jmdict/import_xml_reader.rb
Parent: Jmdict::ImportReader

Methods

new   parse  

Public Class methods

[Source]

    # File lib/jmdict/import_xml_reader.rb, line 5
 5:   def initialize(filename, version, maximum_parsed = nil)
 6:     @filename = filename
 7:     @maximum_parsed = maximum_parsed
 8:     @words = Array.new
 9:     @japanese_language = Language.get_japanese
10:     @source_description = 'jmdict_' + version + ' imported on ' + DateTime.now.to_s
11:   end

Public Instance methods

returns an array of words from a Jmdict source

[Source]

    # File lib/jmdict/import_xml_reader.rb, line 14
14:   def parse
15:     @words.clear
16:     
17:     # TODO Make this use XML::Reader
18:     parser = XML::Parser.file(@filename)  
19:     doc = parser.parse 
20: 
21:     message("Parsing [#{@maximum_parsed}] entries.")
22:     number_parsed = 0
23:     
24:     doc.find('//entry').each do  |entry |
25:             
26:       word = Word.new
27:       
28:       keb = entry.find('k_ele/keb')
29:       reb = entry.find('r_ele/reb')
30:       ent_seq = entry.find('ent_seq')
31:       source_tag = ent_seq ? ent_seq.to_a.first.content : nil
32:       
33:       # TODO update existing entries
34:       # skip previously imported entries
35:       next if source_tag && Word.find_by_source_tag(source_tag)
36:       
37:       word.pronunciation      = reb ? reb.to_a.first.content : nil
38:       word.spelling           = keb ? keb.to_a.first.content : word.pronunciation
39:       word.source_tag         = 
40:       word.source_description = @source_description
41:       word.language           = @japanese_language
42:       
43:       for sense in entry.find('sense').to_a
44: 
45:         add_definitions(word, sense)
46:         add_examples(word, sense)
47:         add_parts_of_speech(word, sense)
48:         add_references(word, sense)
49:         add_dialect(word, sense)
50:         add_examples(word, sense)
51: 
52:       end
53: 
54:       @words << word
55:       number_parsed += 1
56:       message("Parsed [#{number_parsed}] entries.") if 0 == number_parsed % 1
57:       break if @maximum_parsed && numbed_parsed <= @maximum_parsed
58:             
59:     end
60:     
61:     message("Done parsing [#{@words.size}] entries.")
62: 
63:     return @words
64:   end

[Validate]