Class Jmdict::ImportKanjiListener
In: lib/jmdict/import_kanji_listener.rb
Parent: Jmdict::ImportXmlListener

Methods

Public Class methods

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 3
 3:   def initialize(filename, version, maximum_parsed = nil)
 4:     super(filename, version, maximum_parsed)
 5: 
 6:     # run-time states and objects
 7:     @_current_character       = nil
 8:     @_current_meaning         = nil
 9:     @_current_reading         = nil
10:     @_current_dr_type         = nil
11:     @_current_rmgroup         = 0
12:     @_current_attributes         = Array.new
13:     @_current_code_points        = Array.new
14:     @_current_dictionary_entries = Array.new
15:     @_current_meanings           = Array.new
16:     @_current_names              = Array.new
17:     @_current_parts              = Array.new
18:     @_current_queries            = Array.new
19:     @_current_readings           = Array.new
20:     
21:     @_supported_dictionaries  = LanguageCharacterDictionary.find_all_by_language_id(@japanese_language.id).map{ |entry| entry.dictionary }
22:     @_supported_attributes    = LanguageCharacterAttribute.find_all_by_language_id(@japanese_language.id).map{ |entry| entry.attribute }
23:     @_supported_readings      = LanguageCharacterReading.find_all_by_language_id(@japanese_language.id).map{ |entry| entry.reading }
24:     
25:     @iso_639_codes       = Hash.new
26:     @iso_639_codes['']   = 'eng'
27:     @iso_639_codes['ja'] = 'jpn'
28:     @iso_639_codes['fr'] = 'fre'
29:     @iso_639_codes['de'] = 'ger'
30:     
31:     @_language_family         = LanguageCharacterFamily.find_by_language_id_and_family(@japanese_language.id, 'kanji')
32:   end

Public Instance methods

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 133
133:   def end_character
134:     if not @_skip_entry
135:       @_number_parsed                   = @_number_parsed + 1
136:       
137:       for attribute in @_current_attributes
138:         @_current_character.character_attributes << attribute
139:       end
140:       
141:       for code_point in @_current_code_points
142:         @_current_character.character_code_points << code_point
143:       end
144: 
145:       for dictionary_entry in @_current_dictionary_entries
146:         @_current_character.character_dictionary_entries << dictionary_entry
147:       end
148:       
149:       for meaning in @_current_meanings
150:         @_current_character.character_meanings << meaning
151:       end
152:       
153:       for name in @_current_names
154:         @_current_character.character_names << name
155:       end
156: 
157:       for part in @_current_parts
158:         @_current_character.character_parts << part
159:       end
160: 
161:       for query in @_current_queries
162:         @_current_character.character_queries << query
163:       end
164: 
165:       for reading in @_current_readings
166:         @_current_character.character_readings << reading
167:       end
168:       
169:       if (@_current_meanings.empty? and @_current_readings.empty? and @_current_names.empty?)
170:         Rails.logger.warn("Missing meanings [#{@_current_meanings.inspect}], names [#{@_current_names.inspect}], and readings [#{@_current_readings.inspect}] for character [#{@_current_character.inspect}]")
171:       else
172:         @records << @_current_character
173:         message("Parsed [#{@_number_parsed}]th entry.") if 0 == (@_number_parsed % @message_every_nth_time)
174:       end
175:     end
176:     reset
177:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 125
125:   def start_character(attrs)
126:     @_skip_entry                                  = false
127:     @_current_character                           = Character.new
128:     @_current_character.source_description        = @source_description
129:     @_current_character.language_id                  = @japanese_language.id
130:     @_current_character.language_character_family_id = @_language_family.id
131:   end

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 87
87:   def start_cp_value(attrs)
88:     @_current_cp_type = attrs['cp_type']
89:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 104
104:   def start_dic_ref(attrs)
105:     @_current_dr_type = attrs['dr_type']
106:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 115
115:   def start_gloss(attrs)
116:     iso_639_code = attrs['xml:lang']
117:     case iso_639_code
118:     when nil, ''
119:       @_current_meaning.language = Language.get_english
120:     else
121:       @_current_meaning.language = Language.get_by_iso_639_code(iso_639_code)
122:     end
123:   end

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 95
95:   def start_meaning(attrs)
96:     @_current_m_lang = attrs['m_lang'] ? @iso_639_codes[attrs['m_lang']] : 'eng'
97:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 112
112:   def start_misc(attrs)
113:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 99
 99:   def start_q_code(attrs)
100:     @_current_qc_type = attrs['qc_type']
101:     @_current_qc_sub = attrs['skip_misclass'] ? attrs['skip_misclass'] : nil
102:   end

[Source]

     # File lib/jmdict/import_kanji_listener.rb, line 108
108:   def start_rad_value(attrs)
109:     @_current_rad_type = attrs['rad_type']
110:   end

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 91
91:   def start_reading(attrs)
92:     @_current_r_type = attrs['r_type']
93:   end

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 83
83:   def start_rmgroup(attrs)
84:     @_current_rmgroup += 1
85:   end

[Source]

    # File lib/jmdict/import_kanji_listener.rb, line 34
34:   def text(text)
35:     return unless text && @_current_tag
36:     if @_supported_attributes.include?(@_current_tag)
37:       add_to_attributes(@_current_tag, text)
38:     else
39:       case @_current_tag
40:       when 'cp_value'
41:         add_to_codepoints(@_current_cp_type, text) 
42:         @_current_cp_type = nil
43:       when 'dic_ref'
44:         add_to_dictionaries(@_current_dr_type, text) 
45:         @_current_dr_type = nil
46:       when 'meaning'
47:         add_to_meanings(@_current_m_lang, text, @_current_rmgroup)      
48:         @_current_m_lang = nil
49:       when 'rad_name'
50:         character_name             = CharacterName.new
51:         character_name.name        = text
52:         character_name.language_id = @japanese_language.id
53:         @_current_names << character_name
54:       when 'literal'
55:         found_character = Character.find(:first, :conditions => {:character => text})
56:         if found_character
57:           message = "Character appears to already exist [#{text}], #{found_character.inspect}"
58:           Rails.logger.warn(message)
59:         elsif text.empty?
60:           message = "There is no character to import [#{text}]. Skipping."
61:           Rails.logger.error(message)
62:           @_skip_entry = true
63:         end
64:         @_current_character.character = text
65:       when 'nanori'
66:         add_to_readings(@_current_tag, text, @_current_rmgroup)
67:       when 'q_code'
68:         add_to_queries(@_current_qc_type, @_current_qc_sub, text)
69:         @_current_qc_type = nil
70:         @_current_qc_sub = nil
71:       when 'rad_value'
72:         add_to_parts(@_current_rad_type, text)
73:         @_current_rad_type = nil
74:       when 'reading'
75:         add_to_readings(@_current_r_type, text, @_current_rmgroup)
76:         @_current_r_type = nil
77:       else
78:         #intentionally empty
79:       end
80:     end
81:   end

[Validate]