Class Jmdict::ImportWordListener
In: lib/jmdict/import_word_listener.rb
Parent: Jmdict::ImportXmlListener

Methods

Public Class methods

[Source]

    # File lib/jmdict/import_word_listener.rb, line 3
 3:   def initialize(filename, version, maximum_parsed = nil)
 4:     super(filename, version, maximum_parsed)
 5: 
 6:     # run-time states and objects
 7:     @_current_definition  = nil
 8:     @_current_word        = nil
 9:     @_current_parts       = Array.new
10:     @_current_definitions = Array.new
11:     #create_parts
12:   end

Public Instance methods

[Source]

     # File lib/jmdict/import_word_listener.rb, line 14
 14:   def create_parts
 15:         Part.create :part => "MA",      :language_id =>  @japanese_language.id, :description =>  "martial arts term"
 16:     Part.create :part => "X",       :language_id =>  @japanese_language.id, :description =>  "rude or X-rated term (not displayed in educational software)"
 17:     Part.create :part => "abbr",    :language_id =>  @japanese_language.id, :description =>  "abbreviation"
 18:     Part.create :part => "adj-i",   :language_id =>  @japanese_language.id, :description =>  "adjective (keiyoushi)"
 19:     Part.create :part => "adj-na",  :language_id =>  @japanese_language.id, :description =>  "adjectival nouns or quasi-adjectives (keiyodoshi)"
 20:     Part.create :part => "adj-no",  :language_id =>  @japanese_language.id, :description =>  "nouns which may take the genitive case particle 'no'"
 21:     Part.create :part => "adj-pn",  :language_id =>  @japanese_language.id, :description =>  "pre-noun adjectival (rentaishi)"
 22:     Part.create :part => "adj-t",   :language_id =>  @japanese_language.id, :description =>  "`taru' adjective"
 23:     Part.create :part => "adj-f",   :language_id =>  @japanese_language.id, :description =>  "noun or verb acting prenominally"
 24:     Part.create :part => "adj",     :language_id =>  @japanese_language.id, :description =>  "former adjective classification (being removed)"
 25:     Part.create :part => "adv",     :language_id =>  @japanese_language.id, :description =>  "adverb (fukushi)"
 26:     Part.create :part => "adv-to",  :language_id =>  @japanese_language.id, :description =>  "adverb taking the `to' particle"
 27:     Part.create :part => "arch",    :language_id =>  @japanese_language.id, :description =>  "archaism"
 28:     Part.create :part => "ateji",   :language_id =>  @japanese_language.id, :description =>  "ateji (phonetic) reading"
 29:     Part.create :part => "aux",     :language_id =>  @japanese_language.id, :description =>  "auxiliary"
 30:     Part.create :part => "aux-v",   :language_id =>  @japanese_language.id, :description =>  "auxiliary verb"
 31:     Part.create :part => "aux-adj", :language_id =>  @japanese_language.id, :description =>  "auxiliary adjective"
 32:     Part.create :part => "Buddh",   :language_id =>  @japanese_language.id, :description =>  "Buddhist term"
 33:     Part.create :part => "chem",    :language_id =>  @japanese_language.id, :description =>  "chemistry term"
 34:     Part.create :part => "chn",     :language_id =>  @japanese_language.id, :description =>  "children's language"
 35:     Part.create :part => "col",     :language_id =>  @japanese_language.id, :description =>  "colloquialism"
 36:     Part.create :part => "comp",    :language_id =>  @japanese_language.id, :description =>  "computer terminology"
 37:     Part.create :part => "conj",    :language_id =>  @japanese_language.id, :description =>  "conjunction"
 38:     Part.create :part => "ctr",     :language_id =>  @japanese_language.id, :description =>  "counter"
 39:     Part.create :part => "derog",   :language_id =>  @japanese_language.id, :description =>  "derogatory"
 40:     Part.create :part => "eK",      :language_id =>  @japanese_language.id, :description =>  "exclusively kanji"
 41:     Part.create :part => "ek",      :language_id =>  @japanese_language.id, :description =>  "exclusively kana"
 42:     Part.create :part => "exp",     :language_id =>  @japanese_language.id, :description =>  "Expressions (phrases, clauses, etc.)"
 43:     Part.create :part => "fam",     :language_id =>  @japanese_language.id, :description =>  "familiar language"
 44:     Part.create :part => "fem",     :language_id =>  @japanese_language.id, :description =>  "female term or language"
 45:     Part.create :part => "food",    :language_id =>  @japanese_language.id, :description =>  "food term"
 46:     Part.create :part => "geom",    :language_id =>  @japanese_language.id, :description =>  "geometry term"
 47:     Part.create :part => "gikun",   :language_id =>  @japanese_language.id, :description =>  "gikun (meaning) reading"
 48:     Part.create :part => "hon",     :language_id =>  @japanese_language.id, :description =>  "honorific or respectful (sonkeigo) language"
 49:     Part.create :part => "hum",     :language_id =>  @japanese_language.id, :description =>  "humble (kenjougo) language"
 50:     Part.create :part => "iK",      :language_id =>  @japanese_language.id, :description =>  "word containing irregular kanji usage"
 51:     Part.create :part => "id",      :language_id =>  @japanese_language.id, :description =>  "idiomatic expression"
 52:     Part.create :part => "ik",      :language_id =>  @japanese_language.id, :description =>  "word containing irregular kana usage"
 53:     Part.create :part => "int",     :language_id =>  @japanese_language.id, :description =>  "interjection (kandoushi)"
 54:     Part.create :part => "io",      :language_id =>  @japanese_language.id, :description =>  "irregular okurigana usage"
 55:     Part.create :part => "iv",      :language_id =>  @japanese_language.id, :description =>  "irregular verb"
 56:     Part.create :part => "ling",    :language_id =>  @japanese_language.id, :description =>  "linguistics terminology"
 57:     Part.create :part => "m-sl",    :language_id =>  @japanese_language.id, :description =>  "manga slang"
 58:     Part.create :part => "male",    :language_id =>  @japanese_language.id, :description =>  "male term or language"
 59:     Part.create :part => "male-sl", :language_id =>  @japanese_language.id, :description =>  "male slang"
 60:     Part.create :part => "math",    :language_id =>  @japanese_language.id, :description =>  "mathematics"
 61:     Part.create :part => "mil",     :language_id =>  @japanese_language.id, :description =>  "military"
 62:     Part.create :part => "n",       :language_id =>  @japanese_language.id, :description =>  "noun (common) (futsuumeishi)"
 63:     Part.create :part => "n-adv",   :language_id =>  @japanese_language.id, :description =>  "adverbial noun (fukushitekimeishi)"
 64:     Part.create :part => "n-suf",   :language_id =>  @japanese_language.id, :description =>  "noun, used as a suffix"
 65:     Part.create :part => "n-pref",  :language_id =>  @japanese_language.id, :description =>  "noun, used as a prefix"
 66:     Part.create :part => "n-t",     :language_id =>  @japanese_language.id, :description =>  "noun (temporal) (jisoumeishi)"
 67:     Part.create :part => "num",     :language_id =>  @japanese_language.id, :description =>  "numeric"
 68:     Part.create :part => "oK",      :language_id =>  @japanese_language.id, :description =>  "word containing out-dated kanji"
 69:     Part.create :part => "obs",     :language_id =>  @japanese_language.id, :description =>  "obsolete term"
 70:     Part.create :part => "obsc",    :language_id =>  @japanese_language.id, :description =>  "obscure term"
 71:     Part.create :part => "ok",      :language_id =>  @japanese_language.id, :description =>  "out-dated or obsolete kana usage"
 72:     Part.create :part => "on-mim",  :language_id =>  @japanese_language.id, :description =>  "onomatopoeic or mimetic word"
 73:     Part.create :part => "poet",    :language_id =>  @japanese_language.id, :description =>  "poetical term"
 74:     Part.create :part => "pol",     :language_id =>  @japanese_language.id, :description =>  "polite (teineigo) language"
 75:     Part.create :part => "pref",    :language_id =>  @japanese_language.id, :description =>  "prefix"
 76:     Part.create :part => "prt",     :language_id =>  @japanese_language.id, :description =>  "particle"
 77:     Part.create :part => "physics", :language_id =>  @japanese_language.id, :description =>  "physics terminology"
 78:     Part.create :part => "rare",    :language_id =>  @japanese_language.id, :description =>  "rare"
 79:     Part.create :part => "sens",    :language_id =>  @japanese_language.id, :description =>  "sensitive"
 80:     Part.create :part => "sl",      :language_id =>  @japanese_language.id, :description =>  "slang"
 81:     Part.create :part => "suf",     :language_id =>  @japanese_language.id, :description =>  "suffix"
 82:     Part.create :part => "uK",      :language_id =>  @japanese_language.id, :description =>  "word usually written using kanji alone"
 83:     Part.create :part => "uk",      :language_id =>  @japanese_language.id, :description =>  "word usually written using kana alone"
 84:     Part.create :part => "v1",      :language_id =>  @japanese_language.id, :description =>  "Ichidan verb"
 85:     Part.create :part => "v2a-s",   :language_id =>  @japanese_language.id, :description =>  "Nidan verb with 'u' ending (archaic)"
 86:     Part.create :part => "v4h",     :language_id =>  @japanese_language.id, :description =>  "Yondan verb with `hu/fu' ending (archaic)"
 87:     Part.create :part => "v4r",     :language_id =>  @japanese_language.id, :description =>  "Yondan verb with `ru' ending (archaic)"
 88:     Part.create :part => "v5",      :language_id =>  @japanese_language.id, :description =>  "Godan verb (not completely classified)"
 89:     Part.create :part => "v5aru",   :language_id =>  @japanese_language.id, :description =>  "Godan verb - -aru special class"
 90:     Part.create :part => "v5b",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `bu' ending"
 91:     Part.create :part => "v5g",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `gu' ending"
 92:     Part.create :part => "v5k",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `ku' ending"
 93:     Part.create :part => "v5k-s",   :language_id =>  @japanese_language.id, :description =>  "Godan verb - Iku/Yuku special class"
 94:     Part.create :part => "v5m",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `mu' ending"
 95:     Part.create :part => "v5n",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `nu' ending"
 96:     Part.create :part => "v5r",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `ru' ending"
 97:     Part.create :part => "v5r-i",   :language_id =>  @japanese_language.id, :description =>  "Godan verb with `ru' ending (irregular verb)"
 98:     Part.create :part => "v5s",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `su' ending"
 99:     Part.create :part => "v5t",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `tsu' ending"
100:     Part.create :part => "v5u",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `u' ending"
101:     Part.create :part => "v5u-s",   :language_id =>  @japanese_language.id, :description =>  "Godan verb with `u' ending (special class)"
102:     Part.create :part => "v5uru",   :language_id =>  @japanese_language.id, :description =>  "Godan verb - Uru old class verb (old form of Eru)"
103:     Part.create :part => "v5z",     :language_id =>  @japanese_language.id, :description =>  "Godan verb with `zu' ending"
104:     Part.create :part => "vz",      :language_id =>  @japanese_language.id, :description =>  "Ichidan verb - zuru verb (alternative form of -jiru verbs)"
105:     Part.create :part => "vi",      :language_id =>  @japanese_language.id, :description =>  "intransitive verb"
106:     Part.create :part => "vk",      :language_id =>  @japanese_language.id, :description =>  "Kuru verb - special class"
107:     Part.create :part => "vn",      :language_id =>  @japanese_language.id, :description =>  "irregular nu verb"
108:     Part.create :part => "vr",      :language_id =>  @japanese_language.id, :description =>  "irregular ru verb, plain form ends with -ri"
109:     Part.create :part => "vs",      :language_id =>  @japanese_language.id, :description =>  "noun or participle which takes the aux. verb suru"
110:     Part.create :part => "vs-s",    :language_id =>  @japanese_language.id, :description =>  "suru verb - special class"
111:     Part.create :part => "vs-i",    :language_id =>  @japanese_language.id, :description =>  "suru verb - irregular"
112:     Part.create :part => "vt",      :language_id =>  @japanese_language.id, :description =>  "transitive verb"
113:     Part.create :part => "vulg",    :language_id =>  @japanese_language.id, :description =>  "vulgar expression or word"
114:   end

[Source]

     # File lib/jmdict/import_word_listener.rb, line 183
183:   def end_entry
184:     if not @_skip_entry
185:       @_number_parsed                   = @_number_parsed + 1
186:       
187:       @_current_word.dialect            = @_current_dialect 
188:       
189:       if @_current_word.pronunciation
190:         @_current_word.pronunciation    << ';'
191:         @_current_word.pronunciation    << @_current_pronunciation
192:       else
193:         @_current_word.pronunciation    = @_current_pronunciation
194:       end
195:       
196:       @_current_word.spelling           = @_current_spelling unless @_current_word.id
197: 
198:       for pos in @_current_parts
199:         @_current_word.parts           << pos
200:       end
201:       
202:       for definition in @_current_definitions
203:         @_current_word.definitions     << definition
204:       end
205:       
206:       @records << @_current_word
207:       message("Parsed [#{@_number_parsed}]th entry.") if 0 == (@_number_parsed % @message_every_nth_time)
208:     end
209:     reset
210:   end

[Source]

     # File lib/jmdict/import_word_listener.rb, line 176
176:   def start_entry(attrs)
177:     @_current_word                    = Word.new
178:     @_current_word.source_description = @source_description
179:     @_current_word.language           = @japanese_language  
180:     new_definition
181:   end

[Source]

     # File lib/jmdict/import_word_listener.rb, line 166
166:   def start_gloss(attrs)
167:     iso_639_code = attrs['xml:lang']
168:     case iso_639_code
169:     when nil, ''
170:       @_current_definition.language = Language.get_english
171:     else
172:       @_current_definition.language = Language.get_by_iso_639_code(iso_639_code)
173:     end
174:   end

[Source]

     # File lib/jmdict/import_word_listener.rb, line 116
116:   def text(text)
117:     return unless text && @_current_tag
118:     case @_current_tag
119:     when 'gloss' 
120:       return unless @_current_definition
121:       definition = text
122:       @_current_definition.definition = definition
123:       new_definition
124:     when 'ent_seq'
125:       source_tag = text
126:       if Word.find_by_source_tag(source_tag)
127:         # ignore entries we have already imported
128:         @_skip_entry = @_skip_entry ? @_skip_entry : true
129:         # TODO eventually we will want to hanle updating existing
130:         # entries (words)
131:       else
132:         @_current_word.source_tag = source_tag
133:       end
134:     when 'keb' 
135:       keb = text
136:       if Word::REQUIRE_UNIQUE_SPELLING
137:         # if we already have a word with the same spelling then
138:         # add this entry to that existing word
139:         existing_word = Word.find_by_spelling(keb)
140:         @_current_word = existing_word ? existing_word : @_current_word
141:       end
142:       @_current_spelling = keb
143:     when 'reb'
144:       reb = text
145:       @_current_pronunciation = reb
146:       @_current_spelling = reb unless @_current_spelling
147:     when 'dial' 
148:       dial = cleanup_entity(text)
149:       @_current_dialect = Dialect.find_by_dialect_and_language_id(dial,@japanese_language.id)
150:       unless @_current_dialect
151:         raise("We do not know about dialect [#{dial}] for language #{@japanese_language.language}")
152:         Rails.logger.error("We do not know about dialect [#{dial}] for language #{@japanese_language.language}")
153:       end
154:     when 'pos' 
155:       pos = cleanup_entity(text)
156:       part_of_speech = Part.find_by_part_and_language_id(pos,@japanese_language.id)
157:       if part_of_speech   
158:         @_current_parts << part_of_speech
159:       else
160:         raise("We do not know about part of speech [#{pos}] for language #{@japanese_language.language}")
161:         Rails.logger.error("We do not know about part of speech [#{pos}] for language #{@japanese_language.language}")
162:       end
163:     end
164:   end

[Validate]