Class Jmdict::Import
In: lib/jmdict/import.rb
Parent: Object

About

Import kanji and word definitions from JDict XML files and example sentences from the Tatoeba Project into Kotoba.

References

Methods

Included Modules

Jmdict::Message

Constants

ENUM_SOURCE_XML = 1   enumeration of source types
ENUM_SOURCE_CSV = 2

Public Class methods

[Source]

    # File lib/jmdict/import.rb, line 19
19:   def self.import(importer)
20:     begin
21:       directory       = importer.default_directory
22:       version         = ENV['version'] ? ENV['version'] : importer.default_version
23:       location        = directory + "/#{importer.file_prefix}#{version}.#{importer.file_suffix}"
24:       if (File.exist?(location))
25:         importer.message("-- importing records from #{location}, limit to [#{ENV['maximum']}] records")
26:         importer.import(location, importer.source_type, version, ENV['maximum'])
27:       else
28:         importer.message("-- there is no source file found at #{location}", :warn)
29:       end
30:     rescue => exception
31:       importer.message(exception, :error)
32:       raise("Unable to complete migration")
33:     end
34:   end

Get a collection of parse stragies. These are are declared with constants starting with STRATEGY_

[Source]

    # File lib/jmdict/import.rb, line 54
54:   def self.strategies
55:     if @@STRATEGIES.empty?
56:       for constant in self.constants
57:         @@STRATEGIES << self.const_get(constant) if 0 == (constant =~ /^STRATEGY_/)
58:       end
59:     end
60:     @@STRATEGIES
61:   end

[Source]

    # File lib/jmdict/import.rb, line 63
63:   def self.valid_strategy?(strategy)
64:     self.strategies.include?(strategy)
65:   end

Public Instance methods

[Source]

    # File lib/jmdict/import.rb, line 36
36:   def default_directory
37:     raise "Define 'default_directory' in extending class"
38:   end

[Source]

    # File lib/jmdict/import.rb, line 40
40:   def default_version
41:     raise "Define 'default_version' in extending class"
42:   end

[Source]

     # File lib/jmdict/import.rb, line 105
105:   def do_parse(listener)
106:     set_parser(@_source_location, listener)
107:     
108:     while @parser.more?
109:       @parser.parse
110:       records = listener.records
111:       records_size = records.size
112:       persist(records)
113:       listener.clear_records
114:     end
115:     
116:     return records_size
117:   end

[Source]

     # File lib/jmdict/import.rb, line 85
 85:   def do_read(reader)
 86:     number_of_lines = 0
 87:     begin
 88:       source = File.new(@_source_location, 'r')
 89:       while(line = source.gets)
 90:         next if reader.is_comment?(line)
 91:         number_of_lines += 1
 92:         reader.read(line)
 93:         records = reader.records
 94:         records_size = records.size
 95:         persist(records)
 96:         reader.clear_records
 97:       end
 98:     rescue => exception
 99:       message(exception, :error)
100:       raise("Unable to read [#{@_source_location}] due to [#{exception.message}]")
101:     end
102:     return records_size
103:   end

[Source]

    # File lib/jmdict/import.rb, line 44
44:   def file_prefix
45:     raise "Define 'file_prefix' in extending class"
46:   end

[Source]

    # File lib/jmdict/import.rb, line 48
48:   def file_suffix
49:     raise "Define 'file_suffix' in extending class"
50:   end

[Source]

    # File lib/jmdict/import.rb, line 67
67:   def parse(source_location, source_type, source_version, maximum_imported, parser_strategy)
68:     begin
69:       @_source_location  = source_location
70:       @_source_type      = source_type
71:       @_source_version   = source_version
72:       @_maximum_imported = maximum_imported ? maximum_imported.to_i : maximum_imported
73:       message("Parsing file [#{source_location}] using strategy [#{parser_strategy}]")
74:       return send(parser_strategy)
75:     rescue => exception
76:       message(exception, :error)
77:       raise("Unable to import records from [#{source_location}], type [#{source_type}], version [#{source_version}] due to: [#{exception.message}]")
78:     ensure
79:       if @parser
80:         @parser.close
81:       end
82:     end
83:   end

[Source]

     # File lib/jmdict/import.rb, line 119
119:   def persist(records)
120:      ActiveRecord::Base.transaction do
121:     
122:         message("Persisting [#{records.size}] records")
123:         number_persisted = 0
124:         
125:         for record in records
126:           if 0 == number_persisted % 100 && RAILS_ENV == 'development'
127:             message(record.inspect)
128:           end
129:           record.save!
130:           number_persisted += 1
131:           message("Persisted [#{number_persisted}] records.") if 0 == number_persisted % 100
132:           break if  @_maximum_imported && number_persisted >= @_maximum_imported
133:         end
134:         
135:         message("Done persisting [#{records.size}] records.")
136: 
137:       end
138: 
139:       message("Imported [#{records.size}] records from [#{@_source_location}], type [#{@_source_type}], version [#{@_source_version}]")
140:   end

[Source]

     # File lib/jmdict/import.rb, line 142
142:   def set_parser(source_location, listener)
143:     case @_source_type
144:       when ENUM_SOURCE_XML
145:         source = File.read(source_location)
146:         @parser = Jmdict::ImportStreamParser.new(source, listener)
147:       when ENUM_SOURCE_CSV
148:         @parser = Jmdict::ImportCsvParser.new(source_location, listener)
149:       else
150:         raise("We do not know how to handle source type [#{@_source_type}]")
151:     end
152:   end

[Source]

     # File lib/jmdict/import.rb, line 154
154:   def source_type
155:     return @_source_type
156:   end

[Validate]