class Numerizer

Constants

BIG_PREFIXES
DIRECT_NUMS
DIRECT_ORDINALS
FRACTIONS
SINGLE_NUMS
SINGLE_ORDINALS
TEN_PREFIXES

Public Class Methods

numerize(string) click to toggle source
# File lib/numerizer.rb, line 109
def self.numerize(string)
  string = string.dup

  # preprocess
  string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words

  # easy/direct replacements
  (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
    string.gsub!(/(^|\W)#{dn[0]}(?=$|\W)/i, '\1<num>' + dn[1].to_s)
  end

  # ten, twenty, etc.
  TEN_PREFIXES.each do |tp|
    SINGLE_NUMS.each do |dn|
      string.gsub!(/(^|\W)#{tp[0]}#{dn[0]}(?=$|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s)
    end
    SINGLE_ORDINALS.each do |dn|
      string.gsub!(/(^|\W)#{tp[0]}(\s)?#{dn[0]}(?=$|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s + dn[0][-2, 2])
    end
    string.gsub!(/(^|\W)#{tp[0]}(?=$|\W)/i, '\1<num>' + tp[1].to_s)
  end

  # handle fractions
  FRACTIONS.each do |tp|
    string.gsub!(/a #{tp[0]}(?=$|\W)/i, '<num>1/' + tp[1].to_s)
    string.gsub!(/\s#{tp[0]}(?=$|\W)/i, '/' + tp[1].to_s)
  end

  (DIRECT_ORDINALS + SINGLE_ORDINALS).each do |on|
    string.gsub!(/(^|\W)#{on[0]}(?=$|\W)/i, '\1<num>' + on[1].to_s + on[0][-2, 2])
  end

  # evaluate fractions when preceded by another number
  string.gsub!(/(\d+)(?: | and |-)+(<num>|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }

  # hundreds, thousands, millions, etc.
  BIG_PREFIXES.each do |bp|
    string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { $1.empty? ? bp[1] : '<num>' + (bp[1] * $1.to_i).to_s }
    andition(string)
  end

  andition(string)

  string.gsub(/<num>/, '')
end

Private Class Methods

andition(string) click to toggle source
# File lib/numerizer.rb, line 157
def andition(string)
  sc = StringScanner.new(string)
  while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
    if sc[2] =~ /and/ || sc[1].size > sc[3].size
      string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
      sc.reset
    end
  end
end