Rails 使用逻辑运算符搜索 ActiveRecord

Rails Search ActiveRecord with Logical Operators

我想知道在 Rails 中解析文本查询的最佳方法是什么,以允许用户包含逻辑运算符?

我希望用户能够输入以下任一内容或一些等效内容:

# searching partial text in emails, just for example
# query A
"jon AND gmail" #=> ["jonsmith@gmail.com"]

# query B
"jon OR gmail" #=> ["jonsmith@gmail.com", "sarahcalaway@gmail.com"]

# query C
"jon AND gmail AND smith" #=> ["jonsmith@gmail.com"]

理想情况下,我们可以使用括号来指示操作顺序,从而变得更加复杂,但这不是必需的。

是否有 gem 或模式支持此功能?

最简单的情况是从字符串中提取一个数组:

and_array = "jon AND gmail".split("AND").map{|e| e.strip}
# ["jon", "gmail"]
or_array = "jon OR sarah".split("OR").map{|e| e.strip}
# ["jon", "sarah"]

然后你可以构造一个查询字符串:

query_string = ""
and_array.each {|e| query_string += "%e%"}
# "%jon%%gmail%"

然后您使用 ilikelike 查询来获取结果:

Model.where("column ILIKE ?", query_string)
# SELECT * FROM model WHERE column ILIKE '%jon%%gmail%'
# Results: jonsmith@gmail.com

当然,这可能有点矫枉过正。但这是一个简单的解决方案。

我在 Sinatra 应用程序中使用了这样的解析器,因为查询往往很复杂,所以我生成普通 SQL 而不是使用 activerecords 选择方法。 能用就放心吧..

你这样用,class_name是activerecord class代表table,params是要解析的字符串的hash,结果发送到浏览器为Json 例如

generic_data_getter (Person, {age: ">30",name: "=John", date: ">=1/1/2014 <1/1/2015"})

  def generic_data_getter (class_name, params, start=0, limit=300, sort='id', dir='ASC')
    selection = build_selection(class_name, params)
    data = class_name.where(selection).offset(start).limit(limit).order("#{sort} #{dir}")
    {:success => true, :totalCount => data.except(:offset, :limit, :order).count, :result => data.as_json}
  end

def build_selection class_name, params
  field_names = class_name.column_names
  selection = []
  params.each do |k,v|
    if field_names.include? k
      type_of_field = class_name.columns_hash[k].type.to_s
      case
      when (['leeg','empty','nil','null'].include? v.downcase) then selection << "#{k} is null"
      when (['niet leeg','not empty','!nil','not null'].include? v.downcase) then selection << "#{k} is not null"
      when type_of_field == 'string' then 
        selection << string_selector(k, v)
      when type_of_field == 'integer' then
        selection << integer_selector(k, v)
      when type_of_field == 'date' then
        selection << date_selector(k, v)
      end
    end
  end
  selection.join(' and ')
end

def string_selector(k, v)
  case
  when v[/\|/]
    v.scan(/([^\|]+)(\|)([^\|]+)/).map {|p| "lower(#{k}) LIKE '%#{p.first.downcase}%' or lower(#{k}) LIKE '%#{p.last.downcase}%'"}
  when v[/[<>=]/]
    v.scan(/(<=?|>=?|=)([^<>=]+)/).map { |part| "#{k} #{part.first} '#{part.last.strip}'"}
  else
    "lower(#{k}) LIKE '%#{v.downcase}%'"
  end
end

def integer_selector(k, v)
  case
  when v[/\||,/]
    v.scan(/([^\|]+)([\|,])([^\|]+)/).map {|p|p p; "#{k} IN (#{p.first}, #{p.last})"}
  when v[/\-/]
    v.scan(/([^-]+)([\-])([^-]+)/).map {|p|p p; "#{k} BETWEEN #{p.first} and #{p.last}"}
  when v[/[<>=]/]
    v.scan(/(<=?|>=?|=)([^<>=]+)/).map { |part| p part; "#{k} #{part.first} #{part.last}"}
  else
    "#{k} = #{v}"
  end
end

def date_selector(k, v)
  eurodate = /^(\d{1,2})[-\/](\d{1,2})[-\/](\d{1,4})$/
  case
  when v[/\|/]
    v.scan(/([^\|]+)([\|])([^\|]+)/).map {|p|p p; "#{k} IN (DATE('#{p.first.gsub(eurodate,'--')}'), DATE('#{p.last.gsub(eurodate,'--')}'))"}
  when v[/\-/]
    v.scan(/([^-]+)([\-])([^-]+)/).map {|p|p p; "#{k} BETWEEN DATE('#{p.first.gsub(eurodate,'--')}')' and DATE('#{p.last.gsub(eurodate,'--')}')"}
  when v[/<|>|=/]
    parts = v.scan(/(<=?|>=?|=)(\d{1,2}[\/-]\d{1,2}[\/-]\d{2,4})/)
    selection = parts.map do |part|
      operator = part.first ||= "="
      date = Date.parse(part.last.gsub(eurodate,'--'))
      "#{k} #{operator} DATE('#{date}')"
    end
  when v[/^(\d{1,2})[-\/](\d{1,4})$/]
    "#{k} >= DATE('#{}-#{}-01') and #{k} <= DATE('#{}-#{}-31')"
  else
    date = Date.parse(v.gsub(eurodate,'--'))
    "#{k} = DATE('#{date}')"
  end
end

这是一种可行但效率低下的方法:

user_input = "jon myers AND gmail AND smith OR goldberg OR MOORE"
terms = user_input.split(/(.+?)((?: and | or ))/i).reject(&:empty?)
# => ["jon myers", " AND ", "gmail", " AND ", "smith", " OR ", "goldberg", " OR ", "MOORE"]

pairs = terms.each_slice(2).map { |text, op| ["column LIKE ? #{op} ", "%#{text}%"] }
# => [["column LIKE ?  AND  ", "%jon myers%"], ["column LIKE ?  AND  ", "%gmail%"], ["column LIKE ?  OR  ", "%smith%"], ["column LIKE ?  OR  ", "%goldberg%"], ["column LIKE ?  ", "%MOORE%"]]

query = pairs.reduce([""]) { |acc, terms| acc[0] += terms[0]; acc << terms[1] }
# => ["column LIKE ?  AND  column LIKE ?  AND  column LIKE ?  OR column LIKE ?  OR  column LIKE ?  ", "%jon myers%", "%gmail%", "%smith%", "%goldberg%", "%MOORE%"]

Model.where(query[0], *query[1..-1]).to_sql
# => SELECT "courses".* FROM "courses"  WHERE (column LIKE '%jon myers%'  AND  column LIKE '%gmail%'  AND  column LIKE '%smith%'  OR  column LIKE '%goldberg%'  OR  column LIKE '%MOORE%'  )

但是,正如我所说,像这样的搜索效率极低。我建议您使用全文搜索引擎,例如 Elasticsearch.