需求是这样的:( 哈哈这个感觉简单一点)
write a program that given a phrase can count the occurrences of each word in that phrase.
For example for the input `"olly olly in come free"`
{ "olly" => 2, "in" => 1, "come" => 1, "free" => 1 }
其实它是有个测试要过的,测试代码略长,想看的话在这里 https://gist.github.com/qingwang/6e23f19d3cce2f4464ee
lol 我又开始闭着眼睛写啦
# 第一版
class Phrase
attr_reader :a_phrase
def initialize(a_phrase)
@a_phrase = a_phrase
end
def word_count
result = {}
# Split the words into a hash
words_hash = a_phrase.downcase.scan(/[a-zA-Z0-9]+/).group_by{ |elem| elem }
# Push each word and its count into the result hash
# Can be a one liner but would probably be too long to read
words_hash.each { |key, val| result[key] = val.size }
result
end
end
正在等人拍砖,反正都是接砖,也到这里来接几块...
感觉你的代码写得挺好的,测试却写得好坑爹啊,明明一个函数搞定的,非要整个类啊
被test_count_everything_just_once
这个测试雷翻了,代码要写得多坑爹才会出这种问题啊。
不考虑复杂情况 (仅按空白字符作为分割单词的依据),把基于正则的scan
换成简单的split
,会快一些。。。。
=> cat /tmp/ruby_test1.rb
1000000.times do
'olly olly in come free'.downcase.scan(/[a-zA-Z0-9]+/).group_by{ |elem| elem }
end
=> cat /tmp/ruby_test2.rb
1000000.times do
'olly olly in come free'.downcase.split.group_by{ |elem| elem }
end
=> time ruby /tmp/ruby_test1.rb
real 0m6.860s
user 0m6.823s
sys 0m0.035s
=> time ruby /tmp/ruby_test2.rb
real 0m3.678s
user 0m3.669s
sys 0m0.007s
当然,这点差距可以忽略不计。。。
Inspired from @luikore's use of String
, but still a very sloppy solution o_O...
class Phrase < String
def word_count
scanned = scan(/\w+/).map(&:downcase)
scanned.inject({}) do |hash, element|
hash.merge(element => scanned.count(element))
end
end
end
悲剧了,erlang 的 dict 竟然不自带比较,还得自己写一个
word_count(S) ->
lists:foldl(
fun (W, D) -> dict:update_counter(W, 1, D) end,
dict:new(),
string:tokens(string:to_lower(S), " ~!@#$%^&*:,.")).
dict_equal(D1, D2) ->
D3 = dict:merge(fun (_,V1,V2) -> V1 = V2 end, D1, D2),
true = dict:size(D3) == dict:size(D1).
test(count_one_word) ->
dict_equal(
word_count("word"),
dict:from_list([{"word", 1}]));
test(count_one_of_each) ->
dict_equal(
word_count("one of each"),
dict:from_list([{"one", 1}, {"of", 1}, {"each", 1}]));
test(count_multiple_occurrences) ->
dict_equal(
word_count("one fish two fish red fish blue fish"),
dict:from_list([{"one", 1}, {"fish", 4}, {"two", 1}, {"red", 1}, {"blue", 1}]));
test(ignore_punctuation) ->
dict_equal(
word_count("car : carpet as java : javascript!!&@$%^&"),
dict:from_list([{"car", 1}, {"carpet", 1}, {"as", 1}, {"java", 1}, {"javascript", 1}]));
test(handles_cramped_lists) ->
dict_equal(
word_count("one,two,three"),
dict:from_list([{"one", 1}, {"two", 1}, {"three", 1}]));
test(include_numbers) ->
dict_equal(
word_count("testing, 1, 2 testing"),
dict:from_list([{"testing", 2}, {"1", 1}, {"2", 1}]));
test(normalize_case) ->
dict_equal(
word_count("go Go GO"),
dict:from_list([{"go", 3}])).
test() ->
test(count_one_word),
test(count_one_of_each),
test(count_multiple_occurrences),
test(ignore_punctuation),
test(handles_cramped_lists),
test(include_numbers),
test(normalize_case),
ok.
class Phrase < String
def word_count
r = Hash.new 0
scan(/\w+/){|w| r[w.downcase] += 1 }
r
end
end
Hash::new(obj)
Hash.new 0
If obj is specified, this single object will be used for all default values.
太厲害了!
#14 楼 @blacktulip 这是继承的基本之基本吧,is a Phrase
also a String
? Of course!
另外一点点好处:我们知道 String 有拷贝构造函数可以这么用 String.new "str"
, 那么构造函数都不用自己写了
class Phrase
attr_reader :a_phrase
def initialize(a_phrase)
@a_phrase = a_phrase.to_s
@re = /(?:[,:]|\s|(?:\s[,:]\s))/
end
def word_count(re=@re)
h = Hash[]
@a_phrase.split(re).each_with_index{|i,index| h[index]=i}
h
end
end
str = "olly olly in come free"
p = Phrase.new str
p.word_count #=> {0=>"olly", 1=>"olly", 2=>"in", 3=>"come", 4=>"free"}
p.word_count(/\s/) #=> {0=>"olly", 1=>"olly", 2=>"in", 3=>"come", 4=>"free"}
稍微修改了下楼主的代码,不过这样不太好,每次解析都需要创建一个 Phrase 的对象。
修改版: 一切尽在代码中。
class Phrase
def initialize
@re = /(?:[,:]|\s|(?:\s[,:]\s))/
end
def phrase(str, re=@re)
return 0 unless str.is_a? String
h = Hash[]
str.split(re).each_with_index{|i,index| h[index]=i}
h
end
end
str = "olly olly in come free"
p = Phrase.new
p.phrase str
那个正则有一点问题,
"car : carpet as java : javascript!!&@$%^&"
这种带空格的字符串 carpet as java 也会被分隔开。
不过你可以给 phrase 传个正则。XD
补:鉴于你测试里的字符串什么类型都有,所以一行正则恐怕难以完成任务。 你应该分情况去写正则判断了。