def maxa(n)
(0..10000000).each do |i|
[n, i].max
end
end
def maxf(n)
(0..10000000).each do |i|
max(n, i)
end
end
def max(a,b)
a > b ? a : b
end
[3] pry(main)> Benchmark.measure { maxa(1000)}
=> #<Benchmark::Tms:0x00000002ed1000 @cstime=0.0, @cutime=0.0, @label="", @real=0.4200649199774489, @stime=0.0, @total=0.42000000000000004, @utime=0.42000000000000004>
[4] pry(main)> Benchmark.measure { maxf(1000)}
=> #<Benchmark::Tms:0x00000002f217d0 @cstime=0.0, @cutime=0.0, @label="", @real=0.6093627649825066, @stime=0.0, @total=0.6, @utime=0.6>
为何 [a,b].max 这种需要创建数组的情况比 直接调用函数,无需创建临时数组的方式还要快,不科学啊。
static VALUE
enum_max_by(int argc, VALUE *argv, VALUE obj)
{
struct MEMO *memo;
VALUE num;
rb_scan_args(argc, argv, "01", &num);
RETURN_SIZED_ENUMERATOR(obj, argc, argv, enum_size);
if (!NIL_P(num))
return rb_nmin_run(obj, num, 1, 1, 0);
memo = MEMO_NEW(Qundef, Qnil, 0);
rb_block_call(obj, id_each, 0, 0, max_by_i, (VALUE)memo);
return memo->v2;
}
Array#max确实是c实现的
https://ruby-doc.org/core-2.5.1/Enumerable.html#method-i-max
直觉是不可能,在我自己机器上跑了一下,结果是相反的,你的 ruby 是什么版本?
2.3.2 :018 > Benchmark.measure { maxa(1000)}
=> #<Benchmark::Tms:0x007fcd7a884b78 @label="", @real=3.692405005916953, @cstime=0.0, @cutime=0.0, @stime=0.040000000000000036, @utime=3.6, @total=3.64>
2.3.2 :019 > Benchmark.measure { maxf(1000)}
=> #<Benchmark::Tms:0x007fcd7b027b00 @label="", @real=0.8660073862411082, @cstime=0.0, @cutime=0.0, @stime=0.009999999999999898, @utime=0.8300000000000001, @total=0.84>
好像在2.4测试就反过来了,是因为2.4的那个Array#max实现更新吗,等下再看看...
我也试了一下,2.1.4 版本的
2.1.4 :020 > Benchmark.measure { maxa(1000)}
=> #<Benchmark::Tms:0x007fda620f8798 @label="", @real=2.204469, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=2.1999999999999997, @total=2.1999999999999997>
2.1.4 :021 > Benchmark.measure { maxf(1000)}
=> #<Benchmark::Tms:0x007fda620ba290 @label="", @real=0.602247, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.6100000000000003, @total=0.6100000000000003>
貌似下面的要快几倍
2.3.6 :021 > ENV['RUBY_VERSION']
=> "ruby-2.3.6"
.3.6 :019 > Benchmark.measure { maxa(1000)}
=> #<Benchmark::Tms:0x00007f8a60a4a780 @label="", @real=2.4335659999924246, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=2.42, @total=2.42>
2.3.6 :020 > Benchmark.measure { maxf(1000)}
=> #<Benchmark::Tms:0x00007f8a60a3be88 @label="", @real=0.5101689999864902, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.5100000000000002, @total=0.5100000000000002>
:001 > ENV['RUBY_VERSION']
=> "ruby-2.5.0"
:022 > Benchmark.measure { maxa(1000)}
=> #<Benchmark::Tms:0x00007fe484845d30 @label="", @real=0.46752000000560656, @cstime=0.0, @cutime=0.0, @stime=0.00017400000000000054, @utime=0.46631599999999995, @total=0.46648999999999996>
:023 > Benchmark.measure { maxf(1000)}
=> #<Benchmark::Tms:0x00007fe48282dc78 @label="", @real=0.6195719999959692, @cstime=0.0, @cutime=0.0, @stime=0.0003689999999999978, @utime=0.6181120000000001, @total=0.6184810000000001>
2.3 和 2.5 的测试,确实也是反过来的,而且差别不小。
我想说的是楼主的假设是不是不够科学? [].max 虽然会返回新对象,但是它的执行成本明显要低于max(a,b)
,执行过程本身就会创建一大波对象 (都放在 ObjectSpace 中的),调用越少消耗越少,虽然栈调用的消耗相对较小。
不能简单地根据调用是否返回新对象来判断其性能吧? @quakewang
这个测试用例中被测试的调用是在 each 中的,不知道虚拟机解释的时候,是否有区别优化?
[n, i].max
做了优化,并不是普通的创建临时数组,不过性能完全依赖实现细节吧
ruby --dump insns test.rb
local table (size: 1, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] i<Arg>
0000 nop ( 2)[Bc]
0001 getlocal_OP__WC__1 n ( 3)[Li]
0003 getlocal_OP__WC__0 i
0005 opt_newarray_max 2
0007 leave
opt_newarray_max 用来比较
如果是走上半部分的话貌似就不会创建临时数组了
static VALUE
vm_opt_newarray_max(rb_num_t num, const VALUE *ptr)
{
if (BASIC_OP_UNREDEFINED_P(BOP_MAX, ARRAY_REDEFINED_OP_FLAG)) {
if (num == 0) {
return Qnil;
}
else {
struct cmp_opt_data cmp_opt = { 0, 0 };
VALUE result = Qundef;
rb_num_t i = num - 1;
result = ptr[i];
while (i-- > 0) {
const VALUE v = ptr[i];
if (result == Qundef || OPTIMIZED_CMP(v, result, cmp_opt) > 0) {
result = v;
}
}
return result == Qundef ? Qnil : result;
}
}
else {
VALUE ary = rb_ary_new4(num, ptr);
return rb_funcall(ary, idMax, 0);
}
}