Swift vs Rust-(未)クリアな仕上がりのLinuxでのベンチマーク

こんにちは、Habr!

時々、SwiftをLinux用のアプリケーションプログラミング言語と見なします-シンプルで、動的で、コンパイルされ、ガベージコレクターなしで、つまり理論的にはデバイスにも適していることを意味します。 Rustのように若くてファッショナブルなものと比較することにしました。 テストとして、私は適用されたタスクを取りました-オブジェクトの配列を含む大きなJSONファイルを解析して集約します。 実行速度、バイナリサイズ、ソースサイズ、コーディングの主観的な印象という4つのパラメーターに従って、ソースコードを1つのスタイルで並べようとしました。

タスクの詳細をご覧ください 。 100万個のオブジェクトの配列内に、100 MBのJSONファイルがあります。 各オブジェクトは借金の記録です-会社の名前、電話のリスト、および借金の額。 さまざまな企業が同じ電話を使用できますが、この標識ではグループ化する必要があります。 名前のリスト、電話のリスト、および総負債で実際の債務者を識別します。 元のオブジェクトは「ダーティ」です。 データは、文字列/数値/配列/オブジェクトとして書き込むことができます。

ベンチマーク結果は私を困惑させました。 詳細とソーステキストはカットの下にあります。

ソースJSON:
[
    {"company":"  ", "debt": 800, "phones": [123, 234, 456]},
    {"company":" ", "debt": 1200, "phones": ["2128506", 456, 789]},
    {"company":" ", "debt": "666", "phones": 666},
    {"company": " ", "debt": 1500, "phones": [234567, "34567"], "phone": 666},
    {"company": {"name": ""}, "debt": 2550, "phones": 788, "phone": 789},
...


4 :

1) , . YAJL, , {}, ASCII, Unicode. , Unicode — , JSON , .

2) C-, 1, JSON, (Any Swift JsonValue Rust).

3) , :
//source data
class DebtRec {
    var company: String
    var phones: Array<String>
    var debt: Double
}

4) — ( ), . 2 :
//result data
class Debtor {
    var companies: Set<String>
    var phones: Set<String>
    var debt: Double
}
class Debtors {
    var all: Array<Debtor>
    var index_by_phone: Dictionary<String, Int>
}

(), -, . … , Rust ( ), all — . , , , , - . Rust ?

P.S.
Rust — , to_string(), ( , ). Swift — , — , .


:
swift build -c release
cargo build --release


Rust 86 , , to_string() ( ? <>). Swift . .

1 .
Swift: 50
Rust: 4.31 , 11.5


Swift:
62 Kb, runtime — 9 54,6 ( , )
Rust:
— 1,9 , («lto=true» 950 , ).


Swift: 189 , 4.5 Kb
Rust: 230 , 5.8


— , Swift , «» Rust, . , , . , .

1) Swift ( Foundation) Rust, . , . , , , , Rust — from_str(), from_utf8() ..

2) + Swift . , Data(), JSON. Data , Array, .. , . , Data (!) , Array. Rust , API , .
PS
— Swift , , Rust 14%. , Rust , , , « - ».


3) Swift ( ) , ?! — unwrap(). match Option, Result, Value, , , . Swift Optional, , .

4) Swift , , , .

5) Rust , JSON , 2- :
if let Null = myVal {
    ...
}
match myVal {
    Null => {
        ...
    }
    _ => {}
}

:
if myVal is Null {
    ...
}
if myVal == Option::Null {
    ...
}

is_str(), is_null(), is_f64() enum-, , , .
PS
, , proposal.



? :

1) ,
Swift: 7.46
Rust: 0.75

2) JSON
Swift: 21.8
— : JSONSerialization.jsonObject(with: Data(obj))
Rust: 1.77
— : serde_json::from_slice(&obj)

3) Any
Swift: 16.01
Rust: 0.88
— , , Rust «» Swift

4)
Swift: 4.74
Rust: 0.91

, Swift , Node.js Python, , . — . , ? , Go MicroPython?

Rust — , , . , Rc<> , Node, Go Java, (, , Javascript 2.5 ).

P.S.
— .


Swift:
main.swift
import Foundation

let FILE_BUFFER_SIZE = 50000 

//source data
class DebtRec {
    var company: String = ""
    var phones: Array<String> = []
    var debt: Double = 0.0
}
//result data
class Debtor {
    var companies: Set<String> = []
    var phones: Set<String> = []
    var debt: Double = 0.0
}
class Debtors {
    var all: Array<Debtor> = []
    var index_by_phone: Dictionary<String, Int> = [:]
}


func main() {
    var res = Debtors()

    var fflag = 0
    for arg in CommandLine.arguments {
        if arg == "-f" {
            fflag = 1
        }
        else if fflag == 1 {
            fflag = 2
            print("\(arg):")
            let tbegin = Date()

            let (count, errcount) = process_file(fname: arg, res: &res)

            print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
        }
    }

    for (di, d) in res.all.enumerated() {
        print("-------------------------------")
        print("#\(di): debt: \(d.debt)")
        print("companies: \(d.companies)\nphones: \(d.phones)")
    }

    if fflag < 2 {
        print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
    }
}


func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
    var count = 0
    var errcount = 0

    if let f = FileHandle(forReadingAtPath: fname) {
        var obj: Array<UInt8> = []
        var braces = 0

        while true {
            let buf = f.readData(ofLength: FILE_BUFFER_SIZE)
            if buf.isEmpty {
                break //EOF
            }
            for b in buf {
                if b == 123 { // {
                    braces += 1
                    obj.append(b)
                }
                else if b == 125 { // }
                    braces -= 1
                    obj.append(b)

                    if braces == 0 { //object formed !

                        do {
                            let o = try JSONSerialization.jsonObject(with: Data(obj))
                            process_object(o: (o as! Dictionary<String, Any>), res: &res)
                        } catch {
                            print("JSON ERROR: \(obj)")
                            errcount += 1
                        }

                        count += 1
                        obj = []
                    }
                }
                else if braces > 0 {
                    obj.append(b)
                }
            }
        }
    } else {
        print("ERROR: Unable to open file")
    }
    return (count, errcount)
}


func process_object(o: Dictionary<String, Any>, res: inout Debtors) {
    let dr = extract_data(o)
    //print("\(dr.company) - \(dr.phones) - \(dr.debt)")

    var di: Optional<Int> = Optional.none //debtor index search result
    for p in dr.phones {
        if let i = res.index_by_phone[p] {
            di = Optional.some(i)
            break
        }
    }
    if let i = di { //existing debtor
        let d = res.all[i]
        d.companies.insert(dr.company)
        for p in dr.phones {
            d.phones.insert(p)
            res.index_by_phone[p] = i
        }
        d.debt += dr.debt
    }
    else { //new debtor
        let d = Debtor()
        let i = res.all.count

        d.companies.insert(dr.company)
        for p in dr.phones {
            d.phones.insert(p)
            res.index_by_phone[p] = i
        }
        d.debt = dr.debt

        res.all.append(d)
    }
}


func extract_data(_ o: Dictionary<String, Any>) -> DebtRec {

    func val2str(_ v: Any) -> String {
        if let vs = (v as? String) {
            return vs
        }
        else if let vi = (v as? Int) {
            return String(vi)
        }
        else {
            return "null"
        }
    }

    let dr = DebtRec()

    let c = o["company"]!
    if let company = (c as? Dictionary<String, Any>) {
        dr.company = val2str(company["name"]!)
    } else {
        dr.company = val2str(c)
    }

    let pp = o["phones"]
    if let pp = (pp as? Array<Any>) {
        for p in pp {
            dr.phones.append(val2str(p))
        }
    } 
    else if pp != nil {
        dr.phones.append(val2str(pp!))
    }       

    let p = o["phone"]
    if p != nil {
        dr.phones.append(val2str(p!))
    }        

    if let d = o["debt"] {
        if let dd = (d as? Double) {
            dr.debt = dd
        }
        else if let ds = (d as? String) {
            dr.debt = Double(ds)!
        }
    }

    return dr
}

main()


Rust:
main.rs
//[dependencies]
//serde_json = "1.0"

use std::collections::{HashMap, HashSet};
use serde_json::Value;

const FILE_BUFFER_SIZE: usize = 50000;

//source data
struct DebtRec {
    company: String,
    phones: Vec<String>,
    debt: f64
}
//result data
struct Debtor {
    companies: HashSet<String>,
    phones: HashSet<String>,
    debt: f64
}
struct Debtors {
    all: Vec<Debtor>,
    index_by_phone: HashMap<String, usize>
}


impl DebtRec {
    fn new() -> DebtRec {
        DebtRec {
            company: String::new(),
            phones: Vec::new(),
            debt: 0.0
        }
    }
}
impl Debtor {
    fn new() -> Debtor {
        Debtor {
            companies: HashSet::new(),
            phones: HashSet::new(),
            debt: 0.0
        }
    }
}
impl Debtors {
    fn new() -> Debtors {
        Debtors {
            all: Vec::new(),
            index_by_phone: HashMap::new()
        }
    }
}


fn main() {
    let mut res = Debtors::new();

    let mut fflag = 0;
    for arg in std::env::args() {
        if arg == "-f" {
            fflag = 1;
        }
        else if fflag == 1 {
            fflag = 2;
            println!("{}:", &arg);
            let tbegin = std::time::SystemTime::now();

            let (count, errcount) = process_file(&arg, &mut res);

            println!("PROCESSED: {} objects in {:?}, {} errors found", count, tbegin.elapsed().unwrap(), errcount);
        }
    }

    for (di, d) in res.all.iter().enumerate() {
        println!("-------------------------------");
        println!("#{}: debt: {}", di, &d.debt);
        println!("companies: {:?}\nphones: {:?}", &d.companies, &d.phones);
    }

    if fflag < 2 {
        println!("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...");
    }
}


fn process_file(fname: &str, res: &mut Debtors) -> (i32, i32) { 
    use std::io::prelude::*;

    let mut count = 0;
    let mut errcount = 0;

    match std::fs::File::open(fname) {
        Ok(file) => {
            let mut freader = std::io::BufReader::with_capacity(FILE_BUFFER_SIZE, file);
            let mut obj = Vec::new();
            let mut braces = 0;

            loop {
                let buf = freader.fill_buf().unwrap();
                let blen = buf.len();
                if blen == 0 {
                    break; //EOF
                }
                for b in buf {
                    if *b == b'{' {
                        braces += 1;
                        obj.push(*b);
                    }
                    else if *b == b'}' {
                        braces -= 1;
                        obj.push(*b);

                        if braces == 0 { //object formed !

                            match serde_json::from_slice(&obj) {
                                Ok(o) => {
                                    process_object(&o, res);
                                }
                                Err(e) => {
                                    println!("JSON ERROR: {}:\n{:?}", e, &obj);
                                    errcount +=1;
                                }
                            }

                            count += 1;
                            obj = Vec::new();
                        }
                    }
                    else if braces > 0 {
                        obj.push(*b);
                    }
                }
                freader.consume(blen);
            }
        }
        Err(e) => {
            println!("ERROR: {}", e);
        }
    }
    return (count, errcount);
}


fn process_object(o: &Value, res: &mut Debtors) {
    let dr = extract_data(o);
    //println!("{} - {:?} - {}", &dr.company, &dr.phones, &dr.debt,);

    let mut di: Option<usize> = Option::None; //debtor index search result
    for p in &dr.phones {
        if let Some(i) = res.index_by_phone.get(p) {
            di = Some(*i);
            break;
        }
    }
    match di {
        Some(i) => { //existing debtor
            let d = &mut res.all[i];
            d.companies.insert(dr.company);
            for p in &dr.phones {
                d.phones.insert(p.to_string());
                res.index_by_phone.insert(p.to_string(), i);
            }
            d.debt += dr.debt;
        }
        None => { //new debtor
            let mut d = Debtor::new();
            let i = res.all.len();

            d.companies.insert(dr.company);
            for p in &dr.phones {
                d.phones.insert(p.to_string());
                res.index_by_phone.insert(p.to_string(), i);
            }
            d.debt = dr.debt;

            res.all.push(d);
        }
    }
}


fn extract_data(o: &Value) -> DebtRec {
    use std::str::FromStr;

    let mut dr = DebtRec::new();

    let c = &o["company"];
    dr.company =
        match c {
            Value::Object(c1) =>
                match &c1["name"] {
                    Value::String(c2) => c2.to_string(),
                    _ => val2str(c)
                },
            _ => val2str(c)
        };

    let pp =  &o["phones"];
    match pp {
        Value::Null => {}
        Value::Array(pp) => {
            for p in pp {
                dr.phones.push(val2str(&p));
            }
        }
        _ => {dr.phones.push(val2str(&pp))}
    }

    let p = &o["phone"];
    match p {
        Value::Null => {}
        _ => {dr.phones.push(val2str(&p))}
    }

    dr.debt =
        match &o["debt"] {
            Value::Number(d) => d.as_f64().unwrap_or(0.0),
            Value::String(d) => f64::from_str(&d).unwrap_or(0.0),
            _ => 0.0
        };

    return dr;

    fn val2str(v: &Value) -> String {
        match v {
            Value::String(vs) => vs.to_string(), //to avoid additional quotes
            _ => v.to_string()
        }
    }
}


.

Source: https://habr.com/ru/post/J450512/


All Articles