こんにちは、Habr!
時々、SwiftをLinux用のアプリケーションプログラミング言語と見なします-シンプルで、動的で、コンパイルされ、ガベージコレクターなしで、つまり理論的にはデバイスにも適していることを意味します。 Rustのように若くてファッショナブルなものと比較することにしました。 テストとして、私は適用されたタスクを取りました-オブジェクトの配列を含む大きなJSONファイルを解析して集約します。 実行速度、バイナリサイズ、ソースサイズ、コーディングの主観的な印象という4つのパラメーターに従って、ソースコードを1つのスタイルで並べようとしました。
タスクの詳細をご覧ください 。 100万個のオブジェクトの配列内に、100 MBのJSONファイルがあります。 各オブジェクトは借金の記録です-会社の名前、電話のリスト、および借金の額。 さまざまな企業が同じ電話を使用できますが、この標識ではグループ化する必要があります。 名前のリスト、電話のリスト、および総負債で実際の債務者を識別します。 元のオブジェクトは「ダーティ」です。 データは、文字列/数値/配列/オブジェクトとして書き込むことができます。
ベンチマーク結果は私を困惑させました。 詳細とソーステキストはカットの下にあります。
ソースJSON:[
{"company":" ", "debt": 800, "phones": [123, 234, 456]},
{"company":" ", "debt": 1200, "phones": ["2128506", 456, 789]},
{"company":" ", "debt": "666", "phones": 666},
{"company": " ", "debt": 1500, "phones": [234567, "34567"], "phone": 666},
{"company": {"name": ""}, "debt": 2550, "phones": 788, "phone": 789},
...
4 :
1) , . YAJL, , {}, ASCII, Unicode. , Unicode — , JSON , .
2) C-, 1, JSON, (
Any Swift
JsonValue Rust).
3) , :
//source data
class DebtRec {
var company: String
var phones: Array<String>
var debt: Double
}
4) — ( ), . 2 :
//result data
class Debtor {
var companies: Set<String>
var phones: Set<String>
var debt: Double
}
class Debtors {
var all: Array<Debtor>
var index_by_phone: Dictionary<String, Int>
}
(), -, . … , Rust ( ), all — . , , , , - . Rust ?
P.S.
Rust — , to_string(), ( , ). Swift — , — , .
:
swift build -c release
cargo build --releaseRust 86 , , to_string() ( ? <>). Swift . .
1 .Swift: 50
Rust: 4.31 , 11.5
Swift:
62 Kb, runtime — 9 54,6 ( , )
Rust:
— 1,9 , (
«lto=true» 950 , ).
Swift: 189 , 4.5 Kb
Rust: 230 , 5.8
— , Swift , «» Rust, . , , . , .
1) Swift ( Foundation) Rust, . , . , , , , Rust — from_str(), from_utf8() ..
2) + Swift . , Data(), JSON. Data , Array, .. , . , Data (!) , Array. Rust , API , .
PS
— Swift , , Rust 14%. , Rust , , , « - ».3) Swift ( ) , ?! — unwrap(). match Option, Result, Value, , , . Swift Optional, , .
4) Swift , , , .
5) Rust , JSON , 2- :
if let Null = myVal {
...
}
match myVal {
Null => {
...
}
_ => {}
}
:
if myVal is Null {
...
}
if myVal == Option::Null {
...
}
is_str(), is_null(), is_f64() enum-, , , .
PS
, , proposal.? :
1) ,
Swift: 7.46
Rust: 0.75
2) JSON
Swift: 21.8
— :
JSONSerialization.jsonObject(with: Data(obj))Rust: 1.77
— :
serde_json::from_slice(&obj)3) Any
Swift: 16.01
Rust: 0.88
— , , Rust «» Swift
4)
Swift: 4.74
Rust: 0.91
, Swift , Node.js Python, , . — . , ? , Go MicroPython?
Rust — , , . , Rc<> , Node, Go Java, (, , Javascript 2.5 ).
P.S.
— .
Swift:main.swiftimport Foundation
let FILE_BUFFER_SIZE = 50000
//source data
class DebtRec {
var company: String = ""
var phones: Array<String> = []
var debt: Double = 0.0
}
//result data
class Debtor {
var companies: Set<String> = []
var phones: Set<String> = []
var debt: Double = 0.0
}
class Debtors {
var all: Array<Debtor> = []
var index_by_phone: Dictionary<String, Int> = [:]
}
func main() {
var res = Debtors()
var fflag = 0
for arg in CommandLine.arguments {
if arg == "-f" {
fflag = 1
}
else if fflag == 1 {
fflag = 2
print("\(arg):")
let tbegin = Date()
let (count, errcount) = process_file(fname: arg, res: &res)
print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
}
}
for (di, d) in res.all.enumerated() {
print("-------------------------------")
print("#\(di): debt: \(d.debt)")
print("companies: \(d.companies)\nphones: \(d.phones)")
}
if fflag < 2 {
print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
}
}
func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
var count = 0
var errcount = 0
if let f = FileHandle(forReadingAtPath: fname) {
var obj: Array<UInt8> = []
var braces = 0
while true {
let buf = f.readData(ofLength: FILE_BUFFER_SIZE)
if buf.isEmpty {
break //EOF
}
for b in buf {
if b == 123 { // {
braces += 1
obj.append(b)
}
else if b == 125 { // }
braces -= 1
obj.append(b)
if braces == 0 { //object formed !
do {
let o = try JSONSerialization.jsonObject(with: Data(obj))
process_object(o: (o as! Dictionary<String, Any>), res: &res)
} catch {
print("JSON ERROR: \(obj)")
errcount += 1
}
count += 1
obj = []
}
}
else if braces > 0 {
obj.append(b)
}
}
}
} else {
print("ERROR: Unable to open file")
}
return (count, errcount)
}
func process_object(o: Dictionary<String, Any>, res: inout Debtors) {
let dr = extract_data(o)
//print("\(dr.company) - \(dr.phones) - \(dr.debt)")
var di: Optional<Int> = Optional.none //debtor index search result
for p in dr.phones {
if let i = res.index_by_phone[p] {
di = Optional.some(i)
break
}
}
if let i = di { //existing debtor
let d = res.all[i]
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt += dr.debt
}
else { //new debtor
let d = Debtor()
let i = res.all.count
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt = dr.debt
res.all.append(d)
}
}
func extract_data(_ o: Dictionary<String, Any>) -> DebtRec {
func val2str(_ v: Any) -> String {
if let vs = (v as? String) {
return vs
}
else if let vi = (v as? Int) {
return String(vi)
}
else {
return "null"
}
}
let dr = DebtRec()
let c = o["company"]!
if let company = (c as? Dictionary<String, Any>) {
dr.company = val2str(company["name"]!)
} else {
dr.company = val2str(c)
}
let pp = o["phones"]
if let pp = (pp as? Array<Any>) {
for p in pp {
dr.phones.append(val2str(p))
}
}
else if pp != nil {
dr.phones.append(val2str(pp!))
}
let p = o["phone"]
if p != nil {
dr.phones.append(val2str(p!))
}
if let d = o["debt"] {
if let dd = (d as? Double) {
dr.debt = dd
}
else if let ds = (d as? String) {
dr.debt = Double(ds)!
}
}
return dr
}
main()
Rust:main.rs//[dependencies]
//serde_json = "1.0"
use std::collections::{HashMap, HashSet};
use serde_json::Value;
const FILE_BUFFER_SIZE: usize = 50000;
//source data
struct DebtRec {
company: String,
phones: Vec<String>,
debt: f64
}
//result data
struct Debtor {
companies: HashSet<String>,
phones: HashSet<String>,
debt: f64
}
struct Debtors {
all: Vec<Debtor>,
index_by_phone: HashMap<String, usize>
}
impl DebtRec {
fn new() -> DebtRec {
DebtRec {
company: String::new(),
phones: Vec::new(),
debt: 0.0
}
}
}
impl Debtor {
fn new() -> Debtor {
Debtor {
companies: HashSet::new(),
phones: HashSet::new(),
debt: 0.0
}
}
}
impl Debtors {
fn new() -> Debtors {
Debtors {
all: Vec::new(),
index_by_phone: HashMap::new()
}
}
}
fn main() {
let mut res = Debtors::new();
let mut fflag = 0;
for arg in std::env::args() {
if arg == "-f" {
fflag = 1;
}
else if fflag == 1 {
fflag = 2;
println!("{}:", &arg);
let tbegin = std::time::SystemTime::now();
let (count, errcount) = process_file(&arg, &mut res);
println!("PROCESSED: {} objects in {:?}, {} errors found", count, tbegin.elapsed().unwrap(), errcount);
}
}
for (di, d) in res.all.iter().enumerate() {
println!("-------------------------------");
println!("#{}: debt: {}", di, &d.debt);
println!("companies: {:?}\nphones: {:?}", &d.companies, &d.phones);
}
if fflag < 2 {
println!("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...");
}
}
fn process_file(fname: &str, res: &mut Debtors) -> (i32, i32) {
use std::io::prelude::*;
let mut count = 0;
let mut errcount = 0;
match std::fs::File::open(fname) {
Ok(file) => {
let mut freader = std::io::BufReader::with_capacity(FILE_BUFFER_SIZE, file);
let mut obj = Vec::new();
let mut braces = 0;
loop {
let buf = freader.fill_buf().unwrap();
let blen = buf.len();
if blen == 0 {
break; //EOF
}
for b in buf {
if *b == b'{' {
braces += 1;
obj.push(*b);
}
else if *b == b'}' {
braces -= 1;
obj.push(*b);
if braces == 0 { //object formed !
match serde_json::from_slice(&obj) {
Ok(o) => {
process_object(&o, res);
}
Err(e) => {
println!("JSON ERROR: {}:\n{:?}", e, &obj);
errcount +=1;
}
}
count += 1;
obj = Vec::new();
}
}
else if braces > 0 {
obj.push(*b);
}
}
freader.consume(blen);
}
}
Err(e) => {
println!("ERROR: {}", e);
}
}
return (count, errcount);
}
fn process_object(o: &Value, res: &mut Debtors) {
let dr = extract_data(o);
//println!("{} - {:?} - {}", &dr.company, &dr.phones, &dr.debt,);
let mut di: Option<usize> = Option::None; //debtor index search result
for p in &dr.phones {
if let Some(i) = res.index_by_phone.get(p) {
di = Some(*i);
break;
}
}
match di {
Some(i) => { //existing debtor
let d = &mut res.all[i];
d.companies.insert(dr.company);
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt += dr.debt;
}
None => { //new debtor
let mut d = Debtor::new();
let i = res.all.len();
d.companies.insert(dr.company);
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt = dr.debt;
res.all.push(d);
}
}
}
fn extract_data(o: &Value) -> DebtRec {
use std::str::FromStr;
let mut dr = DebtRec::new();
let c = &o["company"];
dr.company =
match c {
Value::Object(c1) =>
match &c1["name"] {
Value::String(c2) => c2.to_string(),
_ => val2str(c)
},
_ => val2str(c)
};
let pp = &o["phones"];
match pp {
Value::Null => {}
Value::Array(pp) => {
for p in pp {
dr.phones.push(val2str(&p));
}
}
_ => {dr.phones.push(val2str(&pp))}
}
let p = &o["phone"];
match p {
Value::Null => {}
_ => {dr.phones.push(val2str(&p))}
}
dr.debt =
match &o["debt"] {
Value::Number(d) => d.as_f64().unwrap_or(0.0),
Value::String(d) => f64::from_str(&d).unwrap_or(0.0),
_ => 0.0
};
return dr;
fn val2str(v: &Value) -> String {
match v {
Value::String(vs) => vs.to_string(), //to avoid additional quotes
_ => v.to_string()
}
}
}
.