## DATA_1 <- readRDS( file = "DATA_MAIL.rds" ) DATA_2 <- readRDS( file = "DATA_SITE.rds" ) ################################################ # 6034544 nrow(DATA_1) # 3432650 nrow(DATA_2) ################################################ # : 132093 length( intersect(DATA_1[,1],DATA_2[,1]) ) # -: 77387 length( intersect( paste( DATA_1[,1], DATA_1[,3], sep = "|" ), paste( DATA_2[,1], DATA_2[,3], sep = "|" ) ) ) ################################################# # VECTOR_I <- intersect( paste( DATA_1[,1], DATA_1[,3], sep = "||" ), paste( DATA_2[,1], DATA_2[,3], sep = "||" ) ) VECTOR_I <- strsplit(VECTOR_I, "||", fixed=TRUE) DATA_I <- matrix(unlist(VECTOR_I), ncol=2, byrow=TRUE) DATA_I <- as.data.frame(DATA_I) colnames(DATA_I) <- c("login","passwd") ################################################# # , -N PASS_SUM <- readRDS( file = "PassSum.rds" ) PASS_10 <- PASS_SUM[1:10,] PASS_100 <- PASS_SUM[1:100,] PASS_1000 <- PASS_SUM[1:1000,] # 9652 length( which( DATA_I$passwd %in% PASS_10$passwd ) ) # 10535 length( which( DATA_I$passwd %in% PASS_100$passwd ) ) # 11704 length( which( DATA_I$passwd %in% PASS_1000$passwd ) )