«Теперь он и тебя сосчитал» или Наука о данных с нуля (Data Science from Scratch)

, Data Science, Cognitive Class. , , , - , .

, IT , Data Science , .



1. « — » —


, , Data Science , .

, , . , , :

  1. . ;
  2. Python ;
  3. Data Science, .
  4. , .

, , ?
Cognitive Class, Kaggle , , .

. , ? .

2. « — » —


«Data Science. » — , , , . 2015 . 2 IT , , Python. ( ) . , Python 2, , Python 3 ( GitHub), (, ).

, Anaconda / .

. , : « . , .» — . 100% , , 2 , , . , , , , .

, -, .
, « » - , , . «Linux from scratch», , - Linux « », , , ( ).

. , , , ( )

, : « , :»

3. « — Python» — .


, «» . , , , Data Science (, Coursera). , 2-3 , « » , - , - .

, — “DataSciencester”. , , . «» .

, , , , , «» , .

Python, , , , 2, , , ( ).

Python, 3 :

  1. . ;
  2. , , ;
  3. ( );

Ozon ( ), .

, GitHub, .

() , , .
Python 2 3, Jupyter notebook. , , Anaconda ( ). , Jupyter ( ). , , . Jupyter «» , ( Windows Linux)

, , , « » - (, API ).

.

, , ( GitHub). , .

# -*- coding: utf-8 -*-
# linear_algebra.py

import re, math, random # regexes, math functions, random numbers
import matplotlib.pyplot as plt # pyplot
from collections import defaultdict, Counter
from functools import partial, reduce

#
# functions for working with vectors
#

def vector_add(v, w):
    """adds two vectors componentwise"""
    return [v_i + w_i for v_i, w_i in zip(v,w)]

def vector_subtract(v, w):
    """subtracts two vectors componentwise"""
    return [v_i - w_i for v_i, w_i in zip(v,w)]

def vector_sum(vectors):
    return reduce(vector_add, vectors)

def scalar_multiply(c, v):
    return [c * v_i for v_i in v]

def vector_mean(vectors):
    """compute the vector whose i-th element is the mean of the
    i-th elements of the input vectors"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

def dot(v, w):
    """v_1 * w_1 + ... + v_n * w_n"""
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    """v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)

def magnitude(v):
    return math.sqrt(sum_of_squares(v))

def squared_distance(v, w):
    return sum_of_squares(vector_subtract(v, w))

def distance(v, w):
   return math.sqrt(squared_distance(v, w))

#
# functions for working with matrices
#

def shape(A):
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0
    return num_rows, num_cols

def get_row(A, i):
    return A[i]

def get_column(A, j):
    return [A_i[j] for A_i in A]

def make_matrix(num_rows, num_cols, entry_fn):
    """returns a num_rows x num_cols matrix
    whose (i,j)-th entry is entry_fn(i, j)"""
    return [[entry_fn(i, j) for j in range(num_cols)]
            for i in range(num_rows)]

def is_diagonal(i, j):
    """1's on the 'diagonal', 0's everywhere else"""
    return 1 if i == j else 0

identity_matrix = make_matrix(5, 5, is_diagonal)

#          user 0  1  2  3  4  5  6  7  8  9
#
friendships = [[0, 1, 1, 0, 0, 0, 0, 0, 0, 0], # user 0
               [1, 0, 1, 1, 0, 0, 0, 0, 0, 0], # user 1
               [1, 1, 0, 1, 0, 0, 0, 0, 0, 0], # user 2
               [0, 1, 1, 0, 1, 0, 0, 0, 0, 0], # user 3
               [0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # user 4
               [0, 0, 0, 0, 1, 0, 1, 1, 0, 0], # user 5
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 6
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 7
               [0, 0, 0, 0, 0, 0, 1, 1, 0, 1], # user 8
               [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]] # user 9

, , , , .

(, ) - , ( ), , , , , , , , , «». .

4. « !» — .


, ?

Data Science Cognitive class (), .

, , , , , ( 2 ), , , . «» «» CC ( ).

, - ? , . - , , . , , , , ( ). , « », .

? , . , , , - « », , - , , , , .

Data Science , , , .

550 300 , , . , , - kaggle, .


Source: https://habr.com/ru/post/J331794/


All Articles