Getting started

Python is a programming language and it has more libraries dedicated for statistical analysis, graphics representation and reporting.

This is a summary of the Python language syntax: you can find more details in python documentation.

Basics

[1]:

# initialization of a variable
my_numeric_variable = 1
my_string_variable = "hello"

[2]:

# print a variable directly
my_numeric_variable

[2]:

[3]:

# print method
print(my_string_variable)

hello

Vectors

[4]:

# initialization of a vector
my_numeric_vector = [1,2,3,4,5,6]
my_sequence = list(range(1,7))
my_string_vector = ["hello", "world", "!"]
my_logic_vector = [True, False]
import random
my_random_vector = random.sample(list(range(1,5000)), 25) # Vector with 25 elements with random number from 1 to 5000

[5]:

my_numeric_vector

[5]:

[1, 2, 3, 4, 5, 6]

Operations with vectors

There are many libraries for statistics analysis, so you can use the same operations on different libraries.

[6]:

sum(my_numeric_vector) # sum
import statistics
print(statistics.mean(my_numeric_vector)) # mean
print(statistics.median(my_numeric_vector)) # median
import numpy
print(numpy.median(my_numeric_vector)) # median

3.5
3.5
3.5

[7]:

# multiplication 2 with each element of a vector
# each variable my_new_vector is the same
my_new_vector = [e * 2 for e in my_numeric_vector]
my_new_vector = list(map(lambda x: x * 2, my_numeric_vector))
import pandas
serie = pandas.Series(my_numeric_vector)
my_new_vector = (serie * 2).tolist()
import numpy
my_new_vector = list(numpy.array(my_numeric_vector) * 2)
my_new_vector

[7]:

[2, 4, 6, 8, 10, 12]

[8]:

# division 2 with each element of a vector
# each variable my_new_vector is the same
my_new_vector = [e / 2 for e in my_numeric_vector]
my_new_vector = list(map(lambda x: x / 2, my_numeric_vector))
serie = pandas.Series(my_numeric_vector)
my_new_vector = (serie / 2).tolist()
my_new_vector = list(numpy.array(my_numeric_vector) / 2)
my_new_vector

[8]:

[0.5, 1.0, 1.5, 2.0, 2.5, 3.0]

[9]:

# sum each element of a vector with another vector by position
# each variable my_new_vector is the same
my_new_vector = [sum(e) for e in zip(my_numeric_vector, my_sequence)]
my_new_vector = [x + y for x, y in zip(my_numeric_vector, my_sequence)]
import operator
my_new_vector = list(map(operator.add, my_numeric_vector, my_sequence))
import pandas
my_new_vector = list(pandas.Series(my_numeric_vector).add(pandas.Series(my_sequence)))
import numpy
my_new_vector = list(numpy.array(my_numeric_vector) + numpy.array(my_sequence))
my_new_vector

[9]:

[2, 4, 6, 8, 10, 12]

[10]:

# get element from a vector
print(my_string_vector[0]) # print hello
print(my_string_vector[::len(my_string_vector)-1]) # print hello!
print(my_string_vector[0:2]) # print hello world

hello
['hello', '!']
['hello', 'world']

[11]:

# add labels to a vector
import pandas
names = ["one","two","three","four","five","six"]
my_vector = pandas.DataFrame(my_numeric_vector, index=names).T
print(my_vector)

   one  two  three  four  five  six
0    1    2      3     4     5    6

[12]:

# get data type of a vector
import numpy
print(numpy.array(my_vector).dtype.type) # print numpy.int64
print(numpy.array(my_string_vector).dtype.type) # print numpy.str_

<class 'numpy.int64'>
<class 'numpy.str_'>

[13]:

# convertion of each element of a vector
# each variable my_new_string_vector is the same
my_new_string_vector = [str(e) for e in my_numeric_vector]
my_new_string_vector = list(map(str, my_numeric_vector))
my_new_string_vector

[13]:

['1', '2', '3', '4', '5', '6']

Matrixes

A matrix is a vector with more dimensions

[14]:

my_matrix = [[0] * 5 for e in range(2)] # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix

[14]:

[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]

[15]:

my_matrix = [list(range(1,6)) for e in range(2)] # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix

[15]:

[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]

[16]:

import numpy
my_matrix = numpy.arange(1,11).reshape(2, 5) # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix

[16]:

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

[17]:

my_matrix = numpy.arange(1,11).reshape(5, 2).T # creates a matrix bidimensional with 2 rows and 5 columns like R language
my_matrix

[17]:

array([[ 1,  3,  5,  7,  9],
       [ 2,  4,  6,  8, 10]])

[18]:

print(my_matrix[1,1]) # prints 4
print(my_matrix[0,]) # prints row 1
print(my_matrix[:,1]) # prints column 2

4
[1 3 5 7 9]
[3 4]

[19]:

# merge of vectors
vector_one = ["one", 0.1]
vector_two = ["two", 1]
vector_three = ["three", 10]
my_vectors = [vector_one, vector_two, vector_three]
import pandas
colnames = ["vector number", "quantity"]
rownames = ["yesterday", "today", "tomorrow"]
pandas.DataFrame(my_vectors, index=rownames, columns=colnames)

[19]:

	vector number	quantity
yesterday	one	0.1
today	two	1.0
tomorrow	three	10.0

Weighted average

[20]:

# performance
apple_performance = 3
netflix_performance = 7
amazon_performance = 11
# weight
apple_weight = .3
netflix_weight = .4
amazon_weight = .3

[21]:

# portfolio performance
weighted_average = apple_performance * apple_weight + netflix_performance * netflix_weight + amazon_performance * amazon_weight
weighted_average

[21]:

7.0

[22]:

# the same sample but with vectors
performance = [3,7,11]
weight = [.3,.4,.3]
company = ['apple','netflix','amazon']
import pandas
performance = pandas.DataFrame(performance, index=company).T
weight = pandas.DataFrame(weight, index=company).T
print(performance)
print(weight)

   apple  netflix  amazon
0      3        7      11
   apple  netflix  amazon
0    0.3      0.4     0.3

[23]:

# with headers
performance_weight = performance.multiply(weight)
print(performance_weight)
weighted_average = performance_weight.sum(axis=1)
print(weighted_average)

   apple  netflix  amazon
0    0.9      2.8     3.3
0    7.0
dtype: float64

[24]:

# without headers
import numpy
performance_weight = numpy.array(performance) * numpy.array(weight)
print(performance_weight)
weighted_average = numpy.sum(performance_weight)
print(weighted_average)

[[0.9 2.8 3.3]]
7.0

Functions

[25]:

import numpy
# the weighted average but with a function
def weighted_average_function(performance, weight):
    return numpy.sum(numpy.array(performance) * numpy.array(weight))
performance = [3,7,11]
weight = [.3,.4,.3]
weighted_average = weighted_average_function(performance, weight)
weighted_average

[25]:

7.0