Getting started

Python is a programming language and it has more libraries dedicated for statistical analysis, graphics representation and reporting.

This is a summary of the Python language syntax: you can find more details in python documentation.

Basics

[1]:
# initialization of a variable
my_numeric_variable = 1
my_string_variable = "hello"
[2]:
# print a variable directly
my_numeric_variable
[2]:
1
[3]:
# print method
print(my_string_variable)
hello

Vectors

[4]:
# initialization of a vector
my_numeric_vector = [1,2,3,4,5,6]
my_sequence = list(range(1,7))
my_string_vector = ["hello", "world", "!"]
my_logic_vector = [True, False]
import random
my_random_vector = random.sample(list(range(1,5000)), 25) # Vector with 25 elements with random number from 1 to 5000
[5]:
my_numeric_vector
[5]:
[1, 2, 3, 4, 5, 6]

Operations with vectors

There are many libraries for statistics analysis, so you can use the same operations on different libraries.

[6]:
sum(my_numeric_vector) # sum
import statistics
print(statistics.mean(my_numeric_vector)) # mean
print(statistics.median(my_numeric_vector)) # median
import numpy
print(numpy.median(my_numeric_vector)) # median
3.5
3.5
3.5
[7]:
# multiplication 2 with each element of a vector
# each variable my_new_vector is the same
my_new_vector = [e * 2 for e in my_numeric_vector]
my_new_vector = list(map(lambda x: x * 2, my_numeric_vector))
import pandas
serie = pandas.Series(my_numeric_vector)
my_new_vector = (serie * 2).tolist()
import numpy
my_new_vector = list(numpy.array(my_numeric_vector) * 2)
my_new_vector
[7]:
[2, 4, 6, 8, 10, 12]
[8]:
# division 2 with each element of a vector
# each variable my_new_vector is the same
my_new_vector = [e / 2 for e in my_numeric_vector]
my_new_vector = list(map(lambda x: x / 2, my_numeric_vector))
serie = pandas.Series(my_numeric_vector)
my_new_vector = (serie / 2).tolist()
my_new_vector = list(numpy.array(my_numeric_vector) / 2)
my_new_vector
[8]:
[0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
[9]:
# sum each element of a vector with another vector by position
# each variable my_new_vector is the same
my_new_vector = [sum(e) for e in zip(my_numeric_vector, my_sequence)]
my_new_vector = [x + y for x, y in zip(my_numeric_vector, my_sequence)]
import operator
my_new_vector = list(map(operator.add, my_numeric_vector, my_sequence))
import pandas
my_new_vector = list(pandas.Series(my_numeric_vector).add(pandas.Series(my_sequence)))
import numpy
my_new_vector = list(numpy.array(my_numeric_vector) + numpy.array(my_sequence))
my_new_vector
[9]:
[2, 4, 6, 8, 10, 12]
[10]:
# get element from a vector
print(my_string_vector[0]) # print hello
print(my_string_vector[::len(my_string_vector)-1]) # print hello!
print(my_string_vector[0:2]) # print hello world
hello
['hello', '!']
['hello', 'world']
[11]:
# add labels to a vector
import pandas
names = ["one","two","three","four","five","six"]
my_vector = pandas.DataFrame(my_numeric_vector, index=names).T
print(my_vector)
   one  two  three  four  five  six
0    1    2      3     4     5    6
[12]:
# get data type of a vector
import numpy
print(numpy.array(my_vector).dtype.type) # print numpy.int64
print(numpy.array(my_string_vector).dtype.type) # print numpy.str_
<class 'numpy.int64'>
<class 'numpy.str_'>
[13]:
# convertion of each element of a vector
# each variable my_new_string_vector is the same
my_new_string_vector = [str(e) for e in my_numeric_vector]
my_new_string_vector = list(map(str, my_numeric_vector))
my_new_string_vector
[13]:
['1', '2', '3', '4', '5', '6']

Matrixes

A matrix is a vector with more dimensions

[14]:
my_matrix = [[0] * 5 for e in range(2)] # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix
[14]:
[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
[15]:
my_matrix = [list(range(1,6)) for e in range(2)] # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix
[15]:
[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]
[16]:
import numpy
my_matrix = numpy.arange(1,11).reshape(2, 5) # creates a matrix bidimensional with 2 rows and 5 columns
my_matrix
[16]:
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])
[17]:
my_matrix = numpy.arange(1,11).reshape(5, 2).T # creates a matrix bidimensional with 2 rows and 5 columns like R language
my_matrix
[17]:
array([[ 1,  3,  5,  7,  9],
       [ 2,  4,  6,  8, 10]])
[18]:
print(my_matrix[1,1]) # prints 4
print(my_matrix[0,]) # prints row 1
print(my_matrix[:,1]) # prints column 2
4
[1 3 5 7 9]
[3 4]
[19]:
# merge of vectors
vector_one = ["one", 0.1]
vector_two = ["two", 1]
vector_three = ["three", 10]
my_vectors = [vector_one, vector_two, vector_three]
import pandas
colnames = ["vector number", "quantity"]
rownames = ["yesterday", "today", "tomorrow"]
pandas.DataFrame(my_vectors, index=rownames, columns=colnames)
[19]:
vector number quantity
yesterday one 0.1
today two 1.0
tomorrow three 10.0

Weighted average

[20]:
# performance
apple_performance = 3
netflix_performance = 7
amazon_performance = 11
# weight
apple_weight = .3
netflix_weight = .4
amazon_weight = .3
[21]:
# portfolio performance
weighted_average = apple_performance * apple_weight + netflix_performance * netflix_weight + amazon_performance * amazon_weight
weighted_average
[21]:
7.0
[22]:
# the same sample but with vectors
performance = [3,7,11]
weight = [.3,.4,.3]
company = ['apple','netflix','amazon']
import pandas
performance = pandas.DataFrame(performance, index=company).T
weight = pandas.DataFrame(weight, index=company).T
print(performance)
print(weight)
   apple  netflix  amazon
0      3        7      11
   apple  netflix  amazon
0    0.3      0.4     0.3
[23]:
# with headers
performance_weight = performance.multiply(weight)
print(performance_weight)
weighted_average = performance_weight.sum(axis=1)
print(weighted_average)
   apple  netflix  amazon
0    0.9      2.8     3.3
0    7.0
dtype: float64
[24]:
# without headers
import numpy
performance_weight = numpy.array(performance) * numpy.array(weight)
print(performance_weight)
weighted_average = numpy.sum(performance_weight)
print(weighted_average)
[[0.9 2.8 3.3]]
7.0

Functions

[25]:
import numpy
# the weighted average but with a function
def weighted_average_function(performance, weight):
    return numpy.sum(numpy.array(performance) * numpy.array(weight))
performance = [3,7,11]
weight = [.3,.4,.3]
weighted_average = weighted_average_function(performance, weight)
weighted_average
[25]:
7.0