Use help function to get help for the syntax¶
In [1]:
import random
help(random.uniform)
Help on method uniform in module random: uniform(a, b) method of random.Random instance Get a random number in the range [a, b) or [a, b] depending on rounding.
Typical data types & math operations¶
In [2]:
# integer, real number; assignment to a
a = 10 + 3.14
# a multiplies a
result_square = a * a
# square of a, which gives the same value as the previous one
result_power = a**2
# complex number; assignment to b
b = complex(3.0, 4.0)
# The modulus of b
result_modulus = abs(b)
# logical; c is TRUE
c = 3 + 4 == 7
# not c, which is FALSE
result_logical = not c
# character
d = "Monday"
# show the data type of d
result_type = type(d)
# number of characters in a string
result_length = len(d)
# Print the results
print(result_square)
print(result_power)
print(result_modulus)
print(result_logical)
print(result_type)
print(result_length)
172.6596 172.6596 5.0 False <class 'str'> 6
Vectors¶
In [3]:
# a list of characters
vec = ["Man", "Woman", "Woman", "Man", "Woman"]
# length of the list
length_of_vec = len(vec)
print("Length of vec:", length_of_vec)
# logical values, compare each element to "Woman"
logical_values = [item == "Woman" for item in vec]
print("Logical values:", logical_values)
# identify the indices of the elements being "Woman"
indices_of_woman = [index for index, item in enumerate(vec) if item == "Woman"]
print("Indices of 'Woman':", indices_of_woman)
Length of vec: 5 Logical values: [False, True, True, False, True] Indices of 'Woman': [1, 2, 4]
Categorical data (similar to Factors in R)¶
In [4]:
import pandas as pd
# a list of characters
vec = ["Man", "Woman", "Woman", "Man", "Woman"]
# convert the list to a factor-like categorical variable
vec_cat = pd.Categorical(vec)
# check the data types
print("Type of vec:", type(vec))
print("Type of vec_cat:", type(vec_cat))
# levels of the categorical variable
levels_of_vec_cat = vec_cat.categories
print("Levels of vec_cat:", levels_of_vec_cat)
# number of levels
num_levels_of_vec_cat = len(levels_of_vec_cat)
print("Number of levels of vec_cat:", num_levels_of_vec_cat)
Type of vec: <class 'list'> Type of vec_cat: <class 'pandas.core.arrays.categorical.Categorical'> Levels of vec_cat: Index(['Man', 'Woman'], dtype='object') Number of levels of vec_cat: 2
If statement¶
In [5]:
a = 0
# conditions
if a == 1:
print("a equals to 1")
else:
print("a is not 1")
a is not 1
For loops¶
In [6]:
# range(start, stop) function is used to generate a sequence of numbers from "start" to "stop - 1"
for i in range(1, 6):
print(i)
1 2 3 4 5
In [7]:
# Another way to use for loop
v = [1, 2, 5, 7]
for i in v:
print(i)
1 2 5 7
While statement¶
In [8]:
i = 0
while i < 5:
i = i + 2
print(i)
2 4 6
Apply statement¶
In [9]:
import numpy as np
# Generate a random matrix of 4 x 4
mat = np.random.randn(4, 4)
# Display the matrix
print("Matrix:")
print(mat)
# Calculate row sums
row_sums = np.apply_along_axis(np.sum, axis=1, arr=mat)
# Calculate column sums
column_sums = np.apply_along_axis(np.sum, axis=0, arr=mat)
print("Row sums:", row_sums)
print("Column sums:", column_sums)
Matrix: [[ 0.36293694 -0.38902911 -0.37996659 -0.24803539] [-0.83384852 -0.13384431 1.40763216 0.56164117] [ 0.14599948 0.83089115 -0.36447597 -1.30608725] [ 0.91811255 0.87439601 -0.98635587 -0.65802497]] Row sums: [-0.65409414 1.0015805 -0.69367259 0.14812773] Column sums: [ 0.59320045 1.18241375 -0.32316627 -1.65050643]
Functions¶
In [10]:
def myfunction(x):
# a simple function to perform x square
return x * x
b = myfunction(4)
print("b =", b)
b = 16
List¶
In [11]:
# define a new list
my_list = ["a", [1, 2, 3], False, 3.14]
# the first element
print("First element:", my_list[0])
# the second element
print("Second element:", my_list[1])
# Dictionary (or dict in Python) is used to associate names with elements
my_dict = {"letters": my_list[0], "array": my_list[1], "TF": my_list[2], "pi": my_list[3]}
# retrieve elements by names
print("TF element:", my_dict["TF"])
# convert the dictionary to a vector (flatten the list)
my_vector = [value for value in my_dict.values()]
print("Resulting vector:", my_vector)
First element: a Second element: [1, 2, 3] TF element: False Resulting vector: ['a', [1, 2, 3], False, 3.14]
Data frame¶
In [12]:
# pandas library is used to create and manipulate a DataFrame
# Define a new DataFrame
my_data = pd.DataFrame({
'id': [1, 2, 3, 4, 5],
'atom': ["N", "CA", "CB", "C", "O"],
'mass': [14, 12, 12, 12, 16],
'size': [1.2, 1.4, 1.4, 1.4, 1.1]
})
# Display the DataFrame
print(my_data)
# Get column names
print("Column names:", my_data.columns)
# Get the structure of the DataFrame
print("Data structure:")
print(my_data.info())
# Retrieve a column
print("Column 'mass':")
print(my_data['mass'])
# Add a column
my_data['backbone'] = [True, True, False, True, True]
# Display the updated DataFrame
print("Updated DataFrame:")
print(my_data)
# Add a row
new_row = pd.Series([6, "S", 32, 1.6, False], index=my_data.columns)
my_data = pd.concat([my_data, pd.DataFrame([new_row])], ignore_index=True)
# Display the DataFrame after adding a row
print("DataFrame after adding a row:")
print(my_data)
id atom mass size 0 1 N 14 1.2 1 2 CA 12 1.4 2 3 CB 12 1.4 3 4 C 12 1.4 4 5 O 16 1.1 Column names: Index(['id', 'atom', 'mass', 'size'], dtype='object') Data structure: <class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 5 non-null int64 1 atom 5 non-null object 2 mass 5 non-null int64 3 size 5 non-null float64 dtypes: float64(1), int64(2), object(1) memory usage: 292.0+ bytes None Column 'mass': 0 14 1 12 2 12 3 12 4 16 Name: mass, dtype: int64 Updated DataFrame: id atom mass size backbone 0 1 N 14 1.2 True 1 2 CA 12 1.4 True 2 3 CB 12 1.4 False 3 4 C 12 1.4 True 4 5 O 16 1.1 True DataFrame after adding a row: id atom mass size backbone 0 1 N 14 1.2 True 1 2 CA 12 1.4 True 2 3 CB 12 1.4 False 3 4 C 12 1.4 True 4 5 O 16 1.1 True 5 6 S 32 1.6 False
Input/output¶
In [13]:
# Assuming 'my_data' is a pandas DataFrame
my_data.to_csv("my_data.csv", index=False)
my_data = pd.read_csv("my_data.csv")
my_data
Out[13]:
id | atom | mass | size | backbone | |
---|---|---|---|---|---|
0 | 1 | N | 14 | 1.2 | True |
1 | 2 | CA | 12 | 1.4 | True |
2 | 3 | CB | 12 | 1.4 | False |
3 | 4 | C | 12 | 1.4 | True |
4 | 5 | O | 16 | 1.1 | True |
5 | 6 | S | 32 | 1.6 | False |
Basic plotting¶
In [14]:
import matplotlib.pyplot as plt
x = np.arange(1, 6)
y = x**2
# Plot points
plt.plot(x, y, 'o')
# Add labels and title
plt.xlabel('x')
plt.ylabel('y')
plt.title('Scatter Plot of x and y')
# Show the plot
plt.show()
In [15]:
x = np.linspace(1, 5, 100)
y = x**2
# Plot the curve
plt.plot(x, y)
# Add labels and title
plt.xlabel('x')
plt.ylabel('y')
plt.title('Curve Plot of x^2')
# Show the plot
plt.show()
In [16]:
# Generate 1000 random points in 2D
np.random.seed(42) # Setting seed for reproducibility
y = pd.DataFrame({'xvalues': np.random.randn(1000), 'yvalues': np.random.randn(1000)})
# Scatter plot using matplotlib
plt.scatter(y['xvalues'], y['yvalues'], s = 20, c = 'white', edgecolors='black', linewidth=0.5 )
plt.xlabel('xvalues')
plt.ylabel('yvalues')
plt.title('Scatter Plot of 1000 Random Points in 2D')
plt.show()
In [17]:
import seaborn as sns
sns.kdeplot(x='xvalues', y='yvalues', data=y,cmap="Blues_r", fill = False, thresh=0,levels=20)
sns.kdeplot(x='xvalues', y='yvalues', data=y, fill=True, cmap='Blues', thresh=0,
levels=20, cbar = True, cbar_kws={'ticks': [0.0, 0.05, 0.10, 0.15]})
# Add labels and title
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('Density Map with Contours (Seaborn)')
# Show the plot
plt.show()
Pass keyword arguments to a function¶
In [18]:
# A function without using ellipsis
def func_main(a, b, c):
return a + b + c
# Usage
result = func_main(a=1, b=2.4, c=0.6)
print(result)
4.0
In [19]:
# A function where an argument is the name of another function, func_2nd
# **kwargs is used to pass arguments for func_2nd
def func_main2(a, func_2nd, **kwargs):
return a + func_2nd(**kwargs)
# Usage case 1
def func1(d, e):
print("d",d)
print("e",e)
return d + e
result = func_main2(a=1, func_2nd=func1, d=2.4, e=0.6)
print(result)
d 2.4 e 0.6 4.0
In [20]:
## Usage case 2
def func2(f, g, h, i):
return(f+g+h*i)
func_main2(a = 1, func_2nd = func2, f = 10, g = 20, h = 1, i = -3)
Out[20]:
28
In [21]:
## Wrong usages
#func_main2(a = 1, func_2nd = func2, f = 10, g = 20, h = 1)
#func_main2(a = 1, func_2nd = func2, d = 2.4, e = 0.6)
Make sure that all arguments passed through ellipsis are the same whenever ellipsis is used in the body of the function. Thus, in the usage case 2 above, the code doesn’t work when only f, g, h are passed to func2. It doesn’t work either when, for example, d and e are passed to func2.