01A. Basics of programming in Python¶

Mingyang Lu¶

12/15/2023¶

Use help function to get help for the syntax¶

In [1]:
import random
help(random.uniform)
Help on method uniform in module random:

uniform(a, b) method of random.Random instance
    Get a random number in the range [a, b) or [a, b] depending on rounding.

Typical data types & math operations¶

In [2]:
# integer, real number; assignment to a
a = 10 + 3.14

# a multiplies a
result_square = a * a

# square of a, which gives the same value as the previous one
result_power = a**2

# complex number; assignment to b
b = complex(3.0, 4.0)

# The modulus of b
result_modulus = abs(b)

# logical; c is TRUE
c = 3 + 4 == 7

# not c, which is FALSE
result_logical = not c

# character
d = "Monday"

# show the data type of d
result_type = type(d)

# number of characters in a string
result_length = len(d)

# Print the results
print(result_square)
print(result_power)
print(result_modulus)
print(result_logical)
print(result_type)
print(result_length)
172.6596
172.6596
5.0
False
<class 'str'>
6

Vectors¶

In [3]:
# a list of characters
vec = ["Man", "Woman", "Woman", "Man", "Woman"]

# length of the list
length_of_vec = len(vec)
print("Length of vec:", length_of_vec)

# logical values, compare each element to "Woman"
logical_values = [item == "Woman" for item in vec]
print("Logical values:", logical_values)

# identify the indices of the elements being "Woman"
indices_of_woman = [index for index, item in enumerate(vec) if item == "Woman"]
print("Indices of 'Woman':", indices_of_woman)
Length of vec: 5
Logical values: [False, True, True, False, True]
Indices of 'Woman': [1, 2, 4]

Categorical data (similar to Factors in R)¶

In [4]:
import pandas as pd
# a list of characters
vec = ["Man", "Woman", "Woman", "Man", "Woman"]

# convert the list to a factor-like categorical variable
vec_cat = pd.Categorical(vec)

# check the data types
print("Type of vec:", type(vec))
print("Type of vec_cat:", type(vec_cat))

# levels of the categorical variable
levels_of_vec_cat = vec_cat.categories
print("Levels of vec_cat:", levels_of_vec_cat)

# number of levels
num_levels_of_vec_cat = len(levels_of_vec_cat)
print("Number of levels of vec_cat:", num_levels_of_vec_cat)
Type of vec: <class 'list'>
Type of vec_cat: <class 'pandas.core.arrays.categorical.Categorical'>
Levels of vec_cat: Index(['Man', 'Woman'], dtype='object')
Number of levels of vec_cat: 2

If statement¶

In [5]:
a = 0

# conditions
if a == 1:
    print("a equals to 1")
else:
    print("a is not 1")
a is not 1

For loops¶

In [6]:
# range(start, stop) function is used to generate a sequence of numbers from "start" to "stop - 1"
for i in range(1, 6):
    print(i)
1
2
3
4
5
In [7]:
# Another way to use for loop
v = [1, 2, 5, 7]

for i in v:
    print(i)
1
2
5
7

While statement¶

In [8]:
i = 0
while i < 5:
    i = i + 2
    print(i)
2
4
6

Apply statement¶

In [9]:
import numpy as np
# Generate a random matrix of 4 x 4
mat = np.random.randn(4, 4)

# Display the matrix
print("Matrix:")
print(mat)

# Calculate row sums
row_sums = np.apply_along_axis(np.sum, axis=1, arr=mat)

# Calculate column sums
column_sums = np.apply_along_axis(np.sum, axis=0, arr=mat)

print("Row sums:", row_sums)
print("Column sums:", column_sums)
Matrix:
[[ 0.36293694 -0.38902911 -0.37996659 -0.24803539]
 [-0.83384852 -0.13384431  1.40763216  0.56164117]
 [ 0.14599948  0.83089115 -0.36447597 -1.30608725]
 [ 0.91811255  0.87439601 -0.98635587 -0.65802497]]
Row sums: [-0.65409414  1.0015805  -0.69367259  0.14812773]
Column sums: [ 0.59320045  1.18241375 -0.32316627 -1.65050643]

Functions¶

In [10]:
def myfunction(x):
    # a simple function to perform x square
    return x * x

b = myfunction(4)
print("b =", b)
b = 16

List¶

In [11]:
# define a new list
my_list = ["a", [1, 2, 3], False, 3.14]  

# the first element
print("First element:", my_list[0])

# the second element
print("Second element:", my_list[1])

# Dictionary (or dict in Python) is used to associate names with elements
my_dict = {"letters": my_list[0], "array": my_list[1], "TF": my_list[2], "pi": my_list[3]}

# retrieve elements by names
print("TF element:", my_dict["TF"])

# convert the dictionary to a vector (flatten the list)
my_vector = [value for value in my_dict.values()]

print("Resulting vector:", my_vector)
First element: a
Second element: [1, 2, 3]
TF element: False
Resulting vector: ['a', [1, 2, 3], False, 3.14]

Data frame¶

In [12]:
# pandas library is used to create and manipulate a DataFrame

# Define a new DataFrame
my_data = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'atom': ["N", "CA", "CB", "C", "O"],
    'mass': [14, 12, 12, 12, 16],
    'size': [1.2, 1.4, 1.4, 1.4, 1.1]
})

# Display the DataFrame
print(my_data)

# Get column names
print("Column names:", my_data.columns)

# Get the structure of the DataFrame
print("Data structure:")
print(my_data.info())

# Retrieve a column
print("Column 'mass':")
print(my_data['mass'])

# Add a column
my_data['backbone'] = [True, True, False, True, True]

# Display the updated DataFrame
print("Updated DataFrame:")
print(my_data)

# Add a row
new_row = pd.Series([6, "S", 32, 1.6, False], index=my_data.columns)
my_data = pd.concat([my_data, pd.DataFrame([new_row])], ignore_index=True)

# Display the DataFrame after adding a row
print("DataFrame after adding a row:")
print(my_data)
   id atom  mass  size
0   1    N    14   1.2
1   2   CA    12   1.4
2   3   CB    12   1.4
3   4    C    12   1.4
4   5    O    16   1.1
Column names: Index(['id', 'atom', 'mass', 'size'], dtype='object')
Data structure:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   id      5 non-null      int64  
 1   atom    5 non-null      object 
 2   mass    5 non-null      int64  
 3   size    5 non-null      float64
dtypes: float64(1), int64(2), object(1)
memory usage: 292.0+ bytes
None
Column 'mass':
0    14
1    12
2    12
3    12
4    16
Name: mass, dtype: int64
Updated DataFrame:
   id atom  mass  size  backbone
0   1    N    14   1.2      True
1   2   CA    12   1.4      True
2   3   CB    12   1.4     False
3   4    C    12   1.4      True
4   5    O    16   1.1      True
DataFrame after adding a row:
   id atom  mass  size  backbone
0   1    N    14   1.2      True
1   2   CA    12   1.4      True
2   3   CB    12   1.4     False
3   4    C    12   1.4      True
4   5    O    16   1.1      True
5   6    S    32   1.6     False

Input/output¶

In [13]:
# Assuming 'my_data' is a pandas DataFrame
my_data.to_csv("my_data.csv", index=False)
my_data = pd.read_csv("my_data.csv")
my_data
Out[13]:
id atom mass size backbone
0 1 N 14 1.2 True
1 2 CA 12 1.4 True
2 3 CB 12 1.4 False
3 4 C 12 1.4 True
4 5 O 16 1.1 True
5 6 S 32 1.6 False

Basic plotting¶

In [14]:
import matplotlib.pyplot as plt

x = np.arange(1, 6)
y = x**2

# Plot points
plt.plot(x, y, 'o')

# Add labels and title
plt.xlabel('x')
plt.ylabel('y')
plt.title('Scatter Plot of x and y')

# Show the plot
plt.show()
No description has been provided for this image
In [15]:
x = np.linspace(1, 5, 100)
y = x**2
# Plot the curve
plt.plot(x, y)

# Add labels and title
plt.xlabel('x')
plt.ylabel('y')
plt.title('Curve Plot of x^2')

# Show the plot
plt.show()
No description has been provided for this image
In [16]:
# Generate 1000 random points in 2D
np.random.seed(42)  # Setting seed for reproducibility
y = pd.DataFrame({'xvalues': np.random.randn(1000), 'yvalues': np.random.randn(1000)})

# Scatter plot using matplotlib
plt.scatter(y['xvalues'], y['yvalues'], s = 20, c = 'white', edgecolors='black', linewidth=0.5 )
plt.xlabel('xvalues')
plt.ylabel('yvalues')
plt.title('Scatter Plot of 1000 Random Points in 2D')
plt.show()
No description has been provided for this image
In [17]:
import seaborn as sns

sns.kdeplot(x='xvalues', y='yvalues', data=y,cmap="Blues_r", fill = False, thresh=0,levels=20)
sns.kdeplot(x='xvalues', y='yvalues', data=y, fill=True, cmap='Blues', thresh=0,
            levels=20, cbar = True, cbar_kws={'ticks': [0.0, 0.05, 0.10, 0.15]}) 

# Add labels and title
plt.xlabel('x values')
plt.ylabel('y values')
plt.title('Density Map with Contours (Seaborn)')

# Show the plot
plt.show()
No description has been provided for this image

Pass keyword arguments to a function¶

In [18]:
# A function without using ellipsis
def func_main(a, b, c):
    return a + b + c

# Usage
result = func_main(a=1, b=2.4, c=0.6)
print(result)
4.0
In [19]:
# A function where an argument is the name of another function, func_2nd
# **kwargs is used to pass arguments for func_2nd
def func_main2(a, func_2nd, **kwargs):
    return a + func_2nd(**kwargs)

# Usage case 1
def func1(d, e):
    print("d",d)
    print("e",e)
    return d + e

result = func_main2(a=1, func_2nd=func1, d=2.4, e=0.6)
print(result)
d 2.4
e 0.6
4.0
In [20]:
## Usage case 2
def func2(f, g, h, i):
  return(f+g+h*i)

func_main2(a = 1, func_2nd = func2, f = 10, g = 20, h = 1, i = -3)
Out[20]:
28
In [21]:
## Wrong usages
#func_main2(a = 1, func_2nd = func2, f = 10, g = 20, h = 1) 
#func_main2(a = 1, func_2nd = func2, d = 2.4, e = 0.6) 

Make sure that all arguments passed through ellipsis are the same whenever ellipsis is used in the body of the function. Thus, in the usage case 2 above, the code doesn’t work when only f, g, h are passed to func2. It doesn’t work either when, for example, d and e are passed to func2.