常规特性¶
质数¶
In [36]:
from math import sqrt
def is_prime(n):
if n == 1:
return False
for i in range(2, int(sqrt(n))+1):
if n % i == 0:
return False
return True
print(is_prime(4))
print(is_prime(5))
False True
斐波那契数列¶
递归¶
In [42]:
def normal_fibo(n):
if n <= 2:
return 1
else:
return normal_fibo(n-1) + normal_fibo(n-2)
# 前十
for n in range(10):
print(n, normal_fibo(n))
# 第n
%time normal_fibo(35)
0 1 1 1 2 1 3 2 4 3 5 5 6 8 7 13 8 21 9 34 CPU times: user 2.23 s, sys: 0 ns, total: 2.23 s Wall time: 2.39 s
Out[42]:
9227465
递归缓存¶
In [43]:
from functools import cache
@cache
def cached_fibo(n):
if n <= 2:
return 1
else:
return cached_fibo(n - 1) + cached_fibo(n-2)
# 前10
for n in range(10):
print(n, cached_fibo(n))
# 第n
%time cached_fibo(35)
0 1 1 1 2 1 3 2 4 3 5 5 6 8 7 13 8 21 9 34 CPU times: user 27 µs, sys: 0 ns, total: 27 µs Wall time: 30 µs
Out[43]:
9227465
合并和分组聚合¶
字符串离散化¶
In [81]:
import pandas as pd
file_path = './test.csv'
df = pd.read_csv(file_path)
print(type(df))
print(df.shape)
print(df['type'].head(3))
# 统计分类的列表
temp_list = df['type'].str.split(',').to_list()
# 分类列表
genre_list = list(set([i for j in temp_list for i in j]))
print(temp_list[0])
import numpy as np
# 构造全为0的数组
zeros_df = pd.DataFrame(np.zeros((df.shape[0], len(genre_list))),columns=genre_list)
for i in range(df.shape[0]):#行
zeros_df.loc[i, temp_list[i]] = 1
print(zeros_df.head(3))
# 统计和
genre_cnt = zeros_df.sum(axis=0)
print(genre_cnt)
genre_cnt = genre_cnt.sort_values()
_x = genre_cnt.index
_y = genre_cnt.values
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 8), dpi=30)
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x)
plt.show()
<class 'pandas.core.frame.DataFrame'>
(5, 2)
0 a,b,c
1 a,f,x
2 x,b,m
Name: type, dtype: object
['a', 'b', 'c']
x y a c f m b
0 0.0 0.0 1.0 1.0 0.0 0.0 1.0
1 1.0 0.0 1.0 0.0 1.0 0.0 0.0
2 1.0 0.0 0.0 0.0 0.0 1.0 1.0
x 2.0
y 1.0
a 3.0
c 3.0
f 2.0
m 1.0
b 3.0
dtype: float64
数据合并¶
In [89]:
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.ones((2,4)), index=['A','B'], columns=list('abcd'))
print(df1)
df2 = pd.DataFrame(np.zeros((3,3)), index=['A','B','C'], columns=list('xyz'))
print(df2)
print(df2.join(df1))
print(df1.join(df2))
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
x y z
A 0.0 0.0 0.0
B 0.0 0.0 0.0
C 0.0 0.0 0.0
x y z a b c d
A 0.0 0.0 0.0 1.0 1.0 1.0 1.0
B 0.0 0.0 0.0 1.0 1.0 1.0 1.0
C 0.0 0.0 0.0 NaN NaN NaN NaN
a b c d x y z
A 1.0 1.0 1.0 1.0 0.0 0.0 0.0
B 1.0 1.0 1.0 1.0 0.0 0.0 0.0
In [91]:
df3 = pd.DataFrame(np.zeros((3,3)), columns=list('fax'))
print(df3)
print(df1.merge(df3, on='a'))
f a x 0 0.0 0.0 0.0 1 0.0 0.0 0.0 2 0.0 0.0 0.0 Empty DataFrame Columns: [a, b, c, d, f, x] Index: []
In [92]:
df3.loc[1, "a"] = 1
print(df1.merge(df3, on='a'))
a b c d f x 0 1.0 1.0 1.0 1.0 0.0 0.0 1 1.0 1.0 1.0 1.0 0.0 0.0
In [94]:
df3 = pd.DataFrame(np.arange(9).reshape(3,3), columns=list('fax'))
print(df3)
print(df1.merge(df3, on='a'))
f a x
0 0 1 2
1 3 4 5
2 6 7 8
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
1 1.0 1.0 1.0 1.0 0 2
In [95]:
df1.loc["A",'a'] = 100
print(df1.merge(df3, on='a'))
a b c d f x 0 1.0 1.0 1.0 1.0 0 2
In [96]:
print(df1.merge(df3, on='a', how='outer'))
a b c d f x 0 1.0 1.0 1.0 1.0 0.0 2.0 1 4.0 NaN NaN NaN 3.0 5.0 2 7.0 NaN NaN NaN 6.0 8.0 3 100.0 1.0 1.0 1.0 NaN NaN
In [97]:
print(df1.merge(df3, on='a', how='left'))
a b c d f x 0 100.0 1.0 1.0 1.0 NaN NaN 1 1.0 1.0 1.0 1.0 0.0 2.0
In [98]:
print(df1.merge(df3, on='a', how='right'))
a b c d f x 0 1.0 1.0 1.0 1.0 0 2 1 4.0 NaN NaN NaN 3 5 2 7.0 NaN NaN NaN 6 8