Manage Data
Getting Information about your data
def get_info_data(df):
print('\n ++++Head Dataframe:++++ \n')
print(df.head(5))
print('\n ++++Tail Dataframe:++++ \n')
print(df.tail(5))
print('\n ++++Info Dataframe:++++ \n')
print(df.info())
print('\n Shape Dataframe: ',df.shape)
print('\n ++++Describe Dataframe:++++ \n')
print(df.describe())
print('\n ++++IsNull in Dataframe:++++ \n')
print(df.isnull().sum())
return
Drop Duplicates
def drop_dupli(df, keeps='last'):
df = df.drop_duplicates(keep=keeps)
return df
Count the number of NaN's
# นับจำนวนที่มีค่า NaN
def count_number_nan(df):
return df.isnull().sum()
ลบค่า NaN
def f_fill_nan (self,methods="ffill"):
# perform a backwards fill
return self.fillna(method=methods)
Lower Column
# ปรับ colum ให้เป็นตัวพิมพ์เล็ก
def lower_column(df):
df.columns = [col.lower() for col in df]
return df
ปรับ Format
ทศนิยม 2 ตำแหน่ง
def num_format(number,format_num='.2f'):
"""
Formatting helper - float (1 Item)
"""
if np.isnan(number):
return '-'
return format(number, format_num)
dataframe / Series
def fmtp(item, format_num ='.2f'):
"""
ปรับ format (ทศนิยม 2 ตำแหน่ง) (dataframe / Series)
Map a format string over a pandas object.
"""
if isinstance(item, pd.Series):
return item.map(lambda x: format(x, format_num))
elif isinstance(item, pd.DataFrame):
return item.applymap(lambda x: format(x, format_num))
% Number (*100)
def percent_number(number, format_num='.2f'):
"""
Formatting helper - percent no % sign
"""
if np.isnan(number):
return '-'
return format(number * 100, format_num)
percent + % sign (Dataframe / Series)
def as_percent(self, digits=2):
return as_format(self, '.%s%%' % digits)
Last updated