Manage Data

Getting Information about your data

def get_info_data(df):
    print('\n ++++Head Dataframe:++++ \n')
    print(df.head(5))
    print('\n ++++Tail Dataframe:++++ \n')
    print(df.tail(5))
    print('\n ++++Info Dataframe:++++ \n')
    print(df.info())
    print('\n Shape Dataframe: ',df.shape)   
    print('\n ++++Describe Dataframe:++++ \n')
    print(df.describe())
    print('\n ++++IsNull in Dataframe:++++ \n')
    print(df.isnull().sum())  
    
    return 

Drop Duplicates

def drop_dupli(df, keeps='last'):
    df = df.drop_duplicates(keep=keeps)
    return df

Count the number of NaN's

# นับจำนวนที่มีค่า NaN
def count_number_nan(df):
    return df.isnull().sum()

ลบค่า NaN

def f_fill_nan (self,methods="ffill"):
# perform a backwards fill
    return self.fillna(method=methods)

Lower Column

# ปรับ colum ให้เป็นตัวพิมพ์เล็ก
def lower_column(df):
    df.columns = [col.lower() for col in df]
    return df

ปรับ Format

ทศนิยม 2 ตำแหน่ง

def num_format(number,format_num='.2f'):
    """
    Formatting helper - float (1 Item)
    """
    if np.isnan(number):
        return '-'
    return format(number, format_num)

dataframe / Series

def fmtp(item, format_num ='.2f'):
    """
    ปรับ format (ทศนิยม 2 ตำแหน่ง) (dataframe / Series)
    Map a format string over a pandas object.
    """
    if isinstance(item, pd.Series):
        return item.map(lambda x: format(x, format_num))
    elif isinstance(item, pd.DataFrame):
        return item.applymap(lambda x: format(x, format_num))

% Number (*100)

def percent_number(number, format_num='.2f'):
    """
    Formatting helper - percent no % sign
    """
    if np.isnan(number):
        return '-'
    return format(number * 100, format_num)

percent + % sign (Dataframe / Series)

def as_percent(self, digits=2):
      return as_format(self, '.%s%%' % digits)

Last updated