Pandas

Start Notebook

import pandas as pd


print(f"pandas  version = {pd.__version__}")
pd.Timestamp.today()

Example Function Main()

# Import pandas package
import pandas as pd
 
# Function to add
def add(a, b, c):
    return a + b + c
 
def main():
     
    # create a dictionary with
    # three fields each
    data = {
            'A':[1, 2, 3],
            'B':[4, 5, 6],
            'C':[7, 8, 9] }
     
    # Convert the dictionary into DataFrame
    df = pd.DataFrame(data)
    print("Original DataFrame:\n", df)
     
    df['add'] = df.apply(lambda row : add(row['A'],
                     row['B'], row['C']), axis = 1)
  
    print('\nAfter Applying Function: ')
    # printing the new dataframe
    print(df)
  
if __name__ == '__main__':
    main()

Output:

Reading from Clipboard.

# Reading from Clipboard.
import pandas as pd

n = ['code','description','size']
df = pd.read_clipboard(header=None,names=n)
df

Remove Null Values

# removing null values to avoid errors  
data.dropna(inplace = True)  

Apply uppercase to a column in dataframe (āļ•āļąāļ§āļžāļīāļĄāļžāđŒāđƒāļŦāļāđˆ)

āđ€āļ›āļĨāļĩāđˆāļĒāļ™āļ‚āđ‰āļ­āļĄāļđāļĨāđƒāļ™ column āđƒāļŦāđ‰āđ€āļ›āđ‡āļ™āļ­āļąāļāļĐāļĢāļ•āļąāļ§āļžāļīāļĄāļžāđŒāđƒāļŦāļāđˆ

df['description'] =df['description'].str.upper()
# Applying upper() method on 'College' column
data['College'].apply(lambda x: x.upper())

Apply function to every row

# Import pandas package
import pandas as pd
 
# Function to add
def add(a, b, c):
    return a + b + c

df['add'] = df.apply(lambda row : add(row['A'],
                     row['B'], row['C']), axis = 1)

āļŦāļĢāļ·āļ­

df['add'] = df.apply(np.sum, axis = 1)

Output:

Dataframe to Datatable

def create_js(df):
    col = df.columns

    lst_result = ""
    for col_idx,data in df.iterrows():  
        lst = []
        for i  in range(len(col)):
            lst.append(str(data[i]))
        lst_result += str(lst) +"," + '\n' 
    print(lst_result)
    return lst_result
['20G0', 'GENERAL PURPOSE CONT. (GENERAL - OPENINGS AT ONE OR BOTH ENDS)', '20x8x8'],
['20G1', 'GENERAL PURPOSE CONT. (GENERAL - PASSIVE VENTS AT UPPER PART OF CARGO SPACE)', '20x8x8'],
['20H0', 'INSULATED CONTAINER(REFRIGERATED OR HEATED WITH REMOVABLE EQUIPMENT LOCATED EXTERNALLY;HEAT TRANSFER COEFFICIENT K=0.4W/M2.K)', '20x8x8'],
['20H1', 'PORT HOLE REEFER CONTAINER', '20x8x8'],

Trim All Dataframe (āļ•āļąāļ”āļŠāđˆāļ­āļ‡āļ§āđˆāļēāļ‡)

def trim_all_columns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trim_strings = lambda x: x.strip() if isinstance(x, str) else x
    return df.applymap(trim_strings)

āļ›āļĢāļąāļš Format

df['Import'] = df['Import'].map('{:.4f}'.format)

Dataframe to Markdown

def create_md(df):
    col_name = df.columns
    md = "| "
    for i in range (len(col_name)):
        
        md += col_name[i]+" |"
    md += '\n'+"|"
    for i in range (len(col_name)):
        word = len(col_name[i])
        if word <= 5:
            md += "-----|"
        else:
            md += "----------|"
    md += '\n' 
  
    
    lst_heads = list(col_name)
    for col_idx,data in df.iterrows():    
      # md += "|"+ str(col_idx+1)+"|"
    
        md +=  "| "
        for i in range(len(lst_heads)):
            md +=  str(data[i]) + " |"
        md += '\n'    
    print(md)      

Read Text file (Fix Length)

data = []
def split_text(texts,chunk_size):
    for x in texts.splitlines():
        l = split_column(x,chunk_size)
        data.append(l)
    return data

def split_column(x,chunk_size):
    s = 0
    lines=[]
    for a in range(len(chunk_size)):
        txt = x[s: s + chunk_size[a]]
            
        s = s + int(chunk_size[a])
    
       
        lines.append(txt.strip())
            
    return lines

# āļāļģāļŦāļ™āļ”āļ„āđˆāļēāļ‚āļ­āļ‡āļ‚āđ‰āļ­āļĄāļđāļĨ
chunk_size = (4,125,75,6,50,8,8,4,1,1,1,1)
cols = ["code","name","Abbreviate","Department","email","start_date","finish_date","payment_port","er","el","ir","il"]
files = "data/areacode.txt"

f = open(files,"r+")  
texts = f.read()
data = split_text(texts,chunk_size)
df = pd.DataFrame(data)
df.columns = cols
df.head()

Dictionary for Thai Month

dict_month = {1  :  'āļĄāļāļĢāļēāļ„āļĄ', 2  :  'āļāļļāļĄāļ āļēāļžāļąāļ™āļ˜āđŒ', 3  :  'āļĄāļĩāļ™āļēāļ„āļĄ', 4  :  'āđ€āļĄāļĐāļēāļĒāļ™', 5  :  'āļžāļĪāļĐāļ āļēāļ„āļĄ', 6  :  'āļĄāļīāļ–āļļāļ™āļēāļĒāļ™', 7  :  'āļāļĢāļāļŽāļēāļ„āļĄ', 8  :  'āļŠāļīāļ‡āļŦāļēāļ„āļĄ', 9  :  'āļāļąāļ™āļĒāļēāļĒāļ™', 10  :  'āļ•āļļāļĨāļēāļ„āļĄ', 11  :  'āļžāļĪāļĻāļˆāļīāļāļēāļĒāļ™', 12  :  'āļ˜āļąāļ™āļ§āļēāļ„āļĄ'}
dict_month[1]

# 'āļĄāļāļĢāļēāļ„āļĄ'

Last updated