Module maleo.scanner

Expand source code Browse git
from ._scanner import *

__all__ = ["scanner"]

Functions

def scanner(df: pandas.core.frame.DataFrame, text_column: str) ‑> pandas.core.frame.DataFrame

Scan text dataset, which is do count char, word, emoji, number, punctuation and date in each text.

Parameters

df : pd.DataFrame
DataFrame of text data
text_column : str
Choose specific column name in DataFrame

Returns

df : pd.DataFrame
Summary of text dataset
Expand source code Browse git
def scanner(df: pd.DataFrame, text_column: str) -> pd.DataFrame:
    """Scan text dataset, which is do count char, word, emoji, number, punctuation and date in each text.
    
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame of text data
    text_column : str
        Choose specific column name in DataFrame
    Returns
    -------
    df : pd.DataFrame
        Summary of text dataset
    """
    df['chars_count'] = df[text_column].apply(len)
    df['words_count'] = df[text_column].apply(count_words)
    df['emojis_count'] = df[text_column].apply(count_emojis)
    df['numbers_count'] = df[text_column].apply(count_numbers)
    df['punctuations_count'] = df[text_column].apply(count_punctuations)
    df['dates_count'] = df[text_column].apply(count_dates)
    return df