Module maleo.scanner
Expand source code Browse git
from ._scanner import *
__all__ = ["scanner"]
Functions
def scanner(df: pandas.core.frame.DataFrame, text_column: str) ‑> pandas.core.frame.DataFrame
-
Scan text dataset, which is do count char, word, emoji, number, punctuation and date in each text.
Parameters
df
:pd.DataFrame
- DataFrame of text data
text_column
:str
- Choose specific column name in DataFrame
Returns
df
:pd.DataFrame
- Summary of text dataset
Expand source code Browse git
def scanner(df: pd.DataFrame, text_column: str) -> pd.DataFrame: """Scan text dataset, which is do count char, word, emoji, number, punctuation and date in each text. Parameters ---------- df : pd.DataFrame DataFrame of text data text_column : str Choose specific column name in DataFrame Returns ------- df : pd.DataFrame Summary of text dataset """ df['chars_count'] = df[text_column].apply(len) df['words_count'] = df[text_column].apply(count_words) df['emojis_count'] = df[text_column].apply(count_emojis) df['numbers_count'] = df[text_column].apply(count_numbers) df['punctuations_count'] = df[text_column].apply(count_punctuations) df['dates_count'] = df[text_column].apply(count_dates) return df