How to use

Example

(Data: https://github.com/RomiconEZ/AnaText/blob/main/tests/20_newsgroup_text_only_50.csv)

from pathlib import Path
import eta

def test_split_merge_clusters():
    current_path = Path(__file__).parent
    filename = current_path / "20_newsgroup_text_only_50.csv"

    output_dict = eta.cluster_documents_with_keywords(filename, verbose=True)

    df = output_dict["df"]
    top_word_dict = output_dict["top_word_dict"]
    data = output_dict["data"]
    cluster_centers_1 = output_dict["cluster_centers"]
    radiuses = output_dict["radiuses"]
    cluster_model_1 = output_dict["cluster_model"]
    cluster_centers_2d_1 = output_dict["cluster_centers_2d"]
    reduce_model_1 = output_dict["reduce_model"]
    embeddings_1 = output_dict["embeddings"]
    tokenizer = output_dict["tokenizer"]
    model = output_dict["model"]

    cl_list = [0, 1]
    cluster_num = 0
    divisor = 2

    output_dict_split = eta.split_cluster(cluster_num, divisor, data, reduce_model_1, embeddings_1.to_list())
    data = output_dict_split["data"]
    cluster_centers_2d = output_dict_split["cluster_centers_2d"]
    radiuses = output_dict_split["radiuses"]

    output_dict_union = eta.union_clusters(cl_list, data, reduce_model_1, embeddings_1.to_list())
    data = output_dict_union["data"]
    cluster_centers_2d = output_dict_union["cluster_centers_2d"]
    radiuses = output_dict_union["radiuses"]

    return

if __name__ == '__main__':
    test_split_merge_clusters()