From 1b9de37f9fdf9c071b4bcb6c82d8d3eef582c3c2 Mon Sep 17 00:00:00 2001
From: Miniyahil Kebede <53700166+hmhard@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:58:38 +0300
Subject: [PATCH] code refactoring (#2)

---
 README.md                                     |   2 +-
 image.png => assets/image.png                 | Bin
 clean_stop_words.py                           |   6 +-
 clear-non-alpha.py                            |   6 +-
 .../cleared_unwanted_keys.json                |   0
 .../english_keys.json                         |   0
 .../final_filtered_data.json                  |   0
 .../invalid_keys.json                         |   0
 .../non_english_keys.json                     |   0
 stop_words.json => outputs/stop_words.json    |   0
 valid_keys.json => outputs/valid_keys.json    |   0
 .../word_dictionary.json                      |   0
 process.sh                                    |  36 ++
 remove-duplicates.py                          |   4 +-
 remove_unwanted_chars_from_keys.py            |   4 +-
 separate.py                                   |   6 +-
 top-500.json                                  | 502 ++++++++++++++++++
 top-words.py                                  |  24 +-
 word_cat.py                                   |   2 +-
 words-extractor.py                            |   7 +-
 20 files changed, 571 insertions(+), 28 deletions(-)
 rename image.png => assets/image.png (100%)
 rename cleared_unwanted_keys.json => outputs/cleared_unwanted_keys.json (100%)
 rename english_keys.json => outputs/english_keys.json (100%)
 rename final_filtered_data.json => outputs/final_filtered_data.json (100%)
 rename invalid_keys.json => outputs/invalid_keys.json (100%)
 rename non_english_keys.json => outputs/non_english_keys.json (100%)
 rename stop_words.json => outputs/stop_words.json (100%)
 rename valid_keys.json => outputs/valid_keys.json (100%)
 rename word_dictionary.json => outputs/word_dictionary.json (100%)
 create mode 100644 process.sh
 create mode 100644 top-500.json

diff --git a/README.md b/README.md
index d2d3d7b..bc91abe 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 
 ## one of analysis dashboard
 
-![Image](./image.png)
+![Image](./assets/image.png)
 
 ### libraries used
 - bs4
diff --git a/image.png b/assets/image.png
similarity index 100%
rename from image.png
rename to assets/image.png
diff --git a/clean_stop_words.py b/clean_stop_words.py
index 601bfc7..dd4562d 100644
--- a/clean_stop_words.py
+++ b/clean_stop_words.py
@@ -1,8 +1,8 @@
 import json
 
-data_file_path = 'cleared_unwanted_keys.json'  
-keys_to_remove_file_path = 'stop_words.json' 
-output_file_path = 'final_filtered_data.json' 
+data_file_path = 'outputs/cleared_unwanted_keys.json'  
+keys_to_remove_file_path = 'outputs/stop_words.json' 
+output_file_path = 'outputs/final_filtered_data.json' 
 
 with open(data_file_path, 'r', encoding='utf-8') as file:
     key_value_pairs = json.load(file) 
diff --git a/clear-non-alpha.py b/clear-non-alpha.py
index 06b7c4a..cf2ea2a 100644
--- a/clear-non-alpha.py
+++ b/clear-non-alpha.py
@@ -1,10 +1,10 @@
 import json
 import re
 
-file_path = 'word_dictionary.json'
+file_path = 'outputs/word_dictionary.json'
 
-valid_keys_file = 'valid_keys.json'
-invalid_keys_file = 'invalid_keys.json'
+valid_keys_file = 'outputs/valid_keys.json'
+invalid_keys_file = 'outputs/invalid_keys.json'
 
 with open(file_path, 'r', encoding='utf-8') as file:
     data = json.load(file)
diff --git a/cleared_unwanted_keys.json b/outputs/cleared_unwanted_keys.json
similarity index 100%
rename from cleared_unwanted_keys.json
rename to outputs/cleared_unwanted_keys.json
diff --git a/english_keys.json b/outputs/english_keys.json
similarity index 100%
rename from english_keys.json
rename to outputs/english_keys.json
diff --git a/final_filtered_data.json b/outputs/final_filtered_data.json
similarity index 100%
rename from final_filtered_data.json
rename to outputs/final_filtered_data.json
diff --git a/invalid_keys.json b/outputs/invalid_keys.json
similarity index 100%
rename from invalid_keys.json
rename to outputs/invalid_keys.json
diff --git a/non_english_keys.json b/outputs/non_english_keys.json
similarity index 100%
rename from non_english_keys.json
rename to outputs/non_english_keys.json
diff --git a/stop_words.json b/outputs/stop_words.json
similarity index 100%
rename from stop_words.json
rename to outputs/stop_words.json
diff --git a/valid_keys.json b/outputs/valid_keys.json
similarity index 100%
rename from valid_keys.json
rename to outputs/valid_keys.json
diff --git a/word_dictionary.json b/outputs/word_dictionary.json
similarity index 100%
rename from word_dictionary.json
rename to outputs/word_dictionary.json
diff --git a/process.sh b/process.sh
new file mode 100644
index 0000000..69758ca
--- /dev/null
+++ b/process.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <top_n>"
+    exit 1
+fi
+
+top_n="$1"
+
+python_files=(
+    # "words-extractor.py"
+    # "clear-non-alpha.py"
+    # "separate.py"
+    # "remove_unwanted_chars_from_keys.py"
+    "clean_stop_words.py"
+)
+
+for file in "${python_files[@]}"
+do
+    echo "Running $file..."
+    python "$file"
+    if [ $? -ne 0 ]; then
+        echo "Error: $file failed to run."
+        exit 1
+    fi
+    echo "$file completed successfully."
+done
+
+echo "Running TOP  $top_n..."
+python "top-words.py" $top_n
+if [ $? -ne 0 ]; then
+    echo "Error: $last_file failed to run."
+    exit 1
+fi
+
+echo "All Processing is done executed."
diff --git a/remove-duplicates.py b/remove-duplicates.py
index c7383c7..cbde3ff 100644
--- a/remove-duplicates.py
+++ b/remove-duplicates.py
@@ -1,10 +1,10 @@
 import json
 
 # Path to the input JSON file containing an array of words
-input_file_path = 'stop_words.json'
+input_file_path = 'outputs/stop_words.json'
 
 # Path to the output JSON file where unique words will be saved
-output_file_path = 'unique_words.json'
+output_file_path = 'outputs/unique_words.json'
 
 # Open and load the JSON file
 with open(input_file_path, 'r', encoding='utf-8') as file:
diff --git a/remove_unwanted_chars_from_keys.py b/remove_unwanted_chars_from_keys.py
index b6d3906..ec9c63e 100644
--- a/remove_unwanted_chars_from_keys.py
+++ b/remove_unwanted_chars_from_keys.py
@@ -2,10 +2,10 @@
 import re
 
 # Path to the input JSON file with key-value pairs
-input_file_path = 'non_english_keys.json'
+input_file_path = 'outputs/non_english_keys.json'
 
 # Path to the output JSON file where filtered key-value pairs will be saved
-output_file_path = 'cleared_unwanted_keys.json'
+output_file_path = 'outputs/cleared_unwanted_keys.json'
 
 # Regular expressions for identifying unwanted characters in keys
 symbols_pattern = re.compile(r'[^\w\s]', re.UNICODE)  # Matches any symbols (excluding alphanumeric and whitespace)
diff --git a/separate.py b/separate.py
index 739d5b9..9ac8c23 100644
--- a/separate.py
+++ b/separate.py
@@ -1,11 +1,11 @@
 import json
 import re
 
-file_path = 'valid_keys.json'
+file_path = 'outputs/valid_keys.json'
 
 # Output file paths
-english_keys_file = 'english_keys.json'
-non_english_keys_file = 'non_english_keys.json'
+english_keys_file = 'outputs/english_keys.json'
+non_english_keys_file = 'outputs/non_english_keys.json'
 
 # Open and load the JSON file
 with open(file_path, 'r', encoding='utf-8') as file:
diff --git a/top-500.json b/top-500.json
new file mode 100644
index 0000000..12d2708
--- /dev/null
+++ b/top-500.json
@@ -0,0 +1,502 @@
+{
+    "ሰዎች": 14109,
+    "ከተማ": 10457,
+    "ክልል": 9968,
+    "አቶ": 9289,
+    "ቤት": 8725,
+    "አበባ": 7661,
+    "ሲሆን": 7288,
+    "ዛሬ": 6636,
+    "መሆኑን": 6383,
+    "ተማሪዎች": 5937,
+    "ቀን": 5896,
+    "ከፍተኛ": 5741,
+    "የኢትዮጵያ": 5684,
+    "ሰዓት": 5294,
+    "ፖሊስ": 5167,
+    "እንዲሁም": 5136,
+    "መንግስት": 4997,
+    "ትምህርት": 4992,
+    "መረጃ": 4845,
+    "ስራ": 4820,
+    "ብር": 4673,
+    "ኢትዮጵያ": 4528,
+    "ቫይረስ": 4229,
+    "ደግሞ": 4185,
+    "ቁጥር": 4154,
+    "ሚኒስትር": 4130,
+    "አገልግሎት": 4026,
+    "ዞን": 3999,
+    "ዩኒቨርሲቲ": 3820,
+    "ዓመት": 3709,
+    "ሚኒስቴር": 3588,
+    "አባላት": 3540,
+    "ችግር": 3503,
+    "ጠቅላይ": 3370,
+    "ሺህ": 3345,
+    "ሲሉ": 3280,
+    "መግለጫ": 3193,
+    "ጉዳት": 3151,
+    "አካላት": 3151,
+    "ምርመራ": 3099,
+    "አስተዳደር": 3060,
+    "ምክር": 3009,
+    "ወረዳ": 2953,
+    "ደረጃ": 2935,
+    "ድጋፍ": 2896,
+    "አካባቢ": 2883,
+    "ዶክተር": 2824,
+    "ሀገር": 2797,
+    "አቀፍ": 2726,
+    "መሰረት": 2687,
+    "ድረስ": 2654,
+    "ክፍል": 2564,
+    "ተቋማት": 2547,
+    "ልዩ": 2541,
+    "በኢትዮጵያ": 2526,
+    "ዕለት": 2491,
+    "ቤቱ": 2489,
+    "ቢሮ": 2480,
+    "እንደሆነ": 2456,
+    "ጉዳዮች": 2446,
+    "የትምህርት": 2393,
+    "ቤቶች": 2378,
+    "ያሉ": 2372,
+    "ምክትል": 2363,
+    "ጥቃት": 2360,
+    "ጨምሮ": 2360,
+    "አደጋ": 2343,
+    "በቁጥጥር": 2340,
+    "ቀናት": 2336,
+    "በቫይረሱ": 2326,
+    "ህይወት": 2304,
+    "ሰላም": 2293,
+    "ህዝብ": 2287,
+    "በአዲስ": 2278,
+    "ጥሪ": 2270,
+    "ወር": 2235,
+    "ሚሊዮን": 2215,
+    "ውይይት": 2209,
+    "በሚል": 2180,
+    "ተብሎ": 2164,
+    "የውጭ": 2153,
+    "ምንም": 2146,
+    "ግጭት": 2141,
+    "ፍርድ": 2136,
+    "ምርጫ": 2118,
+    "አህመድ": 2084,
+    "ዜጎች": 2082,
+    "በተለያዩ": 2073,
+    "ፈተና": 2040,
+    "የፀጥታ": 2026,
+    "ጤና": 2011,
+    "የአዲስ": 2004,
+    "አካባቢዎች": 1970,
+    "ጥያቄ": 1962,
+    "የስራ": 1952,
+    "ተከትሎ": 1943,
+    "ኮሚሽን": 1926,
+    "መንግሥት": 1924,
+    "በኮሮና": 1913,
+    "ድርጅት": 1897,
+    "በማድረግ": 1890,
+    "ውሳኔ": 1876,
+    "ኃይል": 1866,
+    "ያለውን": 1856,
+    "እስካሁን": 1852,
+    "ባለፉት": 1833,
+    "ነዋሪዎች": 1829,
+    "ያሉት": 1822,
+    "መያዛቸው": 1815,
+    "ይፋ": 1809,
+    "ለማድረግ": 1805,
+    "ሥራ": 1803,
+    "አካል": 1788,
+    "ይህን": 1783,
+    "ሪፖርት": 1776,
+    "ቃል": 1756,
+    "የሚገኙ": 1746,
+    "የክልሉ": 1735,
+    "ግንኙነት": 1731,
+    "ሆነው": 1727,
+    "አብይ": 1725,
+    "የጤና": 1721,
+    "በሙሉ": 1709,
+    "የነበሩ": 1672,
+    "በመሆኑ": 1652,
+    "የኮሮና": 1641,
+    "ኃላፊ": 1628,
+    "አጠቃላይ": 1618,
+    "ውጤት": 1617,
+    "እርምጃ": 1604,
+    "መሆኑ": 1594,
+    "ጥረት": 1593,
+    "የመንግስት": 1593,
+    "ቡድን": 1587,
+    "ሀገራት": 1586,
+    "በዛሬው": 1572,
+    "የሰላም": 1557,
+    "ዓለም": 1556,
+    "ዝግጅት": 1553,
+    "አየር": 1534,
+    "መልዕክት": 1532,
+    "ፓርቲ": 1521,
+    "የተያዙ": 1518,
+    "ምላሽ": 1514,
+    "ቀደም": 1488,
+    "ኮሚቴ": 1483,
+    "ግለሰቦች": 1470,
+    "ዋጋ": 1464,
+    "በዓል": 1438,
+    "ወጣቶች": 1425,
+    "ወንጀል": 1417,
+    "ሆስፒታል": 1416,
+    "በተመለከተ": 1413,
+    "እንቅስቃሴ": 1412,
+    "የላብራቶሪ": 1410,
+    "አመት": 1404,
+    "አመራሮች": 1401,
+    "ፕሬዝዳንት": 1400,
+    "መከላከያ": 1386,
+    "የሚል": 1376,
+    "በኃላ": 1372,
+    "ሆኖ": 1362,
+    "ባንክ": 1357,
+    "የፖለቲካ": 1347,
+    "በተጨማሪ": 1333,
+    "ብሔራዊ": 1315,
+    "ዐቢይ": 1310,
+    "የሆነ": 1307,
+    "ከተሞች": 1290,
+    "በተያያዘ": 1290,
+    "የትግራይ": 1286,
+    "ሳምንት": 1279,
+    "መልኩ": 1274,
+    "ብሎ": 1264,
+    "የአማራ": 1258,
+    "ባለው": 1241,
+    "በትግራይ": 1239,
+    "ልማት": 1235,
+    "ኃይሎች": 1233,
+    "ጉባኤ": 1219,
+    "አባል": 1218,
+    "ድምፅ": 1211,
+    "ሂደት": 1205,
+    "መብት": 1201,
+    "ህግ": 1192,
+    "ደህንነት": 1192,
+    "ገንዘብ": 1178,
+    "ሚኒስትሩ": 1176,
+    "ጦርነት": 1170,
+    "ያላቸው": 1160,
+    "በመቶ": 1158,
+    "ስብሰባ": 1151,
+    "የህዝብ": 1150,
+    "ሚዲያ": 1142,
+    "ዳይሬክተር": 1133,
+    "በሰላም": 1120,
+    "ስጋት": 1113,
+    "የአሜሪካ": 1103,
+    "ቤተሰብ": 1101,
+    "ስምምነት": 1096,
+    "አደባባይ": 1095,
+    "ዓመታት": 1088,
+    "ሰላማዊ": 1084,
+    "ወራት": 1079,
+    "ሰልፍ": 1077,
+    "የአፍሪካ": 1075,
+    "መረጃዎች": 1072,
+    "በመሆን": 1071,
+    "ሳይሆን": 1071,
+    "ቦርድ": 1067,
+    "ህይወታቸው": 1061,
+    "ጣቢያ": 1055,
+    "ተወካዮች": 1045,
+    "በክልሉ": 1045,
+    "ክልሎች": 1040,
+    "ብሄራዊ": 1034,
+    "የህክምና": 1032,
+    "አፍሪካ": 1032,
+    "ከባድ": 1027,
+    "ህብረት": 1026,
+    "ተግባር": 1023,
+    "ዘመን": 1014,
+    "በማለት": 1014,
+    "መፍትሄ": 1014,
+    "አምባሳደር": 1013,
+    "የነበረ": 1002,
+    "በጋራ": 1000,
+    "በሰጡት": 997,
+    "ህክምና": 986,
+    "ሱዳን": 985,
+    "መሆናቸውን": 984,
+    "እርዳታ": 984,
+    "መደበኛ": 984,
+    "የሆኑ": 982,
+    "ሰዎችን": 981,
+    "ምሽት": 981,
+    "ግንባታ": 979,
+    "ትኩረት": 979,
+    "ከበሽታው": 975,
+    "ዙር": 974,
+    "ተማሪዎችን": 972,
+    "ፕሮግራም": 967,
+    "ግለሰብ": 965,
+    "ሰራዊት": 959,
+    "ሰራተኞች": 954,
+    "የጦር": 949,
+    "የምርጫ": 944,
+    "እየተካሄደ": 944,
+    "አንዱ": 942,
+    "ስም": 940,
+    "እየተደረገ": 939,
+    "የጋራ": 938,
+    "ንብረት": 937,
+    "የኦሮሚያ": 933,
+    "ዘርፍ": 931,
+    "ሜትር": 930,
+    "ንግግር": 928,
+    "የሆኑት": 913,
+    "ነጥብ": 910,
+    "ይህንን": 900,
+    "ጥንቃቄ": 900,
+    "ቦታዎች": 896,
+    "ክትትል": 895,
+    "ክስ": 885,
+    "የሀገር": 884,
+    "ተማሪ": 881,
+    "አዋጅ": 881,
+    "ዝርዝር": 880,
+    "የሰው": 878,
+    "ትግራይ": 876,
+    "የጸጥታ": 870,
+    "መልካም": 868,
+    "አድርጎ": 868,
+    "አሜሪካ": 866,
+    "ክፍለ": 865,
+    "የንግድ": 862,
+    "የዓለም": 859,
+    "የፌደራል": 858,
+    "ተደርጎ": 857,
+    "ያገገሙ": 855,
+    "ስርዓት": 854,
+    "በሀገሪቱ": 852,
+    "ተቋም": 848,
+    "የቲክቫህ": 847,
+    "በዚህም": 845,
+    "ወገኖች": 844,
+    "አራት": 841,
+    "እለት": 838,
+    "አስቸኳይ": 837,
+    "ሶስት": 834,
+    "ማዕከል": 834,
+    "ግድብ": 832,
+    "ቀበሌ": 831,
+    "ጦር": 829,
+    "ሺ": 825,
+    "ታሪክ": 822,
+    "ኢትዮጵያውያን": 822,
+    "ከንቲባ": 821,
+    "ችግሮች": 817,
+    "ድርጅቶች": 812,
+    "ማህበረሰብ": 809,
+    "ለመከላከል": 806,
+    "አቅም": 805,
+    "አገር": 803,
+    "የሰብዓዊ": 803,
+    "ከአዲስ": 802,
+    "ባለፈው": 802,
+    "ፕሬዚዳንት": 800,
+    "ቤተ": 798,
+    "ውጪ": 796,
+    "በአማራ": 795,
+    "መሪ": 793,
+    "መድረክ": 793,
+    "መሠረት": 790,
+    "በአሁኑ": 788,
+    "መስቀል": 788,
+    "ዶላር": 786,
+    "የመጀመሪያ": 785,
+    "የግል": 784,
+    "በአጠቃላይ": 778,
+    "ችሎት": 778,
+    "ምግብ": 775,
+    "መምሪያ": 775,
+    "ጋዜጠኛ": 774,
+    "ነዋሪ": 773,
+    "ኤፍ": 772,
+    "በአካባቢው": 771,
+    "ለመስጠት": 770,
+    "ርዕሰ": 770,
+    "የሥራ": 767,
+    "ብለው": 766,
+    "መኪና": 765,
+    "መጠን": 765,
+    "ዩኒቨርሲቲዎች": 764,
+    "መስጠት": 764,
+    "የምግብ": 760,
+    "ነፃ": 759,
+    "የሚገኘው": 758,
+    "የመከላከያ": 754,
+    "መኖሪያ": 752,
+    "ስራዎች": 751,
+    "ሞት": 749,
+    "የፌዴራል": 747,
+    "የነበረውን": 744,
+    "በቀጣይ": 742,
+    "ቤተሰቦች": 737,
+    "አመራር": 736,
+    "ጎንደር": 734,
+    "ተግባራዊ": 731,
+    "በማህበራዊ": 730,
+    "አስተዳደሩ": 730,
+    "መንገዶች": 727,
+    "ድርጊት": 725,
+    "ከኢትዮጵያ": 725,
+    "መስከረም": 724,
+    "ወጥ": 723,
+    "መሳሪያ": 720,
+    "በደቡብ": 720,
+    "ሰኔ": 720,
+    "በከፍተኛ": 720,
+    "ቁጥጥር": 718,
+    "ደቡብ": 717,
+    "መረጃዎችን": 716,
+    "ሴቶች": 715,
+    "ሚሊየን": 714,
+    "ባለሙያዎች": 714,
+    "ሆቴል": 710,
+    "በቂ": 709,
+    "በበሽታው": 709,
+    "ንግድ": 708,
+    "ቫይረሱ": 706,
+    "እንዲሆን": 705,
+    "ባወጣው": 705,
+    "ክፍሎች": 704,
+    "ፓርቲዎች": 704,
+    "ትልቅ": 701,
+    "መቶ": 696,
+    "መስሪያ": 693,
+    "አስመልክቶ": 692,
+    "ቀጠሮ": 692,
+    "ሚዲያዎች": 689,
+    "ለመፍታት": 689,
+    "ግብር": 688,
+    "ሳይንስ": 685,
+    "ይህም": 684,
+    "በኦሮሚያ": 684,
+    "ጉዞ": 680,
+    "በነበረው": 678,
+    "ወታደራዊ": 677,
+    "የሚያስችል": 673,
+    "እንደሚገኝ": 673,
+    "ሕዝብ": 673,
+    "የገንዘብ": 671,
+    "የነበሩት": 670,
+    "የሀገሪቱ": 669,
+    "ኪሎ": 668,
+    "መሆኑንም": 667,
+    "በሁሉም": 666,
+    "ባለስልጣናት": 664,
+    "ጥበቃ": 664,
+    "የደቡብ": 661,
+    "የሆነው": 660,
+    "ህብረተሰቡ": 660,
+    "ወረርሽኝ": 660,
+    "ዩኒቨርስቲ": 654,
+    "ህዝቦች": 653,
+    "የሟቾች": 652,
+    "ውድድር": 651,
+    "ቴክኖሎጂ": 650,
+    "አውሮፕላን": 645,
+    "በሆነ": 645,
+    "እየተሰራ": 644,
+    "ግንቦት": 644,
+    "አገራት": 644,
+    "ባለስልጣን": 643,
+    "የአየር": 642,
+    "ይዞ": 641,
+    "ስልክ": 629,
+    "ወረዳዎች": 629,
+    "ሲሆኑ": 625,
+    "ጽህፈት": 625,
+    "ግቢ": 624,
+    "እየሰራ": 623,
+    "የእሳት": 622,
+    "ስነ": 621,
+    "ያላቸውን": 619,
+    "ለኢትዮጵያ": 617,
+    "ማህበር": 617,
+    "መገናኛ": 616,
+    "መስተዳድር": 616,
+    "የከተማ": 615,
+    "የኦሮሞ": 614,
+    "ምዝገባ": 612,
+    "ኤጀንሲ": 611,
+    "የህግ": 610,
+    "ተሽከርካሪዎች": 610,
+    "እጥረት": 609,
+    "ዕጣ": 609,
+    "ማህበራዊ": 607,
+    "ፈቃድ": 604,
+    "ጭምር": 603,
+    "በተለይም": 601,
+    "በጀት": 599,
+    "ስርጭት": 598,
+    "ከእስር": 598,
+    "ኮሚሽነር": 598,
+    "እያደረገ": 597,
+    "ማብራሪያ": 594,
+    "አስፈላጊውን": 594,
+    "ጥቅምት": 593,
+    "ክልላዊ": 591,
+    "በሌሎች": 589,
+    "ኢትዮጵያን": 588,
+    "በይፋ": 588,
+    "ሀሰተኛ": 588,
+    "የቤት": 587,
+    "ሕግ": 586,
+    "ክፍያ": 583,
+    "ለህዝብ": 582,
+    "ተስፋ": 581,
+    "አቀባበል": 581,
+    "በሚገኘው": 580,
+    "ነዳጅ": 579,
+    "ሬድዮ": 578,
+    "የአንድ": 577,
+    "ውሃ": 576,
+    "አዲሱ": 576,
+    "ወቅታዊ": 575,
+    "ትራንስፖርት": 573,
+    "ግንባር": 571,
+    "የትራንስፖርት": 570,
+    "የነዳጅ": 570,
+    "ልጆች": 568,
+    "ተገኝተው": 568,
+    "የምርመራ": 568,
+    "መሪዎች": 566,
+    "ስለሆነ": 565,
+    "ተጠርጣሪዎች": 564,
+    "ታከለ": 563,
+    "ጉሙዝ": 562,
+    "የማህበራዊ": 561,
+    "የሚሆኑ": 560,
+    "አስፈፃሚ": 560,
+    "በቀለ": 559,
+    "ደብዳቤ": 559,
+    "ገቢ": 559,
+    "ቅድመ": 558,
+    "ምንጭ": 556,
+    "በተደረገው": 556,
+    "መቐለ": 556,
+    "ወጣት": 554,
+    "ሥር": 553,
+    "ሴት": 552,
+    "በሀገራችን": 550,
+    "ጥያቄዎች": 549,
+    "ጉብኝት": 549,
+    "ታደሰ": 548,
+    "ዝግጁ": 547,
+    "ማቆያ": 545,
+    "ቴሌቪዥን": 544
+}
\ No newline at end of file
diff --git a/top-words.py b/top-words.py
index 6b2f460..839c41b 100644
--- a/top-words.py
+++ b/top-words.py
@@ -1,16 +1,22 @@
 import json
+import argparse
 
 # Path to the JSON file
-file_path = 'final_filtered_data.json'
+file_path = 'outputs/final_filtered_data.json'
 
-with open(file_path, 'r', encoding='utf-8') as file:
-    data = json.load(file)
+def main(top_n):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
 
+    top = dict(sorted(data.items(), key=lambda item: item[1], reverse=True)[:top_n])
 
-top_n = 500
+    with open("top-"+str(top_n)+".json", 'w', encoding='utf-8') as file:
+        json.dump(top, file, ensure_ascii=False, indent=4)
+        
 
-top = dict(sorted(data.items(), key=lambda item: item[1], reverse=True)[:100])
-
-print(json.dumps(top, indent=4))
-with open("top-"+str(top_n)+".json", 'w', encoding='utf-8') as file:
-    json.dump(top, file, ensure_ascii=False, indent=4)
\ No newline at end of file
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process top n.")
+    
+    parser.add_argument('top_n', type=int, help='An integer representing the top N value')
+    args = parser.parse_args()
+    main(args.top_n)
\ No newline at end of file
diff --git a/word_cat.py b/word_cat.py
index 2bbfe2c..cb9108c 100644
--- a/word_cat.py
+++ b/word_cat.py
@@ -1,7 +1,7 @@
 import json
 import re
 
-file_path = 'word_dictionary.json'
+file_path = 'outputs/word_dictionary.json'
 
 # Open and load the JSON file
 with open(file_path, 'r', encoding='utf-8') as file:
diff --git a/words-extractor.py b/words-extractor.py
index 44b719e..109e6df 100644
--- a/words-extractor.py
+++ b/words-extractor.py
@@ -8,10 +8,9 @@
 
 # Function to clean and split text into words
 def extract_words(text):
-    # Remove punctuation, convert to lowercase, and split into words
-    words = text.lower().split()
+    words_list = text.lower().split()
     # NOTE: additional processing will be addede later
-    return words
+    return words_list
 
 # Initialize a defaultdict to store word counts
 word_dict = defaultdict(int)
@@ -38,7 +37,7 @@ def extract_words(text):
 word_dict = dict(word_dict)
 
 # Save the word dictionary to a JSON file
-output_file = 'word_dictionary.json'
+output_file = 'outputs/word_dictionary.json'
 with open(output_file, 'w', encoding='utf-8') as json_file:
     json.dump(word_dict, json_file, ensure_ascii=False, indent=4)