diff --git a/alabel/labels.py b/alabel/labels.py index 31cb542..35bbef7 100644 --- a/alabel/labels.py +++ b/alabel/labels.py @@ -77,8 +77,12 @@ def main(): printe.output(f'<> {num}/{len_result} qid:"{qid}", page:"{page}"') # --- if page: - himoAPI.Labels_API(qid, page, "ar", False, Or_Alii=True) + # himoAPI.Labels_API(qid, page, "ar", False, Or_Alii=True) + himoAPI.Add_Labels_if_not_there(qid, page, "ar", False) if __name__ == "__main__": - main() + if "test" in sys.argv: + himoAPI.Add_Labels_if_not_there("Q109927", "83 Beatrix", "ar", False) + else: + main() diff --git a/cy/cy5.py b/cy/cy5.py deleted file mode 100644 index 2f744e2..0000000 --- a/cy/cy5.py +++ /dev/null @@ -1,1277 +0,0 @@ -#!/usr/bin/python3 -""" -python pwb.py cy/cy5 -page:باتريك_كونراد -python pwb.py cy/cy5 -page:جويل_سوتير -python pwb.py cy/cy5 -page:كريس_فروم - -python pwb.py cy/cy5 workibrahem test2 -title:خوان_سباستيان_مولانو -python pwb.py cy/cy5 workibrahem test2 -title:إليسا_ونغو_بورغيني ask -python pwb.py cy/cy5 workibrahem test2 -title:كوين_سيمونز - -""" - -# -# (C) Ibrahem Qasim, 2022 -# -# -import re -import sys -import urllib -import urllib.request -import urllib.parse - -# --- -import requests -import datetime - -# --- -AskToSave = True -from datetime import datetime - -menet = datetime.now().strftime("%Y-%b-%d %H:%M:%S") -# --- -# from API.useraccount import * -from . import useraccount - -api_url = "https://" + "ar.wikipedia.org/w/api.php" -username = useraccount.username -password = useraccount.password -# --- -workibrahem = False -if "workibrahem" in sys.argv: - from API import useraccount - - username = useraccount.hiacc - password = useraccount.hipass - workibrahem = True - print("workibrahem active") -# --- -session = {1: requests.Session(), "csrftoken": ""} - - -def login(): - # get login token - r1 = session[1].get( - api_url, - params={ - "format": "json", - "action": "query", - "meta": "tokens", - "type": "login", - }, - timeout=10, - ) - r1.raise_for_status() - - # log in - r2 = session[1].post( - api_url, - data={ - "format": "json", - "action": "login", - "lgname": username, - "lgpassword": password, - "lgtoken": r1.json()["query"]["tokens"]["logintoken"], - }, - timeout=10, - ) - - # print( str( r2.json() ) ) - - if r2.json()["login"]["result"] != "Success": - raise RuntimeError(r2.json()["login"]["reason"]) - - # get edit token - r3 = session[1].get( - api_url, - params={ - "format": "json", - "action": "query", - "meta": "tokens", - }, - timeout=10, - ) - session["csrftoken"] = r3.json()["query"]["tokens"]["csrftoken"] - - -# --- -login() -# --- -remove_date = {} -Work_with_Year = {} -Work_with_Stage = {1: False} -Stage = {"": ""} -# --- -TEST = {1: False, 2: False} -# import pywikibot -# --- -# from likeapi import encode -# encode.encode_arabic(label) -# --- -litters = { - "ا": "%D8%A7", - "ب": "%D8%A8", - "ت": "%D8%AA", - "ث": "%D8%AB", - "ج": "%D8%AC", - "ح": "%D8%AD", - "خ": "%D8%AE", - "د": "%D8%AF", - "ذ": "%D8%B0", - "ر": "%D8%B1", - "ز": "%D8%B2", - "س": "%D8%B3", - "ش": "%D8%B4", - "ص": "%D8%B5", - "ض": "%D8%B6", - "ط": "%D8%B7", - "ظ": "%D8%B8", - "ع": "%D8%B9", - "غ": "%D8%BA", - "ف": "%D9%81", - "ق": "%D9%82", - "ك": "%D9%83", - "ل": "%D9%84", - "م": "%D9%85", - "ن": "%D9%86", - "ه": "%D9%87", - "و": "%D9%88", - "ي": "%D9%8A", - "أ": "%D8%A3", - "آ": "%D8%A2", - "إ": "%D8%A5", - "ى": "%D9%89", - "ء": "%D8%A1", - "ئ": "%D8%A6", - "ؤ": "%D8%A4", - " ": "%20", - "_": "%20", -} - - -def encode_arabic(label): - label2 = label - for x in litters: - label2 = label2.replace(x, litters[x]) - return label2 - - -def ec_de_code(tt, type): - fao = tt - if type == "encode": - fao = urllib.parse.quote(tt) - elif type == "decode": - fao = urllib.parse.unquote(tt) - return fao - - -def print_test2(s): - if TEST[2]: - # pywikibot.output(s) - print(s) - - -def printt(s): - SS = False - if SS or "test" in sys.argv or "test2" in sys.argv: - # pywikibot.output(s) - print(s) - - -def printo(s): - SS = True - if SS: - try: - print(ec_de_code(s, "encode")) - except BaseException: - print("") - if workibrahem: - print(s) - - -# --- -HeadVars = ["imagejersey"] -JOJOJO = "نتيجة سباق الدراجات/جيرسي" - - -def findflag(race, flag): - flage = { - "إيطاليا": "{{رمز علم|إيطاليا}}", - "جيرو ديل ترينتينو": "{{رمز علم|إيطاليا}}", - "the Alps": "{{رمز علم|إيطاليا}}", - "France": "{{رمز علم|فرنسا}}", - "فرنسا": "{{رمز علم|فرنسا}}", - "إسبانيا": "{{رمز علم|إسبانيا}}", - "دونكيرك": "{{رمز علم|بلجيكا}}", - "غنت-وفلجم": "{{رمز علم|بلجيكا}}", - "Gent–Wevelgem": "{{رمز علم|بلجيكا}}", - "Norway": "{{رمز علم|النرويج}}", - "النرويج": "{{رمز علم|النرويج}}", - "كريثيديا دو دوفين": "{{رمز علم|سويسرا}}", - "du Dauphiné": "{{رمز علم|سويسرا}}", - "سويسرا": "{{رمز علم|سويسرا}}", - "باريس-نايس": "{{رمز علم|فرنسا}}", - } - # --- - race = str(race) - # --- - for ff in flage: - te = re.sub(str(ff), "", race) - # --- - if te != race: - flag = flage[ff] - # --- - return flag - - -# --- -Skip_items = ["Q4115189"] - - -def fix_label(label): - label = label.strip() - - label = re.sub(r"بطولة العالم لسباق الدراجات على الطريق (\d+) – سباق الطريق الفردي للرجال", r"سباق الطريق في بطولة العالم \g<1>", label) - - label = re.sub(r"ركوب الدراجات في الألعاب الأولمبية الصيفية (\d+) – سيدات فردي سباق الطريق", r"سباق الطريق للسيدات في ركوب الدراجات الأولمبية الصيفية \g<1>", label) - - label = re.sub(r"ركوب الدراجات في الألعاب الأولمبية الصيفية (\d+) – فريق رجال سباق الطريق", r"سباق الطريق لفرق الرجال في ركوب الدراجات الأولمبية الصيفية \g<1>", label) - - # بطولة العالم لسباق الدراجات على الطريق 1966 – سباق الطريق الفردي للرجال - label = re.sub(r"بطولة العالم لسباق الدراجات على الطريق (\d+) – سباق الطريق الفردي للرجال", r"سباق الطريق للرجال في بطولة العالم \g<1>", label) - - label = re.sub(r"سباق الطريق المداري ", "سباق الطريق ", label) - label = re.sub(r"(بطولة [\s\w]+) الوطنية ", r"\g<1> ", label) - label = re.sub(r"^(سباق\s*.*? في بطولة العالم)\s*(لسباق الدراجات على الطريق|للدراجات) (.*?)$", r"\g<1> \g<3>", label) - label = re.sub(r"^(سباق\s*.*? في بطولة [\s\w]+)\s*(لسباق الدراجات على الطريق|للدراجات) (.*?)$", r"\g<1> \g<3>", label) - - # سباق الطريق للسيدات في ركوب الدراجات في الألعاب الأولمبية الصيفية 2016 - label = re.sub(r"في ركوب الدراجات في الألعاب الأولمبية ", "في ركوب الدراجات الأولمبية ", label) - - # في ركوب الدراجات في دورة ألعاب الكومنولث - label = re.sub(r"ركوب الدراجات في دورة ألعاب الكومنولث", "ركوب الدراجات في دورة الكومنولث", label) - label = re.sub(r"\s+", " ", label) - return label - - -def make_temp_lines(table, title): - # --- - for rr in HeadVars: - if rr not in table: - table[rr] = "" - # --- - image = table["imagejersey"] - image = re.sub(r"JOJOJO", JOJOJO, image) - image = image.replace("%20", "_") - # --- - date = table["Date"] - flag = table["p17lab"] - # --- - qid = table["item"] - table2 = {"race": "", "p17": "", "poss": "", "qid": qid} - # --- - if qid in Skip_items: - return "", table2 - # --- - link = table.get("title", "") - label = table.get("itemlab", "") - if link: - race = f"[[{link}]]" - label = link.split(" (")[0] - # --- - label = fix_label(label) - # --- - if link: - race = f"[[{link}|{label}]]" if label != link else f"[[{link}]]" - else: - race = label - # --- - sss = table["p642label"] - # الفائز وفقاً لترتيب النقاط للشباب - sss = re.sub(r"الفائز وفقاً لترتيب", "الفائز في ترتيب", sss) - sss = re.sub(r"الفائز حسب التصنيف العام", "الفائز في التصنيف العام", sss) - # --- - ranke = table.get("rank", "") - # --- - ranke_tab = { - "المرتبة 1 في": "الأول في", - "المرتبة 2 في": "الثاني في", - "المرتبة 3 في": "الثالث في", - "المرتبة 4 في": "الرابع في", - "المرتبة 5 في": "الخامس في", - "المرتبة 6 في": "السادس في", - "المرتبة 7 في": "السابع في", - "المرتبة 8 في": "الثامن في", - "المرتبة 9 في": "التاسع في", - "المرتبة 10 في": "العاشر في", - # "المرتبة 11 في" : "الحادي عشر في", - # "المرتبة 12 في" : "الثاني عشر في", - } - for kk in ranke_tab: - if ranke.find(kk) >= 0: - ranke = re.sub(kk, ranke_tab[kk], ranke) - # --- - newflag = findflag(race, flag) - # --- - table2["race"] = race - table2["p17"] = newflag - table2["poss"] = sss - # --- - so = "{{نتيجة سباق الدراجات/سطر4" - so = so + "\n|qid = " + qid - so = so + "\n|السباق = " + race - so = so + "\n|البلد = " + newflag - so = so + "\n|التاريخ = " + date - so = so + "\n|المركز = " + sss - so = so + "\n|المرتبة = " + ranke - so = so + "\n|جيرسي = " + image - so += "\n}}" - # --- - if race and race.lower().strip().startswith("q"): - printt(" *** remove line startswith q.") - return "", table2 - # --- - r""" - fanco = title - #fanco = qid - # --- - if fanco not in remove_date: - remove_date[fanco] = 0 - # --- - if fanco in Work_with_Year : - if not date: - remove_date[fanco] += 1 - print_test2( 'remove_date[fanco] += 1 (%d) date == ""' % remove_date[fanco] ) - return "", table2 - else: - hhh = re.match(r'(\d\d\d\d)\-\d\d\-\d\dT\d\d\:\d\d\:\d\dZ', date ) - if hhh : - if int( hhh.group(1) ) < Work_with_Year[fanco] : - remove_date[fanco] += 1 - print_test2( 'remove_date[fanco] += 1 (%d) date == "%s"' % (remove_date[fanco], date) ) - return "", table2 - # ---""" - if ranke and sss.strip() == "": - if Work_with_Stage[1] is False and Len_of_valid_results.get(title, 0) > 10: - if re.sub(r"المرتبة 1 في", "", ranke) == ranke and re.sub(r"الأول في", "", ranke) == ranke: - printt(" *** remove line with rank < 1.") - return "", table2 - # --- - if flag != newflag: - printt(f' *** race:"{race}", flag:"{flag}", newflag:"{newflag}"') - # --- - if title not in Len_of_valid_results: - Len_of_valid_results[title] = 0 - Len_of_valid_results[title] += 1 - # --- - return so, table2 - - -# --- -qu_2018 = """SELECT -?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?jersey_3 ?jersey_4 ?p642label ?p585 ?p582 ?p580 ?title -WHERE { -SELECT ?item ?itemlab ?jerseylab ?p17lab - ?jersey1lab ?image1 ?image2 ?image3 ?image4 - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_1) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image2), "/Special:FilePath/"), "|", ?jersey2lab, "}}") AS ?jersey_2) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image3), "/Special:FilePath/"), "|", ?jersey3lab, "}}") AS ?jersey_3) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image4), "/Special:FilePath/"), "|", ?jersey4lab, "}}") AS ?jersey_4) - ?p642label ?p585 ?p582 ?p580 ?title - WHERE { - BIND(wd:Q447532 AS ?aa) - ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. ?winner pq:P642 ?P642. - OPTIONAL { ?item p:P4323 ?statment1. ?statment1 ps:P4323 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } - OPTIONAL { ?item p:P2321 ?statment2. ?statment2 ps:P2321 ?aa. ?statment2 pq:P2912 ?jersey2. ?jersey2 wdt:P18 ?image2. } - OPTIONAL { ?item p:P4320 ?statment3. ?statment3 ps:P4320 ?aa. ?statment3 pq:P2912 ?jersey3. ?jersey3 wdt:P18 ?image3. } - OPTIONAL { ?item p:P3494 ?statment4. ?statment4 ps:P3494 ?aa. ?statment4 pq:P2912 ?jersey4. ?jersey4 wdt:P18 ?image4. } - - OPTIONAL { ?item wdt:P17 ?p17.} - OPTIONAL { ?item wdt:P585 ?p585.} - OPTIONAL { ?item wdt:P582 ?p582.} - OPTIONAL { ?item wdt:P580 ?p580.} - FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } - OPTIONAL { ?sitelink schema:about ?item - . ?sitelink schema:isPartOf - . ?sitelink schema:name ?title } - SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". - ?p17 rdfs:label ?p17lab. - ?item rdfs:label ?itemlab. - ?jersey1 rdfs:label ?jersey1lab. - ?jersey2 rdfs:label ?jersey2lab. - ?jersey3 rdfs:label ?jersey3lab. - ?jersey4 rdfs:label ?jersey4lab. - ?P642 rdfs:label ?p642label. - } - -} } """ -# --- -q22u = """SELECT - ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?p642label ?p585 ?p582 ?p580 - WHERE { - SELECT ?item ?itemlab ?jerseylab ?image ?p17lab - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image), "/Special:FilePath/"), "|", ?jerseylab, "}}") AS ?jersey_1) - ?jersey1lab ?image1 - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_2) - ?p642label ?p585 ?p582 ?p580 - WHERE { - BIND(wd:Q518222 AS ?aa) - OPTIONAL { ?item p:P2417 ?statment. ?statment ps:P2417 ?aa. ?statment pq:P2912 ?jersey. ?jersey wdt:P18 ?image. } - OPTIONAL { ?item p:P2321 ?statment1. ?statment1 ps:P2321 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } - OPTIONAL { ?item wdt:P17 ?p17.} - OPTIONAL { ?item wdt:P585 ?p585.} - OPTIONAL { ?item wdt:P582 ?p582.} - OPTIONAL { ?item wdt:P580 ?p580.} - ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. - ?winner pq:P642 ?P642. - FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } - SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". - ?p17 rdfs:label ?p17lab. - ?item rdfs:label ?itemlab. - ?jersey rdfs:label ?jerseylab. - ?jersey1 rdfs:label ?jersey1lab. - ?P642 rdfs:label ?p642label. - } - - } } """ - - -def get_query_results(query): - # --- - query = re.sub(r"\n\s+", "\n", query) - # --- - fao = urllib.parse.quote(query) - # --- - url = f"https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query={fao}" - # --- - if "printurl" in sys.argv: - printt(url) - # --- - req = False - # --- - try: - req = session[1].get(url, timeout=10) - - except requests.exceptions.ReadTimeout: - print(f"ReadTimeout: {url}") - - except Exception as e: - print("<> Traceback (most recent call last):") - print(f"<> Exception:{e}.") - print("CRITICAL:") - # --- - json1 = {} - if req: - try: - json1 = req.json() - except Exception as e: - json1 = {} - # --- - print("<> Traceback (most recent call last):") - e = str(e) - if "java.util.concurrent" in e: - e = "java.util.concurrent" - print(f"<> Exception:{e}.") - print("CRITICAL:") - # --- - return json1 - - -def GetSparql(qid, title): - old_qu = """SELECT - ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?p642label ?p585 ?p582 ?p580 ?title - WHERE { - SELECT ?item ?itemlab ?jerseylab ?image ?p17lab - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image), "/Special:FilePath/"), "|", ?jerseylab, "}}") AS ?jersey_1) - ?jersey1lab ?image1 - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_2) - ?p642label ?p585 ?p582 ?p580 ?title - WHERE { - BIND(wd:Q518222 AS ?aa) - OPTIONAL { ?item p:P2417 ?statment. ?statment ps:P2417 ?aa. ?statment pq:P2912 ?jersey. ?jersey wdt:P18 ?image. } - OPTIONAL { ?item p:P2321 ?statment1. ?statment1 ps:P2321 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } - OPTIONAL { ?item wdt:P17 ?p17.} - OPTIONAL { ?item wdt:P585 ?p585.} - OPTIONAL { ?item wdt:P582 ?p582.} - OPTIONAL { ?item wdt:P580 ?p580.} - ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. - ?winner pq:P642 ?P642. - FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } - FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } - OPTIONAL { ?sitelink schema:about ?item - . ?sitelink schema:isPartOf - . ?sitelink schema:name ?title } - SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". - ?p17 rdfs:label ?p17lab. - ?item rdfs:label ?itemlab. - ?jersey rdfs:label ?jerseylab. - ?jersey1 rdfs:label ?jersey1lab. - ?P642 rdfs:label ?p642label. - } - - } } """ - # --- - qu_2019 = """SELECT DISTINCT ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?jersey_3 ?jersey_4 ?p642label ?p585 ?p582 ?p580 ?rankP4323 ?rankP2321 ?rankP4320 ?rankP3494 ?title - WHERE { SELECT DISTINCT ?item ?itemlab ?jerseylab ?p17lab ?rankP4323 ?rankP2321 ?rankP4320 ?rankP3494 - ?jersey1lab ?image1 ?image2 ?image3 ?image4 - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_1) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image2), "/Special:FilePath/"), "|", ?jersey2lab, "}}") AS ?jersey_2) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image3), "/Special:FilePath/"), "|", ?jersey3lab, "}}") AS ?jersey_3) - (CONCAT("{{JOJOJO|", STRAFTER(STR(?image4), "/Special:FilePath/"), "|", ?jersey4lab, "}}") AS ?jersey_4) - ?p642label ?p585 ?p582 ?p580 ?title - WHERE { - BIND(wd:Q447532 AS ?aa) - ?item wdt:P31 ?a1a. - OPTIONAL { ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. ?winner pq:P642 ?P642. } - ?item (p:P1346|p:P4323|p:P2321|p:P4320|p:P3494) ?statment0. - ?statment0 (ps:P1346|ps:P4323|ps:P2321|ps:P4320|ps:P3494) ?aa. - OPTIONAL { ?item p:P4323 ?statment1 . ?statment1 ps:P4323 ?aa. - OPTIONAL {?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } - OPTIONAL {?statment1 pq:P1352 ?rankP4323. } - } - OPTIONAL { ?item p:P2321 ?statment2 . ?statment2 ps:P2321 ?aa. - OPTIONAL {?statment2 pq:P2912 ?jersey2. ?jersey2 wdt:P18 ?image2. } - OPTIONAL {?statment2 pq:P1352 ?rankP2321. } - } - OPTIONAL { ?item p:P4320 ?statment3 . ?statment3 ps:P4320 ?aa. - OPTIONAL {?statment3 pq:P2912 ?jersey3. ?jersey3 wdt:P18 ?image3. } - OPTIONAL {?statment3 pq:P1352 ?rankP4320. } - } - OPTIONAL { ?item p:P3494 ?statment4 . ?statment4 ps:P3494 ?aa. - OPTIONAL {?statment4 pq:P2912 ?jersey4. ?jersey4 wdt:P18 ?image4. } - OPTIONAL {?statment4 pq:P1352 ?rankP3494. } - } -OPTIONAL { ?item wdt:P17 ?p17.} OPTIONAL { ?item wdt:P585 ?p585.} OPTIONAL { ?item wdt:P582 ?p582.} OPTIONAL { ?item wdt:P580 ?p580.} -FILTER NOT EXISTS { ?item wdt:P31 wd:Q20646667. } # plain stage -FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } -FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } -FILTER NOT EXISTS { ?item wdt:P31 ?P31 . ?P31 wdt:P279 wd:Q18131152 } -FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } -OPTIONAL { ?sitelink schema:about ?item - . ?sitelink schema:isPartOf - . ?sitelink schema:name ?title } - SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". - ?p17 rdfs:label ?p17lab. - ?item rdfs:label ?itemlab. - ?jersey1 rdfs:label ?jersey1lab. - ?jersey2 rdfs:label ?jersey2lab. - ?jersey3 rdfs:label ?jersey3lab. - ?jersey4 rdfs:label ?jersey4lab. - ?P642 rdfs:label ?p642label. - } - - } } """ - # --- - qu_2019 = qu_2019.replace("Q447532", qid) - qu2 = qu_2019 - # --- - if title in Stage: - qu2 = qu2.replace("FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 }", "") - qu2 = qu2.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") - # }Limit 10 } """ - # --- - json1 = get_query_results(qu2) - # --- - for rr in json1.get("head", {}).get("vars", []): - HeadVars.append(rr) - # --- - bindings = json1.get("results", {}).get("bindings", []) - if len(bindings) > 1: - return json1 - # one result or no result - if title in Stage: - return {} - # --- - qua3 = qu_2019 - qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 }", "") - qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") - qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") - qua3 += f"\n#{menet}" - # --- - json2 = get_query_results(qua3) - # --- - print("try 2") - # --- - return json2 - - -# --- -# import dateutil.parser -# import operator -# --- -NoAppend = ["p585", "p582", "p580"] -# --- -ranks_label = { - "P4323": "المرتبة %s في تصنيف أفضل شاب", - "P2321": "المرتبة %s في التصنيف العام", - "P4320": "المرتبة %s في تصنيف الجبال", - "P3494": "المرتبة %s في تصنيف النقاط", -} -# --- -Len_of_results = {} -Len_of_valid_results = {} - - -def fix_results(table): - results2 = {} - # --- - tata = { - "head": {"vars": ["item", "p17lab", "itemlab", "jersey_1", "jersey_2", "jersey_3", "jersey_4", "p642label", "p585", "p582", "p580", "rankP4323", "rankP2321", "rankP4320", "rankP3494", "title"]}, - "results": { - "bindings": [{ - "item": {"type": "uri", "value": "http://www.wikidata.org/entity/Q53557910"}, - "title": {"xml:lang": "ar", "type": "literal", "value": "طواف أستونيا 2018"}, - "p580": {"datatype": "http://www.w3.org/2001/XMLSchema#dateTime", "type": "literal", "value": "2018-05-25T00:00:00Z"}, - "p582": {"datatype": "http://www.w3.org/2001/XMLSchema#dateTime", "type": "literal", "value": "2018-05-26T00:00:00Z"}, - "p17lab": {"xml:lang": "ar", "type": "literal", "value": "إستونيا"}, - "itemlab": {"xml:lang": "ar", "type": "literal", "value": "طواف أستونيا 2018"}, - "rankP2321": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "2"}, - "rankP4323": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "1"}, - "rankP3494": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "1"}, - "p642label": {"xml:lang": "ar", "type": "literal", "value": "الفائز وفقاً لترتيب النقاط"}, - "jersey_1": {"type": "literal", "value": "{{JOJOJO|Jersey%20white.svg|قميص أبيض، أفضل شاب}}"}, - "jersey_2": {"type": "literal", "value": "{{JOJOJO|Jersey%20white.svg|قميص أبيض، أفضل شاب}}"}, - "jersey_4": {"type": "literal", "value": "{{JOJOJO|Jersey%20red.svg|قميص أحمر، تصنيف النقاط}}"}, - }] - }, - } - # --- - printt(f"* Lenth fix_results: '{len(table)}' .") - for params in table: - # --- - if params.get("itemlab", {}).get("value", "").lower().strip().startswith("q"): - printt(" *** remove line startswith q---.") - continue - # --- - q = "item" in params and params["item"]["value"].split("/entity/")[1] - # --- - if q not in results2: - results2[q] = {"Date": [], "imagejersey": [], "item": [], "rank": []} - # --- - date = params.get("p585") or params.get("p582") or params.get("p585") or {} - date = date.get("value") or "" - # --- - if date not in results2[q]["Date"]: - results2[q]["Date"].append(date) - # --- - for param in params: - # --- - value = params[param]["value"] - # --- - param2 = param - if param.startswith("rank"): - param2 = "rank" - value2 = param.replace("rank", "") - if value2 in ranks_label: - value = ranks_label[value2] % value - # --- - if param.startswith("jersey_"): - param2 = "imagejersey" - # --- - if param == "p17lab": - value = "{{رمز علم|" + value + "}}" - elif param == "item": - value = value.split("/entity/")[1] - # --- - # if param == "p642label": - # value = re.sub(r'الفائز وفقاً ', 'الفائز في ', value ) - # value = re.sub(r'الفائز حسب التصنيف العام ', 'الفائز في التصنيف العام', value ) - # --- - if param2 not in NoAppend: - if param2 not in results2[q]: - results2[q][param2] = [] - # --- - if value not in results2[q][param2]: - results2[q][param2].append(value) - # --- - return results2 - - -def fix_date(data, title): - data2 = {} - # --- - p642label = 0 - # --- - for ta in data: - # --- - datn = data[ta].get("Date", []) - # --- - if isinstance(datn, list) and len(datn) > 0: - ddds = [x.strip() for x in datn if x.strip() != ""] - # --- - # print(date) - # --- - fanco = title - if fanco not in remove_date: - remove_date[fanco] = 0 - # --- - if fanco in Work_with_Year: - date = "" - if ddds != []: - date = ddds[0] - if not date: - remove_date[fanco] += 1 - # return "" - continue - else: - if hhh := re.match(r"(\d\d\d\d)\-\d\d\-\d\dT\d\d\:\d\d\:\d\dZ", date): - if int(hhh.group(1)) < Work_with_Year[fanco]: - remove_date[fanco] += 1 - # print_test2( 'remove_date[fanco] += 1 (%d) date == "%s"' % (remove_date[fanco], date) ) - # return "" - continue - # --- - data2[ta] = data[ta] - if data2[ta].get("p642label", False): - p642label += 1 - # --- - if remove_date[fanco] != 0: - print_test2("remove_date[fanco] += 1 (%d)" % remove_date[fanco]) - # --- - # Len_of_results[title] = len(data2) - Len_of_results[title] = p642label - # --- - return data2 - - -def make_new_text(qid, title): - Date_List2 = [] - # new_lines[title] = [] - new_lines[title] = {} - json1 = GetSparql(qid, title) - # --- - if not json1: - return False - # --- - bindings = json1.get("results", {}).get("bindings", []) - # --- - if len(bindings) < 1: - return False - # --- - results = fix_results(bindings) - # --- - Len_results = len(results) - printt("* Lenth results: '%d' ." % Len_results) - # --- - # Len_of_results[title] = Len_results - # --- - qidso = {} - for num, qq in enumerate(results): - # --- - if qq not in qidso: - qidso[qq] = {} - # --- - date = results[qq]["Date"][0] - if not date: - if qq not in Date_List2: - Date_List2.append(qq) - elif date not in Date_List2: - Date_List2.append(date) - # --- - qidso[qq] = results[qq] - # --- - qids_2 = fix_date(qidso, title) - # --- - Date_List2.sort() - printt("**Date_List2: ") - # --- - texxt = "" - for dd in Date_List2: - for qoo, tao in qids_2.items(): - # --- - if qoo in Skip_items: - continue - # --- - date = tao["Date"][0] - # --- - if dd == date: - table = {} - # --- - for ss in tao: - space = "، " - if ss in ["imagejersey", "p17lab"]: - space = "" - # --- - faso = sorted(tao[ss]) - # --- - if len(faso) > 0: - if len(faso) == 1 or ss == "p17lab": - k = faso[0] - elif len(faso) > 1: - k = space.join(faso) - # --- - if ss == "Date": - k = faso[0] - # --- - table[ss] = k - # --- - v, tab = make_temp_lines(table, title) - # --- - if v: - vvv = re.sub(r"\n", "", v) - new_lines[title][qoo] = tab - new_lines[title][qoo]["qid"] = qoo - new_lines[title][qoo]["race"] = tab.get("race", "") # re.sub( regline, "\g", vvv ) - new_lines[title][qoo]["p17"] = tab.get("p17", "") # re.sub( regline, "\g", vvv ) - new_lines[title][qoo]["poss"] = tab.get("poss", "") # re.sub( regline, "\g", vvv ) - # --- - texxt = texxt + v + "\n" - # --- - note = "\n" - texxt = note + texxt - # --- - t24 = Len_of_valid_results.get(title, 0) - t23 = Len_of_results.get(title, 0) - printt(f"Len_of_valid_results : {t24}, Len_of_results : {t23}") - printt(f"Len_of_valid_results : {t24}, Len_of_results : {t23}") - # --- - # --- - return texxt - - -def GetSectionNew3(text): - printt("**GetSectionNew3: ") - text = text - text2 = text - FirsPart = "" - # temp1 = '{{نتيجة سباق الدراجات/بداية|wikidatalist=t}}' - # temptop = '{{نتيجة سباق الدراجات/بداية}}' - # --- - Frist = re.compile(r"\{\{نتيجة سباق الدراجات\/بداية\s*?.*?\}\}") - if Fristsss := Frist.findall(text2): - printt("Section: ") - FirsPart = Fristsss[0] - printt(FirsPart) - # --- - if FirsPart: - text2 = text2.split(FirsPart)[1] - text2 = FirsPart + text2 - # --- - text2 = text2.split("{{نتيجة سباق الدراجات/نهاية}}")[0] - text2 = text2 + "{{نتيجة سباق الدراجات/نهاية}}" - # --- - return text2, FirsPart - - -# --- -returntext = {1: True} - - -def make_dada(NewText, MainTitle): - url = "https://" + "ar.wikipedia.org/w/index.php?title=" + ec_de_code(MainTitle, "decode") + "&action=submit" - t = f"
" - t += f"" - t += """ - - - - -
""" - return t - - -def page_put(NewText, summ, MainTitle): - printt(f" page_put: {br}") - # try: - title = ec_de_code(MainTitle, "decode") - # --- - printt(f" page_put {MainTitle}:{br}") - # print_test2( NewText ) - # --- - if (not TEST[1] and not TEST[2]) or workibrahem: - r4 = session[1].post( - api_url, - data={ - "action": "edit", - "format": "json", - "title": title, - "text": NewText, - "summary": summ, - "bot": 1, - "nocreate": 1, - "token": session["csrftoken"], - }, - ) - if workibrahem: - print(r4.text) - if "nochange" in r4.text: - printo("nodiff") - elif "Success" in r4.text: - # print('** true .. ' + '[[' + title + ']]' ) - # print('* true . ') - printo("true") - # printo( r4.text ) - elif "abusefilter-disallowed" in r4.text and returntext[1]: - texts = "
خطأ عند تعديل الصفحة، قم بنسخ المحتوى أدناه إلى الصفحة:
" - texts += make_dada(NewText, MainTitle) - printo(texts) - else: - printo(r4.text) - - -# --- -lines = {} -new_lines = {} -states = {} -# --- -# new_lines -# --- -regline = r"\{\{نتيجة سباق الدراجات/سطر4" -regline += r"\|\s*qid\s*\=(?PQ\d+)" -regline += r"\|\s*السباق\s*\=(?P.*)" -regline += r"\|\s*البلد\s*\=(?P.*)" -regline += r"\|\s*التاريخ\s*\=(?P.*)" -regline += r"\|\s*المركز\s*\=(?P.*)" -regline += r"\|\s*(?:rank|المرتبة)\s*\=(?P.*)" -regline += r"\|\s*جيرسي\s*\=(?P.*)" -regline += r"\s*\|\}\}" - - -def work_tano(text, MainTitle): - # --- - lines[MainTitle] = {} # [] - # --- - # reg_line2 = '\{\{نتيجة سباق الدراجات\/سطر4\s*?.*?\}\}' - reg_line = r"\{\{نتيجة سباق الدراجات\/سطر4([^{]|\{[^{]|\{\{[^{}]+\}\})+\}\}" - re.compile(reg_line) - # pas = fff.findall( text ) - # --- - # vf = re.compile(r'\{\{نتيجة سباق الدراجات\/سطر4([^{]|\{[^{]|\{\{[^{}]+\}\})+\}\}' ).findall( text ) - if text.startswith("{{نتيجة سباق الدراجات/بداية}}\n"): - text = text.replace("{{نتيجة سباق الدراجات/بداية}}\n", "") - text = text.replace("{{نتيجة سباق الدراجات/نهاية}}", "") - text = text.strip() - if vf := text.split("{{نتيجة سباق الدراجات/سطر4"): - for pp in vf: - if not pp: - continue - # --- - if not pp.startswith("{{نتيجة سباق الدراجات/سطر4"): - pp = "{{نتيجة سباق الدراجات/سطر4" + pp - # --- - q_id = "" - # q_id = re.sub(r".*(Q\d+).*", "\g<1>", pp ) - ppr = re.sub(r"\n", "", pp) - q_id = re.sub(r"\{\{نتيجة سباق الدراجات\/سطر4\|qid\s*\=\s*(Q\d+)\|.*\}\}", r"\g<1>", ppr) - if hhh := re.match(r".*(Q\d+).*", ppr): - if q_id != hhh.group(1): - q_id = hhh.group(1) - # lines[MainTitle].append( q_id ) - lines[MainTitle][q_id] = {} - lines[MainTitle][q_id]["qid"] = q_id - lines[MainTitle][q_id]["poss"] = re.sub(regline, r"\g", ppr) - lines[MainTitle][q_id]["rank"] = re.sub(regline, r"\g", ppr) - lines[MainTitle][q_id]["race"] = re.sub(regline, r"\g", ppr) - lines[MainTitle][q_id]["p17"] = re.sub(regline, r"\g", ppr) - # --- - # print( q_id ) - # print( "========================" ) - # --- - # print( "lenth sections : %d" % len( lines[MainTitle] ) ) - # --- - new_line = 0 - same_line = 0 - removed_line = 0 - # --- - if MainTitle in new_lines: - for line in new_lines[MainTitle].keys(): - # --- - if line == "Q49164584" and TEST[1]: - print(new_lines[MainTitle][line]) - # --- - # print( "new_lines:%s" % line ) - same = 0 - new = 0 - if line in lines[MainTitle].keys(): - for x in ["poss", "race", "p17"]: - if new_lines[MainTitle][line][x] == lines[MainTitle][line][x]: - same = 1 - else: - new = 1 - else: - new = 1 - # --- - if same == 1: - same_line += 1 - elif new == 1: - new_line += 1 - # --- - # --- - for liner in lines[MainTitle].keys(): - # print( "lines:%s" % liner ) - if liner not in new_lines[MainTitle].keys(): - removed_line += 1 - # --- - states[MainTitle] = {"new_line": new_line, "same_line": same_line, "removed_line": removed_line} - # --- - liner = "new_line:%d,same_line:%d,removed_line:%d" % (new_line, same_line, removed_line) - # --- - if MainTitle in remove_date and remove_date[MainTitle] != 0: - liner += ",removed_line_date:%d" % remove_date[MainTitle] - states[MainTitle]["removed_line_date"] = remove_date[MainTitle] - # --- - return liner - - -def puttext(text, MainTitle, Newsect): - printt(f"**puttext: {br}") - sect, Frist = GetSectionNew3(text) - # --- - work_tano(sect, MainTitle) - # --- - text = text - Newsect = Frist + "\n" + Newsect + "{{نتيجة سباق الدراجات/نهاية}}" - Newsect = re.sub(r"\n\n{{نتيجة سباق الدراجات/نهاية}}", "\n{{نتيجة سباق الدراجات/نهاية}}", Newsect) - NewText = text.replace(sect, Newsect) - summ = "" if workibrahem else "بوت:تجربة تحديث بيانات اللاعب" - printt(f"showDiff of page: {MainTitle}{br}") - if MainTitle in states: - if states[MainTitle]["new_line"] != 0 or states[MainTitle]["removed_line"] != 0 and text != NewText: - page_put(NewText, summ, MainTitle) - else: - printo("nodiff") - - -def template_params(text, title): - Frist = re.compile(r"\{\{نتيجة سباق الدراجات\/بداية\s*?.*?\}\}") - pas = Frist.findall(text) - # --- - if not pas: - return False, False - # --- - params = str(pas[0]) - params = re.sub(r"\s*\=\s*", "=", params) - params = re.sub(r"\s*\|\s*", "|", params) - if do := re.search(r".*\|تاريخ\=(\d+)(\}\}|\|)", text): - Work_with_Year[title] = int(do.group(1)) - print_test2(f"Work_with_Year:{do.group(1)}") - # --- - if re.sub(r"مراحل\s*\=\s*نعم", "", params) != params: - printt("Work with Stage") - Work_with_Stage[1] = True - Stage[title] = "" - # --- - if re.sub(r".*id\s*\=\s*(Q\d+).*", r"\g<1>", params) != params: - printt("** found currect line") - Qid = re.sub(r".*id\=(Q\d+).*", r"\g<1>", params) - printt(f"id: {Qid}") - return Qid, True - # --- - return False, False - - -def CheckTempalteInPageText(text): - printt(f"**CheckTempalteInPageText: {br}") - if text: - # --- - # \{\{template_tesult(\|id\=Q\d+|)\}\} - Topname = r"نتيجة سباق الدراجات\/بداية" - Top = r"\{\{" + Topname + r"\}\}" - # --- - Check_Top = re.sub(Top, "", text) - Top2 = r"\{\{" + Topname + r"\s*\|\s*id\s*\=\s*Q\d+\s*\}\}" - Check_Top2 = re.sub(Top2, "", text) - Top3 = r"\{\{" + Topname + r"\s*?.*?\}\}" - Check_Top3 = re.sub(Top3, "", text) - Bottom = r"\{\{نتيجة سباق الدراجات\/نهاية\}\}" - Check_Bottom = re.sub(Bottom, "", text) - # --- - if (text == Check_Top) and (text == Check_Top2) and (text == Check_Top3): - po = "لا يمكن إيجاد " + "{{نتيجة سباق الدراجات/بداية " + "في الصفحة. " - printo(po) - return False - elif text == Check_Bottom: - oo = "لا يمكن إيجاد " + "{{نتيجة سباق الدراجات/نهاية}} " + "في الصفحة. " - printo(oo) - return False - else: - printt(f" * Tempaltes Already there.{br}") - return True - else: - printt(f" * no text.{br}") - - -def GetPageText(title): - text, item = "", False - printt(f"**GetPageText: {br}") - # --- - tit = title # ec_de_code(title, 'encode') - # --- - url = "https://" + "ar.wikipedia.org/w/api.php?action=parse&prop=wikitext|properties&utf8=1&format=json&page=" + tit - printt(f"url:{url}") - # --- - json1 = {} - try: - json1 = session[1].get(url, timeout=10).json() - - except requests.exceptions.ReadTimeout: - print(f"ReadTimeout: {url}") - - except Exception as e: - print("<> Traceback (most recent call last):") - print(f"<> Exception:{e}.") - print("CRITICAL:") - # --- - if not json1: - return text, item - # --- - printt(f"find json1:{br}") - # --- - parse = json1.get("parse", {}) - if parse != {}: - printt(f"find parse in json1:{br}") - # --- - text = parse.get("wikitext", {}).get("*", "") - if text: - printt(f"find wikitext in parse:{br}") - printt(f"find * in parse.wikitext :{br}") - # --- - properties = parse.get("properties", []) - # --- - if properties != []: - printt(f"find properties in parse:{br}") - for prop in properties: - if "name" in prop: - if prop["name"] == "wikibase_item": - item = prop["*"] - printt(f"find item in parse.wikitext :{item}{br}") - break - elif "error" in json1: - text = False - if "info" in json1["error"]: - printt(json1["error"]["info"]) - else: - printt(json1) - else: - printt(f"no parse in json1:{br}") - printt(json1) - # --- - return text, item - - -def StartOnePage(title): - printt(f"**StartOnePage: {br}") - # --- - title = title.replace("_", " ") - # --- - if title.find("%") == -1: - title = ec_de_code(title, "encode") - # print( 'title encode: ' + title ) - # --- - text, item = GetPageText(title) - # --- - if not text: - printo("الصفحة المطلوبة غير موجودة أو أن محتواها فارغ.") - return - # --- - Check = CheckTempalteInPageText(text) - # --- - if not Check: - printt(f"no Check: pass....{br}") - return - # --- - printt("**Isre: ") - # --- - Qid, QidinTemplate = template_params(text, title) - if QidinTemplate: - item = Qid - # if not Qid: - # Qid = getwditem(title) - # --- - if not item: - if QidinTemplate: - NewText = " ' - else: - NewText = " ' - printt(f"**{NewText}") - # --- - if not item: - return - # --- - printt(f"**item: {item}") - if NewText := make_new_text(item, title): - printt("**puttext::: ") - puttext(text, title, NewText) - else: - ur = f'{item}.' - printo(f"لا توجد نتائج لهذه الصفحة تأكد من صحة معرف ويكي بيانات: {ur}.") - - # print(ur) - # --- - - -br = "
" - - -def main(): - # --- - title = "" - # --- - for arg in sys.argv: - arg, _, value = arg.partition(":") - # --- - if arg == "test": - TEST[1] = True - # --- - if arg in ["-title", "-page"]: - title = value - # --- - if arg == "test2": - TEST[2] = True - # --- - if arg == "text": - returntext[1] = True - # --- - if TEST[1]: - printt(f"TestMain:{br}") - # python pwb.py cy5 test - # StartOnePage('%D8%B3%D9%8A%D9%84%D9%81%D8%A7%D9%86_%D8%AA%D8%B4%D8%A7%D9%81%D8%A7%D9%86%D9%8A%D9%84') - # StartOnePage('%D8%AC%D8%A7%D9%8A_%D9%83%D8%B1%D9%88%D9%81%D9%88%D8%B1%D8%AF') - # StartOnePage('%D8%AF%D9%88%D9%85%D9%8A%D9%86%D9%8A%D9%83%D9%88_%D8%A8%D9%88%D8%B2%D9%88%D9%81%D9%8A%D9%81%D9%88') - # StartOnePage('%D8%A2%D8%B4%D9%84%D9%8A_%D9%85%D9%88%D9%84%D9%85%D8%A7%D9%86') - StartOnePage("%D8%B1%D9%8A%D8%AA%D8%B4%D9%8A_%D8%A8%D9%88%D8%B1%D8%AA") - # make_new_text('Q286183')# - # --- - # StartOnePage('%D8%B3%D9%8A%D9%84%D9%81%D8%A7%D9%86_%D8%AA%D8%B4%D8%A7%D9%81%D8%A7%D9%86%D9%8A%D9%84') - if title: - StartOnePage(title) - else: - printo('title==""') - - -# --- -tty = """ -===سباقات أو مراحل فاز بها=== -{{نتيجة سباق الدراجات/بداية|مراحل=نعم | id = Q623 -}} - -{{نتيجة سباق الدراجات/سطر4 -|qid = Q3003022 -|السباق = [[كريثيديا دو دوفين 2013]] -|البلد = {{رمز علم|سويسرا}} -|التاريخ = 2013-06-09T00:00:00Z -|المركز = المركز الثاني -|جيرسي = -|}} -{{نتيجة سباق الدراجات/سطر4 -|qid = Q28948862 -|السباق = [[كريثيديا دو دوفين 2017]] -|البلد = {{رمز علم|سويسرا}} -|التاريخ = 2017-06-11T00:00:00Z -|المركز = المركز الثاني -|جيرسي = -|}} -{{نتيجة سباق الدراجات/نهاية}} - -""" -if __name__ == "__main__": - # GetSparql("Q3266987", "") - main() - # s, o = template_params(tty, "dfdfdf") - # print(s) -# --- diff --git a/cy/cy6.py b/cy/cy6.py new file mode 100644 index 0000000..ef426a9 --- /dev/null +++ b/cy/cy6.py @@ -0,0 +1,81 @@ +#!/usr/bin/python3 +""" +python3 /data/project/himo/bots/wd_core/cy/cy6.py -title:إديتا_بوتشينسكايتي +python3 /data/project/himo/bots/wd_core/cy/cy6.py -title:جيروين_بلايلفينس + +python3 core8/pwb.py cy/cy6 -title:جيروين_بلايلفينس +python3 core8/pwb.py cy/cy6 -title:إديتا_بوتشينسكايتي +python3 core8/pwb.py cy/cy6 -title: +python3 core8/pwb.py cy/cy6 -title: +python3 core8/pwb.py cy/cy6 -title: + + +python3 core8/pwb.py cy/cy6 -title:باتريك_كونراد +python3 core8/pwb.py cy/cy6 -title:جويل_سوتير +python3 core8/pwb.py cy/cy6 -title:كريس_فروم + +python3 core8/pwb.py cy/cy6 -title:إديتا_بوتشينسكايتي +python3 core8/pwb.py cy/cy6 workibrahem test2 -title:خوان_سباستيان_مولانو +python3 core8/pwb.py cy/cy6 workibrahem test2 -title:إليسا_ونغو_بورغيني ask +python3 core8/pwb.py cy/cy6 workibrahem test2 -title:كوين_سيمونز + +""" + +import sys + +# --- +from cy_bot.cy_api import page_put, GetPageText +from cy_bot.do_text import do_One_Page +from cy_bot.cy_helps import printt, printo, TEST + +# --- +workibrahem = "workibrahem" in sys.argv +# --- +br = "
" + + +def StartOnePage(title): + printt("**StartOnePage:
") + # --- + title = title.replace("_", " ") + # --- + text, item = GetPageText(title) + # --- + if not text: + printo("الصفحة المطلوبة غير موجودة أو أن محتواها فارغ.") + return + # --- + NewText = do_One_Page(title, text, item) + # --- + if not NewText: + ur = f'{item}.' + printo(f"لا توجد نتائج لهذه الصفحة تأكد من صحة معرف ويكي بيانات: {ur}.") + return + # --- + page_put(NewText, title) + + +def main(): + # --- + title = "" + # --- + for arg in sys.argv: + arg, _, value = arg.partition(":") + # --- + if arg in ["-title", "-page"]: + title = value + # --- + if TEST[1]: + printt("TestMain:
") + # python3 core8/pwb.py cy6 test + StartOnePage("%D8%B1%D9%8A%D8%AA%D8%B4%D9%8A_%D8%A8%D9%88%D8%B1%D8%AA") + # make_new_text('Q286183')# + # --- + if title: + StartOnePage(title) + else: + printo('title==""') + + +if __name__ == "__main__": + main() diff --git a/cy/cy_bot/__init__.py b/cy/cy_bot/__init__.py new file mode 100644 index 0000000..b007c4e --- /dev/null +++ b/cy/cy_bot/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from . import cy_api +from . import cy_helps +from . import cy_regs +from . import cy_sparql +from . import do_text diff --git a/cy/cy_bot/cy_api.py b/cy/cy_bot/cy_api.py new file mode 100644 index 0000000..2c2645f --- /dev/null +++ b/cy/cy_bot/cy_api.py @@ -0,0 +1,195 @@ +#!/usr/bin/python3 +""" + +from .cy_api import page_put, GetPageText + +""" + +import sys +import requests + +# import urlencode +from urllib.parse import urlencode + +# --- +from . import useraccount +from .cy_helps import printt, printo, make_dada, ec_de_code, TEST + +api_url = "https://" + "ar.wikipedia.org/w/api.php" +username = useraccount.username +password = useraccount.password +# --- +returntext = {1: True} +# --- +if "text" in sys.argv: + returntext[1] = False +# --- +workibrahem = False +if "workibrahem" in sys.argv: + from API import useraccount + + username = useraccount.hiacc + password = useraccount.hipass + workibrahem = True + print("workibrahem active") +# --- +session = {1: requests.Session(), "csrftoken": ""} +br = "
" + + +def login(): + # get login token + r1 = session[1].get( + api_url, + params={ + "format": "json", + "action": "query", + "meta": "tokens", + "type": "login", + }, + timeout=10, + ) + r1.raise_for_status() + + # log in + r2 = session[1].post( + api_url, + data={ + "format": "json", + "action": "login", + "lgname": username, + "lgpassword": password, + "lgtoken": r1.json()["query"]["tokens"]["logintoken"], + }, + timeout=10, + ) + + # print( str( r2.json() ) ) + + if r2.json()["login"]["result"] != "Success": + raise RuntimeError(r2.json()["login"]["reason"]) + + # get edit token + r3 = session[1].get( + api_url, + params={ + "format": "json", + "action": "query", + "meta": "tokens", + }, + timeout=10, + ) + session["csrftoken"] = r3.json()["query"]["tokens"]["csrftoken"] + + +login() + + +def page_put(NewText, MainTitle): + printt(" page_put:
") + # try: + title = ec_de_code(MainTitle, "decode") + # --- + summ = "" if "workibrahem" in sys.argv else "بوت:تجربة تحديث بيانات اللاعب" + # --- + printt(f" page_put {MainTitle}:
") + # print_test2( NewText ) + # --- + if (not TEST[1] and not TEST[2]) or workibrahem: + r4 = session[1].post( + api_url, + data={ + "action": "edit", + "format": "json", + "title": title, + "text": NewText, + "summary": summ, + "bot": 1, + "nocreate": 1, + "token": session["csrftoken"], + }, + ) + if workibrahem: + print(r4.text) + if "nochange" in r4.text: + printo("nodiff") + elif "Success" in r4.text: + # print('** true .. ' + '[[' + title + ']]' ) + # print('* true . ') + printo("true") + # printo( r4.text ) + elif "abusefilter-disallowed" in r4.text and returntext[1]: + texts = "
خطأ عند تعديل الصفحة، قم بنسخ المحتوى أدناه إلى الصفحة:
" + texts += make_dada(NewText, MainTitle) + printo(texts) + else: + printo(r4.text) + + +def GetPageText(title): + text, item = "", False + # --- + printt("**GetPageText:
") + # --- + url = "https://" + "ar.wikipedia.org/w/api.php" + # --- + if title.find("%") != -1: + title = ec_de_code(title, "decode") + # --- + params = { + "action": "parse", + "prop": "wikitext|properties", + "utf8": "1", + "format": "json", + "page": title, + } + # --- + printt(f"url:{url}?" + urlencode(params) + "
") + # --- + json1 = {} + try: + json1 = session[1].get(url, params=params, timeout=10).json() + + except requests.exceptions.ReadTimeout: + print(f"ReadTimeout: {url}") + + except Exception as e: + print("<> Traceback (most recent call last):") + print(f"<> Exception:{e}.") + print("CRITICAL:") + # --- + if not json1: + return text, item + # --- + printt("find json1:
") + # --- + parse = json1.get("parse", {}) + if parse != {}: + printt("find parse in json1:
") + # --- + text = parse.get("wikitext", {}).get("*", "") + if text: + printt("find wikitext in parse:
") + printt("find * in parse.wikitext :
") + # --- + properties = parse.get("properties", []) + # --- + if properties != []: + printt("find properties in parse:
") + for prop in properties: + if "name" in prop: + if prop["name"] == "wikibase_item": + item = prop["*"] + printt("find item in parse.wikitext :{item}
") + break + elif "error" in json1: + text = False + if "info" in json1["error"]: + printt(json1["error"]["info"]) + else: + printt(json1) + else: + printt("no parse in json1:
") + printt(json1) + # --- + return text, item diff --git a/cy/cy_bot/cy_helps.py b/cy/cy_bot/cy_helps.py new file mode 100644 index 0000000..ab9e290 --- /dev/null +++ b/cy/cy_bot/cy_helps.py @@ -0,0 +1,227 @@ +""" +from .cy_helps import printt, CheckTempalteInPageText, printo, print_test2, ec_de_code, TEST, make_dada, get_temp_arg + +""" +import sys +import urllib.parse +import wikitextparser as wtp + +qu_2018 = """SELECT +?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?jersey_3 ?jersey_4 ?p642label ?p585 ?p582 ?p580 ?title +WHERE { +SELECT ?item ?itemlab ?jerseylab ?p17lab + ?jersey1lab ?image1 ?image2 ?image3 ?image4 + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_1) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image2), "/Special:FilePath/"), "|", ?jersey2lab, "}}") AS ?jersey_2) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image3), "/Special:FilePath/"), "|", ?jersey3lab, "}}") AS ?jersey_3) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image4), "/Special:FilePath/"), "|", ?jersey4lab, "}}") AS ?jersey_4) + ?p642label ?p585 ?p582 ?p580 ?title + WHERE { + BIND(wd:Q447532 AS ?aa) + ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. ?winner pq:P642 ?P642. + OPTIONAL { ?item p:P4323 ?statment1. ?statment1 ps:P4323 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } + OPTIONAL { ?item p:P2321 ?statment2. ?statment2 ps:P2321 ?aa. ?statment2 pq:P2912 ?jersey2. ?jersey2 wdt:P18 ?image2. } + OPTIONAL { ?item p:P4320 ?statment3. ?statment3 ps:P4320 ?aa. ?statment3 pq:P2912 ?jersey3. ?jersey3 wdt:P18 ?image3. } + OPTIONAL { ?item p:P3494 ?statment4. ?statment4 ps:P3494 ?aa. ?statment4 pq:P2912 ?jersey4. ?jersey4 wdt:P18 ?image4. } + + OPTIONAL { ?item wdt:P17 ?p17.} + OPTIONAL { ?item wdt:P585 ?p585.} + OPTIONAL { ?item wdt:P582 ?p582.} + OPTIONAL { ?item wdt:P580 ?p580.} + FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } + OPTIONAL { ?sitelink schema:about ?item + . ?sitelink schema:isPartOf + . ?sitelink schema:name ?title } + SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". + ?p17 rdfs:label ?p17lab. + ?item rdfs:label ?itemlab. + ?jersey1 rdfs:label ?jersey1lab. + ?jersey2 rdfs:label ?jersey2lab. + ?jersey3 rdfs:label ?jersey3lab. + ?jersey4 rdfs:label ?jersey4lab. + ?P642 rdfs:label ?p642label. + } + +} } """ +# --- +q22u = """SELECT + ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?p642label ?p585 ?p582 ?p580 + WHERE { + SELECT ?item ?itemlab ?jerseylab ?image ?p17lab + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image), "/Special:FilePath/"), "|", ?jerseylab, "}}") AS ?jersey_1) + ?jersey1lab ?image1 + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_2) + ?p642label ?p585 ?p582 ?p580 + WHERE { + BIND(wd:Q518222 AS ?aa) + OPTIONAL { ?item p:P2417 ?statment. ?statment ps:P2417 ?aa. ?statment pq:P2912 ?jersey. ?jersey wdt:P18 ?image. } + OPTIONAL { ?item p:P2321 ?statment1. ?statment1 ps:P2321 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } + OPTIONAL { ?item wdt:P17 ?p17.} + OPTIONAL { ?item wdt:P585 ?p585.} + OPTIONAL { ?item wdt:P582 ?p582.} + OPTIONAL { ?item wdt:P580 ?p580.} + ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. + ?winner pq:P642 ?P642. + FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } + SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". + ?p17 rdfs:label ?p17lab. + ?item rdfs:label ?itemlab. + ?jersey rdfs:label ?jerseylab. + ?jersey1 rdfs:label ?jersey1lab. + ?P642 rdfs:label ?p642label. + } + + } } """ +# --- +TEST = {1: False, 2: False} +# --- +if "test" in sys.argv: + TEST[1] = True +# --- +if "test2" in sys.argv: + TEST[2] = True + + +def get_temp_arg(temp, arg): + if temp.has_arg(arg): + dd = temp.get_arg(arg) + if dd and dd.value and dd.value.strip(): + return dd.value.strip() + # -- + return "" + + +def ec_de_code(tt, type): + fao = tt + if type == "encode": + fao = urllib.parse.quote(tt) + elif type == "decode": + fao = urllib.parse.unquote(tt) + return fao + + +def make_dada(NewText, MainTitle): + url = "https://" + "ar.wikipedia.org/w/index.php?title=" + ec_de_code(MainTitle, "decode") + "&action=submit" + t = f"
" + t += f"" + t += """ + + + + +
""" + return t + + +def print_test2(s): + if TEST[2]: + # pywikibot.output(s) + print(s) + + +def printt(s): + SS = False + if SS or "test" in sys.argv or "test2" in sys.argv: + # pywikibot.output(s) + print(s) + + +def printo(s): + # --- + if "test" in sys.argv or "test2" in sys.argv or "ask" in sys.argv: + print(s) + return + # --- + try: + print(ec_de_code(s, "encode")) + except BaseException: + print("") + if "workibrahem" in sys.argv: + print(s) + + +def CheckTempalteInPageText(text): + printt("**CheckTempalteInPageText:
") + if not text: + printt(" * no text.
") + return + # --- + parser = wtp.parse(text) + # --- + temp_start = False + temp_end = False + # --- + temp_start_name = "نتيجة سباق الدراجات/بداية" + temp_end_name = "نتيجة سباق الدراجات/نهاية" + # --- + for template in parser.templates: + # --- + temp_str = template.string + # --- + if not temp_str or temp_str.strip() == "": + continue + # --- + name = str(template.normal_name()).strip() + # --- + if name == temp_end_name: + temp_end = True + # --- + if name == temp_start_name: + temp_start = True + # t_date = get_temp_arg(template, "تاريخ") + # --- + if not temp_start: + printo(f"لا يمكن إيجاد ({temp_start_name}) في الصفحة.") + return False + # --- + if not temp_end: + printo(f"لا يمكن إيجاد ({temp_end_name}) في الصفحة.") + return False + # --- + printt(" * Tempaltes Already there.
") + return True + + +def find_cy_temp(text): + start = "{{نتيجة سباق الدراجات/بداية" + end = "{{نتيجة سباق الدراجات/نهاية}}" + # --- + start_pos = text.find(start) + if start_pos < 0: + return + # --- + end_pos = text.find(end) + if end_pos < 0: + return + # --- + if end_pos < start_pos: + return + # --- + end_pos += len(end) + # --- + return text[start_pos:end_pos] + + +def get_temps_str(text, temp_name): + # --- + parser = wtp.parse(text) + # --- + results = [] + # --- + for template in parser.templates: + # --- + temp_str = template.string + # --- + if not temp_str or temp_str.strip() == "": + continue + # --- + name = str(template.normal_name()).strip() + # --- + if name == temp_name: + results.append(temp_str) + # --- + return results diff --git a/cy/cy_bot/cy_regs.py b/cy/cy_bot/cy_regs.py new file mode 100644 index 0000000..d733b02 --- /dev/null +++ b/cy/cy_bot/cy_regs.py @@ -0,0 +1,68 @@ +""" + +from .cy_regs import make_data_new + +python3 I:/core/bots/wd_core/cy/cy_bot/cy_regs.py + +""" +import wikitextparser as wtp + +from .cy_helps import get_temp_arg + + +def make_data_new(text): + # --- + tab = {} + # --- + temp_name = "نتيجة سباق الدراجات/سطر4" + # --- + parser = wtp.parse(text) + # --- + for template in parser.templates: + # --- + temp_str = template.string + # --- + if not temp_str or temp_str.strip() == "": + continue + # --- + name = str(template.normal_name()).strip() + # --- + if name == temp_name: + q_id = get_temp_arg(template, "qid") + # --- + if not q_id: + continue + # --- + tab[q_id] = {} + tab[q_id]["qid"] = q_id + tab[q_id]["poss"] = get_temp_arg(template, "المركز") + tab[q_id]["rank"] = get_temp_arg(template, "المرتبة") + # --- + if not tab[q_id]["rank"]: + tab[q_id]["rank"] = get_temp_arg(template, "rank") + # --- + tab[q_id]["race"] = get_temp_arg(template, "السباق") + tab[q_id]["p17"] = get_temp_arg(template, "البلد") + tab[q_id]["jersey"] = get_temp_arg(template, "جيرسي") + # --- + return tab + + +def test(): + text = """{{نتيجة سباق الدراجات/سطر4 +|qid = Q110775370 +|السباق = 2022 Tour de Romandie Féminin +|البلد = {{رمز علم|سويسرا}} +|التاريخ = 2022-10-09T00:00:00Z +|المركز = الفائز في التصنيف العام +|المرتبة = الأول في التصنيف العام، الثالث في تصنيف الجبال، السادس في تصنيف النقاط +|جيرسي = {{نتيجة سباق الدراجات/جيرسي|Jersey_green.svg|قميص أخضر لمتصدر الترتيب العام}} +}}""" + # --- + tab2 = make_data_new(text) + # --- + print(tab2) + + +if __name__ == "__main__": + test() diff --git a/cy/cy_bot/cy_sparql.py b/cy/cy_bot/cy_sparql.py new file mode 100644 index 0000000..9e85201 --- /dev/null +++ b/cy/cy_bot/cy_sparql.py @@ -0,0 +1,181 @@ +#!/usr/bin/python3 +""" +from .cy_sparql import GetSparql + +""" + +import re +import sys +import urllib +import urllib.parse +import requests +from datetime import datetime + +# --- +# from .cy_helps import printt +# --- +menet = datetime.now().strftime("%Y-%b-%d %H:%M:%S") + +Stage = {"": ""} +# --- + + +def get_query_results(query): + # --- + query = re.sub(r"\n\s+", "\n", query) + # --- + fao = urllib.parse.quote(query) + # --- + url = f"https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query={fao}" + # --- + if "printurl" in sys.argv: + print(url) + # --- + req = False + # --- + try: + req = requests.Session().get(url, timeout=10) + + except requests.exceptions.ReadTimeout: + print(f"ReadTimeout: {url}") + + except Exception as e: + print("<> Traceback (most recent call last):") + print(f"<> Exception:{e}.") + print("CRITICAL:") + # --- + json1 = {} + if req: + try: + json1 = req.json() + except Exception as e: + json1 = {} + # --- + print("<> Traceback (most recent call last):") + e = str(e) + if "java.util.concurrent" in e: + e = "java.util.concurrent" + print(f"<> Exception:{e}.") + print("CRITICAL:") + # --- + return json1 + + +def GetSparql(qid, title): + old_qu = """SELECT + ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?p642label ?p585 ?p582 ?p580 ?title + WHERE { + SELECT ?item ?itemlab ?jerseylab ?image ?p17lab + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image), "/Special:FilePath/"), "|", ?jerseylab, "}}") AS ?jersey_1) + ?jersey1lab ?image1 + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_2) + ?p642label ?p585 ?p582 ?p580 ?title + WHERE { + BIND(wd:Q518222 AS ?aa) + OPTIONAL { ?item p:P2417 ?statment. ?statment ps:P2417 ?aa. ?statment pq:P2912 ?jersey. ?jersey wdt:P18 ?image. } + OPTIONAL { ?item p:P2321 ?statment1. ?statment1 ps:P2321 ?aa. ?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } + OPTIONAL { ?item wdt:P17 ?p17.} + OPTIONAL { ?item wdt:P585 ?p585.} + OPTIONAL { ?item wdt:P582 ?p582.} + OPTIONAL { ?item wdt:P580 ?p580.} + ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. + ?winner pq:P642 ?P642. + FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } + FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } + OPTIONAL { ?sitelink schema:about ?item + . ?sitelink schema:isPartOf + . ?sitelink schema:name ?title } + SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". + ?p17 rdfs:label ?p17lab. + ?item rdfs:label ?itemlab. + ?jersey rdfs:label ?jerseylab. + ?jersey1 rdfs:label ?jersey1lab. + ?P642 rdfs:label ?p642label. + } + + } } """ + # --- + qu_2019 = """SELECT DISTINCT ?item ?p17lab ?itemlab ?jersey_1 ?jersey_2 ?jersey_3 ?jersey_4 ?p642label ?p585 ?p582 ?p580 ?rankP4323 ?rankP2321 ?rankP4320 ?rankP3494 ?title + WHERE { SELECT DISTINCT ?item ?itemlab ?jerseylab ?p17lab ?rankP4323 ?rankP2321 ?rankP4320 ?rankP3494 + ?jersey1lab ?image1 ?image2 ?image3 ?image4 + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image1), "/Special:FilePath/"), "|", ?jersey1lab, "}}") AS ?jersey_1) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image2), "/Special:FilePath/"), "|", ?jersey2lab, "}}") AS ?jersey_2) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image3), "/Special:FilePath/"), "|", ?jersey3lab, "}}") AS ?jersey_3) + (CONCAT("{{JOJOJO|", STRAFTER(STR(?image4), "/Special:FilePath/"), "|", ?jersey4lab, "}}") AS ?jersey_4) + ?p642label ?p585 ?p582 ?p580 ?title + WHERE { + BIND(wd:Q447532 AS ?aa) + ?item wdt:P31 ?a1a. + OPTIONAL { ?item wdt:P1346 ?aa. ?item p:P1346 ?winner. ?winner ps:P1346 ?aa. ?winner pq:P642 ?P642. } + ?item (p:P1346|p:P4323|p:P2321|p:P4320|p:P3494) ?statment0. + ?statment0 (ps:P1346|ps:P4323|ps:P2321|ps:P4320|ps:P3494) ?aa. + OPTIONAL { ?item p:P4323 ?statment1 . ?statment1 ps:P4323 ?aa. + OPTIONAL {?statment1 pq:P2912 ?jersey1. ?jersey1 wdt:P18 ?image1. } + OPTIONAL {?statment1 pq:P1352 ?rankP4323. } + } + OPTIONAL { ?item p:P2321 ?statment2 . ?statment2 ps:P2321 ?aa. + OPTIONAL {?statment2 pq:P2912 ?jersey2. ?jersey2 wdt:P18 ?image2. } + OPTIONAL {?statment2 pq:P1352 ?rankP2321. } + } + OPTIONAL { ?item p:P4320 ?statment3 . ?statment3 ps:P4320 ?aa. + OPTIONAL {?statment3 pq:P2912 ?jersey3. ?jersey3 wdt:P18 ?image3. } + OPTIONAL {?statment3 pq:P1352 ?rankP4320. } + } + OPTIONAL { ?item p:P3494 ?statment4 . ?statment4 ps:P3494 ?aa. + OPTIONAL {?statment4 pq:P2912 ?jersey4. ?jersey4 wdt:P18 ?image4. } + OPTIONAL {?statment4 pq:P1352 ?rankP3494. } + } +OPTIONAL { ?item wdt:P17 ?p17.} OPTIONAL { ?item wdt:P585 ?p585.} OPTIONAL { ?item wdt:P582 ?p582.} OPTIONAL { ?item wdt:P580 ?p580.} +FILTER NOT EXISTS { ?item wdt:P31 wd:Q20646667. } # plain stage +FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q53534649 } +FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 } +FILTER NOT EXISTS { ?item wdt:P31 ?P31 . ?P31 wdt:P279 wd:Q18131152 } +FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 } +OPTIONAL { ?sitelink schema:about ?item + . ?sitelink schema:isPartOf + . ?sitelink schema:name ?title } + SERVICE wikibase:label { bd:serviceParam wikibase:language "ar,en,fr". + ?p17 rdfs:label ?p17lab. + ?item rdfs:label ?itemlab. + ?jersey1 rdfs:label ?jersey1lab. + ?jersey2 rdfs:label ?jersey2lab. + ?jersey3 rdfs:label ?jersey3lab. + ?jersey4 rdfs:label ?jersey4lab. + ?P642 rdfs:label ?p642label. + } + + } } """ + # --- + qu_2019 = qu_2019.replace("Q447532", qid) + qu2 = qu_2019 + # --- + if title in Stage: + qu2 = qu2.replace("FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 }", "") + qu2 = qu2.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") + # }Limit 10 } """ + # --- + json1 = get_query_results(qu2) + # --- + # for rr in json1.get("head", {}).get("vars", []): HeadVars.append(rr) + # --- + bindings = json1.get("results", {}).get("bindings", []) + # --- + if len(bindings) > 1: + return json1 + # --- + # one result or no result + if title in Stage: + return {} + # --- + qua3 = qu_2019 + qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P2417 ?P2417 }", "") + qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") + qua3 = qua3.replace("FILTER NOT EXISTS { ?item wdt:P31/wdt:P279* wd:Q18131152 }", "") + qua3 += f"\n#{menet}" + # --- + json2 = get_query_results(qua3) + # --- + print("try 2") + # --- + return json2 diff --git a/cy/cy_bot/do_text.py b/cy/cy_bot/do_text.py new file mode 100644 index 0000000..9094399 --- /dev/null +++ b/cy/cy_bot/do_text.py @@ -0,0 +1,597 @@ +#!/usr/bin/python3 +""" +from .do_text import make_new_text, do_One_Page + +""" + +import re +import wikitextparser as wtp + +# --- +from .cy_regs import make_data_new +from .cy_helps import get_temps_str, get_temp_arg, printt, print_test2, find_cy_temp, printo, TEST, CheckTempalteInPageText +from .cy_sparql import GetSparql + +# --- +remove_date = {} +Work_with_Year = {} +Len_of_results = {} +Len_of_valid_results = {} +new_lines = {} +# --- +states = {} +lines = {} +# --- +HeadVars = ["imagejersey"] +JOJOJO = "نتيجة سباق الدراجات/جيرسي" + + +Skip_items = ["Q4115189"] + +NoAppend = ["p585", "p582", "p580"] +# --- +ranks_label = { + "P4323": "المرتبة %s في تصنيف أفضل شاب", + "P2321": "المرتبة %s في التصنيف العام", + "P4320": "المرتبة %s في تصنيف الجبال", + "P3494": "المرتبة %s في تصنيف النقاط", +} +# --- + +Work_with_Stage = {} + + +def template_params(text, title): + # --- + parser = wtp.parse(text) + # --- + Qid = "" + results = False + # --- + for template in parser.templates: + # --- + temp_str = template.string + # --- + if not temp_str or temp_str.strip() == "": + continue + # --- + name = str(template.normal_name()).strip() + # --- + if name == "نتيجة سباق الدراجات/بداية": + # --- + t_date = get_temp_arg(template, "تاريخ") + if t_date and t_date.isdigit(): + Work_with_Year[title] = int(t_date) + print_test2(f"Work_with_Year:{t_date}") + # --- + t_stages = get_temp_arg(template, "مراحل") + if t_stages: + printt("Work with Stage") + Work_with_Stage[title] = True + # --- + t_id = get_temp_arg(template, "id") + if t_id: + printt("** found currect line") + Qid = t_id + printt(f"id: {Qid}") + results = True + # --- + break + # --- + return Qid, results + + +def findflag(race, flag): + flage = { + "إيطاليا": "{{رمز علم|إيطاليا}}", + "جيرو ديل ترينتينو": "{{رمز علم|إيطاليا}}", + "the Alps": "{{رمز علم|إيطاليا}}", + "France": "{{رمز علم|فرنسا}}", + "فرنسا": "{{رمز علم|فرنسا}}", + "إسبانيا": "{{رمز علم|إسبانيا}}", + "دونكيرك": "{{رمز علم|بلجيكا}}", + "غنت-وفلجم": "{{رمز علم|بلجيكا}}", + "Gent–Wevelgem": "{{رمز علم|بلجيكا}}", + "Norway": "{{رمز علم|النرويج}}", + "النرويج": "{{رمز علم|النرويج}}", + "كريثيديا دو دوفين": "{{رمز علم|سويسرا}}", + "du Dauphiné": "{{رمز علم|سويسرا}}", + "سويسرا": "{{رمز علم|سويسرا}}", + "باريس-نايس": "{{رمز علم|فرنسا}}", + } + # --- + race = str(race) + # --- + for ff in flage: + te = re.sub(str(ff), "", race) + # --- + if te != race: + flag = flage[ff] + # --- + return flag + + +def fix_label(label): + label = label.strip() + + label = re.sub(r"بطولة العالم لسباق الدراجات على الطريق (\d+) – سباق الطريق الفردي للرجال", r"سباق الطريق في بطولة العالم \g<1>", label) + + label = re.sub(r"ركوب الدراجات في الألعاب الأولمبية الصيفية (\d+) – سيدات فردي سباق الطريق", r"سباق الطريق للسيدات في ركوب الدراجات الأولمبية الصيفية \g<1>", label) + + label = re.sub(r"ركوب الدراجات في الألعاب الأولمبية الصيفية (\d+) – فريق رجال سباق الطريق", r"سباق الطريق لفرق الرجال في ركوب الدراجات الأولمبية الصيفية \g<1>", label) + + # بطولة العالم لسباق الدراجات على الطريق 1966 – سباق الطريق الفردي للرجال + label = re.sub(r"بطولة العالم لسباق الدراجات على الطريق (\d+) – سباق الطريق الفردي للرجال", r"سباق الطريق للرجال في بطولة العالم \g<1>", label) + + label = re.sub(r"سباق الطريق المداري ", "سباق الطريق ", label) + label = re.sub(r"(بطولة [\s\w]+) الوطنية ", r"\g<1> ", label) + label = re.sub(r"^(سباق\s*.*? في بطولة العالم)\s*(لسباق الدراجات على الطريق|للدراجات) (.*?)$", r"\g<1> \g<3>", label) + label = re.sub(r"^(سباق\s*.*? في بطولة [\s\w]+)\s*(لسباق الدراجات على الطريق|للدراجات) (.*?)$", r"\g<1> \g<3>", label) + + # سباق الطريق للسيدات في ركوب الدراجات في الألعاب الأولمبية الصيفية 2016 + label = re.sub(r"في ركوب الدراجات في الألعاب الأولمبية ", "في ركوب الدراجات الأولمبية ", label) + + # في ركوب الدراجات في دورة ألعاب الكومنولث + label = re.sub(r"ركوب الدراجات في دورة ألعاب الكومنولث", "ركوب الدراجات في دورة الكومنولث", label) + label = re.sub(r"\s+", " ", label) + return label + + +def make_temp_lines(table, title, with_stages): + # --- + for rr in HeadVars: + if rr not in table: + table[rr] = "" + # --- + image = table["imagejersey"] + image = re.sub(r"JOJOJO", JOJOJO, image) + image = image.replace("%20", "_") + # --- + date = table["Date"] + flag = table["p17lab"] + # --- + qid = table["item"] + table2 = {"race": "", "p17": "", "poss": "", "qid": qid} + # --- + if qid in Skip_items: + return "", table2 + # --- + link = table.get("title", "") + label = table.get("itemlab", "") + if link: + race = f"[[{link}]]" + label = link.split(" (")[0] + # --- + label = fix_label(label) + # --- + if link: + race = f"[[{link}|{label}]]" if label != link else f"[[{link}]]" + else: + race = label + # --- + sss = table["p642label"] + # الفائز وفقاً لترتيب النقاط للشباب + sss = re.sub(r"الفائز وفقاً لترتيب", "الفائز في ترتيب", sss) + sss = re.sub(r"الفائز حسب التصنيف العام", "الفائز في التصنيف العام", sss) + # --- + ranke = table.get("rank", "") + # --- + ranke_tab = { + "المرتبة 1 في": "الأول في", + "المرتبة 2 في": "الثاني في", + "المرتبة 3 في": "الثالث في", + "المرتبة 4 في": "الرابع في", + "المرتبة 5 في": "الخامس في", + "المرتبة 6 في": "السادس في", + "المرتبة 7 في": "السابع في", + "المرتبة 8 في": "الثامن في", + "المرتبة 9 في": "التاسع في", + "المرتبة 10 في": "العاشر في", + # "المرتبة 11 في" : "الحادي عشر في", + # "المرتبة 12 في" : "الثاني عشر في", + } + for kk in ranke_tab: + if ranke.find(kk) >= 0: + ranke = re.sub(kk, ranke_tab[kk], ranke) + # --- + newflag = findflag(race, flag) + # --- + table2["race"] = race + table2["p17"] = newflag + table2["poss"] = sss + # --- + so = "{{نتيجة سباق الدراجات/سطر4" + so = so + "\n|qid = " + qid + so = so + "\n|السباق = " + race + so = so + "\n|البلد = " + newflag + so = so + "\n|التاريخ = " + date + so = so + "\n|المركز = " + sss + so = so + "\n|المرتبة = " + ranke + so = so + "\n|جيرسي = " + image + so += "\n}}" + # --- + if race and race.lower().strip().startswith("q"): + printt(" *** remove line startswith q.") + return "", table2 + # --- + if ranke and sss.strip() == "": + if not with_stages and Len_of_valid_results.get(title, 0) > 10: + if re.sub(r"المرتبة 1 في", "", ranke) == ranke and re.sub(r"الأول في", "", ranke) == ranke: + # printt(" *** remove line with rank < 1.") + return "", table2 + # --- + if flag != newflag: + printt(f' *** race:"{race}", flag:"{flag}", newflag:"{newflag}"') + # --- + if title not in Len_of_valid_results: + Len_of_valid_results[title] = 0 + Len_of_valid_results[title] += 1 + # --- + return so, table2 + + +def fix_results(table): + results2 = {} + # --- + tata = { + "head": {"vars": ["item", "p17lab", "itemlab", "jersey_1", "jersey_2", "jersey_3", "jersey_4", "p642label", "p585", "p582", "p580", "rankP4323", "rankP2321", "rankP4320", "rankP3494", "title"]}, + "results": { + "bindings": [ + { + "item": {"type": "uri", "value": "http://www.wikidata.org/entity/Q53557910"}, + "title": {"xml:lang": "ar", "type": "literal", "value": "طواف أستونيا 2018"}, + "p580": {"datatype": "http://www.w3.org/2001/XMLSchema#dateTime", "type": "literal", "value": "2018-05-25T00:00:00Z"}, + "p582": {"datatype": "http://www.w3.org/2001/XMLSchema#dateTime", "type": "literal", "value": "2018-05-26T00:00:00Z"}, + "p17lab": {"xml:lang": "ar", "type": "literal", "value": "إستونيا"}, + "itemlab": {"xml:lang": "ar", "type": "literal", "value": "طواف أستونيا 2018"}, + "rankP2321": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "2"}, + "rankP4323": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "1"}, + "rankP3494": {"datatype": "http://www.w3.org/2001/XMLSchema#decimal", "type": "literal", "value": "1"}, + "p642label": {"xml:lang": "ar", "type": "literal", "value": "الفائز وفقاً لترتيب النقاط"}, + "jersey_1": {"type": "literal", "value": "{{JOJOJO|Jersey%20white.svg|قميص أبيض، أفضل شاب}}"}, + "jersey_2": {"type": "literal", "value": "{{JOJOJO|Jersey%20white.svg|قميص أبيض، أفضل شاب}}"}, + "jersey_4": {"type": "literal", "value": "{{JOJOJO|Jersey%20red.svg|قميص أحمر، تصنيف النقاط}}"}, + } + ] + }, + } + # --- + printt(f"* Lenth fix_results: '{len(table)}' .") + for params in table: + # --- + if params.get("itemlab", {}).get("value", "").lower().strip().startswith("q"): + printt(" *** remove line startswith q---.") + continue + # --- + q = "item" in params and params["item"]["value"].split("/entity/")[1] + # --- + if q not in results2: + results2[q] = {"Date": [], "imagejersey": [], "item": [], "rank": []} + # --- + date = params.get("p585") or params.get("p582") or params.get("p585") or {} + date = date.get("value") or "" + # --- + if date not in results2[q]["Date"]: + results2[q]["Date"].append(date) + # --- + for param in params: + # --- + value = params[param]["value"] + # --- + param2 = param + if param.startswith("rank"): + param2 = "rank" + value2 = param.replace("rank", "") + if value2 in ranks_label: + value = ranks_label[value2] % value + # --- + if param.startswith("jersey_"): + param2 = "imagejersey" + # --- + if param == "p17lab": + value = "{{رمز علم|" + value + "}}" + elif param == "item": + value = value.split("/entity/")[1] + # --- + # if param == "p642label": + # value = re.sub(r'الفائز وفقاً ', 'الفائز في ', value ) + # value = re.sub(r'الفائز حسب التصنيف العام ', 'الفائز في التصنيف العام', value ) + # --- + if param2 not in NoAppend: + if param2 not in results2[q]: + results2[q][param2] = [] + # --- + if value not in results2[q][param2]: + results2[q][param2].append(value) + # --- + return results2 + + +def fix_date(data, title): + data2 = {} + # --- + p642label = 0 + # --- + for ta in data: + # --- + datn = data[ta].get("Date", []) + # --- + if isinstance(datn, list) and len(datn) > 0: + ddds = [x.strip() for x in datn if x.strip() != ""] + # --- + # print(date) + # --- + fanco = title + if fanco not in remove_date: + remove_date[fanco] = 0 + # --- + if fanco in Work_with_Year: + date = "" + if ddds != []: + date = ddds[0] + if not date: + remove_date[fanco] += 1 + # return "" + continue + else: + if hhh := re.match(r"(\d\d\d\d)\-\d\d\-\d\dT\d\d\:\d\d\:\d\dZ", date): + if int(hhh.group(1)) < Work_with_Year[fanco]: + remove_date[fanco] += 1 + continue + # --- + data2[ta] = data[ta] + if data2[ta].get("p642label", False): + p642label += 1 + # --- + if remove_date[fanco] != 0: + print_test2("remove_date[fanco] += 1 (%d)" % remove_date[fanco]) + # --- + Len_of_results[title] = p642label + # --- + return data2 + + +def tab_sub_x(tao): + table = {} + # --- + for ss in tao: + space = "، " + # --- + if ss in ["imagejersey", "p17lab"]: + space = "" + # --- + faso = sorted(tao[ss]) + # --- + if len(faso) > 0: + if len(faso) == 1 or ss == "p17lab": + k = faso[0] + elif len(faso) > 1: + k = space.join(faso) + # --- + if ss == "Date": + k = faso[0] + # --- + table[ss] = k + # --- + return table + + +def make_text_sec(Date_List2, qids_2, title, with_stages): + texxt = "" + # --- + for dd in Date_List2: + for qoo, tao in qids_2.items(): + # --- + if qoo in Skip_items: + continue + # --- + date = tao["Date"][0] + # --- + if dd == date: + table = tab_sub_x(tao) + # --- + v, tab = make_temp_lines(table, title, with_stages) + # --- + if v: + # vvv = re.sub(r"\n", "", v) + new_lines[title][qoo] = tab + new_lines[title][qoo]["qid"] = qoo + new_lines[title][qoo]["race"] = tab.get("race", "") + new_lines[title][qoo]["p17"] = tab.get("p17", "") + new_lines[title][qoo]["poss"] = tab.get("poss", "") + # --- + texxt = texxt + v + "\n" + # --- + return texxt + + +def make_new_section(qid, title): + Date_List2 = [] + # --- + with_stages = Work_with_Stage.get(title, False) + # --- + new_lines[title] = {} + # --- + json1 = GetSparql(qid, title) + # --- + if not json1: + return False + # --- + bindings = json1.get("results", {}).get("bindings", []) + # --- + for rr in json1.get("head", {}).get("vars", []): + HeadVars.append(rr) + # --- + if len(bindings) < 1: + return False + # --- + results = fix_results(bindings) + # --- + Len_results = len(results) + printt("* Lenth results: '%d' ." % Len_results) + # --- + # Len_of_results[title] = Len_results + # --- + qidso = {} + for num, qq in enumerate(results): + # --- + if qq not in qidso: + qidso[qq] = {} + # --- + date = results[qq]["Date"][0] + if not date: + if qq not in Date_List2: + Date_List2.append(qq) + elif date not in Date_List2: + Date_List2.append(date) + # --- + qidso[qq] = results[qq] + # --- + qids_2 = fix_date(qidso, title) + # --- + Date_List2.sort() + printt("**Date_List2: ") + # --- + texxt = make_text_sec(Date_List2, qids_2, title, with_stages) + # --- + note = "\n" + texxt = note + texxt + # --- + t24 = Len_of_valid_results.get(title, 0) + t23 = Len_of_results.get(title, 0) + # --- + printt(f"Len_of_valid_results : {t24}, Len_of_results : {t23}") + # --- + printt(f"Len_of_valid_results : {t24}, Len_of_results : {t23}") + # --- + return texxt + + +def work_tano(text, MainTitle): + # --- + lines[MainTitle] = make_data_new(text) + # --- + new_line = 0 + same_line = 0 + removed_line = 0 + # --- + if MainTitle in new_lines: + for line in new_lines[MainTitle].keys(): + # --- + if line == "Q49164584" and TEST[1]: + print(new_lines[MainTitle][line]) + # --- + same = 0 + new = 0 + if line in lines[MainTitle].keys(): + for x in ["poss", "race", "p17"]: + if new_lines[MainTitle][line][x] == lines[MainTitle][line][x]: + same = 1 + else: + new = 1 + else: + new = 1 + # --- + if same == 1: + same_line += 1 + elif new == 1: + new_line += 1 + # --- + # --- + for liner in lines[MainTitle].keys(): + if liner not in new_lines[MainTitle].keys(): + removed_line += 1 + # --- + states[MainTitle] = {"new_line": new_line, "same_line": same_line, "removed_line": removed_line} + # --- + liner = "new_line:%d,same_line:%d,removed_line:%d" % (new_line, same_line, removed_line) + # --- + if MainTitle in remove_date and remove_date[MainTitle] != 0: + liner += ",removed_line_date:%d" % remove_date[MainTitle] + states[MainTitle]["removed_line_date"] = remove_date[MainTitle] + # --- + return liner + + +def make_new_text(item, title, text): + # --- + Newsect = make_new_section(item, title) + # --- + if not Newsect: + ur = f'{item}.' + print_test2("no new section") + printo(f"لا توجد نتائج لهذه الصفحة تأكد من صحة معرف ويكي بيانات: {ur}.") + return text + # --- + cy_temp = find_cy_temp(text) + # --- + old_sect = cy_temp + # --- + Frist = "" + first_tmp = "" + # --- + if cy_temp: + first_tmp = get_temps_str(cy_temp, "نتيجة سباق الدراجات/بداية") + if first_tmp: + Frist = first_tmp[0] + # --- + work_tano(old_sect, title) + # --- + Newsect = Frist + "\n" + Newsect + "{{نتيجة سباق الدراجات/نهاية}}" + Newsect = re.sub(r"\n\n{{نتيجة سباق الدراجات/نهاية}}", "\n{{نتيجة سباق الدراجات/نهاية}}", Newsect) + # --- + NewText = text.replace(old_sect, Newsect) + # --- + printt(f"showDiff of page: {title}
") + # --- + if title not in states: + return text + # --- + if states[title]["new_line"] != 0 or states[title]["removed_line"] != 0 and text != NewText: + return NewText + else: + printo("nodiff") + # --- + return text + + +def do_One_Page(title, text, item=""): + # --- + Check = CheckTempalteInPageText(text) + # --- + if not Check: + printt("no Check: pass....
") + return text + # --- + printt("**Isre: ") + # --- + Qid, QidinTemplate = template_params(text, title) + # --- + if QidinTemplate: + item = Qid + # --- + if not item: + hte = " ' + # --- + if QidinTemplate: + hte = " ' + # --- + printt(f"**{hte}") + # --- + if not item: + return text + # --- + printt(f"**item: {item}") + # --- + NewText = make_new_text(item, title, text) + # --- + if not NewText: + ur = f'{item}.' + printo(f"لا توجد نتائج لهذه الصفحة تأكد من صحة معرف ويكي بيانات: {ur}.") + return text + # --- + if NewText == "nodiff": + printo("nodiff") + return text + # --- + return NewText diff --git a/cy/useraccount.py b/cy/cy_bot/useraccount.py similarity index 100% rename from cy/useraccount.py rename to cy/cy_bot/useraccount.py diff --git a/cy/jsub.py b/cy/jsub.py index 4ff8dbd..8192aa5 100644 --- a/cy/jsub.py +++ b/cy/jsub.py @@ -1,41 +1,82 @@ #!/usr/bin/python3 -# -- -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -# --- """ -python pwb.py cy/jsub -page:كريس_فروم -python pwb.py cy/jsub -ref:قالب:نتيجة_سباق_الدراجات/بداية +tfj run jsubx --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/jsub -ns:10 -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات/قوالب" + +python3 core8/pwb.py cy/jsub -page:جيروين_بلايلفينس ask +python3 core8/pwb.py cy/jsub -page:إديتا_بوتشينسكايتي +python3 core8/pwb.py cy/jsub -page: +python3 core8/pwb.py cy/jsub -page:قالب:نتيجة_سباق_الدراجات/ميغيل_إندوراين +python3 core8/pwb.py cy/jsub -page:قالب:نتيجة_سباق_الدراجات/ألبيرتو_كونتادور +python3 core8/pwb.py cy/jsub -page: +python3 core8/pwb.py cy/jsub -page: +python3 core8/pwb.py cy/jsub -page:كريس_فروم + +python3 core8/pwb.py cy/jsub -ref:قالب:نتيجة_سباق_الدراجات/بداية +python3 core8/pwb.py cy/jsub -cat: +python3 core8/pwb.py cy/jsub -ns:10 -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات/قوالب +python3 core8/pwb.py cy/jsub -cat: +python3 core8/pwb.py cy/jsub -cat: python3 core8/pwb.py cy/jsub -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Gathering_data_from_Arabic-Wikipedia """ # --- -# -# (C) Ibrahem Qasim, 2022 -# -# --- - -import pywikibot - -# --- -from cy.cy5 import * +from newapi.page import MainPage -# --- +from cy_bot.do_text import do_One_Page import gent +skip_titles = [ + "قالب:نتيجة سباق الدراجات", + "قالب:نتيجة سباق الدراجات/بداية", +] +def onep(title): + # --- + if title in skip_titles: + return + # --- + page = MainPage(title, "ar", family="wikipedia") + # --- + if not page.exists(): + return + # --- + if not page.can_edit(): + return + # --- + if page.isDisambiguation(): + return + # --- + if page.isRedirect(): + return + # --- + text = page.get_text() + # --- + item = page.get_qid() + # --- + new_text = do_One_Page(title, text, item) + # --- + if not new_text: + print(f"لا توجد نتائج لهذه الصفحة تأكد من صحة معرف ويكي بيانات: {item}.") + return + # --- + if new_text == text: + print("no changes") + return + # --- + if new_text: + page.save(newtext=new_text, summary="بوت:تجربة تحديث بيانات اللاعب") + def main2(*args): generator = gent.get_gent(listonly=True, *args) + # --- for numb, pagetitle in enumerate(generator, start=1): - pywikibot.output(f"page: {numb} : {pagetitle}") - StartOnePage(pagetitle) + print(f"page: {numb} : {pagetitle}") + onep(pagetitle) -# --- if __name__ == "__main__": main2() -# --- diff --git a/cy/mv.py b/cy/mv.py new file mode 100644 index 0000000..2b4a67d --- /dev/null +++ b/cy/mv.py @@ -0,0 +1,188 @@ +#!/usr/bin/python3 +""" + +python3 core8/pwb.py cy/mv -ns:0 -ref:قالب:نتيجة_سباق_الدراجات/بداية ask nodiff +python3 core8/pwb.py cy/mv -ns:0 -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات nofa + +https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Gathering_data_from_Arabic-Wikipedia + +tfj run jsuw1 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/mv -ns:0 -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات p1 nofa" +tfj run jsuw2 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/mv -ns:0 -cat:تصنيف:سجل_فوز_دراج_من_ويكي_بيانات p2 nofa" +tfj run jsubp2 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/mv -ns:0 -ref:قالب:نتيجة_سباق_الدراجات/بداية p2" +tfj run jsubp3 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/mv -ns:0 -ref:قالب:نتيجة_سباق_الدراجات/بداية p3" +tfj run jsubp4 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py cy/mv -ns:0 -ref:قالب:نتيجة_سباق_الدراجات/بداية p4" + +""" +# --- +import wikitextparser as wtp +from newapi.page import MainPage +from newapi import printe + +import tqdm +import sys +import gent + + +def add_id_to_text(item, text): + parser = wtp.parse(text) + # --- + for template in parser.templates: + # --- + temp_str = template.string + # --- + if not temp_str or temp_str.strip() == "": + continue + # --- + name = str(template.normal_name()).strip() + # --- + if name == "نتيجة سباق الدراجات/بداية": + template.set_arg("id", item) + template.set_arg("قالب", "t") + break + # --- + text = parser.string + # --- + return text + + +def move_it_to_temp(title, item, text): + # --- + if not text: + return + # --- + temp_title = f"قالب:نتيجة سباق الدراجات/{title}" + # --- + text = add_id_to_text(item, text) + # --- + temp_page = MainPage(temp_title, "ar", family="wikipedia") + # --- + if temp_page.exists(): + do = temp_page.save(text, summary="بوت:تجربة تحديث بيانات اللاعب") + else: + do = temp_page.Create(text, summary="بوت:تجربة تحديث بيانات اللاعب") + # --- + return do + + +def find_cy_temp(text): + start = "{{نتيجة سباق الدراجات/بداية" + end = "{{نتيجة سباق الدراجات/نهاية}}" + # --- + start_pos = text.find(start) + if start_pos < 0: + return + # --- + end_pos = text.find(end) + if end_pos < 0: + return + # --- + if end_pos < start_pos: + return + # --- + end_pos += len(end) + # --- + return text[start_pos:end_pos] + + +def one_page_work(title, text, item): + # --- + cy_temp = find_cy_temp(text) + # --- + if not cy_temp: + printe.output(f"no cy temp on {title}") + return text + # --- + temp = move_it_to_temp(title, item, cy_temp) + # --- + if not temp: + return text + # --- + new_temp = f"{{{{نتيجة سباق الدراجات/{title}}}}}" + # --- + if text.find(cy_temp) != -1: + text = text.replace(cy_temp, new_temp) + # --- + return text + + +def onep(title): + # --- + page = MainPage(title, "ar", family="wikipedia") + # --- + if not page.exists(): + return + # --- + if not page.can_edit(): + return + # --- + if page.isDisambiguation(): + return + # --- + if page.isRedirect(): + return + # --- + ns = page.namespace() + # --- + if ns != 0: + printe.output(f"<> page:{title} {ns=} not in main namespace.") + return + # --- + text = page.get_text() + # --- + item = page.get_qid() + # --- + # new_text = make_new_text(item, title, text) + new_text = one_page_work(title, text, item) + # --- + if not new_text: + printe.output("no new text!!") + return + # --- + if new_text.find("{{نتيجة سباق الدراجات/بداية") != -1 or new_text.find("{{نتيجة سباق الدراجات/نهاية}}") != -1: + printe.output("error when replacing templates") + return + # --- + if new_text == text: + printe.output("no changes") + return + # --- + if new_text: + page.save(newtext=new_text, summary="بوت:تجربة تحديث بيانات اللاعب") + + +def split_pages(pages, parts=4): + length = len(pages) + part_size = length // parts + remaining = length % parts + + # Create a dictionary to store the split parts + parts_dict = {} + start_index = 0 + for i in range(parts): + end_index = start_index + part_size + (1 if i < remaining else 0) + parts_dict[f"p{i+1}"] = pages[start_index:end_index] + start_index = end_index + + # Return the specified part or the original list + for part_name, part_pages in parts_dict.items(): + if part_name in sys.argv: + printe.output(f"<> part: {part_name}: {len(part_pages):,}") + return part_pages + + return pages + + +def main2(*args): + generator = gent.get_gent(listonly=True, *args) + # --- + list_of_pages = [x for x in tqdm.tqdm(generator)] + # --- + list_of_pages = split_pages(list_of_pages) + # --- + for numb, pagetitle in enumerate(list_of_pages, start=1): + printe.output(f"<> page: {numb}/{len(list_of_pages)} : {pagetitle}") + onep(pagetitle) + + +if __name__ == "__main__": + main2() diff --git a/des/fam.py b/des/fam.py index 87287ae..a7f9314 100644 --- a/des/fam.py +++ b/des/fam.py @@ -10,6 +10,7 @@ """ # --- +import tqdm import sys import random from newapi import printe @@ -79,9 +80,15 @@ # --- if len(desc_table) > 1: # chose randomly 5 of the desc_table + # --- liste = list(desc_table.keys()) + # --- list2 = random.sample(liste, 10) + # --- print(list2) + # --- + random.shuffle(list2) + # --- for x in list2: temp_table[x] = desc_table[x] # --- @@ -112,72 +119,81 @@ ] # --- qlist_done = [] -# --- -# lenth of desc_table and quarry_list -all_lenth = len(quarry_list) * len(desc_table) -# --- -numb = 0 -# --- -for p31, p31_desc in desc_table.items(): - # --- - quarry_result_lenth = 0 + + +def work_one_json(json1, topic_ar, p31, p31_langs): # --- - qu_numb = 0 + json_lenth = len(json1) # --- - for quarry in quarry_list: - # --- - qu_numb += 1 - if quarry_result_lenth == 0 and qu_numb > 1: - printe.output("<> len of first quarry == 0 continue") - continue - # --- - numb += 1 + for num, item in tqdm.tqdm(enumerate(json1, start=1)): + num += 1 + q = "item" in item and item["item"].split("/entity/")[1] # --- - printe.output(f"work in {numb} from {all_lenth} querirs") + q_langs = item.get("langs", "").split(",") # --- - quarry = quarry.replace("wd:Q1457376", f"wd:{p31}") + lang_to_add = list(set(p31_langs) - set(q_langs)) # --- - if qu_numb == 1: - printe.output("<> first quarry") - printe.output(quarry) + tp = f'<>*mainfromQuarry: {num} from {json_lenth} p31:"{p31}", qid:"{q}":<>{topic_ar}' # --- - json1 = wd_bot.sparql_generator_url(quarry) - # --- - json_lenth = len(json1) + if not lang_to_add: + printe.output(tp) + continue # --- - quarry_result_lenth = len(json1) + if num % 50 == 0: + printe.output(tp) # --- - num = 0 + if p31 in railway_tables: + work_railway({}, p31, q=q) + # elif p31 in placesTable: + # work_railway( {}, p31, q=q ) + else: + newdesc.work22(q, p31, desc_table) + + +def work_one_quarry(quarry, p31, p31_desc): + json1 = wd_bot.sparql_generator_url(quarry) + # --- + quarry_result_lenth = len(json1) + # --- + topic_ar = p31_desc.get("ar") or p31_desc.get("en") or "" + # --- + p31_langs = list(p31_desc.keys()) + # --- + work_one_json(json1, topic_ar, p31, p31_langs) + # --- + return quarry_result_lenth + + +def main(): + # lenth of desc_table and quarry_list + all_lenth = len(quarry_list) * len(desc_table) + # --- + numb = 0 + # --- + for p31, p31_desc in desc_table.items(): # --- - topic_ar = p31_desc.get("ar") or p31_desc.get("en") or "" + quarry_result_lenth = 0 # --- - p31_langs = list(p31_desc.keys()) + random.shuffle(quarry_list) # --- - for item in json1: - num += 1 - q = "item" in item and item["item"].split("/entity/")[1] + for qu_numb, quarry in enumerate(quarry_list): # --- - q_langs = item.get("langs", "").split(",") - # --- - lang_to_add = list(set(p31_langs) - set(q_langs)) + if quarry_result_lenth == 0 and qu_numb > 1: + printe.output("<> len of first quarry == 0 continue") + continue # --- - tp = f'<>*mainfromQuarry: {num} from {json_lenth} p31:"{p31}", qid:"{q}":<>{topic_ar}' + numb += 1 # --- - if qu_numb == 1: - printe.output(tp) - # --- - if not lang_to_add: - printe.output(tp) - continue + printe.output(f"work in {numb} from {all_lenth} querirs") # --- - if num % 50 == 0: - printe.output(tp) + quarry = quarry.replace("wd:Q1457376", f"wd:{p31}") # --- - if p31 in railway_tables: - work_railway({}, p31, q=q) - # elif p31 in placesTable: - # work_railway( {}, p31, q=q ) - else: - newdesc.work22(q, p31, desc_table) + if qu_numb == 1: + printe.output("<> first quarry") + printe.output(quarry) # --- - # --- + quarry_result_lenth = work_one_quarry(quarry, p31, p31_desc) + + +if __name__ == "__main__": + main() diff --git a/nep/bots/tax_desc.py b/nep/bots/tax_desc.py index 72750d0..3b46e4f 100644 --- a/nep/bots/tax_desc.py +++ b/nep/bots/tax_desc.py @@ -59,8 +59,8 @@ def make_tax_des_new(item): # --- if bs != []: bs = bs[0] - printe.output("bs:") - printe.output(bs) + # printe.output("bs:") + # printe.output(bs) # --- # [ # {'P171': 'http://www.wikidata.org/entity/Q1390', diff --git a/nep/si3g.py b/nep/si3g.py index 0379fec..a10ca93 100644 --- a/nep/si3g.py +++ b/nep/si3g.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 """ +tfj run dsdcc --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py nep/si3g -family:wikidata -lang:wikidata -start:Q500" tfj run ghu --mem 1Gi --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py nep/si3g -usercontribs:Ghuron" tfj run Q482994 --image python3.9 --command "$HOME/local/bin/python3 core8/pwb.py neq/nldes3 a2r sparql:Q482994" @@ -52,6 +53,7 @@ # --- import time from pathlib import Path + # --- from newapi import printe import gent @@ -130,7 +132,7 @@ def mainwithcat2(): lista = [x.strip() for x in oco if x.strip() != ""] # --- elif newpages: - lista = api_new.Get_Newpages(limit=newpages, namespace=namespaces, rcstart="", user="") + lista = api_new.Get_Newpages(limit=newpages, namespace=namespaces, offset_minutes=20) # --- elif user: lista = api_new.UserContribs(user, limit=user_limit, namespace=namespaces, ucshow="new")