Update for modify contents

project-polymorph · Nov 1, 2024 · dcbb656 · dcbb656
1 parent 2da9beb
commit dcbb656
Show file tree

Hide file tree

Showing 428 changed files with 78,712 additions and 2,749 deletions.
diff --git a/.github/build.sh b/.github/build.sh
@@ -4,6 +4,7 @@
 set -e
 
 rm -rf docs/*
+rm -rf workspace/download/*
 
 # rename files
 python .github/scripts/file/rename.py
@@ -35,6 +36,10 @@ python .github/scripts/others/get_md5_list.py
 
 echo "MD5 list generated successfully!"
 
+python .github/scripts/file/add_config.py
+
+echo "Metadata added successfully!"
+
 # generate page
 python .github/scripts/page/gen_page.py
 
@@ -52,10 +57,12 @@ mkdir -p docs
 cp -r "README.md" "docs/"
 cp -r "健康护理" "docs/"
 cp -r "新闻" "docs/"
-cp -r "法律" "docs/"
+cp -r "法律法规与条款" "docs/"
 cp -r "生活" "docs/"
 cp -r "社群与个人故事" "docs/"
 cp -r "文学与艺术作品" "docs/"
+cp -r "讨论与思考" "docs/"
+cp -r "未知" "docs/"
 # Copy all files from .github/site to root directory
 cp -r .github/site/* ./
 

diff --git a/.github/catalog.yml b/.github/catalog.yml
@@ -22,9 +22,33 @@
 新闻:
   name: 新闻
   description: 这个目录包含与跨性别相关的新闻报道和最新动态，旨在提供关于跨性别者的社会关注度、法律变化和重要事件的最新信息。
-法律:
-  name: 法律
+新闻/传统报纸与杂志:
+  name: 传统报纸与杂志
+  description: 本目录收录来自传统报纸与杂志的跨性别相关报道，涵盖变性经历、法律问题以及社会对跨性别者的态度等方面，展示了跨性别个体在社会中的生存现状和转变过程。
+新闻/网页:
+  name: 网页
+  description: 本目录涵盖与跨性别相关的新闻以及网页链接，旨在提供最新的跨性别议题报道和信息，共同促进社会对跨性别者的理解与支持。
+未知:
+  name: 未知
+  description: 此目录包含与跨性别主题相关的无法分类的文件
+法律法规与条款:
+  name: 法律法规与条款
+  description: 该目录包含与跨性别者相关的法律权益和政府法规的信息，旨在提供跨性别社区在法律政策方面的参考与指引，包括公证服务与法律法规的解读。
+法律法规与条款/保险:
+  name: 保险
+  description: 本目录包含有关跨性别者相关的保险条款，旨在提供保障和支持，帮助跨性别者在追求生活品质与个人兴致时，获得适当的医疗保障与意外险覆盖。
+法律法规与条款/公证:
+  name: 公证
   description: 本目录包含关于办理特殊公证的心得体会，探讨在跨性别法律事务中所遇到的挑战与经验，旨在为其他跨性别者提供参考与帮助。
+法律法规与条款/其他:
+  name: 其他
+  description: 本目录收录了与法律法规相关的跨性别话题，特别是关于反就业歧视的比较研究，探讨英国的相关法律与我国法律的对比及改进的可能性，旨在提升对跨性别权益的重视和理解。
+法律法规与条款/法律法规与政府文件:
+  name: 法律法规与政府文件
+  description: 本目录收录与跨性别相关的法律法规和政府文件，涵盖变性手术、性别变更、以及与跨性别权利和医疗相关的政策文件。这些文件对于跨性别人士的合法权益和社会接纳程度具有重要意义。
+法律法规与条款/法律评论与文献:
+  name: 法律评论与文献
+  description: 本目录包含有关跨性别人士的法律评论与研究文献，探讨代孕、婚姻权利、变性法律等相关法律问题，旨在推进对变性人权利的理解与保护。
 生活:
   name: 生活
   description: 该目录包含与跨性别者日常生活相关的文档，包括个人欲望、家庭关系以及父母的理解与支持，旨在分享生活中的真实经历与挑战。
@@ -39,7 +63,16 @@
   description: 本目录旨在为跨性别者的父母提供指导与支持，帮助他们理解和接纳孩子的性别认同；同时也希望包含孩子如何处理与父母的关系的一些相关资料。
 社群与个人故事:
   name: 社群与个人故事
-  description: 该目录收录了跨性别个体的生命故事与个人经历，展示了他们在性别认同和过渡过程中的真实自述，以及面对社会传统观念的挑战。通过这些故事，力求增进对跨性别群体的理解与支持。
+  description: 该目录收录了跨性别个体的生命故事与个人经历，展示了他们在性别认同和过渡过程中的真实自述，以及面对社会传统观念的挑战。这里更倾向于社群自媒体的发帖，如果是新闻报道，请放在新闻目录下。
 社群与个人故事/个人故事:
   name: 个人故事
-  description: 本目录收录了跨性别个体的真实生命故事与过渡经历，旨在呈现他们的内心感受和生活经历，以期增进对跨性别群体的理解和支持。
+  description: 本目录收录了跨性别个体的真实生命故事与生活经历，旨在呈现他们的内心感受。如果是新闻报道，请放在新闻目录而不是这里。
+社群与个人故事/书籍:
+  name: 书籍
+  description: 这一目录包含了关于跨性别群体的书籍，涉及个人的生命故事与过渡经历，以真实的生动故事反映跨性别者的生存现状，帮助人们更好地理解和认同这些经历。
+讨论与思考:
+  name: 讨论与思考
+  description: 本目录包含了关于跨性别话题的讨论与思考，涉及变性手术的病假问题以及对于东京2020奥运会相关事件的深入反思。
+讨论与思考/社科论文与期刊:
+  name: 社科论文与期刊
+  description: 本目录收录了与跨性别相关的社科论文与期刊，涉及性别伦理、婚姻家庭、社会行为等领域的讨论与研究，提供了对跨性别者经历和现状的深刻分析与见解。
diff --git a/.github/classify.sh b/.github/classify.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# Exit on any error
+set -e
+
+python .github/scripts/workspace/workspace_classify.py
+
+echo "Workspace classified successfully!"
+
+python .github/scripts/workspace/seperate_classify.py
+
+echo "Workspace separated successfully!"
+
+python .github/scripts/workspace/workspace_merge.py
+
+echo "Workspace merged successfully!"
diff --git a/.github/downloader/.gitignore b/.github/downloader/.gitignore
@@ -0,0 +1,7 @@
+node_modules
+res.md
+res.html
+output.md
+monolithic.html
+original.html
+original_clean.html
diff --git a/.github/downloader/ai/check_related.py b/.github/downloader/ai/check_related.py
@@ -0,0 +1,112 @@
+import yaml
+import json
+import tempfile
+import subprocess
+import os
+from pathlib import Path
+
+def load_template(template_path):
+    """Load the template file"""
+    with open(template_path, 'r', encoding='utf-8') as f:
+        return f.read()
+
+def get_ai_classification(title, link, snippet, gen_struct_path, template):
+    """Ask AI to classify if the content is related"""
+    # Define the JSON schema for classification
+    schema = {
+        "type": "object",
+        "properties": {
+            "is_related": {
+                "type": "string",
+                "enum": ["True", "False", "NotSure"],
+                "description": "Whether the content is related to transgender/LGBTQ+ topics"
+            }
+        },
+        "required": ["is_related"],
+        "additionalProperties": False
+    }
+
+    # Create temporary files
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.txt') as temp_input:
+        # Fill in the template
+        prompt = template.format(
+            title=title or "Untitled",
+            link=link,
+            snippet=snippet or ""
+        )
+        temp_input.write(prompt)
+        print(f"Prompt: {prompt}")
+        temp_input_path = temp_input.name
+
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.json') as temp_schema:
+        json.dump(schema, temp_schema)
+        schema_file = temp_schema.name
+
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.json') as temp_output:
+        temp_output_path = temp_output.name
+
+    try:
+        # Run gen_struct.py
+        subprocess.run([
+            'python', gen_struct_path,
+            temp_input_path, temp_output_path, schema_file
+        ], check=True)
+
+        # Read the result
+        with open(temp_output_path, 'r', encoding='utf-8') as f:
+            result = json.load(f)
+        print(f"Result: {result}")
+        return result["is_related"].lower()  # Convert to lowercase to match YAML
+    except Exception as e:
+        print(f"Error during AI classification: {e}")
+        return "unknown"
+    finally:
+        # Cleanup temporary files
+        os.unlink(temp_input_path)
+        os.unlink(temp_output_path)
+        os.unlink(schema_file)
+
+def main():
+    # File paths
+    links_path = Path('.github/links.yml')
+    template_path = Path('.github/prompts/check_related.md.template')
+    gen_struct_path = Path('.github/scripts/ai/gen_struct.py')
+
+    # Load files
+    with open(links_path, 'r', encoding='utf-8') as f:
+        links_data = yaml.safe_load(f)
+
+    template = load_template(template_path)
+
+    # Process each unknown entry
+    modified = False
+    for url, data in links_data.items():
+        if not data.get('is_related') or data.get('is_related') == 'unknown':
+            print(f"Processing: {url}")
+            result = get_ai_classification(
+                data.get('title'),
+                data.get('link'),
+                data.get('snippet'),
+                gen_struct_path,
+                template
+            )
+
+            if result != 'unknown':
+                data['is_related'] = result
+                modified = True
+                print(f"Updated {url} to {result}")
+                # Write changes immediately
+                with open(links_path, 'w', encoding='utf-8') as f:
+                    yaml.dump(links_data, f, allow_unicode=True)
+                print("Changes saved to links.yml")
+            else:
+                print(f"Skipping {url} because the result is unknown")
+        else:
+            print(f"Skipping {url} because the result is already known")
+
+    # Remove final save since we're saving after each update
+    if not modified:
+        print("No changes were necessary")
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/downloader/ai/gen.py b/.github/downloader/ai/gen.py
@@ -0,0 +1,66 @@
+import os
+import openai
+import argparse
+from openai import OpenAI
+from dotenv import load_dotenv
+
+load_dotenv()
+openai.api_key = os.getenv('OPENAI_API_KEY')
+model_name = os.getenv('OPENAI_MODEL_NAME')
+if not model_name:
+    model_name = "gpt-4o"
+temperature = os.getenv('OPENAI_TEMPERATURE')
+if not temperature:
+    temperature = 0.7
+client = OpenAI()
+
+def read_file(file_path):
+    """Read the content of the input file."""
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return file.read()
+
+def write_file(file_path, content):
+    """Write the content to the output file."""
+    with open(file_path, 'w', encoding='utf-8') as file:
+        file.write(content)
+
+def generate_cleanup_content(content):
+    """Send the prompt and content to OpenAI's API and get the cleaned content."""
+
+    completion = client.chat.completions.create(
+                model=model_name,
+                messages=[
+                    {"role": "user", "content": content}
+                ]
+            )
+
+    return str(completion.choices[0].message.content)
+
+def main():
+    # Set up command-line argument parsing
+    parser = argparse.ArgumentParser(
+        description="Generate a cleaned-up version of a text file using OpenAI's GPT-4."
+    )
+    parser.add_argument('input_file', help='Path to the input .txt file')
+    parser.add_argument('output_file', help='Path to save the cleaned output file')
+
+    args = parser.parse_args()
+
+    try:
+
+        # Read input file
+        input_content = read_file(args.input_file)
+
+        # Generate cleaned content
+        cleaned_content = generate_cleanup_content(input_content)
+
+        # Write to output file
+        write_file(args.output_file, cleaned_content)
+
+        print(f"Successfully processed '{args.input_file}' and saved to '{args.output_file}'.")
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+if __name__ == "__main__":
+    main()