侧边栏壁纸
博主头像
ZOUXS的空间博主等级

人生苦短,我学python

  • 累计撰写 8 篇文章
  • 累计创建 18 个标签
  • 累计收到 0 条评论

目 录CONTENT

文章目录

python修改html文件内容的记事

zouxs
2024-02-21 / 0 评论 / 0 点赞 / 14 阅读 / 10293 字
温馨提示:
本文最后更新于 2024-02-21,若内容或图片失效,请留言反馈。部分素材来自网络,若不小心影响到您的利益,请联系我们删除。

一、背景

在我的项目中,需要将已存在的html静态文件修改其内容,以达到在增添内容或增加交互操作的目的。

二、思路

在设计时,存在三个思路

  • 直接修改源html文件

  • 在静态文件加载时修改返回给前端的html内容

  • 使用模板语法include引入html内容,并对其修改

三、实现

1、直接修改源html文件

使用python直接open并修改文件内容

# 指定要修改的文件路径
filepath = "xxxxxxxx"

# 准备要修改的内容
insert_str = "xxxxxxxx"

# 打开并读取修改内容
whit open(filepath, "r") as f:
    content = f.read()
	
    # 确定插入的起始位置
    index = content.find("</script>") + 9

	# 插入内容并生成新的文件内容
    content = content[:index] + add_fun + content[index:]

# 打开并插入修改内容
whit open(filepath, "w") as f:
    # 写入内容
    f.write(content)

2、在静态文件加载时修改返回给前端的html内容

利用django的静态文件服务器,重写django的django.views.static中的serve函数

# task_html.py
import mimetypes
import posixpath
from io import StringIO, BytesIO, BufferedRandom
from pathlib import Path

from bs4 import BeautifulSoup
from django.http import Http404, HttpResponseNotModified, FileResponse
from django.utils._os import safe_join
from django.utils.http import http_date
from django.views.decorators.clickjacking import xframe_options_sameorigin
from django.views.static import directory_index, was_modified_since

from django.utils.translation import gettext as _

from ..views.constants import MERGE_PARTNER


def set_genefuse_html(fullpath):
    # 定义处理dom元素的函数
    def get_html(html):
        soup = BeautifulSoup(html, 'html.parser')
        for table_tag in soup.find_all('table', attrs={"class": None}):
            table_tag.decompose()

        for div_tag in soup.find_all('div', attrs={"class": "tips"}, text="Supporting reads:"):
            div_tag.decompose()

        html = str(soup)
        return html

    with fullpath.open('rb') as f:
		# 读取文件内容
        content = f.read().decode()
		# 使用bs4处理部分标签
        content = get_html(content)

        # 确定插入的起始位置
        index = content.find("</script>") + 9
        add_str = "xxxxxx"
        content = content[:index] + add_str + content[index:]

        # 未设置name属性被视为文件下载,设置后可正常打开html网页,影响区域FileResponse的set_headers
        bytes_io = BytesIO()
        setattr(bytes_io, "name", fullpath.name)

        # 创建BufferedRandom解决字符串流打开缓慢的bug,直接使用BytesIO流加载缓慢
        file_space = BufferedRandom(bytes_io)
        file_space.write(content.encode())
        file_space.seek(0)    # 重置指针保证文件读取位置
    return file_space


@xframe_options_sameorigin    # 解决X_FRAME_OPTIONS=deny导致无法加载frame的问题
def serve(request, path, document_root=None, show_indexes=False):
    """
    Serve static files below a given point in the directory structure.

    To use, put a URL pattern such as::

        from django.views.static import serve

        path('<path:path>', serve, {'document_root': '/path/to/my/files/'})

    in your URLconf. You must provide the ``document_root`` param. You may
    also set ``show_indexes`` to ``True`` if you'd like to serve a basic index
    of the directory.  This index view will use the template hardcoded below,
    but if you'd like to override it, you can create a template called
    ``static/directory_index.html``.
    """
    path = posixpath.normpath(path).lstrip('/')
    fullpath = Path(safe_join(document_root, path))
    if fullpath.is_dir():
        if show_indexes:
            return directory_index(path, fullpath)
        raise Http404(_("Directory indexes are not allowed here."))
    if not fullpath.exists():
        raise Http404(_('“%(path)s” does not exist') % {'path': fullpath})
    # Respect the If-Modified-Since header.
    statobj = fullpath.stat()
    if not was_modified_since(request.META.get('HTTP_IF_MODIFIED_SINCE'),
                              statobj.st_mtime, statobj.st_size):
        return HttpResponseNotModified()
    # 插入js代码段实现页面逻辑
    file_space = None

    if "genefuse.html" in fullpath.name:
        file_space = set_genefuse_html(fullpath)

    content_type, encoding = mimetypes.guess_type(str(fullpath))
    content_type = content_type or 'application/octet-stream'
	# 重写返回的内容
    response = FileResponse(file_space or fullpath.open('rb'), content_type=content_type)
    response.headers["Last-Modified"] = http_date(statobj.st_mtime)
    if encoding:
        response.headers["Content-Encoding"] = encoding
    return response
# url.py
import os

from django.conf import settings
from django.urls import path, re_path
from .task_html import serve


urlpatterns = [
    ...	# 其他路由

    re_path(r'^media/(?P<path>.*)$', serve, {"document_root": os.path.join(
        settings.MEDIA_ROOT, "NGS/analysisData/zip_file_dir")}),
]

3、使用模板语法include引入html内容

利用fastapi与jinja2模板实现该功能,其他框架类似

在settings中定义templates模板文件夹

# config
from jinja2 import Environment, FileSystemLoader
from pydantic import BaseSettings

class STMESettings(BaseSettings):
    ... # 其他配置

    # 假设您的模板文件在"templates"文件夹中
    templates: Environment = Environment(loader=FileSystemLoader(["templates/", ]))


class ProdSettings(STMESettings):

    class Config:
        case_sensitive = True
        env_file = "core/envs/prod.env"
        env_file_encoding = 'utf-8'



class TestSettings(STMESettings):

    class Config:
        case_sensitive = True
        env_file = "core/envs/test.env"
        env_file_encoding = 'utf-8'


class DevSettings(STMESettings):

    class Config:
        case_sensitive = True
        env_file = "core/envs/dev.env"
        env_file_encoding = 'utf-8'


settings = None

env = os.getenv('APP_ENV', 'dev').lower()

if env == 'prod':
    settings = ProdSettings()
elif env == 'test':
    settings = TestSettings()
else:
    settings = DevSettings()

编写模板处理api函数

# utils
import re
from fastapi import APIRouter
from starlette.requests import Request
from starlette.responses import HTMLResponse
from core.config import settings


router = APIRouter()


@router.get("/mutscan/{filename:path}")
async def read_root(filename: str, request: Request) -> HTMLResponse:
    # 渲染 base.html 模板,并传递上下文变量
    include_path = None
    if re.search('[0-9A-Z]+x[0-9A-Z]+\.mutscan\.html\.files/main.html$', filename):
        template_name = "ngs/mutscan_main_base.html"
        include_path = f"cancer/{filename}"
    else:
        template_name = f"cancer/{filename}"
    return HTMLResponse(
        content=settings.templates.get_template(template_name).render(
            request=request, include_path=include_path),
        status_code=200
    )

创建template_exception_handler异常捕获

# exception.py
from fastapi import Request, status
from jinja2 import TemplateNotFound
from starlette.responses import JSONResponse


async def template_exception_handler(request: Request, exc: TemplateNotFound):
    """
    捕捉和替换TemplateNotFound,不再报500错误!

    :param request:
    :param exc:
    :return:
    """
    return JSONResponse(content={'detail': 'Template not found'}, status_code=status.HTTP_404_NOT_FOUND)

在fastapi的main入口文件增加app.add_exception_handler(TemplateNotFound, template_exception_handler)用于捕获异常

# main.py

from fastapi.staticfiles import StaticFiles
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from jinja2 import TemplateNotFound

from apps.auth.api.v1.api import router as auth_router
from apps.wes.api.v1.api import router as wes_router
from apps.ngs.api.v1.api import router as ngs_router
from core.event.redis import register_redis
from core.config import settings
from core.exception import template_exception_handler

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=settings.APP_ALLOW_ORIGINS,
    allow_credentials=True,
    allow_methods=settings.APP_ALLOW_METHODS,
    allow_headers=["*"],
)

# 添加异常捕获
app.add_exception_handler(TemplateNotFound, template_exception_handler)

register_redis(app)

app.include_router(auth_router, prefix="/" + settings.API_V1_STR)

app.mount("/static", StaticFiles(directory="static"), name="static")



@app.get("/ping")
async def ping():
    return {"message": "pong"}


from fastapi import FastAPI, Request
from utils.limiter_util import register_slowapi, limiter
register_slowapi(app)

@app.get("/")
@limiter.limit("5/minute")
async def index(request: Request):

    return "welcome baby!"

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

定义html模板文件

{% include include_path %}
<script type="text/javascript">
    // 插入js内容,可在此处增加使用innerHTML增加html内容
	// testdiv = document.getElementById("menu").getElementsByTagName("div")[0]
    // var inputBoxStr = ` 排序:<input type="checkbox" id="my_input_box" oninput="boxChange(event)">`
    // testdiv.innerHTML=testdiv.innerHTML + inputBoxStr;
</script>

0

评论区