Source code for luci.archive

import logging
import re
import sys
import zipfile
from pathlib import Path
from subprocess import run
from tempfile import NamedTemporaryFile, TemporaryDirectory

DEFAULT_COMMANDS = [
    "documentclass",
    "includegraphics",
    "addbibresource",
    "bibliography",
    "RequirePackage",
    "usepackage",
    "InputIfFileExists",
    "templatetype",
]


[docs] def strip_paths_from_command( latex_text: str, command: str ) -> tuple[str, dict[str, Path]]: """ Replaces \command{path/to/file} with \command{file} using pathlib, and returns a list of (original path, updated line) replacements. Args: latex_text: The LaTeX document as a string. command: The command name without backslash, e.g., 'includegraphics'. Returns: A tuple: - Updated LaTeX text with paths stripped - List of (original path, updated line) for each replacement """ pattern = re.compile(r"(\\" + command + r".*)\{([^}]+)}") replacements: dict[str, Path] = {} def replacer(match): prefix = match.group(1) arg = match.group(2).strip() # If the argument contains a macro, try to include matching local files # but don't modify the LaTeX content. if "\\" in arg: macro_path = Path(arg) parent = macro_path.parent name_part = macro_path.name static_prefix = name_part.split("\\", 1)[0] static_suffix = Path(name_part).suffix if static_prefix: for c in parent.glob(static_prefix + "*" + static_suffix): replacements[c.name] = c return match.group(0) full_path = Path(arg) filename = full_path.name updated = prefix + "{" + filename + "}" if not full_path.exists() and full_path.suffix == "": canidates = list(full_path.parent.glob(filename + ".*")) if len(canidates) == 1: full_path = canidates[0] elif len(canidates) == 0: logging.debug("No matches for %s", full_path) return match.group(0) else: # Prefer an extension based on the command pref_exts: dict[str, list[str]] = { "documentclass": [".cls"], "addbibresource": [".bib"], "bibliography": [".bib"], "RequirePackage": [".sty"], "usepackage": [".sty"], "templatetype": [".sty"], "InputIfFileExists": [".ldf", ".tex", ".sty"], "includegraphics": [ ".pdf", ".png", ".jpg", ".jpeg", ".eps", ], } exts = pref_exts.get(command, []) chosen = None for ext in exts: for c in canidates: if c.suffix.lower() == ext: chosen = c break if chosen is not None: break if chosen is not None: full_path = chosen else: # Fall back to first candidate deterministically full_path = sorted(canidates)[0] replacements[full_path.name] = full_path return updated updated_text = pattern.sub(replacer, latex_text) return updated_text, replacements
[docs] def flatten_latex( file_path: Path, commands_to_flatten=DEFAULT_COMMANDS, root: Path | None = None, scratch=None, ): """ Recursively flattens a LaTeX file by replacing \input and \include with actual content. Returns the flattened LaTeX as a string. """ scratch = scratch or TemporaryDirectory() scratch_dir = getattr(scratch, "name", scratch) tex_path = Path(file_path).resolve() root = root or tex_path.parent if not tex_path.exists(): raise FileNotFoundError(f"File not found: {tex_path}") with open(tex_path, encoding="utf-8") as f: lines = f.readlines() flattened_lines = [] dependencies: dict[str, Path] = {} input_pattern = re.compile(r"^(.*?)\\(input|include)\{([^}]+)\}(.*)$") comment_line = re.compile(r"^\s*%") for line in lines: # Skip comments entirely when searching for commands if comment_line.match(line): continue # Flatten commands for cmd in commands_to_flatten: line, deps = strip_paths_from_command(line, cmd) dependencies.update(deps) match = input_pattern.match(line) if match: pre, cmd, filename, post = match.groups() inc_path = root.joinpath(filename).with_suffix(".tex") included_text, deps = flatten_latex( inc_path, root=root, commands_to_flatten=commands_to_flatten, scratch=scratch, ) dependencies.update(deps) flattened_lines.append(included_text) # Add any trailing content after the command on the same line if post.strip(): flattened_lines.append(post + "\n") else: flattened_lines.append(line) # Flatten class and style files to discover nested dependencies nested_deps = {} for name, file in list(dependencies.items()): if file.suffix in {".cls", ".sty"}: text, deps = flatten_latex(file, scratch=scratch) fid = NamedTemporaryFile(dir=scratch_dir, delete=False) fid.write(text.encode("utf-8")) dependencies[name] = Path(fid.name) nested_deps.update(deps) dependencies.update(nested_deps) return "".join(flattened_lines), dependencies
[docs] def create_archive(archive: str, files: dict[str, Path]): with zipfile.ZipFile(archive, "w") as zipf: for dst, src in files.items(): try: zipf.write(src, dst) except FileNotFoundError as e: logging.warning( "%s not found and will not be added to the zip: %s", src, e )
[docs] def validate_archive(archive: Path, mainfile: str): with TemporaryDirectory() as temp_dir: with zipfile.ZipFile(archive, "r") as zipf: zipf.extractall(temp_dir) result = run( f"tectonic --keep-logs {mainfile}", cwd=temp_dir, capture_output=True, shell=True, encoding="utf-8", ) if result.returncode != 0: logging.error("Archive valiation failed") print(result.stdout, file=sys.stdout) print(result.stderr, file=sys.stderr) for log_file in Path(temp_dir).glob("*.log"): print(f"{log_file.name}:", file=sys.stderr) print(log_file.read_text(), file=sys.stderr) raise RuntimeError("Archive valiation failed") scan_latex_log(Path(temp_dir).joinpath(mainfile).with_suffix(".log"))
[docs] def scan_latex_log(log_path: Path): log_text = Path(log_path).read_text(encoding="utf-8") # Define patterns and their reporting levels (compile lazily here) patterns = { "Undefined Citations": { "pattern": r"(?:Package (?:natbib|biblatex) Warning: Citation|LaTeX Warning: Citation) [`'](.+?)['`].+undefined", "level": logging.WARNING, }, "Undefined References": { "pattern": r"LaTeX Warning: Reference [`']([^`']+)[`'] on page", "level": logging.WARNING, }, "Missing Files": { "pattern": r"! LaTeX Error: File [`'](.+?)['`] not found.", "level": logging.ERROR, }, } for label, info in patterns.items(): regex = re.compile(info["pattern"], re.MULTILINE) unique_matches = set(regex.findall(log_text)) if unique_matches: logging.log(info["level"], "%s: %s", label, ", ".join(unique_matches))
[docs] def add_bbl_file(archive: Path, main: str, deps: dict[str, Path]): with TemporaryDirectory() as temp_dir: with zipfile.ZipFile(archive, "a") as zipf: zipf.extractall(temp_dir) run( f"tectonic --keep-intermediates {main}", cwd=temp_dir, capture_output=True, shell=True, check=True, ) # Add bbl files to archive for file in Path(temp_dir).glob("*.bbl"): zipf.write(file, file.name)
[docs] def archive( main: Path, output: Path | None = None, validate: bool = True, bbl: bool = False ): output = output or Path(main).with_suffix(".zip") with TemporaryDirectory() as scratch: main_text, deps = flatten_latex(main, scratch=scratch) with NamedTemporaryFile(dir=scratch) as fid: fid.write(main_text.encode("utf-8")) fid.flush() deps[main.name] = Path(fid.name) create_archive(output, deps) if bbl: add_bbl_file(output, main.name, deps) if validate: validate_archive(output, main.name)