import logging
import re
import sys
import zipfile
from collections.abc import Callable, Iterable
from enum import Enum
from pathlib import Path
from subprocess import run
from tempfile import NamedTemporaryFile, TemporaryDirectory
from .bibparse import parse_bibliographies
from .check import scan_logs
DEFAULT_COMMANDS = [
"documentclass",
"includegraphics",
"addbibresource",
"bibliography",
"RequirePackage",
"usepackage",
"InputIfFileExists",
"templatetype",
]
[docs]
def strip_paths_from_command(
latex_text: str, command: str
) -> tuple[str, dict[str, Path]]:
r"""
Replaces \command{path/to/file} with \command{file} using pathlib,
and returns a list of (original path, updated line) replacements.
Args:
latex_text: The LaTeX document as a string.
command: The command name without backslash, e.g., 'includegraphics'.
Returns:
A tuple:
- Updated LaTeX text with paths stripped
- List of (original path, updated line) for each replacement
"""
pattern = re.compile(r"(\\" + command + r".*)\{([^}]+)}")
replacements: dict[str, Path] = {}
def replacer(match):
prefix = match.group(1)
arg = match.group(2).strip()
# If the argument contains a macro, try to include matching local files
# but don't modify the LaTeX content.
if "\\" in arg:
macro_path = Path(arg)
parent = macro_path.parent
name_part = macro_path.name
static_prefix = name_part.split("\\", 1)[0]
static_suffix = Path(name_part).suffix
if static_prefix:
for c in parent.glob(static_prefix + "*" + static_suffix):
replacements[c.name] = c
return match.group(0)
full_path = Path(arg)
filename = full_path.name
updated = prefix + "{" + filename + "}"
if not full_path.exists() and full_path.suffix == "":
canidates = list(full_path.parent.glob(filename + ".*"))
if len(canidates) == 1:
full_path = canidates[0]
elif len(canidates) == 0:
logging.debug("No matches for %s", full_path)
return match.group(0)
else:
# Prefer an extension based on the command
pref_exts: dict[str, list[str]] = {
"documentclass": [".cls"],
"addbibresource": [".bib"],
"bibliography": [".bib"],
"RequirePackage": [".sty"],
"usepackage": [".sty"],
"templatetype": [".sty"],
"InputIfFileExists": [".ldf", ".tex", ".sty"],
"includegraphics": [
".pdf",
".png",
".jpg",
".jpeg",
".eps",
],
}
exts = pref_exts.get(command, [])
chosen = None
for ext in exts:
for c in canidates:
if c.suffix.lower() == ext:
chosen = c
break
if chosen is not None:
break
if chosen is not None:
full_path = chosen
else:
# Fall back to first candidate deterministically
full_path = sorted(canidates)[0]
replacements[full_path.name] = full_path
return updated
updated_text = pattern.sub(replacer, latex_text)
return updated_text, replacements
[docs]
def flatten_latex(
file_path: Path,
commands_to_flatten=DEFAULT_COMMANDS,
root: Path | None = None,
scratch=None,
):
r"""
Recursively flattens a LaTeX file by replacing \input and \include with actual content.
Returns the flattened LaTeX as a string.
"""
scratch = scratch or TemporaryDirectory()
scratch_dir = getattr(scratch, "name", scratch)
tex_path = Path(file_path).resolve()
root = root or tex_path.parent
if not tex_path.exists():
raise FileNotFoundError(f"File not found: {tex_path}")
with open(tex_path, encoding="utf-8") as f:
lines = f.readlines()
flattened_lines = []
dependencies: dict[str, Path] = {}
input_pattern = re.compile(r"^(.*?)\\(input|include)\{([^}]+)\}(.*)$")
comment_line = re.compile(r"^\s*%")
for line in lines:
# Skip comments entirely when searching for commands
if comment_line.match(line):
continue
# Flatten commands
for cmd in commands_to_flatten:
line, deps = strip_paths_from_command(line, cmd)
dependencies.update(deps)
match = input_pattern.match(line)
if match:
pre, cmd, filename, post = match.groups()
inc_path = root.joinpath(filename).with_suffix(".tex")
included_text, deps = flatten_latex(
inc_path,
root=root,
commands_to_flatten=commands_to_flatten,
scratch=scratch,
)
dependencies.update(deps)
# Preserve any prefix and postfix text on the line (e.g. macro wrappers)
# by reconstructing the full line with the flattened include inserted.
flattened_lines.append(pre + included_text + post + "\n")
else:
flattened_lines.append(line)
# Flatten class and style files to discover nested dependencies
nested_deps = {}
for name, file in list(dependencies.items()):
if file.suffix in {".cls", ".sty"}:
text, deps = flatten_latex(file, scratch=scratch)
fid = NamedTemporaryFile(dir=scratch_dir, delete=False)
fid.write(text.encode("utf-8"))
dependencies[name] = Path(fid.name)
nested_deps.update(deps)
dependencies.update(nested_deps)
return "".join(flattened_lines), dependencies
[docs]
def replace_citeauthor_commands(
latex_text: str,
bib_files: Iterable[Path],
max_names: int = 2,
) -> str:
"""Replace \\citeauthor and \\citeauthorcite commands with author text.
- ``\\citeauthor{foo}`` -> "<authors for foo>"
- ``\\citeauthorcite{foo}`` -> "<authors for foo>\\cite{foo}"
When an entry has more than ``max_names`` authors, only the first last name
is kept followed by " et al.". Otherwise, up to ``max_names`` last names
are joined with " and ".
Args:
latex_text: The flattened LaTeX text.
bib_files: Paths to .bib files referenced by the project.
max_names: Maximum number of last names to display before using et al.
Returns:
Updated LaTeX text with citeauthor macros replaced.
"""
# Remove an inline macro definition for \citeauthorcite if present
# Exact macro semantics: \newcommand{\citeauthorcite}[1]{\citeauthor{#1}\cite{#1}}
macro_pat = re.compile(
r"\\newcommand\{\\citeauthorcite\}\[1\]\{\s*\\citeauthor\{#1\}\s*\\cite\{#1\}\s*\}",
)
text = macro_pat.sub("", latex_text)
# Build a key -> list[last names] map from provided bib files
authors = parse_bibliographies(bib_files)
# Convert to key -> display-string per max_names policy
author_map: dict[str, str] = {}
for key, last_names in authors.items():
if len(last_names) > max_names:
display = f"{last_names[0]} et al."
else:
display = " and ".join(last_names[:max_names])
author_map[key] = display
if not author_map:
return text
def render_authors(keys_str: str) -> str:
keys = [k.strip() for k in keys_str.split(",") if k.strip()]
rendered: list[str] = []
for k in keys:
rendered.append(author_map[k])
return ", ".join(rendered)
def repl_citeauthor(m: re.Match) -> str:
return render_authors(m.group(1))
def repl_citeauthorcite(m: re.Match) -> str:
keys_str = m.group(1)
return f"{render_authors(keys_str)}\\cite{{{keys_str}}}"
text = re.sub(r"\\citeauthor\{([^}]+)\}", repl_citeauthor, text)
text = re.sub(r"\\citeauthorcite\{([^}]+)\}", repl_citeauthorcite, text)
return text
[docs]
def create_archive(archive: str, files: dict[str, Path]):
with zipfile.ZipFile(archive, "w") as zipf:
for dst, src in files.items():
try:
zipf.write(src, dst)
except FileNotFoundError as e:
logging.warning(
"%s not found and will not be added to the zip: %s", src, e
)
[docs]
def validate_archive(archive: Path, mainfile: str):
with TemporaryDirectory() as temp_dir:
with zipfile.ZipFile(archive, "r") as zipf:
zipf.extractall(temp_dir)
result = run(
f"tectonic --keep-logs {mainfile}",
cwd=temp_dir,
capture_output=True,
shell=True,
encoding="utf-8",
)
if result.returncode != 0:
logging.error("Archive valiation failed")
print(result.stdout, file=sys.stdout)
print(result.stderr, file=sys.stderr)
for log_file in Path(temp_dir).glob("*.log"):
print(f"{log_file.name}:", file=sys.stderr)
print(log_file.read_text(), file=sys.stderr)
raise RuntimeError("Archive valiation failed")
scan_logs(
[Path(temp_dir).joinpath(mainfile).with_suffix(".log")],
overflow_threshold_pt=10,
)
[docs]
def add_bbl_file(archive: Path, main: str, deps: dict[str, Path]):
with TemporaryDirectory() as temp_dir:
with zipfile.ZipFile(archive, "a") as zipf:
zipf.extractall(temp_dir)
run(
f"tectonic --keep-intermediates {main}",
cwd=temp_dir,
capture_output=True,
shell=True,
check=True,
)
# Add bbl files to archive
for file in Path(temp_dir).glob("*.bbl"):
zipf.write(file, file.name)
[docs]
class BibStyle(Enum):
biblatex = "biblatex"
bibtex = "bibtex"
[docs]
def archive(
main: Path,
output: Path | None = None,
validate: bool = True,
bbl: bool = False,
bibstyle: BibStyle = BibStyle.bibtex,
):
"""
Create a zip archive of a LaTeX project by flattening inputs.
Flattens includes and strips paths from common commands to collect local
dependencies next to the main file, then zips them. With --validate, runs
`tectonic` on the archive to ensure it compiles; with --bbl, includes
generated .bbl files after a build.
"""
output = output or Path(main).with_suffix(".zip")
with TemporaryDirectory() as scratch:
main_text, deps = flatten_latex(main, scratch=scratch)
# Post-process flattened text with additional passes (extensible)
bibs = [p for n, p in deps.items() if Path(n).suffix == ".bib"]
passes: list[Callable[[str], str]] = []
# Replace citeauthor-like commands using available bib files
if bibs and bibstyle == BibStyle.bibtex:
passes.append(lambda t: replace_citeauthor_commands(t, bibs))
for fn in passes:
main_text = fn(main_text)
with NamedTemporaryFile(dir=scratch) as fid:
fid.write(main_text.encode("utf-8"))
fid.flush()
deps[main.name] = Path(fid.name)
create_archive(output, deps)
if bbl:
add_bbl_file(output, main.name, deps)
if validate:
validate_archive(output, main.name)