mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-19 18:58:47 +08:00
feat: enhance search, ranking, reranker and CLI tooling across ccw and codex-lens
Major improvements to smart-search, chain-search cascade, ranking pipeline, reranker factory, CLI history store, codex-lens integration, and uv-manager. Simplify command-generator skill by inlining phases. Add comprehensive tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
340
codex-lens/scripts/bootstrap_reranker_local.py
Normal file
340
codex-lens/scripts/bootstrap_reranker_local.py
Normal file
@@ -0,0 +1,340 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bootstrap a local-only ONNX reranker environment for CodexLens.
|
||||
|
||||
This script defaults to dry-run output so it can be used as a reproducible
|
||||
bootstrap manifest. When `--apply` is passed, it installs pinned reranker
|
||||
packages into the selected virtual environment and can optionally pre-download
|
||||
the ONNX reranker model into a repo-local Hugging Face cache.
|
||||
|
||||
Examples:
|
||||
python scripts/bootstrap_reranker_local.py --dry-run
|
||||
python scripts/bootstrap_reranker_local.py --apply --download-model
|
||||
python scripts/bootstrap_reranker_local.py --venv .venv --model Xenova/ms-marco-MiniLM-L-12-v2
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
MANIFEST_PATH = Path(__file__).with_name("requirements-reranker-local.txt")
|
||||
DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
|
||||
DEFAULT_HF_HOME = PROJECT_ROOT / ".cache" / "huggingface"
|
||||
|
||||
STEP_NOTES = {
|
||||
"runtime": "Install the local ONNX runtime first so optimum/transformers do not backtrack over runtime wheels.",
|
||||
"hf-stack": "Pin the Hugging Face stack used by the ONNX reranker backend.",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RequirementStep:
|
||||
name: str
|
||||
packages: tuple[str, ...]
|
||||
|
||||
|
||||
def _normalize_venv_path(raw_path: str | Path) -> Path:
|
||||
return (Path(raw_path) if raw_path else PROJECT_ROOT / ".venv").expanduser().resolve()
|
||||
|
||||
|
||||
def _venv_python(venv_path: Path) -> Path:
|
||||
if os.name == "nt":
|
||||
return venv_path / "Scripts" / "python.exe"
|
||||
return venv_path / "bin" / "python"
|
||||
|
||||
|
||||
def _venv_huggingface_cli(venv_path: Path) -> Path:
|
||||
if os.name == "nt":
|
||||
preferred = venv_path / "Scripts" / "hf.exe"
|
||||
return preferred if preferred.exists() else venv_path / "Scripts" / "huggingface-cli.exe"
|
||||
preferred = venv_path / "bin" / "hf"
|
||||
return preferred if preferred.exists() else venv_path / "bin" / "huggingface-cli"
|
||||
|
||||
|
||||
def _default_shell() -> str:
|
||||
return "powershell" if os.name == "nt" else "bash"
|
||||
|
||||
|
||||
def _shell_quote(value: str, shell: str) -> str:
|
||||
if shell == "bash":
|
||||
return shlex.quote(value)
|
||||
return "'" + value.replace("'", "''") + "'"
|
||||
|
||||
|
||||
def _format_command(parts: Iterable[str], shell: str) -> str:
|
||||
return " ".join(_shell_quote(str(part), shell) for part in parts)
|
||||
|
||||
|
||||
def _format_set_env(name: str, value: str, shell: str) -> str:
|
||||
quoted_value = _shell_quote(value, shell)
|
||||
if shell == "bash":
|
||||
return f"export {name}={quoted_value}"
|
||||
return f"$env:{name} = {quoted_value}"
|
||||
|
||||
|
||||
def _model_local_dir(hf_home: Path, model_name: str) -> Path:
|
||||
slug = model_name.replace("/", "--")
|
||||
return hf_home / "models" / slug
|
||||
|
||||
|
||||
def _parse_manifest(manifest_path: Path) -> list[RequirementStep]:
|
||||
current_name: str | None = None
|
||||
current_packages: list[str] = []
|
||||
steps: list[RequirementStep] = []
|
||||
|
||||
for raw_line in manifest_path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if line.startswith("# [") and line.endswith("]"):
|
||||
if current_name and current_packages:
|
||||
steps.append(RequirementStep(current_name, tuple(current_packages)))
|
||||
current_name = line[3:-1]
|
||||
current_packages = []
|
||||
continue
|
||||
|
||||
if line.startswith("#"):
|
||||
continue
|
||||
|
||||
if current_name is None:
|
||||
raise ValueError(f"Package entry found before a section header in {manifest_path}")
|
||||
current_packages.append(line)
|
||||
|
||||
if current_name and current_packages:
|
||||
steps.append(RequirementStep(current_name, tuple(current_packages)))
|
||||
|
||||
if not steps:
|
||||
raise ValueError(f"No requirement steps found in {manifest_path}")
|
||||
return steps
|
||||
|
||||
|
||||
def _pip_install_command(python_path: Path, packages: Iterable[str]) -> list[str]:
|
||||
return [
|
||||
str(python_path),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"--disable-pip-version-check",
|
||||
"--upgrade-strategy",
|
||||
"only-if-needed",
|
||||
"--only-binary=:all:",
|
||||
*packages,
|
||||
]
|
||||
|
||||
|
||||
def _probe_command(python_path: Path) -> list[str]:
|
||||
return [
|
||||
str(python_path),
|
||||
"-c",
|
||||
(
|
||||
"from codexlens.semantic.reranker.factory import check_reranker_available; "
|
||||
"print(check_reranker_available('onnx'))"
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _download_command(huggingface_cli: Path, model_name: str, model_dir: Path) -> list[str]:
|
||||
return [
|
||||
str(huggingface_cli),
|
||||
"download",
|
||||
model_name,
|
||||
"--local-dir",
|
||||
str(model_dir),
|
||||
]
|
||||
|
||||
|
||||
def _print_plan(
|
||||
shell: str,
|
||||
venv_path: Path,
|
||||
python_path: Path,
|
||||
huggingface_cli: Path,
|
||||
manifest_path: Path,
|
||||
steps: list[RequirementStep],
|
||||
model_name: str,
|
||||
hf_home: Path,
|
||||
) -> None:
|
||||
model_dir = _model_local_dir(hf_home, model_name)
|
||||
|
||||
print("CodexLens local reranker bootstrap")
|
||||
print(f"manifest: {manifest_path}")
|
||||
print(f"target_venv: {venv_path}")
|
||||
print(f"target_python: {python_path}")
|
||||
print(f"backend: onnx")
|
||||
print(f"model: {model_name}")
|
||||
print(f"hf_home: {hf_home}")
|
||||
print("mode: dry-run")
|
||||
print("notes:")
|
||||
print("- Uses only the selected venv Python; no global pip commands are emitted.")
|
||||
print("- Targets the local ONNX reranker backend only; no API or LiteLLM providers are involved.")
|
||||
print("")
|
||||
print("pinned_steps:")
|
||||
for step in steps:
|
||||
print(f"- {step.name}: {', '.join(step.packages)}")
|
||||
note = STEP_NOTES.get(step.name)
|
||||
if note:
|
||||
print(f" note: {note}")
|
||||
print("")
|
||||
print("commands:")
|
||||
print(
|
||||
"1. "
|
||||
+ _format_command(
|
||||
[
|
||||
str(python_path),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
"setuptools",
|
||||
"wheel",
|
||||
],
|
||||
shell,
|
||||
)
|
||||
)
|
||||
command_index = 2
|
||||
for step in steps:
|
||||
print(f"{command_index}. " + _format_command(_pip_install_command(python_path, step.packages), shell))
|
||||
command_index += 1
|
||||
print(f"{command_index}. " + _format_set_env("HF_HOME", str(hf_home), shell))
|
||||
command_index += 1
|
||||
print(f"{command_index}. " + _format_command(_download_command(huggingface_cli, model_name, model_dir), shell))
|
||||
command_index += 1
|
||||
print(f"{command_index}. " + _format_command(_probe_command(python_path), shell))
|
||||
print("")
|
||||
print("optional_runtime_env:")
|
||||
print(_format_set_env("RERANKER_BACKEND", "onnx", shell))
|
||||
print(_format_set_env("RERANKER_MODEL", str(model_dir), shell))
|
||||
print(_format_set_env("HF_HOME", str(hf_home), shell))
|
||||
|
||||
|
||||
def _run_command(command: list[str], *, env: dict[str, str] | None = None) -> None:
|
||||
command_env = os.environ.copy()
|
||||
if env:
|
||||
command_env.update(env)
|
||||
command_env.setdefault("PYTHONUTF8", "1")
|
||||
command_env.setdefault("PYTHONIOENCODING", "utf-8")
|
||||
subprocess.run(command, check=True, env=command_env)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Bootstrap pinned local-only ONNX reranker dependencies for a CodexLens virtual environment.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--venv",
|
||||
type=Path,
|
||||
default=PROJECT_ROOT / ".venv",
|
||||
help="Path to the CodexLens virtual environment (default: ./.venv under codex-lens).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default=DEFAULT_MODEL,
|
||||
help=f"Model repo to pre-download for local reranking (default: {DEFAULT_MODEL}).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hf-home",
|
||||
type=Path,
|
||||
default=DEFAULT_HF_HOME,
|
||||
help="Repo-local Hugging Face cache directory used for optional model downloads.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--shell",
|
||||
choices=("powershell", "bash"),
|
||||
default=_default_shell(),
|
||||
help="Shell syntax to use when rendering dry-run commands.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Execute the pinned install steps against the selected virtual environment.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--download-model",
|
||||
action="store_true",
|
||||
help="When used with --apply, pre-download the model into the configured HF_HOME directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--probe",
|
||||
action="store_true",
|
||||
help="When used with --apply, run a small reranker availability probe at the end.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Print the deterministic bootstrap plan. This is also the default when --apply is omitted.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
steps = _parse_manifest(MANIFEST_PATH)
|
||||
venv_path = _normalize_venv_path(args.venv)
|
||||
python_path = _venv_python(venv_path)
|
||||
huggingface_cli = _venv_huggingface_cli(venv_path)
|
||||
hf_home = args.hf_home.expanduser().resolve()
|
||||
|
||||
if not args.apply:
|
||||
_print_plan(
|
||||
shell=args.shell,
|
||||
venv_path=venv_path,
|
||||
python_path=python_path,
|
||||
huggingface_cli=huggingface_cli,
|
||||
manifest_path=MANIFEST_PATH,
|
||||
steps=steps,
|
||||
model_name=args.model,
|
||||
hf_home=hf_home,
|
||||
)
|
||||
return 0
|
||||
|
||||
if not python_path.exists():
|
||||
print(f"Target venv Python not found: {python_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
_run_command(
|
||||
[
|
||||
str(python_path),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
"setuptools",
|
||||
"wheel",
|
||||
]
|
||||
)
|
||||
for step in steps:
|
||||
_run_command(_pip_install_command(python_path, step.packages))
|
||||
|
||||
if args.download_model:
|
||||
if not huggingface_cli.exists():
|
||||
print(f"Expected venv-local Hugging Face CLI not found: {huggingface_cli}", file=sys.stderr)
|
||||
return 1
|
||||
download_env = os.environ.copy()
|
||||
download_env["HF_HOME"] = str(hf_home)
|
||||
hf_home.mkdir(parents=True, exist_ok=True)
|
||||
_run_command(_download_command(huggingface_cli, args.model, _model_local_dir(hf_home, args.model)), env=download_env)
|
||||
|
||||
if args.probe:
|
||||
local_model_dir = _model_local_dir(hf_home, args.model)
|
||||
probe_env = os.environ.copy()
|
||||
probe_env["HF_HOME"] = str(hf_home)
|
||||
probe_env.setdefault("RERANKER_BACKEND", "onnx")
|
||||
probe_env.setdefault("RERANKER_MODEL", str(local_model_dir if local_model_dir.exists() else args.model))
|
||||
_run_command(_probe_command(python_path), env=probe_env)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
13
codex-lens/scripts/requirements-reranker-local.txt
Normal file
13
codex-lens/scripts/requirements-reranker-local.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
# Ordered local ONNX reranker pins for CodexLens.
|
||||
# Validated against the repo-local Python 3.13 virtualenv on Windows.
|
||||
# bootstrap_reranker_local.py installs each section in file order to keep
|
||||
# pip resolver work bounded and repeatable.
|
||||
|
||||
# [runtime]
|
||||
numpy==2.4.0
|
||||
onnxruntime==1.23.2
|
||||
|
||||
# [hf-stack]
|
||||
huggingface-hub==0.36.2
|
||||
transformers==4.53.3
|
||||
optimum[onnxruntime]==2.1.0
|
||||
Reference in New Issue
Block a user