pagenotes.app .csv to separated .txt files
#pagenotes.app `.csv` to separated `.txt` files

  #!/usr/bin/env bash
set -euo pipefail

# split_pagenotes_csv.sh
#
# Input: CSV with 4 quoted fields per record:
#   "<url>","<timestamp>","<note text possibly with newlines>","<flag>"
#
# Output: one .txt per record in /home/rangelma/bash-csv/
#   - filename derived from field 1 (URL) to match your examples:
#       * strip http:// or https://
#       * split on '/' and join with '>'
#       * drop empty trailing segments (so trailing '/' doesn't create a trailing '>')
#       * sanitize unsafe characters to '_'
#       * append ".txt"
#   - file content is exactly field 3 (preserving internal newlines)

usage() {
  cat <<'USAGE'
Usage:
  split_pagenotes_csv.sh /path/to/notes.csv

Writes files to:
  /home/rangelma/bash-csv/

USAGE
}

if [[ ${1:-} == "-h" || ${1:-} == "--help" ]]; then
  usage
  exit 0
fi

if [[ $# -ne 1 ]]; then
  echo "Error: expected exactly 1 argument (the .csv file)." >&2
  usage >&2
  exit 2
fi

infile=$1
outdir="/home/rangelma/bash-csv"

if [[ ! -f "$infile" ]]; then
  echo "Error: input file not found: $infile" >&2
  exit 2
fi

mkdir -p "$outdir"

python3 - "$infile" "$outdir" <<'PY'
import csv
import os
import re
import sys
from urllib.parse import urlsplit

infile, outdir = sys.argv[1], sys.argv[2]
os.makedirs(outdir, exist_ok=True)

def url_to_filename(url: str) -> str:
    url = url.strip()

    # Remove scheme to match the examples
    try:
        parts = urlsplit(url)
        if parts.scheme:
            rest = parts.netloc + parts.path
        else:
            rest = re.sub(r'^[A-Za-z][A-Za-z0-9+.-]*://', '', url)
    except Exception:
        rest = re.sub(r'^[A-Za-z][A-Za-z0-9+.-]*://', '', url)

    # Convert path separators to '>'
    rest = rest.replace("\\", "/")
    rest = rest.replace("/", ">")

    # Drop empty trailing segments (e.g., trailing '/')
    rest = rest.strip(">")

    # Sanitize: keep letters/digits/._->- ; replace everything else with '_'
    rest = re.sub(r"[^A-Za-z0-9.\-_>]", "_", rest)

    if not rest:
        rest = "untitled"

    return rest + ".txt"

with open(infile, newline="", encoding="utf-8") as f:
    reader = csv.reader(
        f,
        delimiter=",",
        quotechar='"',
        doublequote=True,
        strict=False,
    )

    for row_idx, row in enumerate(reader, start=1):
        # Expecting 4 fields; tolerate extra fields by using at least the first 3
        if not row or len(row) < 3:
            continue

        url = row[0]
        note_text = row[2]

        filename = url_to_filename(url)
        path = os.path.join(outdir, filename)

        # Avoid overwriting if multiple records map to the same filename
        base, ext = os.path.splitext(path)
        final_path = path
        n = 1
        while os.path.exists(final_path):
            final_path = f"{base}__{n}{ext}"
            n += 1

        with open(final_path, "w", encoding="utf-8") as out:
            out.write(note_text)
            if note_text and not note_text.endswith("\n"):
                out.write("\n")
PY

echo "Done. Wrote files to: $outdir"
URL: https://ib.bsb.br/csv2files
#pagenotes.app .csv to separated .txt files

#pagenotes.app `.csv` to separated `.txt` files