create_from_scratch_with_csv.py

from anki.collection import *
from anki.import_export_pb2 import *
import os
from libfaketime import fake_time, reexec_if_needed
import csv
from kanjiconv import KanjiConv

reexec_if_needed() # setup for faking the time later, which may require a restart

# Step 1: create a temporary collection
try:
    # delete if exists already
    os.remove("temporary.anki2")
    os.remove("temporary.media.db2")
except OSError:
  pass
# then start fresh
col = Collection("temporary.anki2")

# Step 2: add a new note type
with open("dist/kanji/front.html", "r") as front_file:
  with open("dist/kanji/back.html", "r") as back_file:
    kanji_front_html = front_file.read()
    kanji_back_html = back_file.read()
with open("dist/hiragana/front.html", "r") as front_file:
  with open("dist/hiragana/back.html", "r") as back_file:
    hiragana_front_html = front_file.read()
    hiragana_back_html = back_file.read()
hiragana_deck_id = col.decks.add_normal_deck_with_name("Lots of Hiragana").id
note_type = {
    'id': 0, # the 0 has no effect, the actual ID will be the timestamp at the time of adding the note type
    'name': "My cool note type",
    'type': 0,
    'mod': 0,
    'usn': 0,
    'sortf': 0,
    'did': None,
    'tmpls': [
        {
            'name': 'English to Japanese',
            'ord': 0,
            'qfmt': kanji_front_html,
            'afmt': kanji_back_html,
            'bqfmt': '',
            'bafmt': '',
            'did': None,
            'bfont': '',
            'bsize': 0,
        },
        {
            'name': 'Hiragana to English',
            'ord': 1,
            'qfmt': hiragana_front_html,
            'afmt': hiragana_back_html,
            'bqfmt': '',
            'bafmt': '',
            # Hiragana should ignore the deck below and always be stored in
            # a separate deck
            'did': hiragana_deck_id,
            'bfont': '',
            'bsize': 0,
        }
    ],
    'flds': [
        # Example
        {
            'name': "Front",
            'ord': 0, # Field No.1, this one must always be non-empty
            # not sure what this stuff does, just use CSS for styling
            'sticky': False,
            'rtl': False,
            'font': 'Arial',
            'size': 20,
            'description': '',
            'plainText': False,
            'collapsed': False,
            'excludeFromSearch': False,
            'id': 0,
            'tag': None,
            'preventDeletion': False
        },
        {
            'name': "Back",
            'ord': 1, # Field No.2, can be empty
            'sticky': False,
            'rtl': False,
            'font': 'Arial',
            'size': 20,
            'description': '',
            'plainText': False,
            'collapsed': False,
            'excludeFromSearch': False,
            'id': 1,
            'tag': None,
            'preventDeletion': False
        },
        {
            'name': "Hiragana",
            'ord': 2,
            'sticky': False,
            'rtl': False,
            'font': 'Arial',
            'size': 20,
            'description': '',
            'plainText': False,
            'collapsed': False,
            'excludeFromSearch': False,
            'id': 2,
            'tag': None,
            'preventDeletion': False
        },
        {
            'name': "Fake images",
            'ord': 3,
            'sticky': False,
            'rtl': False,
            'font': 'Arial',
            'size': 20,
            'description': '',
            'plainText': False,
            'collapsed': False,
            'excludeFromSearch': False,
            'id': 3,
            'tag': None,
            'preventDeletion': False
        }
    ],
    'css': '', # instead of putting it into the HTML you could also set CSS here
    # note sure what the rest does
    'latexPre':'\\documentclass[12pt]{article}\n\\special{papersize=3in,5in}\n\\usepackage[utf8]{inputenc}\n\\usepackage{amssymb,amsmath}\n\\pagestyle{empty}\n\\setlength{\\parindent}{0in}\n\\begin{document}\n',
    'latexPost':'\\end{document}',
    'latexSvg': False,
    'req': [
        [
            0,
            'any',
            [ 0 ]
        ]
    ],
    'originalStockKind': 1
}

# Step 3: add the note type with a predictable ID
with fake_time("2025-04-07 18:45:00"):
    # add the note in the past (or the future)
    note_type_id = col.models.add_dict(note_type).id

# Step 4: add Kanji data from CSV, and calculate Hiragana
kanji_conv = KanjiConv(separator="/")
deck_id = col.decks.add_normal_deck_with_name("Lots of Kanji").id
with open("jouyou-kanji.csv", "r") as csv_file:
   # make sure to use space as the delimiter (interesting choice but very readable in the raw form)
   # example line: '譜 "musical score" fu'
   csv_lines = csv.reader(csv_file, delimiter=" ")
   for line in csv_lines:
      note = Note(col, note_type_id)
      # make up a unique ID by assuming the Kanjis are unique
      note.guid = f'kanji-{line[0]}'
      # first column is Kanji
      note["Back"] = line[0]
      # second is english translation
      note["Front"] = line[1]
      # calculate Hiragana automatically from Kanji
      note["Hiragana"] = kanji_conv.to_hiragana(line[0])
      # if HanziWriter supports the character, ask Anki to include the JSON-P file for HanziWriter for this Kanji in the APKG export
      hanzi_writer_data = f'./media/{line[0]}.js'
      if os.path.isfile(hanzi_writer_data):
        note["Fake images"] = f'<img src="{line[0]}.js">'
        col.media.add_file(f'./media/{line[0]}.js')
      # save the note
      col.add_note(note, deck_id)

# Step 5: Done, now we just need to export an APKG
col.export_anki_package(
    # anki fails if this is just an apkg filename without a dir,
    # with ./ it's fine
    out_path="./lots_of_kanji.apkg",
    options=ExportAnkiPackageOptions(
        with_deck_configs=False,
        with_media=True,
        with_scheduling=False,
        legacy=True,
    ),
    limit=None)