From 2a2931ba8d67ec58100d0c0c53275820a972453c Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Thu, 4 Jul 2024 01:22:29 +0200 Subject: [PATCH] Fix extra EOS tokens (#91) * Fix extra EOS tokens * Fix pre-commit error --- .pre-commit-config.yaml | 2 +- Libraries/LLM/Evaluate.swift | 13 ++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c12932f..28c3445 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/slessans/pre-commit-swift-format - rev: "" + rev: "fd627de92bdf84a75c924ed95691336d14e94cf1" hooks: - id: swift-format args: ["--configuration", ".swift-format"] diff --git a/Libraries/LLM/Evaluate.swift b/Libraries/LLM/Evaluate.swift index f25d5b0..9a1ae63 100644 --- a/Libraries/LLM/Evaluate.swift +++ b/Libraries/LLM/Evaluate.swift @@ -183,19 +183,10 @@ public func generate( var start = Date.timeIntervalSinceReferenceDate var promptTime: TimeInterval = 0 - // build a set of additional stop tokens let additionalEOSTokenIds = Set( (extraEOSTokens ?? []) - .map { - tokenizer.encode(text: $0) - } - .filter { - // discard anything that is not a single token. sometimes - // the tokenizer will insert a token, so accept that too - $0.count == 1 || ($0.count == 2 && $0[0] == 1) - } - .map { - $0.last! + .compactMap { + tokenizer.convertTokenToId($0) }) var tokens = [Int]()