From 21219807a872734b51c86bdc79c91f90784edf3f Mon Sep 17 00:00:00 2001 From: carlospolop Date: Sun, 8 Jun 2025 17:54:06 +0200 Subject: [PATCH] d --- src/AI/AI-llm-architecture/0.-basic-llm-concepts.md | 1 - src/AI/AI-llm-architecture/1.-tokenizing.md | 1 - src/AI/AI-llm-architecture/2.-data-sampling.md | 1 - src/AI/AI-llm-architecture/3.-token-embeddings.md | 1 - src/AI/AI-llm-architecture/4.-attention-mechanisms.md | 1 - src/AI/AI-llm-architecture/5.-llm-architecture.md | 1 - src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md | 1 - .../AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md | 1 - .../AI-llm-architecture/7.1.-fine-tuning-for-classification.md | 1 - .../7.2.-fine-tuning-to-follow-instructions.md | 1 - src/AI/AI-llm-architecture/README.md | 1 - 11 files changed, 11 deletions(-) diff --git a/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md b/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md index 61f7a8ff8..a6ca090d5 100644 --- a/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md +++ b/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md @@ -298,4 +298,3 @@ During the backward pass: - **Accuracy:** Provides exact derivatives up to machine precision. - **Ease of Use:** Eliminates manual computation of derivatives. - diff --git a/src/AI/AI-llm-architecture/1.-tokenizing.md b/src/AI/AI-llm-architecture/1.-tokenizing.md index 6cf3b71af..228222f5d 100644 --- a/src/AI/AI-llm-architecture/1.-tokenizing.md +++ b/src/AI/AI-llm-architecture/1.-tokenizing.md @@ -96,4 +96,3 @@ print(token_ids[:50]) - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/2.-data-sampling.md b/src/AI/AI-llm-architecture/2.-data-sampling.md index 695f072ee..9909261e1 100644 --- a/src/AI/AI-llm-architecture/2.-data-sampling.md +++ b/src/AI/AI-llm-architecture/2.-data-sampling.md @@ -238,4 +238,3 @@ tensor([[ 367, 2885, 1464, 1807], - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/3.-token-embeddings.md b/src/AI/AI-llm-architecture/3.-token-embeddings.md index a0f9514be..7db973e25 100644 --- a/src/AI/AI-llm-architecture/3.-token-embeddings.md +++ b/src/AI/AI-llm-architecture/3.-token-embeddings.md @@ -216,4 +216,3 @@ print(input_embeddings.shape) # torch.Size([8, 4, 256]) - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/4.-attention-mechanisms.md b/src/AI/AI-llm-architecture/4.-attention-mechanisms.md index 2698d24b7..f6febcfb6 100644 --- a/src/AI/AI-llm-architecture/4.-attention-mechanisms.md +++ b/src/AI/AI-llm-architecture/4.-attention-mechanisms.md @@ -429,4 +429,3 @@ For another compact and efficient implementation you could use the [`torch.nn.Mu - diff --git a/src/AI/AI-llm-architecture/5.-llm-architecture.md b/src/AI/AI-llm-architecture/5.-llm-architecture.md index 0aacabd5d..56746a528 100644 --- a/src/AI/AI-llm-architecture/5.-llm-architecture.md +++ b/src/AI/AI-llm-architecture/5.-llm-architecture.md @@ -699,4 +699,3 @@ print("Output length:", len(out[0])) - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md b/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md index 9250fd045..6b521cc36 100644 --- a/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md +++ b/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md @@ -970,4 +970,3 @@ There 2 quick scripts to load the GPT2 weights locally. For both you can clone t - diff --git a/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md b/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md index b30cace1c..fa5817f83 100644 --- a/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md +++ b/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md @@ -62,4 +62,3 @@ def replace_linear_with_lora(model, rank, alpha): - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md b/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md index 2cc13c089..8a283ac73 100644 --- a/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md +++ b/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md @@ -115,4 +115,3 @@ You can find all the code to fine-tune GPT2 to be a spam classifier in [https:// - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md b/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md index 05e138b75..13342ef1a 100644 --- a/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md +++ b/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md @@ -105,4 +105,3 @@ You can find an example of the code to perform this fine tuning in [https://gith - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/README.md b/src/AI/AI-llm-architecture/README.md index 515c506e2..7a381e315 100644 --- a/src/AI/AI-llm-architecture/README.md +++ b/src/AI/AI-llm-architecture/README.md @@ -98,4 +98,3 @@ You should start by reading this post for some basic concepts you should know ab {{#endref}} -