From 18e9ee8566674d6170c714cc1f638c292884ccaf Mon Sep 17 00:00:00 2001 From: carlospolop Date: Sun, 8 Jun 2025 20:00:03 +0200 Subject: [PATCH] f --- src/AI/AI-llm-architecture/0.-basic-llm-concepts.md | 1 - src/AI/AI-llm-architecture/1.-tokenizing.md | 1 - src/AI/AI-llm-architecture/2.-data-sampling.md | 1 - src/AI/AI-llm-architecture/3.-token-embeddings.md | 1 - src/AI/AI-llm-architecture/4.-attention-mechanisms.md | 1 - src/AI/AI-llm-architecture/5.-llm-architecture.md | 2 +- .../AI-llm-architecture/6.-pre-training-and-loading-models.md | 1 - .../7.0.-lora-improvements-in-fine-tuning.md | 2 +- .../AI-llm-architecture/7.1.-fine-tuning-for-classification.md | 2 +- .../7.2.-fine-tuning-to-follow-instructions.md | 2 +- src/AI/AI-llm-architecture/README.md | 1 - 11 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md b/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md index a6ca090d5..e7c37d045 100644 --- a/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md +++ b/src/AI/AI-llm-architecture/0.-basic-llm-concepts.md @@ -297,4 +297,3 @@ During the backward pass: - **Efficiency:** Avoids redundant calculations by reusing intermediate results. - **Accuracy:** Provides exact derivatives up to machine precision. - **Ease of Use:** Eliminates manual computation of derivatives. - diff --git a/src/AI/AI-llm-architecture/1.-tokenizing.md b/src/AI/AI-llm-architecture/1.-tokenizing.md index 6cf3b71af..228222f5d 100644 --- a/src/AI/AI-llm-architecture/1.-tokenizing.md +++ b/src/AI/AI-llm-architecture/1.-tokenizing.md @@ -96,4 +96,3 @@ print(token_ids[:50]) - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/2.-data-sampling.md b/src/AI/AI-llm-architecture/2.-data-sampling.md index 695f072ee..9909261e1 100644 --- a/src/AI/AI-llm-architecture/2.-data-sampling.md +++ b/src/AI/AI-llm-architecture/2.-data-sampling.md @@ -238,4 +238,3 @@ tensor([[ 367, 2885, 1464, 1807], - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/3.-token-embeddings.md b/src/AI/AI-llm-architecture/3.-token-embeddings.md index a0f9514be..7db973e25 100644 --- a/src/AI/AI-llm-architecture/3.-token-embeddings.md +++ b/src/AI/AI-llm-architecture/3.-token-embeddings.md @@ -216,4 +216,3 @@ print(input_embeddings.shape) # torch.Size([8, 4, 256]) - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/4.-attention-mechanisms.md b/src/AI/AI-llm-architecture/4.-attention-mechanisms.md index e485d4f04..5b0396952 100644 --- a/src/AI/AI-llm-architecture/4.-attention-mechanisms.md +++ b/src/AI/AI-llm-architecture/4.-attention-mechanisms.md @@ -427,4 +427,3 @@ For another compact and efficient implementation you could use the [`torch.nn.Mu - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/5.-llm-architecture.md b/src/AI/AI-llm-architecture/5.-llm-architecture.md index 462bfd971..06f4489cf 100644 --- a/src/AI/AI-llm-architecture/5.-llm-architecture.md +++ b/src/AI/AI-llm-architecture/5.-llm-architecture.md @@ -697,4 +697,4 @@ print("Output length:", len(out[0])) ## References -- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) +- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) \ No newline at end of file diff --git a/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md b/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md index eb92bc3fc..c605b6f16 100644 --- a/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md +++ b/src/AI/AI-llm-architecture/6.-pre-training-and-loading-models.md @@ -968,4 +968,3 @@ There 2 quick scripts to load the GPT2 weights locally. For both you can clone t - [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) - diff --git a/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md b/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md index 217248caf..5ebd89765 100644 --- a/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md +++ b/src/AI/AI-llm-architecture/7.0.-lora-improvements-in-fine-tuning.md @@ -60,4 +60,4 @@ def replace_linear_with_lora(model, rank, alpha): ## References -- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) +- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) \ No newline at end of file diff --git a/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md b/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md index 0922decac..da3581f25 100644 --- a/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md +++ b/src/AI/AI-llm-architecture/7.1.-fine-tuning-for-classification.md @@ -113,4 +113,4 @@ You can find all the code to fine-tune GPT2 to be a spam classifier in [https:// ## References -- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) +- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) \ No newline at end of file diff --git a/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md b/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md index ee4f82407..2f546c8e7 100644 --- a/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md +++ b/src/AI/AI-llm-architecture/7.2.-fine-tuning-to-follow-instructions.md @@ -103,4 +103,4 @@ You can find an example of the code to perform this fine tuning in [https://gith ## References -- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) +- [https://www.manning.com/books/build-a-large-language-model-from-scratch](https://www.manning.com/books/build-a-large-language-model-from-scratch) \ No newline at end of file diff --git a/src/AI/AI-llm-architecture/README.md b/src/AI/AI-llm-architecture/README.md index 35cbc6ae9..cf1b3d825 100644 --- a/src/AI/AI-llm-architecture/README.md +++ b/src/AI/AI-llm-architecture/README.md @@ -96,4 +96,3 @@ You should start by reading this post for some basic concepts you should know ab {{#ref}} 7.2.-fine-tuning-to-follow-instructions.md {{#endref}} -