XiaoYun Zhang
commited on
Commit
·
750c19e
1
Parent(s):
dccd8ef
use tokenizer from mlnet
Browse files- Program.cs +12 -19
- clip.csproj +9 -2
- merges.txt +0 -0
- vocab.json +0 -0
Program.cs
CHANGED
|
@@ -1,35 +1,28 @@
|
|
| 1 |
-
using
|
|
|
|
|
|
|
| 2 |
using System.Collections.Generic;
|
| 3 |
using System.IO;
|
| 4 |
using System.Linq;
|
| 5 |
using TorchSharp;
|
| 6 |
|
| 7 |
-
|
| 8 |
-
var
|
| 9 |
-
var
|
| 10 |
var start_token = 49406;
|
| 11 |
var end_token = 49407;
|
| 12 |
-
var dictionary = new Dictionary<string, long>(){
|
| 13 |
-
{"cat", 2368},
|
| 14 |
-
{"a", 320},
|
| 15 |
-
{"cute", 2242},
|
| 16 |
-
{"blue", 1746},
|
| 17 |
-
{"wild", 3220},
|
| 18 |
-
{"green", 1901},
|
| 19 |
-
};
|
| 20 |
-
|
| 21 |
-
var batch = 1;
|
| 22 |
-
|
| 23 |
var prompt = "a wild cute green cat";
|
| 24 |
-
var
|
| 25 |
-
tokens =
|
| 26 |
-
tokens = tokens.Append(end_token).ToList();
|
| 27 |
-
tokens = tokens.Concat(Enumerable.Repeat<long>(0, 77 - tokens.Count)).ToList();
|
| 28 |
var uncontional_tokens = new[]{start_token, end_token}.Concat(Enumerable.Repeat(0, 75)).ToList();
|
| 29 |
var tokenTensor = torch.tensor(tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
|
| 30 |
tokenTensor = tokenTensor.repeat(batch, 1);
|
| 31 |
var unconditional_tokenTensor = torch.tensor(uncontional_tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
|
| 32 |
unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
|
| 34 |
var t = torch.full(new[]{batch, 1L}, value: batch, dtype: torch.ScalarType.Int32, device: device);
|
| 35 |
var condition = clipEncoder.Forward(tokenTensor);
|
|
|
|
| 1 |
+
using Microsoft.ML;
|
| 2 |
+
using Microsoft.ML.Tokenizers;
|
| 3 |
+
using System;
|
| 4 |
using System.Collections.Generic;
|
| 5 |
using System.IO;
|
| 6 |
using System.Linq;
|
| 7 |
using TorchSharp;
|
| 8 |
|
| 9 |
+
var batch = 1;
|
| 10 |
+
var bpe = new Bpe("vocab.json", "merges.txt", endOfWordSuffix: "</w>");
|
| 11 |
+
var tokenier = new Tokenizer(bpe);
|
| 12 |
var start_token = 49406;
|
| 13 |
var end_token = 49407;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
var prompt = "a wild cute green cat";
|
| 15 |
+
var res = tokenier.Encode(prompt);
|
| 16 |
+
var tokens = new[] { start_token }.Concat(res.Ids.Concat(Enumerable.Repeat(0, 75 - res.Ids.Count))).Concat(new[] { end_token }).ToList();
|
|
|
|
|
|
|
| 17 |
var uncontional_tokens = new[]{start_token, end_token}.Concat(Enumerable.Repeat(0, 75)).ToList();
|
| 18 |
var tokenTensor = torch.tensor(tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
|
| 19 |
tokenTensor = tokenTensor.repeat(batch, 1);
|
| 20 |
var unconditional_tokenTensor = torch.tensor(uncontional_tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
|
| 21 |
unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
|
| 22 |
+
|
| 23 |
+
torchvision.io.DefaultImager = new torchvision.io.SkiaImager();
|
| 24 |
+
var device = TorchSharp.torch.device("cuda:0");
|
| 25 |
+
var clipEncoder = new ClipEncoder("clip_encoder.ckpt", device);
|
| 26 |
var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
|
| 27 |
var t = torch.full(new[]{batch, 1L}, value: batch, dtype: torch.ScalarType.Int32, device: device);
|
| 28 |
var condition = clipEncoder.Forward(tokenTensor);
|
clip.csproj
CHANGED
|
@@ -9,12 +9,19 @@
|
|
| 9 |
</PropertyGroup>
|
| 10 |
|
| 11 |
<ItemGroup>
|
|
|
|
|
|
|
| 12 |
<PackageReference Include="TorchVision" Version="$(TorchVersion)" />
|
| 13 |
<PackageReference Include="TorchSharp-cuda-linux" Version="$(TorchVersion)" />
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
| 16 |
</None>
|
| 17 |
-
<PackageReference Include="Microsoft.ML" Version="2.0.1" />
|
| 18 |
</ItemGroup>
|
| 19 |
|
| 20 |
</Project>
|
|
|
|
| 9 |
</PropertyGroup>
|
| 10 |
|
| 11 |
<ItemGroup>
|
| 12 |
+
<PackageReference Include="Microsoft.ML" Version="2.0.1" />
|
| 13 |
+
<PackageReference Include="Microsoft.ML.Tokenizers" Version="0.20.1" />
|
| 14 |
<PackageReference Include="TorchVision" Version="$(TorchVersion)" />
|
| 15 |
<PackageReference Include="TorchSharp-cuda-linux" Version="$(TorchVersion)" />
|
| 16 |
+
<None Update="*.ckpt">
|
| 17 |
+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
| 18 |
+
</None>
|
| 19 |
+
<None Update="merges.txt">
|
| 20 |
+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
| 21 |
+
</None>
|
| 22 |
+
<None Update="vocab.json">
|
| 23 |
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
| 24 |
</None>
|
|
|
|
| 25 |
</ItemGroup>
|
| 26 |
|
| 27 |
</Project>
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|