jglowa commited on
Commit
6d4f0f5
verified
1 Parent(s): 5f94c85

Update indeksator.cmd

Browse files
Files changed (1) hide show
  1. indeksator.cmd +11 -2
indeksator.cmd CHANGED
@@ -1,4 +1,4 @@
1
- :; # Indeksator Prosty RAG v0.1 - Jerzy G艂owacki na licencji Apache 2.0
2
  :; # *NIX:
3
  :; OS=$(uname -s | sed -e 's/^Linux$/linux/' -e 's/^Darwin$/mac/')
4
  :; embedfile="bge-m3.embedfile"
@@ -8,9 +8,11 @@
8
  :; chunkWords=200
9
  :; overlapWords=10
10
  :; > $chunksFile
 
11
  :; [ ! -d $inputDir ] && echo Pobieranie przyk艂adowego pliku $inputDir/wikipedia.txt... && curl --create-dirs -Lo $inputDir/wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe!
12
  :; [ ! -f $embedfile ] && echo Pobieranie $embedfile... && curl -Lo $embedfile https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && chmod +x $embedfile && echo Gotowe!
13
  :; [ ! -f pdftotext ] && echo Pobieranie pdftotext... && curl -LO https://dl.xpdfreader.com/xpdf-tools-$OS-4.05.tar.gz && tar --strip-components 2 -xzf xpdf-tools-$OS-4.05.tar.gz xpdf-tools-$OS-4.05/bin64/pdftotext && del xpdf-tools-$OS-4.05.tar.gz && echo Gotowe!
 
14
  :; echo "Indeksowanie plik贸w PDF/TXT/MD w folderze $inputDir..."
15
  :; shopt -s nullglob
16
  :; for pdf in $inputDir/*.pdf; do echo "Konwertowanie $(basename "$pdf")..." && pdftotext -nopgbrk -enc UTF-8 "$pdf"; done
@@ -41,9 +43,11 @@ set chunkWords=200
41
  set overlapWords=10
42
  for /l %%i in (1,1,%overlapWords%) do set buf[%%i]=
43
  break>%chunksFile%
 
44
  if not exist %inputDir% echo Pobieranie przyk艂adowego pliku %inputDir%\wikipedia.txt... && curl --create-dirs -Lo %inputDir%\wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe^!
45
  if not exist %embedfile% echo Pobieranie %embedfile%... && curl -Lo %embedfile% https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && echo Gotowe^!
46
  if not exist pdftotext.exe echo Pobieranie pdftotext.exe... && curl -LO https://dl.xpdfreader.com/xpdf-tools-win-4.05.zip && tar --strip-components 2 -xf xpdf-tools-win-4.05.zip xpdf-tools-win-4.05/bin64/pdftotext.exe && del xpdf-tools-win-4.05.zip && echo Gotowe^!
 
47
  echo Indeksowanie plik贸w PDF/TXT/MD w folderze %inputDir%...
48
  for %%F in ("%inputDir%\*.pdf") do if not exist "%%~dpnF.txt" echo Konwertowanie %%~nxF... && pdftotext -nopgbrk -enc UTF-8 "%%~F"
49
  for %%F in ("%inputDir%\*.txt" "%inputDir%\*.md") do (
@@ -52,7 +56,11 @@ for %%F in ("%inputDir%\*.txt" "%inputDir%\*.md") do (
52
  set wordCount=0
53
  set /p =%%~nxF: <nul
54
  for /f "usebackq delims=" %%L in ("%%F") do (
55
- for %%W in (%%L) do (
 
 
 
 
56
  set /p =%%W <nul
57
  for /l %%i in (2,1,%overlapWords%) do (set /a j=%%i-1 && set buf[!j!]=!buf[%%i]!)
58
  set "buf[%overlapWords%]=%%W"
@@ -73,3 +81,4 @@ if exist %dbFile% del %dbFile%
73
  %embedfile% import %chunksFile% %dbFile% && echo Gotowe^! Po ka偶dej zmianie w folderze %inputDir% nale偶y uruchomi膰 ponownie indeksator.
74
  del %chunksFile%
75
  endlocal
 
 
1
+ :; # Indeksator Prosty RAG v0.2 - Jerzy G艂owacki na licencji Apache 2.0
2
  :; # *NIX:
3
  :; OS=$(uname -s | sed -e 's/^Linux$/linux/' -e 's/^Darwin$/mac/')
4
  :; embedfile="bge-m3.embedfile"
 
8
  :; chunkWords=200
9
  :; overlapWords=10
10
  :; > $chunksFile
11
+ :; # Instalacja
12
  :; [ ! -d $inputDir ] && echo Pobieranie przyk艂adowego pliku $inputDir/wikipedia.txt... && curl --create-dirs -Lo $inputDir/wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe!
13
  :; [ ! -f $embedfile ] && echo Pobieranie $embedfile... && curl -Lo $embedfile https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && chmod +x $embedfile && echo Gotowe!
14
  :; [ ! -f pdftotext ] && echo Pobieranie pdftotext... && curl -LO https://dl.xpdfreader.com/xpdf-tools-$OS-4.05.tar.gz && tar --strip-components 2 -xzf xpdf-tools-$OS-4.05.tar.gz xpdf-tools-$OS-4.05/bin64/pdftotext && del xpdf-tools-$OS-4.05.tar.gz && echo Gotowe!
15
+ :; # Uruchamianie
16
  :; echo "Indeksowanie plik贸w PDF/TXT/MD w folderze $inputDir..."
17
  :; shopt -s nullglob
18
  :; for pdf in $inputDir/*.pdf; do echo "Konwertowanie $(basename "$pdf")..." && pdftotext -nopgbrk -enc UTF-8 "$pdf"; done
 
43
  set overlapWords=10
44
  for /l %%i in (1,1,%overlapWords%) do set buf[%%i]=
45
  break>%chunksFile%
46
+ :; # Instalacja
47
  if not exist %inputDir% echo Pobieranie przyk艂adowego pliku %inputDir%\wikipedia.txt... && curl --create-dirs -Lo %inputDir%\wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe^!
48
  if not exist %embedfile% echo Pobieranie %embedfile%... && curl -Lo %embedfile% https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && echo Gotowe^!
49
  if not exist pdftotext.exe echo Pobieranie pdftotext.exe... && curl -LO https://dl.xpdfreader.com/xpdf-tools-win-4.05.zip && tar --strip-components 2 -xf xpdf-tools-win-4.05.zip xpdf-tools-win-4.05/bin64/pdftotext.exe && del xpdf-tools-win-4.05.zip && echo Gotowe^!
50
+ :; # Uruchamianie
51
  echo Indeksowanie plik贸w PDF/TXT/MD w folderze %inputDir%...
52
  for %%F in ("%inputDir%\*.pdf") do if not exist "%%~dpnF.txt" echo Konwertowanie %%~nxF... && pdftotext -nopgbrk -enc UTF-8 "%%~F"
53
  for %%F in ("%inputDir%\*.txt" "%inputDir%\*.md") do (
 
56
  set wordCount=0
57
  set /p =%%~nxF: <nul
58
  for /f "usebackq delims=" %%L in ("%%F") do (
59
+ set line=%%L
60
+ set line=!line: =^
61
+
62
+ !
63
+ for /f "delims=" %%W in ("!line!") do (
64
  set /p =%%W <nul
65
  for /l %%i in (2,1,%overlapWords%) do (set /a j=%%i-1 && set buf[!j!]=!buf[%%i]!)
66
  set "buf[%overlapWords%]=%%W"
 
81
  %embedfile% import %chunksFile% %dbFile% && echo Gotowe^! Po ka偶dej zmianie w folderze %inputDir% nale偶y uruchomi膰 ponownie indeksator.
82
  del %chunksFile%
83
  endlocal
84
+ pause