Skip to content

Commit 18fed70

Browse files
authored
Merge pull request #354 from fasonju/main
Text files support using libreoffice
2 parents dd9d117 + efc4b3f commit 18fed70

File tree

4 files changed

+193
-1
lines changed

4 files changed

+193
-1
lines changed

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ RUN apt-get update && apt-get install -y \
5656
inkscape \
5757
libheif-examples \
5858
libjxl-tools \
59+
libreoffice \
5960
libva2 \
6061
libvips-tools \
6162
lmodern \
@@ -81,4 +82,4 @@ EXPOSE 3000/tcp
8182
# used for calibre
8283
ENV QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox"
8384
ENV NODE_ENV=production
84-
ENTRYPOINT [ "bun", "run", "./src/index.tsx" ]
85+
ENTRYPOINT [ "bun", "run", "./src/index.tsx" ]

src/converters/libreoffice.ts

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import { execFile } from "node:child_process";
2+
3+
export const properties = {
4+
from: {
5+
text: [
6+
"602",
7+
"abw",
8+
"csv",
9+
"cwk",
10+
"doc",
11+
"docm",
12+
"docx",
13+
"dot",
14+
"dotx",
15+
"dotm",
16+
"epub",
17+
"fb2",
18+
"fodt",
19+
"htm",
20+
"html",
21+
"hwp",
22+
"mcw",
23+
"mw",
24+
"mwd",
25+
"lwp",
26+
"lrf",
27+
"odt",
28+
"ott",
29+
"pages",
30+
"pdf",
31+
"psw",
32+
"rtf",
33+
"sdw",
34+
"stw",
35+
"sxw",
36+
"tab",
37+
"tsv",
38+
"txt",
39+
"wn",
40+
"wpd",
41+
"wps",
42+
"wpt",
43+
"wri",
44+
"xhtml",
45+
"xml",
46+
"zabw",
47+
],
48+
},
49+
to: {
50+
text: [
51+
"csv",
52+
"doc",
53+
"docm",
54+
"docx",
55+
"dot",
56+
"dotx",
57+
"dotm",
58+
"epub",
59+
"fodt",
60+
"htm",
61+
"html",
62+
"odt",
63+
"ott",
64+
"pdf",
65+
"rtf",
66+
"tab",
67+
"tsv",
68+
"txt",
69+
"wps",
70+
"wpt",
71+
"xhtml",
72+
"xml",
73+
],
74+
},
75+
};
76+
77+
type FileCategories = "text" | "calc";
78+
79+
const filters: Record<FileCategories, Record<string, string>> = {
80+
text: {
81+
"602": "T602Document",
82+
abw: "AbiWord",
83+
csv: "Text",
84+
doc: "MS Word 97",
85+
docm: "MS Word 2007 XML VBA",
86+
docx: "MS Word 2007 XML",
87+
dot: "MS Word 97 Vorlage",
88+
dotx: "MS Word 2007 XML Template",
89+
dotm: "MS Word 2007 XML Template",
90+
epub: "EPUB",
91+
fb2: "Fictionbook 2",
92+
fodt: "OpenDocument Text Flat XML",
93+
htm: "HTML (StarWriter)",
94+
html: "HTML (StarWriter)",
95+
hwp: "writer_MIZI_Hwp_97",
96+
mcw: "MacWrite",
97+
mw: "MacWrite",
98+
mwd: "Mariner_Write",
99+
lwp: "LotusWordPro",
100+
lrf: "BroadBand eBook",
101+
odt: "writer8",
102+
ott: "writer8_template",
103+
pages: "Apple Pages",
104+
// pdf: "writer_pdf_import",
105+
psw: "PocketWord File",
106+
rtf: "Rich Text Format",
107+
sdw: "StarOffice_Writer",
108+
stw: "writer_StarOffice_XML_Writer_Template",
109+
sxw: "StarOffice XML (Writer)",
110+
tab: "Text",
111+
tsv: "Text",
112+
txt: "Text",
113+
wn: "WriteNow",
114+
wpd: "WordPerfect",
115+
wps: "MS Word 97",
116+
wpt: "MS Word 97 Vorlage",
117+
wri: "MS_Write",
118+
xhtml: "HTML (StarWriter)",
119+
xml: "OpenDocument Text Flat XML",
120+
zabw: "AbiWord",
121+
},
122+
calc: {},
123+
};
124+
125+
const getFilters = (fileType: string, converto: string) => {
126+
if (fileType in filters.text && converto in filters.text) {
127+
return [filters.text[fileType], filters.text[converto]];
128+
} else if (fileType in filters.calc && converto in filters.calc) {
129+
return [filters.calc[fileType], filters.calc[converto]];
130+
}
131+
return [null, null];
132+
};
133+
134+
export function convert(
135+
filePath: string,
136+
fileType: string,
137+
convertTo: string,
138+
targetPath: string,
139+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
140+
options?: unknown,
141+
): Promise<string> {
142+
const outputPath = targetPath.split("/").slice(0, -1).join("/").replace("./", "") ?? targetPath;
143+
144+
// Build arguments array
145+
const args: string[] = [];
146+
args.push("--headless");
147+
const [inFilter, outFilter] = getFilters(fileType, convertTo);
148+
149+
if (inFilter) {
150+
args.push(`--infilter="${inFilter}"`);
151+
}
152+
153+
if (outFilter) {
154+
args.push("--convert-to", `${convertTo}:${outFilter}`, "--outdir", outputPath, filePath);
155+
} else {
156+
args.push("--convert-to", convertTo, "--outdir", outputPath, filePath);
157+
}
158+
159+
return new Promise((resolve, reject) => {
160+
execFile("soffice", args, (error, stdout, stderr) => {
161+
if (error) {
162+
reject(`error: ${error}`);
163+
}
164+
165+
if (stdout) {
166+
console.log(`stdout: ${stdout}`);
167+
}
168+
169+
if (stderr) {
170+
console.error(`stderr: ${stderr}`);
171+
}
172+
173+
resolve("Done");
174+
});
175+
});
176+
}

src/converters/main.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { convert as convertImagemagick, properties as propertiesImagemagick } fr
1111
import { convert as convertInkscape, properties as propertiesInkscape } from "./inkscape";
1212
import { convert as convertLibheif, properties as propertiesLibheif } from "./libheif";
1313
import { convert as convertLibjxl, properties as propertiesLibjxl } from "./libjxl";
14+
import { convert as convertLibreOffice, properties as propertiesLibreOffice } from "./libreoffice";
1415
import { convert as convertPandoc, properties as propertiesPandoc } from "./pandoc";
1516
import { convert as convertPotrace, properties as propertiesPotrace } from "./potrace";
1617
import { convert as convertresvg, properties as propertiesresvg } from "./resvg";
@@ -76,6 +77,10 @@ const properties: Record<
7677
properties: propertiesCalibre,
7778
converter: convertCalibre,
7879
},
80+
libreoffice: {
81+
properties: propertiesLibreOffice,
82+
converter: convertLibreOffice,
83+
},
7984
pandoc: {
8085
properties: propertiesPandoc,
8186
converter: convertPandoc,

src/helpers/printVersions.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,16 @@ if (process.env.NODE_ENV === "production") {
144144
}
145145
});
146146

147+
exec("soffice --version", (error, stdout) => {
148+
if (error) {
149+
console.error("libreoffice is not installed");
150+
}
151+
152+
if (stdout) {
153+
console.log(stdout.split("\n")[0]);
154+
}
155+
});
156+
147157
exec("bun -v", (error, stdout) => {
148158
if (error) {
149159
console.error("Bun is not installed. wait what");

0 commit comments

Comments
 (0)