JAVA- Character Issue when exporting to a txt file

  Kiến thức lập trình

I wrote a piece of code to read a PDF file, extract the content and export to a TXT file.
However, I encountered character encoding issues as shown in the attached image. I tried specifying the encoding as UTF-8, but I still face the same issue. Could you please help me with this? Thanks

try {
    PDDocument document = PDDocument.load(new File(filePath));

    document.getClass();
    if (!document.isEncrypted()) {
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);

        PDFTextStripper tStripper = new PDFTextStripper();

        String pdfFileInText = tStripper.getText(document);

        String cleanedText = cleanText(pdfFileInText);
        System.out.println("Cleaned text:");
        System.out.println(cleanedText);

        writeToFile(outputFilePath, cleanedText);
    }
    document.close();
} catch (IOException e) {
    e.printStackTrace();
}

private static String cleanText(String text) {
    String pattern = "//todosomething";
    return text.replaceAll(pattern, "");
}

private static void writeToFile(String outputFilePath, String content) {
    if (content == null || content.isEmpty()) {
        System.out.println("No content to write to file.");
        return;
    }
    try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(outputFilePath), StandardCharsets.UTF_8)) {
        writer.write(content);
        writer.flush();
        System.out.println("Questions written to output file successfully.");
    } catch (IOException e) {
        System.out.println("Error writing to file: " + e.getMessage());
        e.printStackTrace();
    }
}

LEAVE A COMMENT