Googel Translate API – Missing Returned Data

  Kiến thức lập trình

I am creating a simple program, upload a SRT/txt file, then translate the contents using Google translate API.
The code is currently working for smaller files, but seems to have reach some kind of limit with larger files, there are no errors shown on the page I’m working on.

I have created batches which has helped but now I get random parts not returning.

How can I get google translate to return all results.

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>SRT File Parser</title>
</head>
<body>

<h2>Upload SRT File</h2>
<form action="#" method="post" enctype="multipart/form-data">
    Select SRT file to upload:
    <input type="file" name="fileToUpload" id="fileToUpload">
    <input type="submit" value="Upload SRT File" name="submit">
</form>

<?php
error_reporting( E_ALL ); 
define('SRT_STATE_SUBNUMBER', 0);
define('SRT_STATE_TIME',      1);
define('SRT_STATE_TEXT',      2);
define('SRT_STATE_BLANK',     3);

$translateCount = 0;
$output = array();

function translateText($text, $targetLanguage) {
    global $translateCount;
    $translateCount++;

    $apiKey = 'GOOGLE_API_KEY'; // Replace 'GOOGLE_API_KEY' with your actual API key for testing obviously cant post mine
    $url = 'https://translation.googleapis.com/language/translate/v2?key=' . $apiKey;

    $data = array(
        'q' => $text,
        'target' => $targetLanguage
    );

    $data_string = json_encode($data);

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array(
        'Content-Type: application/json',
        'Content-Length: ' . strlen($data_string))
    );

    $response = curl_exec($ch);
    curl_close($ch);

    if ($response === FALSE) {
        return "Error translating text: cURL error";
    }

    $json = json_decode($response, true);
    if(isset($json['error'])) {
        return "Error translating text: " . $json['error']['message'];
    }

    return $json['data']['translations'][0]['translatedText'];
}

function translate($batchNumber, &$batchSubs, $targetLanguage, &$output) {
    global $translateCount;

    $batchOutput = array();
    $batchOutput[] = "Batch Number: $batchNumber<br>";

    // Prepare text array for translation
    $text = "";
    foreach ($batchSubs as $sub) {
        $text .= $sub->text . "|"; // Add a separator between texts
    }
    // Translate the text
    $translatedText = translateText($text, $targetLanguage);

    // Split translated text into individual subtitles
    $translatedTextArray = explode("|", trim($translatedText));

    // Display subtitles with translated text
    $translatedTextIndex = 0;
    foreach ($batchSubs as $sub) {
        $subOutput = array();
        $subOutput['number'] = $sub->number;
        $subOutput['startTime'] = $sub->startTime;
        $subOutput['stopTime'] = $sub->stopTime;
        $subOutput['originalText'] = $sub->text;
        $subOutput['translatedText'] = $translatedTextArray[$translatedTextIndex];
        $output[] = $subOutput;
        $translatedTextIndex++;
    }

    $output[] = "<br>";
}

if(isset($_FILES['fileToUpload'])) {
    $file = $_FILES['fileToUpload']['tmp_name'];

    if(!file_exists($file)) {
        die("Error: File not found");
    }

    $lines = file($file);

    $subs    = array();
    $state   = SRT_STATE_SUBNUMBER;
    $subNum  = 0;
    $subText = '';
    $subTime = '';

    foreach($lines as $line) {
        switch($state) {
            case SRT_STATE_SUBNUMBER:
                $subNum = trim($line);
                if (!is_numeric($subNum)) {
                    continue 2; // Skip this subtitle if the number is not numeric
                }
                $state = SRT_STATE_TIME;
                break;

            case SRT_STATE_TIME:
                $subTime = trim($line);
                if (!preg_match('/^d{2}:d{2}:d{2},d{3} --> d{2}:d{2}:d{2},d{3}$/', $subTime)) {
                    continue 2; // Skip this subtitle if the timestamp format is incorrect
                }
                $state = SRT_STATE_TEXT;
                break;

            case SRT_STATE_TEXT:
                if (trim($line) == '') {
                    $sub = new stdClass;
                    $sub->number = $subNum;
                    list($sub->startTime, $sub->stopTime) = explode(' --> ', $subTime);
                    $sub->text = $subText;
                    $subText = '';
                    $state = SRT_STATE_SUBNUMBER;

                    $subs[] = $sub;
                } else {
                    if (preg_match('/^d{2}:d{2}:d{2},d{3}/', $line)) {
                        continue 2; // Skip this subtitle if the line starts with a timestamp-like pattern
                    }
                    $subText .= $line;
                }
                break;
        }
    }

    if ($state == SRT_STATE_TEXT) {
        // if file was missing the trailing newlines, we'll be in this
        // state here.  Append the last read text and add the last sub.
        $sub->text = $subText;
        $subs[] = $sub;
    }

    $output[] = "Line count: " . count($lines);
    $output[] = "Word count: " . str_word_count(implode(" ", $lines));
    $output[] = "Character count: " . strlen(implode("", $lines));
    $output[] = "Array count: " . count($subs) . "<br><br>";

    $totalChars = 0;
    $batchNumber = 1;
    $batchSubs = array();
    foreach($subs as $sub) {
        $batchSubs[] = $sub;
        $totalChars += strlen($sub->text);
        if ($totalChars >= 2000) {
            translate($batchNumber, $batchSubs, 'fr', $output); // Change 'fr' to your target language code
            $totalChars = 0;
            $batchSubs = array();
            $batchNumber++;
            sleep(2); // Add a 5-second delay
        }
    }
    if (!empty($batchSubs)) {
        translate($batchNumber, $batchSubs, 'fr', $output); // Change 'fr' to your target language code
        sleep(2); // Add a 5-second delay
    }
}

?>

<?php
foreach ($output as $section) {
    if (is_array($section)) {
        echo "Number: " . $section['number'] . "<br>";
        echo "Start Time: " . $section['startTime'] . "<br>";
        echo "Stop Time: " . $section['stopTime'] . "<br>";
        echo "Original Text: " . $section['originalText'] . "<br>";
        echo "Translated Text: " . $section['translatedText'] . "<br><br>";
    } else {
        echo "$section";
    }
}
echo "Translate function has been called $translateCount times.";
?>

</body>
</html>

I have tried

  • Batching
  • Sleep functions

New contributor

Chris Craven is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

LEAVE A COMMENT