deepl-python icon indicating copy to clipboard operation
deepl-python copied to clipboard

HTTPError 502 Bad Gateway for big number of strings

Open gtlc-ctrl opened this issue 1 year ago • 0 comments

When I use the API to translate a big array of strings ( 322 to be more specific ), after a while I receive HTTPError 502 Bad Gateway error. I tried to 'split' in smaller pieces, but the I still get the same error.

image


def parse_xml_file(file_path):
    """Parse the XML file and extract <source> and <translation> tag contents."""
    tree = ET.parse(file_path)
    root = tree.getroot()

    source_translation_pairs = []

    for context in root.findall('context'):
        for message in context.findall('message'):
            source = message.find('source')
            translation = message.find('translation')

            if source is not None and translation is not None:
                source_text = source.text
                translation_text = translation.text
                source_translation_pairs.append((source_text, translation_text))

    return source_translation_pairs, tree, root

def translate_batch(texts, target_lang='ES'):
    """Translate a batch of texts to the specified target language using the DeepL API."""
    data = {
        'auth_key': API_KEY,
        'target_lang': target_lang,
        'formality': 'default',  # Adjust formality if needed
    }
    translations = []

    for text in texts:
        data['text'] = text

        try:
            response = requests.post(DEEPL_API_URL, data=data, proxies=proxies)
            response.raise_for_status()  # Check for HTTP request errors

            # Extract translated text
            translation_result = response.json()
            translations.append(translation_result['translations'][0]['text'])
        except requests.exceptions.RequestException as e:
            print(f"Error during translation: {e}")
            translations.append("")  # Append empty string in case of failure

        time.sleep(2)  # Add a delay to avoid overloading the API

    return translations

def translate_texts(texts, target_lang='ES'):
    """Translate a list of texts in batches to avoid overloading the API."""
    translated_texts = []

    for i in range(0, len(texts), BATCH_SIZE):
        batch = texts[i:i + BATCH_SIZE]
        translated_batch = translate_batch(batch, target_lang)
        translated_texts.extend(translated_batch)

    return translated_texts

def update_spanish_file(english_file, spanish_file):
    """Translate English text and update Spanish XML file."""

    # Start timing
    start_time = time.time()

    # Step 1: Parse the English XML file and extract translations
    english_source_translation_pairs, _, _ = parse_xml_file(english_file)

    # Extract only English translations for translation via DeepL
    english_translations = [pair[1] for pair in english_source_translation_pairs if pair[1]]

    # Step 2: Translate the English translations into Spanish
    translated_texts = translate_texts(english_translations, target_lang='ES')

    # Step 3: Parse the Spanish XML file
    _, spanish_tree, spanish_root = parse_xml_file(spanish_file)

    # Step 4: Update the Spanish translation in the right <message> tags
    for (english_source, _), translated_text in zip(english_source_translation_pairs, translated_texts):
        for context in spanish_root.findall('context'):
            for message in context.findall('message'):
                source = message.find('source')
                translation = message.find('translation')

                if source is not None and translation is not None and source.text == english_source:
                    translation.text = translated_text

    # Step 5: Write the updated Spanish XML back to the file
    spanish_tree.write(spanish_file, encoding='utf-8', xml_declaration=True)
    write_xml_with_doctype(spanish_file, spanish_tree)

    # End timing
    end_time = time.time()

    # Calculate and print the execution time
    execution_time = end_time - start_time
    print(f"Execution time: {execution_time:.2f} seconds")

# Example usage:
if __name__ == "__main__":
    english_file = 'english_file.ts'
    spanish_file = 'spanish_file.ts'

    update_spanish_file(english_file, spanish_file)

Above is my code

Edit: I am using the free version for the moment, but I plan on upgrading to PRO. If I raise the sleep to be 2 seconds I receive code 504

gtlc-ctrl avatar Sep 12 '24 13:09 gtlc-ctrl