CivilのKeTCindy教室: 10月 2024

音声(mp3)のセンテンスのテキスト出力及び各センテンスのmp3出力(python,windows10)

[F:\英語の学習-実習\mp3-2-text\t1-mp3-to-sentense-mp3.py](作業場所)

■音声(mp3)のセンテンスのテキスト出力及び各センテンスのmp3出力の試み
上手くいかない場合があります。
改善が必要ですが覚書として記録したものです。

import speech_recognition as sr

from pydub import AudioSegment

from pydub.silence import split_on_silence

import os

# 一括でファイル名を定義

mp3_file = "practice.mp3"

wav_file = "converted.wav"

# ディレクトリ部分とファイル名部分に分ける

basename, ext = os.path.splitext(mp3_file)

#print(basename) # "practice"

#print(ext) # ".mp3"

dir_name, file_name = os.path.split(mp3_file)

#print(dir_name) # 空文字列 ""

#print(file_name) # "practice.mp3"

# ファイル名から拡張子を除去

tmp2 = os.path.splitext(file_name)[0] #"practice"

txt_file = tmp2 + "/" + tmp2 + ".txt" #"practice/practice.txt"で保存

directory = os.path.dirname(txt_file) #"practice"

# ディレクトリが存在しない場合は作成

if not os.path.exists(directory):

os.makedirs(directory)

# 無音の長さ（秒単位）を設定（チャンク同士の間隔）

#silence_duration = 2 # 1 デフォルトの無音時間（秒）

silence_duration = 1 #1 英文のチャンク間の無音時間

# MP3ファイルをWAVに変換する

sound = AudioSegment.from_mp3(mp3_file)

sound.export(wav_file, format="wav")

# 音声タイプを選択 ('e-only' for 英文だけ, 'j-only' for 和訳だけ)

audio_type = 'e-only' # 'e-only' または 'j-only' に設定

# 無音を作成する関数

def generate_silence(duration_sec):

return AudioSegment.silent(duration=duration_sec * 1000) # ミリ秒に変換

# 音声ファイルを分割する（無音部分で区切る）

def split_audio(file_path):

sound = AudioSegment.from_wav(file_path)

chunks = split_on_silence(

sound,

min_silence_len=1200, # 500->1200無音とみなす最小の長さ（ミリ秒）[この値を適宜変更]

silence_thresh=sound.dBFS - 14, # 無音とみなす音の閾値

keep_silence=500 # 各チャンクの先頭と末尾に無音を残す（ミリ秒）

)

return chunks

# 文章の末尾に句点やピリオドを追加し、英文をキャピタライズする関数

def format_sentence(sentence, is_english=False):

sentence = sentence.strip() # 余分なスペースを削除

if is_english:

sentence = sentence.capitalize() # 英文の先頭を大文字に変換

if sentence and sentence[-1] not in ['.', '?', '!', '。', '？', '！']:

sentence += '.' # 英文の文末にピリオドを追加

else:

if sentence and sentence[-1] not in ['。', '？', '！']:

sentence += '。' # 和訳の文末に句読点を追加

return sentence

# 音声認識の準備

recognizer = sr.Recognizer()

# WAVファイルをチャンクごとに分割

audio_chunks = split_audio(wav_file)

# 変換したテキストを保存するリスト

recognized_sentences = []

# 各チャンクをテキストに変換

chunk_files = [] # 生成されたチャンクファイルのリスト

for i, chunk in enumerate(audio_chunks):

chunk_filename = f"chunk{i}.wav"

# 各チャンクを保存

chunk.export(chunk_filename, format="wav")

chunk_files.append(chunk_filename) # 作成されたチャンクファイルをリストに追加

with sr.AudioFile(chunk_filename) as source:

audio = recognizer.record(source)

# 音声をテキストに変換

try:

# 音声の種類に応じて処理を変更

if audio_type == 'e-only':

# 英文の場合

text = recognizer.recognize_google(audio, language="en-US")

formatted_sentence = format_sentence(text, is_english=True)

elif audio_type == 'j-only':

# 和訳の場合

text = recognizer.recognize_google(audio, language="ja-JP")

formatted_sentence = format_sentence(text, is_english=False)

# 認識結果をリストに追加

recognized_sentences.append(formatted_sentence)

except sr.UnknownValueError:

print(f"チャンク{i} は認識できませんでした。")

except sr.RequestError as e:

print(f"Googleサービスへのリクエストに失敗しました; {e}")

# TXTファイルに書き込む処理

with open(txt_file, mode='w', encoding='utf-8') as txtfile:

for sentence in recognized_sentences:

txtfile.write(sentence + '\n')

print(f"出力が完了しました。結果は {txt_file} に保存されました。")

# 生成された各チャンクファイルをMP3に変換する

# ファイル名はmp3_fileのbasenameに"-"と3桁の連番を付ける。例 practice-001.mp3

output_dir=directory

# ディレクトリが存在しない場合は作成

if not os.path.exists(output_dir):

os.makedirs(output_dir)

# 各チャンクファイルをMP3に変換して保存

for i, chunk in enumerate(audio_chunks):

mp3_filename = f"{basename}-{i+1:03d}.mp3"

output_path = os.path.join(output_dir, mp3_filename) # ディレクトリに保存

# チャンクをMP3として保存

chunk.export(output_path, format="mp3")

print(f"{output_path} に保存されました。")

# 生成されたWAVファイルを削除

if os.path.exists(wav_file):

os.remove(wav_file)

print(f"{wav_file} を削除しました。")

# 生成されたチャンクファイルを削除

for chunk_file in chunk_files:

if os.path.exists(chunk_file):

os.remove(chunk_file)

print(f"{chunk_file} を削除しました。")

ファイルからテキストを読み込み、文字数を数える関数(python)

# ファイルからテキストを読み込み、文字数を数える関数

def count_characters_in_file(file_path, language="mixed"):

with open(file_path, 'r', encoding='utf-8') as file:

text = file.read()

if language == "english":

# 英語専用：アルファベットのみをカウント

english_text = ''.join([char for char in text if char.isalpha()])

return len(english_text)

elif language == "japanese":

# 日本語専用：ひらがな、カタカナ、漢字をカウント

japanese_text = ''.join([char for char in text if '\u3040' <= char <= '\u30FF' or '\u4E00' <= char <= '\u9FFF'])

return len(japanese_text)

else:

# 混在：全ての文字（空白や記号も含む）をカウント

return len(text)

# ファイルパスと言語モードを指定して実行

file_path = 'example.txt'

language_mode = 'english' # 'english', 'japanese', または 'mixed' を指定

character_count = count_characters_in_file(file_path, language_mode)

print(f"文字数: {character_count}")

複数のMP3ファイルを結合するpythonコード例

複数のMP3ファイルを結合するpythonコード例
・各ファイル間に無音時間を設定できる
・ディレクトリ内のすべてのMP3ファイルを結合
・ファイル名(concatenate_mp3_files.py)
----
import os

from pydub import AudioSegment

# 複数のMP3ファイルを結合する関数

def concatenate_mp3_files(directory, output_file, silence_duration=1):

# 無音時間の設定（ミリ秒単位）

silence = AudioSegment.silent(duration=silence_duration * 1000) # 秒単位をミリ秒に変換

# ディレクトリ内のすべてのMP3ファイルを取得

mp3_files = [f for f in os.listdir(directory) if f.endswith('.mp3')]

# MP3ファイルをロードして結合

combined = AudioSegment.empty()

for mp3_file in mp3_files:

mp3_path = os.path.join(directory, mp3_file)

audio = AudioSegment.from_mp3(mp3_path)

combined += audio + silence # 音声ファイルに無音時間を追加して結合

# 最後に追加された無音時間を削除

combined = combined[:-silence.duration_seconds * 1000]

# 出力ファイルとして保存

combined.export(output_file, format="mp3")

print(f"MP3ファイルが結合されました。出力ファイル: {output_file}")

# 使用例

directory = "lesson" # MP3ファイルが置かれているディレクトリ

output_file = "combined_lessonn.mp3" # 結合後の出力ファイル名

silence_duration = 1 # 無音時間の設定（秒単位）ファイル間の無音時間(この場合は1秒)

concatenate_mp3_files(directory, output_file, silence_duration)
----

画像のテキスト化

画像のテキスト化のpythonコード例

from PIL import Image

import pytesseract

# Tesseractのインストールパスを指定（例: Windowsの場合）

# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def extract_text_from_image(image_path):

# 画像を開く

img = Image.open(image_path)

# 画像からテキストを抽出

text = pytesseract.image_to_string(img, lang='eng') # lang='jpn+eng'日本語と英語の両方を使用

# 全角を半角に変換

#text = text.replace('’', "'") # 全角アポストロフィ → 半角アポストロフィ

text = text.replace('’', "'").replace('“', '"').replace('”', '"') # 必要に応じて追加

return text

# 使用例

image_path = 'sample.jpg'

text = extract_text_from_image(image_path)

print(text)

PDFのテキスト化

■PDFのテキスト化のpythonコード例1

import fitz # PyMuPDF

import pytesseract

from PIL import Image

import io

# Tesseractのインストールパスを指定（例: Windowsの場合）

# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def pdf_to_text(pdf_path, language='eng'):

# PDFを開く

pdf_document = fitz.open(pdf_path)

text = ""

#text = pdf_to_text(pdf_path, language='eng')

# 各ページごとに画像に変換し、OCRでテキストを抽出

for page_num in range(pdf_document.page_count):

page = pdf_document[page_num]

# PDFページを画像にレンダリング

pix = page.get_pixmap(dpi=300)

img = Image.open(io.BytesIO(pix.tobytes()))

# 画像からテキストを抽出

page_text = pytesseract.image_to_string(img, lang=language)

text += page_text + "\n\n" # ページごとに改行を追加

pdf_document.close()

# 全角を半角に変換

#text = text.replace('’', "'") # 全角アポストロフィ → 半角アポストロフィ

text = text.replace('’', "'").replace('“', '"').replace('”', '"') # 必要に応じて追加

return text

# 使用例

pdf_path = 'sample.pdf'

# 'eng'は英語, 'jpn'は日本語, 'eng+jpn'は英語と日本語の両方

#text = pdf_to_text(pdf_path, language='eng+jpn')

text = pdf_to_text(pdf_path, language='eng')

print(text) #テキストの表示。必要な場合はファイル出力に

=======
■PDFのテキスト化のpythonコード例2

import fitz # PyMuPDF

import pytesseract

from PIL import Image

import io

# Tesseractのインストールパスを指定（例: Windowsの場合）

# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def pdf_to_text(pdf_path, language='eng'):

# PDFを開く

pdf_document = fitz.open(pdf_path)

text = ""

# 各ページごとに画像に変換し、OCRでテキストを抽出

for page_num in range(pdf_document.page_count):

page = pdf_document[page_num]

# PDFページを画像にレンダリング

pix = page.get_pixmap(dpi=300)

img = Image.open(io.BytesIO(pix.tobytes()))

# 画像からテキストを抽出

page_text = pytesseract.image_to_string(img, lang=language)

text += page_text + "\n\n" # ページごとに改行を追加

pdf_document.close()

# 全角を半角に変換

# text = text.replace('’', "'").replace('“', '"').replace('”', '"') # 必要に応じて追加

return text

# 全角の句読点や記号を半角に変換する関数

def convert_fullwidth_to_halfwidth(text):

text = text.replace('，', ',') # 全角コンマ → 半角カンマ

text = text.replace('。', '.') # 全角ピリオド → 半角ピリオド

text = text.replace('：', ':') # 全角コロン → 半角コロン

text = text.replace('；', ';') # 全角セミコロン → 半角セミコロン

text = text.replace('！', '!') # 全角感嘆符 → 半角感嘆符

text = text.replace('？', '?') # 全角疑問符 → 半角疑問符

text = text.replace('’', "'") # 全角アポストロフィ → 半角アポストロフィ

text = text.replace('“', '"') # 全角二重引用符始 → 半角二重引用符始

text = text.replace('”', '"') # 全角二重引用符終 → 半角二重引用符終

text = text.replace('（', '(') # 全角括弧開 → 半角括弧開

text = text.replace('）', ')') # 全角括弧閉 → 半角括弧閉

text = text.replace('［', '[') # 全角角括弧開 → 半角角括弧開

text = text.replace('］', ']') # 全角角括弧閉 → 半角角括弧閉

text = text.replace('｛', '{') # 全角波括弧開 → 半角波括弧開

text = text.replace('｝', '}') # 全角波括弧閉 → 半角波括弧閉

text = text.replace('・', '･') # 全角中黒 → 半角中黒

text = text.replace('～', '~') # 全角チルダ → 半角チルダ

text = text.replace('　', ' ') # 全角スペース → 半角スペース

return text

# 使用例

# PDFからテキストを抽出し、全角を半角に変換

pdf_path = 'sample.pdf'

text = pdf_to_text(pdf_path, language='eng+jpn')

text = convert_fullwidth_to_halfwidth(text)

print(text)

メニュー

音声(mp3)のセンテンスのテキスト出力及び各センテンスのmp3出力(python,windows10)

ファイルからテキストを読み込み、文字数を数える関数(python)

複数のMP3ファイルを結合するpythonコード例

画像のテキスト化

PDFのテキスト化