fix: 修复tx源歌词与翻译错位的问题

This commit is contained in:
sukimon_qwq 2024-10-04 12:34:51 +08:00 committed by GitHub
parent 193618b34e
commit 3a2da26245
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -26,70 +26,66 @@ class ParseTools:
'timeLabelFixRxp': re.compile(r'(?:\.0+|0+)$'), 'timeLabelFixRxp': re.compile(r'(?:\.0+|0+)$'),
} }
def ms_format(self, time_ms): def msFormat(self, timeMs):
if not time_ms: if isinstance(timeMs, float) and timeMs.is_nan():
return '' return ''
ms = time_ms % 1000 ms = timeMs % 1000
time_ms /= 1000 timeMs //= 1000
m = str(int(time_ms / 60)).zfill(2) m = str(int(timeMs // 60)).zfill(2)
time_ms %= 60 s = str(int(timeMs % 60)).zfill(2)
s = str(int(time_ms)).zfill(2) return f'[{m}:{s}.{str(ms).zfill(3)}]'
return f"[{m}:{s}.{str(ms).zfill(3)}]"
def parse_lyric(self, lrc): def parseLyric(self, lrc):
lrc = lrc.strip() lrc = lrc.strip().replace('\r', '')
lrc = lrc.replace('\r', '')
if not lrc: if not lrc:
return {'lyric': '', 'lxlyric': ''} return {'lyric': '', 'lxlyric': ''}
lines = lrc.split('\n') # print(lrc)
lxlrc_lines = [] lines = lrc.split('\n')
lyric_lines = [] lxlrcLines = []
lrcLines = []
for line in lines: for line in lines:
line = line.strip() line = line.strip()
result = self.rxps['lineTime'].match(line) result = self.rxps['lineTime'].match(line)
if not result: if not result:
if line.startswith('[offset'): if line.startswith('[offset'):
lxlrc_lines.append(line) lxlrcLines.append(line)
lyric_lines.append(line) lrcLines.append(line)
if self.rxps['lineTime2'].search(line): if self.rxps['lineTime2'].match(line):
lyric_lines.append(line) lrcLines.append(line)
continue continue
start_ms_time = int(result.group(1)) startMsTime = int(result.group(1))
start_time_str = self.ms_format(start_ms_time) startTimeStr = self.msFormat(startMsTime)
if not start_time_str: if not startTimeStr:
continue continue
words = re.sub(self.rxps['lineTime'], '', line) words = re.sub(self.rxps['lineTime'], '', line)
lyric_lines.append(f"{start_time_str}{re.sub(self.rxps['wordTimeAll'], '', words)}") lrcLines.append(f'{startTimeStr}{re.sub(self.rxps["wordTimeAll"], "", words)}')
times = re.findall(self.rxps['wordTimeAll'], words) times = re.findall(self.rxps['wordTimeAll'], words)
if not times: if not times:
continue continue
times = [ _rxp = r"\((\d+),(\d+)\)"
f"<{max(int(match.group(1)) - start_ms_time, 0)},{match.group(2)}>" times = [f'''<{max(int(re.search(_rxp, time).group(1)) - startMsTime, 0)},{re.search(_rxp, time).group(2)}>''' for time in times]
for match in re.finditer(r'\((\d+),(\d+)\)', words) wordArr = re.split(self.rxps['wordTime'], words)
] newWords = ''.join([f'{time}{wordArr[index]}' for index, time in enumerate(times)])
word_arr = re.split(self.rxps['wordTime'], words) lxlrcLines.append(f'{startTimeStr}{newWords}')
new_words = ''.join([f"{time}{word}" for time, word in zip(times, word_arr)])
lxlrc_lines.append(f"{start_time_str}{new_words}")
return { return {
'lyric': '\n'.join(lyric_lines), 'lyric': '\n'.join(lrcLines),
'lxlyric': '\n'.join(lxlrc_lines), 'lxlyric': '\n'.join(lxlrcLines),
} }
def parse_rlyric(self, lrc): def parseRlyric(self, lrc):
lrc = lrc.strip() lrc = lrc.strip().replace('\r', '')
lrc = lrc.replace('\r', '')
if not lrc: if not lrc:
return {'lyric': '', 'lxlyric': ''} return {'lyric': '', 'lxlyric': ''}
lines = lrc.split('\n')
lyric_lines = [] lines = lrc.split('\n')
lrcLines = []
for line in lines: for line in lines:
line = line.strip() line = line.strip()
@ -97,91 +93,104 @@ class ParseTools:
if not result: if not result:
continue continue
start_ms_time = int(result.group(1)) startMsTime = int(result.group(1))
start_time_str = self.ms_format(start_ms_time) startTimeStr = self.msFormat(startMsTime)
if not start_time_str: if not startTimeStr:
continue continue
words = re.sub(self.rxps['lineTime'], '', line) words = re.sub(self.rxps['lineTime'], '', line)
lrcLines.append(f'{startTimeStr}{re.sub(self.rxps["wordTimeAll"], "", words)}')
lyric_lines.append(f"{start_time_str}{re.sub(self.rxps['wordTimeAll'], '', words)}") return '\n'.join(lrcLines)
return '\n'.join(lyric_lines) def removeTag(self, string):
def remove_tag(self, string):
return re.sub(r'^[\S\s]*?LyricContent="', '', string).replace('"\/>[\S\s]*?$', '') return re.sub(r'^[\S\s]*?LyricContent="', '', string).replace('"\/>[\S\s]*?$', '')
def get_intv(self, interval): def getIntv(self, interval):
if not interval:
return 0
if '.' not in interval: if '.' not in interval:
interval += '.0' interval += '.0'
arr = re.split(':|\.', interval.ljust(8, '0'))[:3] arr = re.split(r':|\.', interval)
m, s, ms = map(int, arr) while len(arr) < 3:
return m * 3600000 + s * 1000 + ms arr.insert(0, '0')
m, s, ms = arr
return int(m) * 3600000 + int(s) * 1000 + int(ms)
def fix_rlrc_time_tag(self, rlrc, lrc): def fixRlrcTimeTag(self, rlrc, lrc):
rlrc_lines = rlrc.split('\n') rlrcLines = rlrc.split('\n')
lrc_lines = lrc.split('\n') lrcLines = lrc.split('\n')
new_lrc = [] newLrc = []
for line in rlrc_lines:
result = self.rxps['lineTime2'].search(line) for line in rlrcLines:
result = self.rxps['lineTime2'].match(line)
if not result: if not result:
continue continue
words = re.sub(self.rxps['lineTime2'], '', line) words = re.sub(self.rxps['lineTime2'], '', line)
if not words.strip(): if not words.strip():
continue continue
t1 = self.get_intv(result.group(1)) t1 = self.getIntv(result.group(1))
while lrc_lines:
lrc_line = lrc_lines.pop(0) while lrcLines:
lrc_line_result = self.rxps['lineTime2'].search(lrc_line) lrcLine = lrcLines.pop(0)
if not lrc_line_result: lrcLineResult = self.rxps['lineTime2'].match(lrcLine)
if not lrcLineResult:
continue continue
t2 = self.get_intv(lrc_line_result.group(1)) t2 = self.getIntv(lrcLineResult.group(1))
if abs(t1 - t2) < 100: if abs(t1 - t2) < 100:
new_lrc.append(re.sub(self.rxps['lineTime2'], lrc_line_result.group(0), line)) newLrc.append(re.sub(self.rxps['lineTime2'], lrcLineResult.group(0), line))
break break
return '\n'.join(new_lrc)
def fix_tlrc_time_tag(self, tlrc, lrc): return '\n'.join(newLrc)
tlrc_lines = tlrc.split('\n')
lrc_lines = lrc.split('\n')
new_lrc = []
time_tag_rxp = r'^\[[\d:.]+\]'
for line in tlrc_lines: def fixTlrcTimeTag(self, tlrc, lrc):
result = re.match(time_tag_rxp, line) tlrcLines = tlrc.split('\n')
lrcLines = lrc.split('\n')
newLrc = []
for line in tlrcLines:
result = self.rxps['lineTime2'].match(line)
if not result: if not result:
continue continue
words = re.sub(time_tag_rxp, '', line) words = re.sub(self.rxps['lineTime2'], '', line)
if not words.strip(): if not words.strip():
continue continue
tag = re.sub(r'\[\d+:\d+\.\d+\]', '', result.group(0)) time = result.group(1)
if '.' in time:
time += '0' * (3 - len(time.split('.')[1]))
while lrc_lines: t1 = self.getIntv(time)
lrc_line = lrc_lines.pop(0)
lrc_line_result = re.match(time_tag_rxp, lrc_line) while lrcLines:
if not lrc_line_result: lrcLine = lrcLines.pop(0)
lrcLineResult = self.rxps['lineTime2'].match(lrcLine)
if not lrcLineResult:
continue continue
if tag in lrc_line_result.group(0): t2 = self.getIntv(lrcLineResult.group(1))
new_lrc.append(re.sub(time_tag_rxp, lrc_line_result.group(0), line)) if abs(t1 - t2) < 100:
newLrc.append(re.sub(self.rxps['lineTime2'], lrcLineResult.group(0), line))
break break
return '\n'.join(new_lrc) return '\n'.join(newLrc)
def parse(self, lrc, tlrc, rlrc): def parse(self, lrc, tlrc=None, rlrc=None):
info = { info = {
'lyric': '', 'lyric': '',
'tlyric': '', 'tlyric': '',
'rlyric': '', 'rlyric': '',
'lxlyric': '', 'lxlyric': '',
} }
if lrc: if lrc:
lyric_info = self.parse_lyric(self.remove_tag(lrc)) parsed_lrc = self.parseLyric(self.removeTag(lrc))
info['lyric'] = lyric_info['lyric'] info['lyric'] = parsed_lrc['lyric']
info['lxlyric'] = lyric_info['lxlyric'] info['lxlyric'] = parsed_lrc['lxlyric']
if rlrc: if rlrc:
info['rlyric'] = self.fix_rlrc_time_tag(self.parse_rlyric(self.remove_tag(rlrc)), info['lyric']) info['rlyric'] = self.fixRlrcTimeTag(self.parseRlyric(self.removeTag(rlrc)), info['lyric'])
if tlrc: if tlrc:
info['tlyric'] = self.fix_tlrc_time_tag(tlrc, info['lyric']) info['tlyric'] = self.fixTlrcTimeTag(tlrc, info['lyric'])
return info return info