Create arrays based after searching specific strings in a text file (and output them in a table)

I have a text file (some 10,000 lines) some of the lines are given below

confusables.txt

1F110 ; 0028 0041 0029 ;    MA  #* ( 🄐 → (A) ) PARENTHESIZED LATIN CAPITAL LETTER A → LEFT PARENTHESIS, LATIN CAPITAL LETTER A, RIGHT PARENTHESIS  # 
FF21 ;  0041 ;  MA  # ( Ａ → A ) FULLWIDTH LATIN CAPITAL LETTER A → LATIN CAPITAL LETTER A   # →А→
FF22 ;  0042 ;  MA  # ( Ｂ → B ) FULLWIDTH LATIN CAPITAL LETTER B → LATIN CAPITAL LETTER B   # →Β→
212C ;  0042 ;  MA  # ( ℬ → B ) SCRIPT CAPITAL B → LATIN CAPITAL LETTER B   # 
1F110 ; 0028 0041 0029 ;    MA  #* ( 🄐 → (A) ) PARENTHESIZED LATIN CAPITAL LETTER A → LEFT PARENTHESIS, LATIN CAPITAL LETTER A, RIGHT PARENTHESIS  # 
1D435 ; 0042 ;  MA  # ( 𝐵 → B ) MATHEMATICAL ITALIC CAPITAL B → LATIN CAPITAL LETTER B # 
213B ;  0046 0041 0058 ;    MA  #* ( ℻ → FAX ) FACSIMILE SIGN → LATIN CAPITAL LETTER F, LATIN CAPITAL LETTER A, LATIN CAPITAL LETTER X  #

I want to get the first character after parenthesis (Unicode, original char) in every line based on a search string (i.e. ℬ against 'LATIN CAPITAL LETTER B' in line 4 above), I can do this using the following code

<?php

/**
 * @return Generator
 */

// read file
$fileData = function () {
    $file = fopen(__DIR__ . './confusables.txt', 'r');

    if (!$file) {
        return;
    }
    while (($line = fgets($file)) !== false) {
        yield $line;
    }

    fclose($file);
};

// output array
$output_string = [
    'uni-code' => '',
    'original' => '',
    'des' => '',
];

$search_string = 'LATIN CAPITAL LETTER A';

$initial_line_count = 1; // variable to count lines before we start slicing
$final_count = 0; // final line count
// loop to get final count
foreach ($fileData() as $line) {
    // $line contains current line
    if (preg_match_all("/{$search_string}/i", $line)) {
        $initial_line_count++;
        $final_count = $initial_line_count;
        // echo $final_count.'<br>';
    }
}

$line_count = 1; // loop termination counter
$html = '<table>
            <tr>
                <th style="border:1px solid #000">ORIGINAL LETTERS</th>
                <th style="border:1px solid #000">UNICODE CHARACTER</th>
                <th style="border:1px solid #000">Description</th>
            </tr>';
// loop to slice and append in array
foreach ($fileData() as $line) {
    // $line contains current line
    if (preg_match_all("/{$search_string}/i", $line)) {
        // start slicing
        $slice_after = substr($line, 0, strpos($line, ' ) ')); // slice everything after )
        $slice_before = ltrim(stristr($slice_after, '('), '('); // slice everything upto (
        $first_char = substr($slice_before, 0, strpos($slice_before, "→")); // get every first character
        $split_Real_char = ltrim(stristr($search_string, 'LETTER'), 'LETTER'); // get every real character
        $real_Char = $output_string['original'] .= $split_Real_char; // append to array
        $split_Unicode_char = $output_string['uni-code'] .= $first_char . ','; // append to array
        $line_count++; // loop termination counter
        // loop termination
        if ($line_count == $final_count) {
            $html .= '  <tr>
                            <td style=" border:1px solid black;"><pre>' . $split_Real_char . '</pre></td>
                            <td style=" border:1px solid black;"><pre>' . $split_Unicode_char . '</pre></td>
                            <td style=" border:1px solid black;"><pre>' . $search_string . '</pre></td>
                        </tr>';
            $html .= '</table>';
            echo $html;
            break;
        }
    }
}

and I get output such as

| ORIGINAL LETTER      | UNICODE CHARACTER         | Description                      |
| -------------------- | ------------------------- | -------------------------------- |
| B                    | Ｂ, ℬ ,  𝐵               | LATIN CAPITAL LETTER B           |

The output looks fine for a single (hard coded string) but I have to automate the process (for the entire 10,000 lines), so far I have tried

<?php

/**
 * @return Generator
 */

// read file
$fileData = function () {
    $file = fopen(__DIR__ . './confusables.txt', 'r');

    if (!$file) {
        return;
    }
    while (($line = fgets($file)) !== false) {
        yield $line;
    }

    fclose($file);
};

$searchStringArray = array();

// loop to generate search strings
foreach (range('A', 'B') as $alphabet) {
    $alphabets = 'LATIN CAPITAL LETTER ' . $alphabet . "";
    array_push($searchStringArray, $alphabets);
}

// output array
$output_string = [
    'uni-code' => '',
    'original' => '',
    'des' => '',
];

$initial_line_count = 1; // variable to count lines before we start slicing
$final_count = 0; // final line count

for ($i = 0; $i < count($searchStringArray); $i++) {
    $search_string = $searchStringArray[$i];
    // loop to get final count
    foreach ($fileData() as $line) {
        // $line contains current line
        if (preg_match_all("/{$search_string}/i", $line)) {
            $initial_line_count++;
            $final_count = $initial_line_count;
            // echo $final_count.'<br>';
        }
    }
}
$line_count = 1; // loop termination counter
$html = '<table>
            <tr>
                <th style="border:1px solid #000">ORIGINAL LETTERS</th>
                <th style="border:1px solid #000">UNICODE CHARACTER</th>
                <th style="border:1px solid #000">Description</th>
            </tr>';
            
for ($i = 0; $i < count($searchStringArray); $i++) {
    $search_string = $searchStringArray[$i];
    // loop to slice and append in array
    foreach ($fileData() as $line) {
        // $line contains current line
        if (preg_match_all("/{$search_string}/i", $line)) {
            // start slicing
            $slice_after = substr($line, 0, strpos($line, ' ) ')); // slice everything after )
            $slice_before = ltrim(stristr($slice_after, '('), '('); // slice everything upto (
            $first_char = substr($slice_before, 0, strpos($slice_before, "→")); // get every first character
            $split_Real_char = ltrim(stristr($search_string, 'LETTER'), 'LETTER'); // get every real character
            $real_Char = $output_string['original'] .= $split_Real_char; // append to array
            $split_Unicode_char = $output_string['uni-code'] .= $first_char . ','; // append to array
            $line_count++; // loop termination counter
            // loop termination
            if ($line_count == $final_count) {
                $html .= '  <tr>
                            <td style=" border:1px solid black;"><pre>' . $split_Real_char . '</pre></td>
                            <td style=" border:1px solid black;"><pre>' . $split_Unicode_char . '</pre></td>
                            <td style=" border:1px solid black;"><pre>' . $search_string . '</pre></td>
                        </tr>';
                $html .= '</table>';
                echo $html;
                break;
            }
        }
    }
}

and I get output

| ORIGINAL LETTER      | UNICODE CHARACTER         | Description                 |
| -------------------- | ------------------------- | --------------------------- |
| B                    | Ａ, 🄐, Ｂ, ℬ,  𝐵, ℻     | LATIN CAPITAL LETTER B      |

I get all the Unicode characters but issues arise with original letters and search strings. All the Unicode characters should not fall in a single table cell, also despite loop running multiple times I get a single row only.

Expected output

| ORIGINAL LETTER      | UNICODE CHARACTER | Description            |                 
| -------------------- | ------------------| ---------------------- |
| A                    | Ａ, 🄐, ℻        | LATIN CAPITAL LETTER A |      
| B                    | Ｂ, ℬ,  𝐵        | LATIN CAPITAL LETTER B |

Any suggestions how can I achieve this?

Solution

To answer my own question. I was able to get this working using a function and array_map().

<?php

// html table to output data
$html_table = '<table>
                    <thead>
                        <tr>
                            <th style="border:1px solid #000">Original Letters</th>
                            <th style="border:1px solid #000">Unicode Characters</th>
                            <th style="border:1px solid #000">Description</th>
                        </tr>
                    </thead>
                    <tbody>';

function my_func($search_map)
{
    global $html_table;

    // read file
    $fileData = function () {
        $file = fopen(__DIR__ . './confusable.txt', 'r');

        if (!$file) {
            return; // die() is a bad practice, better to use return
        }
        while (($line = fgets($file)) !== false) {
            yield $line . '<br />';
        }

        fclose($file);
    };

    // store unicode in array
    $store_unicode_array = [
        'uni-code' => '',
    ];

    $initial_line_count = 1; // variable to count lines before we start slicing
    $final_count = 0; // final line count
    // loop to get final count
    foreach ($fileData() as $line) {
        // $line contains current line
        if (preg_match("/{$search_map}/i", $line)) {
            $initial_line_count++;
            $final_count = $initial_line_count;
        }
    }

    $line_count = 1; // loop termination counter
    foreach ($fileData() as $line) {
        if (preg_match("/{$search_map}/i", $line)) {
            $slice_after = substr($line, 0, strpos($line, ' ) ')); // slice everything after )
            $slice_before = ltrim(stristr($slice_after, '('), '('); // slice everything upto (
            $unicode_char = substr($slice_before, 0, strpos($slice_before, "→")); // get every first character
            $real_char = ltrim(stristr($search_map, 'LETTER'), 'LETTER'); // get every real character
            $store_unicode = $store_unicode_array['uni-code'] .= $unicode_char . ','; // append each unicode char to $store_unicode_array
            $remove_comma_unicode_char = substr($store_unicode, 0, -1); // remove comma after nth last character in store_unicode
            $line_count++; // increment loop termination counter
            if ($line_count == $final_count) {
                $html_table .= '<tr>
                                    <td style=" border:1px solid black;"><pre>' . $real_char . '</pre></td>
                                    <td style=" border:1px solid black;"><pre>' . $remove_comma_unicode_char . '</pre></td>
                                    <td style=" border:1px solid black;"><pre>' . $search_map . '</pre></td>
                                </tr>';
                break;
            }
        }
    }
}

// initializing empty array for storing search strings
$search_string_array = array();

// some part of search string
$search_string_part = 'LATIN CAPITAL LETTER ';

// generate search strings based on $search_string_part
foreach (range('A', 'Z') as $alphabet) {
    $alphabets = $search_string_part . $alphabet;
    array_push($search_string_array, $alphabets);
}

array_map("my_func", $search_string_array);
// foreach ($search_string_array as $element) {
//     my_func($element);
// }

$html_table .= '    </tbody>
                </table>';
echo $html_table;

Output