User Tools

Site Tools


import_process

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
import_process [2019/03/24 16:05]
steve
import_process [2019/03/24 16:13]
steve
Line 6: Line 6:
  
   * Fetch all HTML sources   * Fetch all HTML sources
-  * Convert HTML sources to DOM+  * Convert HTML sources to DOM (google-chrome-stable --headless --dump-dom http://​scriptures.nephi.org)
   * Insert a custom JavaScript source file using JQuery to remove HTML tags and get scripture text   * Insert a custom JavaScript source file using JQuery to remove HTML tags and get scripture text
   * Iterate over the modified HTML files   * Iterate over the modified HTML files
Line 13: Line 13:
 That's it! That's it!
  
-Doing some code dumps here:+Doing some undocumented ​code dumps here:
  
 <​code>​ <​code>​
 <script src='​jquery-3.3.1.min.js'></​script><​script type='​text/​javascript'>​var v=1;var scripture=new Object();​for(v=1;​v<​$('​p.verse'​).length+1;​v+=1){$('#​p'​+v+'​ span.verse-number.verse'​).remove();​$('#​p'​+v+'​ span.para-mark'​).remove();​$('#​p'​+v+'​ sup.studyNoteMarker.dontHighlight'​).remove();​scripture.bcv=$('​title'​).text()+':'​+v;​scripture.text=$('#​p'​+v).text();​$.ajax({url:'​http://​localhost:​8080/',​type:'​POST',​async:​false,​data:​scripture})}</​script>​ <script src='​jquery-3.3.1.min.js'></​script><​script type='​text/​javascript'>​var v=1;var scripture=new Object();​for(v=1;​v<​$('​p.verse'​).length+1;​v+=1){$('#​p'​+v+'​ span.verse-number.verse'​).remove();​$('#​p'​+v+'​ span.para-mark'​).remove();​$('#​p'​+v+'​ sup.studyNoteMarker.dontHighlight'​).remove();​scripture.bcv=$('​title'​).text()+':'​+v;​scripture.text=$('#​p'​+v).text();​$.ajax({url:'​http://​localhost:​8080/',​type:'​POST',​async:​false,​data:​scripture})}</​script>​
 +</​code>​
 +
 +AJAX:
 +
 +<​code>​
 +<?php
 +
 + error_reporting(E_ALL & ~E_STRICT & ~E_DEPRECATED);​
 + ini_set('​error_log',​ '​error.log'​);​
 +
 + $insert = false;
 + $update = true;
 +
 + if($insert)
 + $pdo_dsn = "​sqlite:​import.db";​
 + elseif($update)
 + $pdo_dsn = "​sqlite:​update.db";​
 + else
 + exit;
 +
 + $legacy_db = new PDO("​sqlite:​lds-scriptures-sqlite3.db"​);​
 + $legacy_db->​setAttribute(PDO::​ATTR_DEFAULT_FETCH_MODE,​ PDO::​FETCH_ASSOC);​
 +
 + $update_db = new PDO("​sqlite:​update.db"​);​
 + $update_db->​setAttribute(PDO::​ATTR_DEFAULT_FETCH_MODE,​ PDO::​FETCH_ASSOC);​
 +
 + // Sample queries
 +
 + // getOne():
 + /*
 + $foo = $db->​query('​SELECT id FROM dvds ORDER BY id DESC LIMIT 1;');
 + $bar = $foo->​fetchColumn();​
 + var_dump($bar);​
 +
 + // getCol():
 + $foo = $db->​query('​SELECT id FROM dvds ORDER BY id DESC LIMIT 10;');
 + $bar = $foo->​fetchAll(PDO::​FETCH_COLUMN);​
 +
 + // getAll():
 + $foo = $db->​query('​SELECT * FROM dvds ORDER BY id DESC LIMIT 10;');
 + $bar = $foo->​fetchAll();​
 + print_r($bar);​
 +
 + // getRow():
 + $foo = $db->​query('​SELECT * FROM dvds ORDER BY id DESC LIMIT 10;');
 + $bar = current($foo->​fetchAll());​
 + print_r($bar);​
 + */
 +
 + try {
 + $db = new PDO($pdo_dsn);​
 + $db->​setAttribute(PDO::​ATTR_DEFAULT_FETCH_MODE,​ PDO::​FETCH_ASSOC);​
 + } catch (PDOException $e) {
 +    echo "* Error!: "​.$e->​getMessage()."​\n";​
 + exit(1);
 + }
 +
 + extract($_POST);​
 +
 + $bcv = htmlentities($bcv);​
 + $bcv = str_replace("&​nbsp;",​ " ", $bcv);
 + $bcv = str_replace("&​mdash;",​ "​--",​ $bcv); // "​Joseph Smith--History"​
 +
 + $text = htmlentities($text);​
 + $text = str_replace("&​nbsp;",​ " ", $text);
 + $text = str_replace("&​lsquo;",​ "'",​ $text);
 + $text = str_replace("&​rsquo;",​ "'",​ $text);
 + $text = str_replace("&​ldquo;",​ "​\"",​ $text);
 + $text = str_replace("&​rdquo;",​ "​\"",​ $text);
 + $text = str_replace("&​ndash;",​ "​-",​ $text);
 + $text = str_replace("&​mdash;",​ "​--",​ $text);
 + $text = str_replace("&​hellip;",​ "​...",​ $text); // Joseph Smith--History # 41910
 + $text = str_replace("&​aelig;",​ "​ae",​ $text);
 + $text = str_replace("&​AElig;",​ "​Ae",​ $text);
 +
 + // Legacy has '​Psalms 1:1' but new has 'Psalm 1:1' -- use the old one when doing a lookup
 + $bcv = str_replace("​Psalm ", "​Psalms ", $bcv);
 +
 + // "​Solomon'​s Song" changed to "Song of Solomon"​
 + $bcv = str_replace("​Song of Solomon",​ "​Solomon'​s Song", $bcv);
 +
 + if($bcv == "​Joseph Smith--History 1:71")
 + $text = str_replace("​commanded.*",​ "​commanded.",​ $text); // Joseph Smith--History 1:71
 +
 + if($update) {
 +
 + $q_bcv = $db->​quote($bcv);​
 + $q_scripture_text = $db->​quote($text);​
 +
 + $sql = "​SELECT verse_id, scripture_text FROM scriptures WHERE verse_title = $q_bcv;";​
 +
 + $rs = $legacy_db->​query($sql);​
 + $rows = $rs->​fetchAll();​
 +
 + if(count($rows) == 0) {
 + error_log("​Cannot find verse ID for $bcv"​);​
 + exit;
 + }
 +
 + $row = current($rows);​
 +
 + $legacy_id = $row['​verse_id'​];​
 + $legacy_text = $row['​scripture_text'​];​
 +
 + if($legacy_text != $text) {
 + $sql = "​UPDATE verses SET scripture_text = $q_scripture_text WHERE id = $legacy_id;";​
 + file_put_contents("​update_sqlite.sql",​ "​$sql\n",​ FILE_APPEND);​
 + // $update_db->​query($sql);​
 + error_log($bcv);​
 + }
 +
 + }
 </​code>​ </​code>​
import_process.txt ยท Last modified: 2019/03/24 16:13 by steve