Rohdatenerfassung zu den Locations via Instagram-API (finaler Ansatz)

Im Zuge des Roland-Berger-Projektes fand ich endlich Gelegenheit, den Locationrohdatenscraper zu optimieren. Hier nun der Quellcode:

(1) Scrapingprozedur
Erklärung:
get_loc_short(mytable: TStringGrid; locid: string; rounds: integer);
mytable -> Stringgridobjekt, verlangt die Übergabe des Objektnamens
locid -> LocationID aus Instagram (siehe: Locationparser)
rounds -> maximale Anzahl der Subprüfung je Durchlauf

procedure TForm1.get_loc_short(mytable: TStringGrid; locid: string; rounds: integer);
var
JSONArray: tJSONArray;
JSONValue,jvalue: tJSONValue;
JSONPair: TJSONPair;
JSON, json_sub: TJSONObject;
size: integer;
j_array: tJSONArray;
s: string;
i,j: integer;
next_id: string;
zaehl: integer;
begin
zaehl:=0;
try
debug.text:=idhttp1.Get('https://api.instagram.com/v1/locations/'+locid+'/media/recent?access_token='+token.text);
JSONValue := TJSONObject.ParseJSONValue(debug.text);
JSON := TJSONObject.ParseJSONValue(debug.Lines.Text) as TJSONObject;
JSONArray := TJSONArray(JSON.Get('data').JsonValue);
try next_id:= JSONValue.GetValue('pagination.next_url');
except
next_id:='N/A';
end;
for i := 0 to JSONArray.Size - 1 do
begin
with mytable do
begin
cells[0,rowcount]:=inttostr(rowcount);
cells[1,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('link')).JsonValue.Value);
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('tags')).ToString);
s:= StringReplace(s, '"tags":[', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, ']', '', [rfReplaceAll,rfIgnoreCase]);
cells[2,rowcount]:=s;
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('likes')).ToString);
s:= StringReplace(s, '"likes":{"count":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
cells[3,rowcount]:=s;
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('comments')).ToString);
s:= StringReplace(s, '"comments":{"count":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
cells[4,rowcount]:=s;
cells[5,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('created_time')).JsonValue.Value);
cells[5,rowcount]:=datetimetostr(UnixToDateTime(strtoint(cells[5,rowcount])));
cells[6,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('id')).JsonValue.Value);
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('user')).ToString);
s:= StringReplace(s, '"user":{"username":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '"', '', [rfReplaceAll,rfIgnoreCase]);
cells[7,rowcount]:=s;
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('location')).ToString);
s:= StringReplace(s, '"location":', '', [rfReplaceAll,rfIgnoreCase]);
cells[8,rowcount]:=s;
cells[9,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('filter')).JsonValue.Value);
cells[10,rowcount]:=datetimetostr(now);
rowcount:=rowcount+1;
end;
grdColWidth(mytable, 40);
end;
except
end;
repeat
// -> tiefenpruefung
try
debug.text:=idhttp1.Get(next_id);
JSONValue := TJSONObject.ParseJSONValue(debug.text);
JSON := TJSONObject.ParseJSONValue(debug.Lines.Text) as TJSONObject;
JSONArray := TJSONArray(JSON.Get('data').JsonValue);
try next_id:= JSONValue.GetValue('pagination.next_url');
except
next_id:='N/A';
end;
for i := 0 to JSONArray.Size - 1 do
begin
with mytable do
begin
cells[0,rowcount]:=inttostr(rowcount);
cells[1,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('link')).JsonValue.Value);
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('tags')).ToString);
s:= StringReplace(s, '"tags":[', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, ']', '', [rfReplaceAll,rfIgnoreCase]);
cells[2,rowcount]:=escape(s);
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('likes')).ToString);
s:= StringReplace(s, '"likes":{"count":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
cells[3,rowcount]:=s;
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('comments')).ToString);
s:= StringReplace(s, '"comments":{"count":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
cells[4,rowcount]:=s;
cells[5,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('created_time')).JsonValue.Value);
cells[5,rowcount]:=datetimetostr(UnixToDateTime(strtoint(cells[5,rowcount])));
cells[6,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('id')).JsonValue.Value);
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('user')).ToString);
s:= StringReplace(s, '"user":{"username":', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '}', '', [rfReplaceAll,rfIgnoreCase]);
s:= StringReplace(s, '"', '', [rfReplaceAll,rfIgnoreCase]);
cells[7,rowcount]:=s;
s:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('location')).ToString);
s:= StringReplace(s, '"location":', '', [rfReplaceAll,rfIgnoreCase]);
cells[8,rowcount]:=s;
cells[9,rowcount]:=(TJSONPair(TJSONObject(JSONArray.Get(i)).Get('filter')).JsonValue.Value);
cells[10,rowcount]:=datetimetostr(now);
rowcount:=rowcount+1;
end;
grdColWidth(mytable, 40);
end;
except
end;
// -> tiefenpruefung, ende
zaehl:=zaehl+1;
until zaehl=rounds;
//uebertrag auf tabelle
grdColWidth(mytable, 40);
form1.Caption:=version+' alle Posts herunter geladen';
end;

(2) Aufruf der Scrapingprozedur
procedure TForm1.Button45Click(Sender: TObject);
var lauf: integer;
begin
with locmedia do
begin
cells[0,0]:='Nr.';
cells[1,0]:='URL';
cells[2,0]:='Tag';
cells[3,0]:='Likes';
cells[4,0]:='Comments';
cells[5,0]:='Erstellzeit';
cells[6,0]:='ID';
cells[7,0]:='User';
cells[8,0]:='Location';
cells[9,0]:='Filter';
colcount:=10;
rowcount:=1;
end;
for lauf := 1 to locroh.RowCount do
begin
randomize;
token.Text:=token.Items[random(token.Items.Count-1)];
get_loc_short(locmedia,locroh.Cells[1,lauf],20);
savetocsv(locmedia,verz+'\support\dummydata.csv');
form1.Caption:=version+' Locationscraper: '+inttostr(lauf)+' / '+inttostr(locroh.RowCount);
delay(1000);
end;
end;

Schreibe einen Kommentar