Skip to content

Commit

Permalink
replace exec with spawn to support large parquets. fixes llvm#1
Browse files Browse the repository at this point in the history
  • Loading branch information
Dvir Yitzchaki authored and dvirtz committed Apr 24, 2019
1 parent 03f5e3e commit 2f1d653
Show file tree
Hide file tree
Showing 5 changed files with 1,361 additions and 22 deletions.
13 changes: 8 additions & 5 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
// Import the module and reference it with the alias vscode in your code below
import * as vscode from 'vscode';
import { ParquetContentProvider } from './parquet_content_provider';
import { execFile } from 'child_process';
import { spawn } from 'child_process';

// this method is called when your extension is activated
// your extension is activated the very first time the command is executed
export function activate(context: vscode.ExtensionContext) {
console.log('parquet-viewer activated');

execFile('parquet-tools', ['-h'], err => {
vscode.window.showErrorMessage('parquet-tools not in PATH');
spawn('parquet-tools', ['-h']).on('error', (err) => {
vscode.window.showErrorMessage('parquet-tools not in PATH');
});

const scheme = 'parquet';
Expand All @@ -21,8 +21,8 @@ export function activate(context: vscode.ExtensionContext) {

let onFile = function (document: vscode.TextDocument) {
if (document.fileName.endsWith('parquet') && document.uri.scheme !== scheme) {
let uri = vscode.Uri.parse(scheme + '://' + document.uri.path);
vscode.window.showTextDocument(uri, { preview: true, viewColumn: vscode.window.activeTextEditor!.viewColumn });
let uri = vscode.Uri.parse(scheme + '://' + document.uri.path + ".as.json");
vscode.window.showTextDocument(uri);
}
};

Expand All @@ -36,6 +36,9 @@ export function activate(context: vscode.ExtensionContext) {
}));

context.subscriptions.push(vscode.workspace.onDidOpenTextDocument(onFile));
context.subscriptions.push(vscode.workspace.onDidChangeTextDocument((e) => {
onFile(e.document);
}));

if (vscode.window.activeTextEditor) {
onFile(vscode.window.activeTextEditor.document);
Expand Down
44 changes: 38 additions & 6 deletions src/parquet_content_provider.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,56 @@
import { TextDocumentContentProvider, EventEmitter, Uri, window } from "vscode";
import { exec } from "child_process";
import { spawn } from "child_process";
// import { Readable } from "stream";
// import { tmpdir } from "os";
// import { createWriteStream, readFile, readFileSync } from 'fs';
// import { sep } from 'path';


class Json {
data: string = "";
}

export class ParquetContentProvider implements TextDocumentContentProvider {

private jsons: Map<string, Json> = new Map();

// emitter and its event
onDidChangeEmitter = new EventEmitter<Uri>();
onDidChange = this.onDidChangeEmitter.event;

async provideTextDocumentContent(uri: Uri): Promise<string> {
// simply invoke cowsay, use uri-path as text
return new Promise<string>((resolve, reject) => {
exec('parquet-tools cat -j ' + uri.path, (error, stdout, stderr) => {
if (error) {
const message = `error when running parquet-tools ${error}:\n${stderr}`;

const path = uri.path.replace(RegExp('\.as\.json$'), '');

if (this.jsons.has(path)) {
resolve(this.jsons.get(path)!.data);
}

var json = new Json;
this.jsons.set(path, json);

const parquet_tools = spawn('parquet-tools', ['cat', '-j', path]);
// parquet_tools.stdout.pipe(stream)
var stderr: string = "";
parquet_tools.stderr.on('data', (data) => {
stderr += data;
});
parquet_tools.stdout.on('data', (data) => {
json.data += data;
this.onDidChangeEmitter.fire(uri);
});

parquet_tools.on('close', (code) => {
if (code) {
const message = `error when running parquet-tools ${code}:\n${stderr}`;
window.showErrorMessage(message);
reject(message);
}

resolve(stdout);
resolve(json.data);
});
});
}

}
24 changes: 13 additions & 11 deletions src/test/provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,27 @@ import { Uri } from 'vscode';

suite("Provider tests", () => {

test("Parquet to JSON", async () => {
test('Converts Parquet to JSON', (done) => {
const provider = new ParquetContentProvider();
const json = getUri("small.parquet").then(parquet => {
return provider.provideTextDocumentContent(parquet);
});
const expected = fileRead("small.json");
["small", "large"].forEach(async (name) => {
const json = getUri(`${name}.parquet`).then(parquet => {
return provider.provideTextDocumentContent(parquet);
});
const expected = fileRead(`${name}.json`);

return Promise.all([json, expected]).then(values => {
assert.strictEqual(values[0], values[1]);
Promise.all([json, expected]).then((values) =>
assert.strictEqual(values[0], values[1]))
.then(done, done);
});
});

test("Error on not existing file", async () => {
const provider = new ParquetContentProvider();

return provider.provideTextDocumentContent(Uri.parse("file://.")).then(data => {
assert(false, "should not get here");
}, (error: string) => {
assert(error.indexOf('error when running parquet-tools') !== -1);
});
assert(false, "should not get here");
}, (error: string) => {
assert(error.indexOf('error when running parquet-tools') !== -1);
});
});
});
Loading

0 comments on commit 2f1d653

Please sign in to comment.